parse.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. #!/usr/bin/env python
  2. def parse(file, store):
  3. f = open(file, 'r')
  4. dic = {}
  5. for i in f:
  6. i = i.strip("\n")
  7. val = i.split("\t")
  8. try:
  9. dic[val[0]] = dic[val[0]] + ";"+ val[1]+"-"+val[2]
  10. except KeyError:
  11. dic[val[0]] = val[1]+"-"+val[2]
  12. f.close()
  13. f = open(store, 'w')
  14. for i in dic.keys():
  15. string = i+"\t"+dic[i]+"\t0"
  16. f.write(string+"\n")
  17. f.close
  18. if __name__=="__main__":
  19. import sys
  20. if len(sys.argv) > 1:
  21. file = sys.argv[1]
  22. store = sys.argv[2]
  23. parse(file, store)
  24. else:
  25. sys.exit("No input")
  26. ####INPUT
  27. #Eucgr.A00001 GO:0008565 protein transporter activity
  28. #Eucgr.A00001 GO:0031204 posttranslational protein targeting to membrane, translocation
  29. #Eucgr.A00004 GO:0005634 nucleus
  30. #Eucgr.A00006 GO:0003677 DNA binding
  31. #Eucgr.A00006 GO:0003824 catalytic activity
  32. #Eucgr.A00012 GO:0015031 protein transport
  33. #Eucgr.A00012 GO:0006457 protein folding
  34. #Eucgr.A00014 GO:0003852 2-isopropylmalate synthase activity
  35. #Eucgr.A00014 GO:0009098 leucine biosynthetic process
  36. #Eucgr.A00017 GO:0008312 7S RNA binding
  37. ###OUTPUT
  38. #Eucgr.L00569 GO:0004672-protein kinase activity;GO:0006468-protein phosphorylation;GO:0005524-ATP binding
  39. #Eucgr.K00395 GO:0003723-RNA binding
  40. #Eucgr.A02469 GO:0004672-protein kinase activity;GO:0006468-protein phosphorylation
  41. #Eucgr.E01168 GO:0005089-Rho guanyl-nucleotide exchange factor activity
  42. #Eucgr.A02467 GO:0007275-multicellular organismal development;GO:0005634-nucleus;GO:0006511-ubiquitin-dependent protein catabolic process
  43. #Eucgr.E01166 GO:0016747-transferase activity, transferring acyl groups other than amino-acyl groups
  44. #Eucgr.A02465 GO:0006950-response to stress;GO:0051087-chaperone binding;GO:0001671-ATPase activator activity
  45. #Eucgr.A02464 GO:0006662-glycerol ether metabolic process;GO:0045454-cell redox homeostasis;GO:0015035-protein disulfide oxidoreductase activity
  46. #Eucgr.E01163 GO:0016747-transferase activity, transferring acyl groups other than amino-acyl groups
  47. #Eucgr.A02462 GO:0005634-nucleus;GO:0006355-regulation of transcription, DNA-templated;GO:0003677-DNA binding;GO:0003700-sequence-specific DNA binding transcription factor activity;GO:004356