Matrix Science Mascot Parser toolkit
 
Loading...
Searching...
No Matches
resfile_ms2quant_to_custom.py

Example program converting ms_ms2quantitation to ms_customquantitation.

1#!/usr/bin/python
2
15
16import msparser
17import sys
18
19# If no arguments supplied, print help and exit
20
21if len(sys.argv) != 5:
22 print("Usage: %s <quantitation schema 1 filepath> <quantitation schema 2 filepath> <unimod schema filepath> <results file>" % sys.argv[0])
23 print("The location of schema files should be e.g.")
24 print(" ../html/xmlns/schema/quantitation_1/quantitation_1.xsd")
25 print(" ../html/xmlns/schema/quantitation_2/quantitation_2.xsd")
26 print(" ../html/xmlns/schema/unimod_2/unimod_2.xsd")
27 print("if running from the Mascot cgi directory.")
28 sys.exit(1)
29
30quant_schema1_filepath = sys.argv[1]
31quant_schema2_filepath = sys.argv[2]
32unimod_schema_filepath = sys.argv[3]
33
34resfile = msparser.ms_mascotresfilebase.createResfile(sys.argv[4], 1)
35
36if not resfile.isValid():
37 print(resfile.getLastErrorString())
38 sys.exit(1)
39
40# ms_range function has been added to make the range supplied consistent with other languages such as Perl and C#
41def ms_range(start, stop, step=1):
42 i = start
43 while i <= stop:
44 yield i
45 i += step
46
47def dump_warnings(err):
48
49 for i in ms_range(1, err.getNumberOfErrors()):
50 print('Warning: %s' %(err.getErrorString(i)))
51
52# And print out related warnings, if any (we know these are only warnings,
53# because isValid() returned true earlier):
54
55dump_warnings(resfile.getErrorHandler())
56resfile.clearAllErrors()
57
58resfile.setXMLschemaFilePath(msparser.ms_mascotresfilebase.XML_SCHEMA_QUANTITATION, "http://www.matrixscience.com/xmlns/schema/quantitation_2 " + quant_schema2_filepath + " http://www.matrixscience.com/xmlns/schema/quantitation_1 " + quant_schema1_filepath)
59resfile.setXMLschemaFilePath(msparser.ms_mascotresfilebase.XML_SCHEMA_UNIMOD, "http://www.unimod.org/xmlns/schema/unimod_2 " + unimod_schema_filepath)
60
61qmethod = msparser.ms_quant_method()
62
63if not resfile.getQuantitationMethod(qmethod):
64 print(resfile.getLastErrorString())
65 sys.exit(1)
66
67datfile = msparser.ms_datfile()
68undef, flags, minprob, maxhits, iisb, minpeplen, use_pepsum, flags2 = resfile.get_ms_mascotresults_params(datfile.getMascotOptions())
69
70if not use_pepsum:
71 print("Results File cannot be opened as a peptide summary")
72 sys.exit(1)
73
74pepsum = msparser.ms_peptidesummary(resfile, flags, minprob, maxhits, '', iisb, minpeplen, '', flags2)
75
76if not resfile.isValid():
77 print(resfile.getLastErrorString)
78 sys.exit(1)
79
80dump_warnings(resfile.getErrorHandler())
81resfile.clearAllErrors()
82
83# Quantitate the file using ms_ms2quantitation:
84
85ms2quant = msparser.ms_ms2quantitation(pepsum, qmethod)
86
87if not ms2quant.isValid():
88 print(ms2quant.getLastErrorString())
89 sys.exit(1)
90
91if ms2quant.hasIntensityNormalisation():
92 ms2quant.normaliseIntensities()
93elif ms2quant.hasPeptideRatioNormalisation():
94 ms2quant.normalisePeptideRatios()
95
96dump_warnings(ms2quant.getErrorHandler())
97ms2quant.clearAllErrors()
98
99# Then import the data into ms_customquantitation:
100customquant = msparser.ms_ms1quantitation(pepsum, qmethod)
101
102if not customquant.isValid():
103 print(customquant.getLastErrorString())
104 sys.exit(1)
105
106err = customquant.getErrorHandler()
107# And print out related warnings, if any (we know these are only warnings,
108# because isValid() returned true earlier):
109
110for i in range(1, err.getNumberOfErrors()):
111 print("Warning: %s" %(err.getErrorString(i)))
112
113customquant.clearAllErrors()
114
115# Refresh the quantitation method object, in case customquant altered
116# some of the parameters during conversion:
117
118qmethod = customquant.getQuantitationMethod()
119
120# Now we can inspect peptide and protein data in customquant.
121# But first, dump quantitation method parameters to see what we have:
122
123comps = []
124
125for i in ms_range(0, qmethod.getNumberOfComponents()-1):
126 comp = qmethod.getComponentByNumber(i)
127 comps.append(comp.getName())
128
129print("Components: %s" %(comps))
130
131rationames = []
132
133for i in ms_range(0, qmethod.getNumberOfReportRatios()-1):
134 ratioByNumber = qmethod.getReportRatioByNumber(i)
135 rationames.append(ratioByNumber.getName())
136
137print("Report Ratios: %s" %(rationames))
138print("Protein ratio type = %s" %(qmethod.getProteinRatioType()))
139print("Min. number of peptides = %d" %(qmethod.getMinNumPeptides()))
140
141if qmethod.haveQuality():
142 q = qmethod.getQuality()
143 print("Quality: min. precursor charge = %s" %(q.getMinPrecursorCharge()))
144 print("Quality: pep. threshold type = %s" %(q.getPepThresholdType()))
145 print("Quality: pep. threshold value = %s" %(q.getPepThresholdValue()))
146else:
147 print("Quality: no restrictions")
148
149if qmethod.haveNormalisation():
150 q = qmethod.getNormalisation()
151 print("Normalisation = %s" %(q.getMethod()))
152 if q.haveProteins():
153 p = q.getProteins()
154 print("getProteins")
155 for i in ms_range(0, p.getNumberOfProteins()-1):
156 proteins = p.getProteins(i)
157 protein = proteins.getAccession()
158 print(" - Accessions = %s" %(protein))
159
160 if q.havePeptides():
161 p = q.getPeptides()
162 peptides = p.getPeptides()
163 for i in ms_range(0, p.getNumberOfPeptides()-1):
164 peptides = p.getPeptides(i)
165 peptide = peptides.getSequence()
166 print(" - Sequences = %s" %(peptide))
167
168 component_bases = []
169 for i in ms_range(0, qmethod.getNumberOfComponents()-1):
170 gcbn = qmethod.getComponentByNumber(i)
171 base = ms2quant.getIntensityNormalisationBase(gcbn.getName())
172 component_bases.append(base)
173
174 print("Intensity normalisation constants: %s" %(component_bases))
175 print("Ration normalisation constants: %s" %(rationames))
176
177else:
178 print("Normalisation: none")
179
180if qmethod.haveOutliers():
181 q = qmethod.getOutliers()
182 print("Outliers = %s" %(q.getMethod()))
183else:
184 print("Outliers: none")
185
186print("\n")
187
188# Collect proteins we're interested in:
189
190proteins = []
191
192for i in ms_range(1, pepsum.getNumberOfHits()):
193 hit = pepsum.getHit(i)
194 proteins.append(hit)
195 j = 0
196 protein = pepsum.getNextFamilyProtein(i, j)
197 while protein !=None:
198 proteins.append(protein)
199 j += 1
200
201for protein in proteins:
202 print("%d::%s:" %(protein.getDB(), protein.getAccession()))
203
204 for rationame in rationames:
205 ratio = customquant.getProteinRatio(protein.getAccession(), protein.getDB(), rationame)
206
207 if ratio.isMissing():
208 print("%10s: ---" %(rationame))
209 else:
210 print("%10s = %10g, stderr = %10g, N = %d, normal-p = %6g, hyp-p = %6g" %(rationame, ratio.getValue(), ratio.getStandardError_only(), ratio.getSampleSize(), ratio.getNormalityPvalue_only(), ratio.getHypothesisPvalue_only()))
211
212 print(" %10s %s" %('Peptide:', rationame))
213
214 for i in ms_range(1, protein.getNumPeptides()):
215 if protein.getPeptideDuplicate(i) == msparser.ms_protein.DUPE_DuplicateSameQuery:
216 continue
217 q = protein.getPeptideQuery(i)
218 p = protein.getPeptideP(i)
219
220 peptide = pepsum.getPeptide(q, p)
221
222 if not peptide:
223 continue
224
225 values = ''
226 for rationame in rationames:
227 # Note that the q,p arguments to ms_peptide_quant_key must be
228 # integers. Otherwise the wrong constructor is chosen. Here they
229 # are integers, as they are return values from Parser functions
230 # that return ints.
231 quantKey = msparser.ms_peptide_quant_key(q, p)
232 ratio = ms2quant.getPeptideRatio(quantKey, rationame)
233
234 if ratio.isMissing():
235 values = "---"
236 continue
237
238 if ratio.isInfinite():
239 values = "###"
240 else:
241 values = ratio.getValue()
242
243 print(" %-9s %-10s" %("q%d_p%d" %(q, p), str(values)))
244
245 print("\n")
246
247
248