Matrix Science Mascot Parser toolkit
 
Loading...
Searching...
No Matches
resfile_ms2quantitation.py

Example program extracting peptide ratios from a Reporter or Multiplex file.

1#!/usr/bin/python
2
16
17import msparser
18import sys
19
20# If no arguments supplied, print help and exit
21
22if len(sys.argv) != 5:
23 print("Usage: %s <quantitation schema 1 filepath> <quantitation schema 2 filepath> <unimod schema filepath> <results file>" % sys.argv[0])
24 print("The location of schema files should be e.g.")
25 print(" ../html/xmlns/schema/quantitation_1/quantitation_1.xsd")
26 print(" ../html/xmlns/schema/quantitation_2/quantitation_2.xsd")
27 print(" ../html/xmlns/schema/unimod_2/unimod_2.xsd")
28 print("if running from the Mascot cgi directory.")
29 sys.exit(1)
30
31quant_schema1_filepath = sys.argv[1]
32quant_schema2_filepath = sys.argv[2]
33unimod_schema_filepath = sys.argv[3]
34
35resfile = msparser.ms_mascotresfilebase.createResfile(sys.argv[4], 1)
36
37if not resfile.isValid():
38 print(resfile.getLastErrorString())
39 sys.exit(1)
40
41def dump_warnings(err):
42
43 for i in ms_range(1, err.getNumberOfErrors()):
44 print('Warning: %s' %(err.getErrorString(i)))
45
46# ms_range function has been added to make the range supplied consistent with other languages such as Perl and C#
47def ms_range(start, stop, step=1):
48 i = start
49 while i <= stop:
50 yield i
51 i += step
52
53# And print out related warnings, if any (we know these are only warnings,
54# because isValid() returned true earlier):
55
56dump_warnings(resfile.getErrorHandler())
57resfile.clearAllErrors()
58
59resfile.setXMLschemaFilePath(msparser.ms_mascotresfilebase.XML_SCHEMA_QUANTITATION, "http://www.matrixscience.com/xmlns/schema/quantitation_2 " + quant_schema2_filepath + " http://www.matrixscience.com/xmlns/schema/quantitation_1 " + quant_schema1_filepath)
60resfile.setXMLschemaFilePath(msparser.ms_mascotresfilebase.XML_SCHEMA_UNIMOD, "http://www.unimod.org/xmlns/schema/unimod_2 " + unimod_schema_filepath)
61
62qmethod = msparser.ms_quant_method()
63
64if not resfile.getQuantitationMethod(qmethod):
65 print(resfile.getLastErrorString())
66 sys.exit(1)
67
68
69datfile = msparser.ms_datfile()
70undef, flags, minprob, maxhits, iisb, minpeplen, use_pepsum, flags2 = resfile.get_ms_mascotresults_params(datfile.getMascotOptions())
71
72if not use_pepsum:
73 print("Results File cannot be opened as a peptide summary")
74 sys.exit(1)
75
76pepsum = msparser.ms_peptidesummary(resfile, flags, minprob, maxhits, '', iisb, minpeplen, '', flags2)
77
78if not resfile.isValid():
79 print(resfile.getLastErrorString)
80 sys.exit(1)
81
82dump_warnings(resfile.getErrorHandler())
83resfile.clearAllErrors()
84
85# Quantitate the file using ms_ms2quantitation:
86
87ms2quant = msparser.ms_ms2quantitation(pepsum, qmethod)
88
89if not ms2quant.isValid():
90 sys.exit(1)
91
92if ms2quant.hasIntensityNormalisation():
93 ms2quant.normaliseIntensities()
94elif ms2quant.hasPeptideRatioNormalisation():
95 ms2quant.normalisePeptideRatios()
96
97# Update the quantitation method in case ms_ms2quantitation changed any of
98# the parameters:
99qmethod = ms2quant.getQuantitationMethod()
100
101dump_warnings(ms2quant.getErrorHandler())
102ms2quant.clearAllErrors()
103
104# Now we can inspect peptide and protein data in ms2quant.
105# But first, dump quantitation method parameters to see what we have:
106
107comps = []
108for i in ms_range(0, qmethod.getNumberOfComponents()-1):
109 comp = qmethod.getComponentByNumber(i)
110 comps.append(comp.getName())
111
112print("Components: %s" %(comps))
113
114rationames = []
115for i in ms_range(0, qmethod.getNumberOfReportRatios()-1):
116 reportratio = qmethod.getReportRatioByNumber(i)
117 reportrationame = reportratio.getName()
118 rationames.append(reportrationame)
119print("Report ratios: %s" %(rationames))
120
121print("Protein ratio type = %s" %(qmethod.getProteinRatioType()))
122print("Min. number of peptides = %d" %(qmethod.getMinNumPeptides()))
123
124if qmethod.haveQuality():
125 q = qmethod.getQuality()
126 print("Quality: min. precursor charge = %s" %(q.getMinPrecursorCharge()))
127 print("Quality: pep. threshold type = %s" %(q.getPepThresholdType()))
128 print("Quality: pep. threshold value = %s" %(q.getPepThresholdValue()))
129else:
130 print("Quality: no restrictions")
131
132# Check for Normalisation
133
134if qmethod.haveNormalisation():
135 q = qmethod.getNormalisation()
136 print("Normalisation = %s" %(q.getMethod()))
137
138 if q.haveProteins():
139 p = q.getProteins()
140 for i in ms_range(0, p.getNumberOfProteins()-1):
141 accessions = []
142 accessions.append(p.getProtein().getAccession())
143 print(" - Accessions = %s" %(accessions))
144
145 if q.havePeptides():
146 p = q.getPeptides()
147 for i in ms_range(0, p.getNumberOfPeptides()-1):
148 peptides = []
149 peptides.append(p.getPeptide().getSequence())
150 print(" - Sequences = %s" %(peptides))
151
152 component_bases = []
153
154 for i in ms_range(0, qmethod.getNumberOfComponents()-1):
155 component = qmethod.getComponentByNumber(i)
156 compname = component.getName()
157 baseNormalisation = ms2quant.getIntensityNormalisationBase(compname)
158 component_bases.append(baseNormalisation)
159 for rationame in rationames:
160 ms2gprn = ms2quant.getPeptideRatioNormalisationBase(rationame)
161 print("Intensity normalisation constants: %s" %(component_bases))
162 print("Ratio normalisation constants: %s" %(ms2gprn))
163
164else:
165 print("Normalisation: none")
166
167if qmethod.haveOutliers():
168 q = qmethod.getOutliers()
169 print("Outliers = %s" %(q.getMethod()))
170else:
171 print("Outliers: none")
172
173print("\n")
174
175# Collect proteins we're interested in:
176proteins = []
177
178for i in ms_range(1, pepsum.getNumberOfHits()):
179 hit = pepsum.getHit(i)
180 proteins.append(hit)
181 j = 0
182 protein = pepsum.getNextFamilyProtein(i, j)
183 while protein !=None:
184 proteins.append(protein)
185 j += 1
186
187protein = ''
188for protein in proteins:
189 print("%d::%s:" %(protein.getDB(), protein.getAccession()))
190
191 for rationame in rationames:
192 ratio = ms2quant.getProteinRatio(protein.getAccession(), protein.getDB(), rationame)
193 print(rationame)
194 if ratio.isMissing():
195 print("%10s: ---" %(rationame))
196 else:
197 print("%10s = %10g, stderr = %10g, N = %d, normal-p = %6g, hypo-p = %6g" %(rationame, ratio.getValue(), ratio.getStandardError_only(), ratio.getSampleSize(), ratio.getNormalityPvalue_only(), ratio.getHypothesisPvalue_only()))
198 print(rationame)
199 print(" %10s %s" %('Peptide:', rationame))
200 for i in ms_range(1, protein.getNumPeptides()):
201 if protein.getPeptideDuplicate(i) == msparser.ms_protein.DUPE_DuplicateSameQuery:
202 continue
203
204 q = protein.getPeptideQuery(i)
205 p = protein.getPeptideP(i)
206
207 peptide = pepsum.getPeptide(q, p)
208
209 if not peptide:
210 continue
211
212 values = ''
213 for rationame in rationames:
214 # Note that the q,p arguments to ms_peptide_quant_key must be
215 # integers. Otherwise the wrong constructor is chosen. Here they
216 # are integers, as they are return values from Parser functions
217 # that return ints.
218 quantKey = msparser.ms_peptide_quant_key(q, p)
219 ratio = ms2quant.getPeptideRatio(quantKey, rationame)
220
221 if ratio.isMissing():
222 values = "---"
223 continue
224
225 if ratio.isInfinite():
226 values = "###"
227 else:
228 values = ratio.getValue()
229
230 print(" %-9s %-10s" %("q%d_p%d" %(q, p), str(values)))
231
232 print("\n")
233
234
235
236