Matrix Science Mascot Parser toolkit
 
Loading...
Searching...
No Matches
tools_aahelper.py

For calculating peptide and fragment masses.

1#!/usr/bin/python
2
15
16import msparser
17import sys
18
19def main() :
20 if len(sys.argv) < 4 :
21 print("Usage: %s ENZYMES_FILE UNIMOD SCHEMA" % sys.argv[0])
22 print("ENZYMES_FILE is the enzymes file path, e.g., ../config/enzymes")
23 print("UNIMOD is the file path to the unimod.xml file, e.g., ../config/unimod.xml or a fully formed URL such as http://www.matrixscience.com/cgi .")
24 print("SCHEMA is the unimod schema file path, e.g., ../html/xmlns/schema/unimod_2/unimod_2.xsd")
25 sys.exit(1)
26
27
28 enzymefile = open_enzymefile(sys.argv[1])
29
30 # Note: both enzymefile *must* be kept in scope for as long as you use
31 # Trypsin. See "Using the toolkit from Perl, Java and Python" in Mascot Parser
32 # manual.
33 Trypsin = enzymefile.getEnzymeByName('Trypsin')
34
35 if not Trypsin :
36 print("Cannot find 'Trypsin' in the enzyme file. Cannot continue.")
37 return 1
38
39
40 modfile = open_modfile(sys.argv[2], sys.argv[3])
41
42 aahelper = msparser.ms_aahelper()
43 # Note: both modfile and Trypsin *must* be kept in scope for as
44 # long as you use aahelper. See "Using the toolkit from Perl, Java and
45 # Python" in Mascot Parser manual.
46 aahelper.setMasses(modfile.getMassFile())
47 aahelper.setEnzyme(Trypsin)
48
49 # Now we can generate peptides for a given protein. This is
50 # RL29_METTP (50S ribosomal protein L29P OS=Methanosaeta thermophila (strain
51 # DSM 6194 / PT) GN=rpl29p PE=3 SV=1) from SwissProt 2010_09.
52 proteinStr = "MAIFRIDEIRNMSSEELEEELRKLEVELIRERGAVRAGGAPEKPGRIREIRRTIARMKTVQRERVRK"
53
54 # No missed cleavages are allowed (third parameter).
55 aahelper.startIteratePeptides(proteinStr, len(proteinStr), 0);
56
57 print("List of peptides")
58
59 while aahelper.getNextPeptide() :
60 start = aahelper.getPepStart();
61 end = aahelper.getPepEnd()
62
63 # getPepStart() and getPeptideEnd() return one-based index.
64 peptideStr = proteinStr[start - 1 : end]
65 print(peptideStr)
66
67 print("End of list")
68
69 # Create a list of fixed modifications.
70 vecFixed = msparser.ms_modvector()
71 vecFixed.appendModification( modfile.getModificationByName('Phospho (Y)') )
72
73 # Create a list of variable modifications.
74 vecVariable = msparser.ms_modvector()
75 vecVariable.appendModification( modfile.getModificationByName('Oxidation (M)') )
76 vecVariable.appendModification( modfile.getModificationByName('Acetyl (N-term)') )
77
78 # Note: both vecFixed and vecVariable *must* be kept in scope for as
79 # long as you use aahelper. See "Using the toolkit from Perl, Java and
80 # Python" in Mascot Parser manual.
81 aahelper.setAvailableModifications(vecFixed, vecVariable)
82
83 # ms_aahelper can also contain errors that might happen when applying
84 # modifications, for instance when we have a conflict between two
85 # modifications (same residue or same peptide end).
86 if not aahelper.isValid() :
87 print("Error while setting available modifications: %s" % aahelper.getLastErrorString())
88 return 1
89
90 # We will need also a separate error object for collecting peptide-specific
91 # errors.
92 err = msparser.ms_errs()
93
94 # Example of how to call calcPeptideMZ(). It will often be more convenient to
95 # create an ms_peptide instead, and then call getMrCalc() on that object.
96 numThatMustBeModded = msparser.vectori()
97 numThatMustBeModded.append(1); # 1 acetylNterm modification
98 numThatMustBeModded.append(1); # 1 site is oxidised
99
100 mr = aahelper.calcPeptideMZ(
101 proteinStr,
102 len(proteinStr),
103 1,
104 10, # peptide ends (1-based)
105 numThatMustBeModded,
106 0, # no charge - i.e. Mr
107 msparser.MASS_TYPE_MONO,
108 err
109 )
110
111 if err.isValid() :
112 print("Peptide mass calculated using 'calcPeptideMZ' is %8.3f" % mr)
113 else :
114 print("Error while calculating peptide mass: %s" % err.getLastErrorString())
115 # Don't need to halt as they are not fatal errors.
116 err.clearAllErrors();
117
118 # Create a peptide - which we can then fragment.
119 #
120 # Specify which residues are modified by which modification as it has to
121 # correspond to a modification string:
122 #
123 # Nterm modification + 9 residues + Cterm modification
124 numModded = msparser.vectori()
125 numModded.append(2) # N-term - modified by "Acetyl (N-term)"
126 numModded.append(1) # M - modified by "Oxidation (M)"
127 numModded.append(0) # A
128 numModded.append(0) # I
129 numModded.append(0) # F
130 numModded.append(0) # R
131 numModded.append(0) # I
132 numModded.append(0) # D
133 numModded.append(0) # E
134 numModded.append(0) # I
135 numModded.append(0) # R
136 numModded.append(0) # C-term
137
138 # We have to specify (or at least supply an empty vector) which neutral loss
139 # value to use, in case there are more than one available for a modification.
140 whichNl = msparser.vectori()
141 whichNl.append(0) # N-term
142 whichNl.append(1) # M - has 2 neutral losses. Specify the first (-98)
143 whichNl.append(0) # A
144 whichNl.append(0) # I
145 whichNl.append(0) # F
146 whichNl.append(0) # R
147 whichNl.append(0) # I
148 whichNl.append(0) # D
149 whichNl.append(0) # E
150 whichNl.append(0) # I
151 whichNl.append(0) # R
152 whichNl.append(0) # C-term
153
154 peptide = aahelper.createPeptide(
155 proteinStr,
156 len(proteinStr),
157 1,
158 10, # end positions
159 numModded, # modification string-like vector
160 whichNl, # which neutral loss to use
161 0, # no charge
162 msparser.MASS_TYPE_MONO,
163 err
164 )
165
166 if not err.isValid() :
167 print("Error while creating a peptide: %s" % err.getLastErrorString())
168 # Don't need to halt as they are not fatal errors.
169 err.clearAllErrors()
170 else :
171 print("Peptide has been created successfully: %s" % peptide.getPeptideStr())
172
173
174 # Keep a list of fragments from all series
175 all_fragments = msparser.ms_fragmentvector()
176
177 b_ions = fragmentPeptide(
178 aahelper,
179 peptide,
180 msparser.ms_fragmentationrules.FRAG_B_SERIES,
181 'b-ion series',
182 0, # single-charged ions only
183 mr # maximal fragment mass to return
184 )
185
186 # copyFrom() can only be used to populate the list for the first time.
187 all_fragments.copyFrom(b_ions)
188
189 fragments = fragmentPeptide(
190 aahelper,
191 peptide,
192 msparser.ms_fragmentationrules.FRAG_Y_SERIES,
193 'y-ion series',
194 0, # single-charged ions only
195 mr # maximal fragment mass to return
196 )
197
198 for i in range(fragments.getNumberOfFragments()) :
199 all_fragments.appendFragment(fragments.getFragmentByNumber(i))
200
201
202 fragments = fragmentPeptide(
203 aahelper,
204 peptide,
205 msparser.ms_fragmentationrules.FRAG_Y_SERIES,
206 'y++-ion series',
207 2, # double-charged ions only
208 mr # maximal fragment mass to return
209 )
210
211 for i in range(fragments.getNumberOfFragments()) :
212 all_fragments.appendFragment(fragments.getFragmentByNumber(i))
213
214
215 fragments = fragmentPeptide(
216 aahelper,
217 peptide,
218 msparser.ms_fragmentationrules.FRAG_INTERNAL_YB,
219 'internal yb-ion series',
220 0, # single-charged ions only
221 700 # maximal fragment mass to return
222 )
223
224 for i in range(fragments.getNumberOfFragments()) :
225 all_fragments.appendFragment(fragments.getFragmentByNumber(i))
226
227
228 print("Paste the following into a Mascot search query window to verify this output:")
229
230 displayMascotTestSearch(
231 vecFixed, vecVariable, Trypsin, peptide.getMrCalc(),
232 b_ions # or you can use all_fragments
233 )
234
235
236def open_enzymefile(filename) :
237 enzymefile = msparser.ms_enzymefile(filename)
238
239 if not enzymefile.isValid :
240 print("Error while opening enzyme file: %s " % enzymefile.getLastErrorString())
241 sys.exit(1)
242
243 return enzymefile
244
245
246def open_modfile(umod_filename, schema_filename) :
247 umodFile = msparser.ms_umod_configfile(umod_filename, schema_filename)
248 modfile = msparser.ms_modfile(umodFile)
249
250 if not modfile.isValid() :
251 print("Error while opening mod file: %s" % modfile.getLastErrorString())
252 sys.exit(1)
253
254 for mod in ['Oxidation (M)', 'Acetyl (N-term)', 'Phospho (Y)'] :
255 if not modfile.getModificationByName(mod) :
256 print("Cannot find '%s' in the mod file. Cannot continue." % mod)
257 sys.exit(1)
258
259 return modfile
260
261
262def fragmentPeptide(aahelper, peptide, series, series_label, doubleCharged, mass_max) :
263 fragments = msparser.ms_fragmentvector()
264 err = msparser.ms_errs()
265
266 aahelper.calcFragments(
267 peptide,
268 series,
269 doubleCharged,
270 100.0,
271 mass_max,
272 msparser.MASS_TYPE_MONO,
273 fragments,
274 err
275 )
276
277 # Check err here.
278
279 print(series_label + " fragments:")
280 printFragmentsTable(fragments)
281
282 return fragments
283
284
285def printFragmentsTable(fragments) :
286
287 print("Number of fragments: %d" % fragments.getNumberOfFragments())
288
289 headerfmt = "%5s %5s %5s %-10s %7s %7s %4s %5s %6s %4s"
290 fmt = "%5s %5s %5s %-10s %7.2f %7.2f %4s %5s %6s %4s"
291
292 print(headerfmt % ("Col", "Start", "End", "Label", "Mass", "NL", "Name", "Immon", "Intern", "Reg"))
293
294 for i in range(fragments.getNumberOfFragments()) :
295 frag = fragments.getFragmentByNumber(i)
296
297 print(fmt % (
298 frag.getColumn(),
299 frag.getStart(),
300 frag.getEnd(),
301 frag.getLabel(),
302 frag.getMass(),
303 frag.getNeutralLoss(),
304 frag.getSeriesName(),
305 frag.isImmonium(),
306 frag.isInternal(),
307 frag.isRegular()
308 ))
309
310 print(" ")
311
312
313def displayMascotTestSearch(vecFixed, vecVariable, enzyme, mr, fragments) :
314 """
315 vecFixed contains a list of fixed mods applied to the peptide
316 vecVariable contains a list of variable mods applied to the peptide
317 enzyme is the enzyme used in fragmentation
318 mr is the peptide Mr(calc)
319 fragments contains a list of b-ions from a peptide
320
321 Use this information to generate an test search that can be run on Mascot.
322 """
323
324 for i in range(vecFixed.getNumberOfModifications()) :
325 print("MODS=%s" % vecFixed.getModificationByNumber(i).getTitle())
326
327
328 for i in range(vecVariable.getNumberOfModifications()) :
329 print("IT_MODS=%s" % vecVariable.getModificationByNumber(i).getTitle())
330
331
332 print("CHARGE=Mr")
333 print("CLE=%s" % enzyme.getTitle())
334 print("INSTRUMENT=MALDI-TOF-TOF")
335
336 masses = []
337 for i in range(fragments.getNumberOfFragments()) :
338 masses.append("%.3f" % fragments.getFragmentByNumber(i).getMass())
339
340 print("%.3f ions(%s)" % (mr, ", ".join(masses)))
341
342
343if __name__ == "__main__" :
344 sys.exit(main())
345
346
347"""
348
349Running the program as
350
351python tools_aahelper.pl /usr/local/mascot/config/enzymes /usr/local/mascot/config/mod_file
352
353will give the following output under Mascot 2.3:
354
355
356List of peptides
357M
358MAIFR
359AIFR
360IDEIR
361NMSSEELEEELR
362K
363LEVELIR
364ER
365GAVR
366AGGAPEKPGR
367IR
368EIR
369R
370TIAR
371MK
372TVQR
373ER
374VR
375K
376End of list
377Peptide mass calculated using 'calcPeptideMZ' is 1320.686
378Peptide has been created successfully: MAIFRIDEIR
379b-ion series fragments:
380Number of fragments: 9
381 Col Start End Label Mass NL Name Immon Intern Reg
382 1 1 -1 b(1) 190.05 0.00 b False False True
383 2 2 -1 b(2) 261.09 0.00 b False False True
384 3 3 -1 b(3) 374.17 0.00 b False False True
385 4 4 -1 b(4) 521.24 0.00 b False False True
386 5 5 -1 b(5) 677.34 0.00 b False False True
387 6 6 -1 b(6) 790.43 0.00 b False False True
388 7 7 -1 b(7) 905.45 0.00 b False False True
389 8 8 -1 b(8) 1034.50 0.00 b False False True
390 9 9 -1 b(9) 1147.58 0.00 b False False True
391
392y-ion series fragments:
393Number of fragments: 9
394 Col Start End Label Mass NL Name Immon Intern Reg
395 9 9 -1 y(9) 1132.65 0.00 y False False True
396 8 8 -1 y(8) 1061.61 0.00 y False False True
397 7 7 -1 y(7) 948.53 0.00 y False False True
398 6 6 -1 y(6) 801.46 0.00 y False False True
399 5 5 -1 y(5) 645.36 0.00 y False False True
400 4 4 -1 y(4) 532.27 0.00 y False False True
401 3 3 -1 y(3) 417.25 0.00 y False False True
402 2 2 -1 y(2) 288.20 0.00 y False False True
403 1 1 -1 y(1) 175.12 0.00 y False False True
404
405y++-ion series fragments:
406Number of fragments: 8
407 Col Start End Label Mass NL Name Immon Intern Reg
408 9 9 -1 y(9)++ 566.83 0.00 y False False True
409 8 8 -1 y(8)++ 531.31 0.00 y False False True
410 7 7 -1 y(7)++ 474.77 0.00 y False False True
411 6 6 -1 y(6)++ 401.23 0.00 y False False True
412 5 5 -1 y(5)++ 323.18 0.00 y False False True
413 4 4 -1 y(4)++ 266.64 0.00 y False False True
414 3 3 -1 y(3)++ 209.13 0.00 y False False True
415 2 2 -1 y(2)++ 144.61 0.00 y False False True
416
417internal yb-ion series fragments:
418Number of fragments: 22
419 Col Start End Label Mass NL Name Immon Intern Reg
420 2 2 3 AI 185.13 0.00 yb False True False
421 2 2 4 AIF 332.20 0.00 yb False True False
422 2 2 5 AIFR 488.30 0.00 yb False True False
423 2 2 6 AIFRI 601.38 0.00 yb False True False
424 3 3 4 IF 261.16 0.00 yb False True False
425 3 3 5 IFR 417.26 0.00 yb False True False
426 3 3 6 IFRI 530.34 0.00 yb False True False
427 3 3 7 IFRID 645.37 0.00 yb False True False
428 4 4 5 FR 304.18 0.00 yb False True False
429 4 4 6 FRI 417.26 0.00 yb False True False
430 4 4 7 FRID 532.29 0.00 yb False True False
431 4 4 8 FRIDE 661.33 0.00 yb False True False
432 5 5 6 RI 270.19 0.00 yb False True False
433 5 5 7 RID 385.22 0.00 yb False True False
434 5 5 8 RIDE 514.26 0.00 yb False True False
435 5 5 9 RIDEI 627.35 0.00 yb False True False
436 6 6 7 ID 229.12 0.00 yb False True False
437 6 6 8 IDE 358.16 0.00 yb False True False
438 6 6 9 IDEI 471.24 0.00 yb False True False
439 7 7 8 DE 245.08 0.00 yb False True False
440 7 7 9 DEI 358.16 0.00 yb False True False
441 8 8 9 EI 243.13 0.00 yb False True False
442
443Paste the following into a Mascot search query window to verify this output:
444MODS=Phospho (Y)
445IT_MODS=Oxidation (M)
446IT_MODS=Acetyl (N-term)
447CHARGE=Mr
448CLE=Trypsin
449INSTRUMENT=MALDI-TOF-TOF
4501320.686 ions(190.053, 261.090, 374.174, 521.243, 677.344, 790.428, 905.455, 1034.498, 1147.582)
451
452"""
453