For calculating peptide and fragment masses.
1
2
15
16import msparser
17import sys
18
19def main() :
20 if len(sys.argv) < 4 :
21 print("Usage: %s ENZYMES_FILE UNIMOD SCHEMA" % sys.argv[0])
22 print("ENZYMES_FILE is the enzymes file path, e.g., ../config/enzymes")
23 print("UNIMOD is the file path to the unimod.xml file, e.g., ../config/unimod.xml or a fully formed URL such as http://www.matrixscience.com/cgi .")
24 print("SCHEMA is the unimod schema file path, e.g., ../html/xmlns/schema/unimod_2/unimod_2.xsd")
25 sys.exit(1)
26
27
28 enzymefile = open_enzymefile(sys.argv[1])
29
30
31
32
33 Trypsin = enzymefile.getEnzymeByName('Trypsin')
34
35 if not Trypsin :
36 print("Cannot find 'Trypsin' in the enzyme file. Cannot continue.")
37 return 1
38
39
40 modfile = open_modfile(sys.argv[2], sys.argv[3])
41
42 aahelper = msparser.ms_aahelper()
43
44
45
46 aahelper.setMasses(modfile.getMassFile())
47 aahelper.setEnzyme(Trypsin)
48
49
50
51
52 proteinStr = "MAIFRIDEIRNMSSEELEEELRKLEVELIRERGAVRAGGAPEKPGRIREIRRTIARMKTVQRERVRK"
53
54
55 aahelper.startIteratePeptides(proteinStr, len(proteinStr), 0);
56
57 print("List of peptides")
58
59 while aahelper.getNextPeptide() :
60 start = aahelper.getPepStart();
61 end = aahelper.getPepEnd()
62
63
64 peptideStr = proteinStr[start - 1 : end]
65 print(peptideStr)
66
67 print("End of list")
68
69
70 vecFixed = msparser.ms_modvector()
71 vecFixed.appendModification( modfile.getModificationByName('Phospho (Y)') )
72
73
74 vecVariable = msparser.ms_modvector()
75 vecVariable.appendModification( modfile.getModificationByName('Oxidation (M)') )
76 vecVariable.appendModification( modfile.getModificationByName('Acetyl (N-term)') )
77
78
79
80
81 aahelper.setAvailableModifications(vecFixed, vecVariable)
82
83
84
85
86 if not aahelper.isValid() :
87 print("Error while setting available modifications: %s" % aahelper.getLastErrorString())
88 return 1
89
90
91
92 err = msparser.ms_errs()
93
94
95
96 numThatMustBeModded = msparser.vectori()
97 numThatMustBeModded.append(1);
98 numThatMustBeModded.append(1);
99
100 mr = aahelper.calcPeptideMZ(
101 proteinStr,
102 len(proteinStr),
103 1,
104 10,
105 numThatMustBeModded,
106 0,
107 msparser.MASS_TYPE_MONO,
108 err
109 )
110
111 if err.isValid() :
112 print("Peptide mass calculated using 'calcPeptideMZ' is %8.3f" % mr)
113 else :
114 print("Error while calculating peptide mass: %s" % err.getLastErrorString())
115
116 err.clearAllErrors();
117
118
119
120
121
122
123
124 numModded = msparser.vectori()
125 numModded.append(2)
126 numModded.append(1)
127 numModded.append(0)
128 numModded.append(0)
129 numModded.append(0)
130 numModded.append(0)
131 numModded.append(0)
132 numModded.append(0)
133 numModded.append(0)
134 numModded.append(0)
135 numModded.append(0)
136 numModded.append(0)
137
138
139
140 whichNl = msparser.vectori()
141 whichNl.append(0)
142 whichNl.append(1)
143 whichNl.append(0)
144 whichNl.append(0)
145 whichNl.append(0)
146 whichNl.append(0)
147 whichNl.append(0)
148 whichNl.append(0)
149 whichNl.append(0)
150 whichNl.append(0)
151 whichNl.append(0)
152 whichNl.append(0)
153
154 peptide = aahelper.createPeptide(
155 proteinStr,
156 len(proteinStr),
157 1,
158 10,
159 numModded,
160 whichNl,
161 0,
162 msparser.MASS_TYPE_MONO,
163 err
164 )
165
166 if not err.isValid() :
167 print("Error while creating a peptide: %s" % err.getLastErrorString())
168
169 err.clearAllErrors()
170 else :
171 print("Peptide has been created successfully: %s" % peptide.getPeptideStr())
172
173
174
175 all_fragments = msparser.ms_fragmentvector()
176
177 b_ions = fragmentPeptide(
178 aahelper,
179 peptide,
180 msparser.ms_fragmentationrules.FRAG_B_SERIES,
181 'b-ion series',
182 0,
183 mr
184 )
185
186
187 all_fragments.copyFrom(b_ions)
188
189 fragments = fragmentPeptide(
190 aahelper,
191 peptide,
192 msparser.ms_fragmentationrules.FRAG_Y_SERIES,
193 'y-ion series',
194 0,
195 mr
196 )
197
198 for i in range(fragments.getNumberOfFragments()) :
199 all_fragments.appendFragment(fragments.getFragmentByNumber(i))
200
201
202 fragments = fragmentPeptide(
203 aahelper,
204 peptide,
205 msparser.ms_fragmentationrules.FRAG_Y_SERIES,
206 'y++-ion series',
207 2,
208 mr
209 )
210
211 for i in range(fragments.getNumberOfFragments()) :
212 all_fragments.appendFragment(fragments.getFragmentByNumber(i))
213
214
215 fragments = fragmentPeptide(
216 aahelper,
217 peptide,
218 msparser.ms_fragmentationrules.FRAG_INTERNAL_YB,
219 'internal yb-ion series',
220 0,
221 700
222 )
223
224 for i in range(fragments.getNumberOfFragments()) :
225 all_fragments.appendFragment(fragments.getFragmentByNumber(i))
226
227
228 print("Paste the following into a Mascot search query window to verify this output:")
229
230 displayMascotTestSearch(
231 vecFixed, vecVariable, Trypsin, peptide.getMrCalc(),
232 b_ions
233 )
234
235
236def open_enzymefile(filename) :
237 enzymefile = msparser.ms_enzymefile(filename)
238
239 if not enzymefile.isValid :
240 print("Error while opening enzyme file: %s " % enzymefile.getLastErrorString())
241 sys.exit(1)
242
243 return enzymefile
244
245
246def open_modfile(umod_filename, schema_filename) :
247 umodFile = msparser.ms_umod_configfile(umod_filename, schema_filename)
248 modfile = msparser.ms_modfile(umodFile)
249
250 if not modfile.isValid() :
251 print("Error while opening mod file: %s" % modfile.getLastErrorString())
252 sys.exit(1)
253
254 for mod in ['Oxidation (M)', 'Acetyl (N-term)', 'Phospho (Y)'] :
255 if not modfile.getModificationByName(mod) :
256 print("Cannot find '%s' in the mod file. Cannot continue." % mod)
257 sys.exit(1)
258
259 return modfile
260
261
262def fragmentPeptide(aahelper, peptide, series, series_label, doubleCharged, mass_max) :
263 fragments = msparser.ms_fragmentvector()
264 err = msparser.ms_errs()
265
266 aahelper.calcFragments(
267 peptide,
268 series,
269 doubleCharged,
270 100.0,
271 mass_max,
272 msparser.MASS_TYPE_MONO,
273 fragments,
274 err
275 )
276
277
278
279 print(series_label + " fragments:")
280 printFragmentsTable(fragments)
281
282 return fragments
283
284
285def printFragmentsTable(fragments) :
286
287 print("Number of fragments: %d" % fragments.getNumberOfFragments())
288
289 headerfmt = "%5s %5s %5s %-10s %7s %7s %4s %5s %6s %4s"
290 fmt = "%5s %5s %5s %-10s %7.2f %7.2f %4s %5s %6s %4s"
291
292 print(headerfmt % ("Col", "Start", "End", "Label", "Mass", "NL", "Name", "Immon", "Intern", "Reg"))
293
294 for i in range(fragments.getNumberOfFragments()) :
295 frag = fragments.getFragmentByNumber(i)
296
297 print(fmt % (
298 frag.getColumn(),
299 frag.getStart(),
300 frag.getEnd(),
301 frag.getLabel(),
302 frag.getMass(),
303 frag.getNeutralLoss(),
304 frag.getSeriesName(),
305 frag.isImmonium(),
306 frag.isInternal(),
307 frag.isRegular()
308 ))
309
310 print(" ")
311
312
313def displayMascotTestSearch(vecFixed, vecVariable, enzyme, mr, fragments) :
314 """
315 vecFixed contains a list of fixed mods applied to the peptide
316 vecVariable contains a list of variable mods applied to the peptide
317 enzyme is the enzyme used in fragmentation
318 mr is the peptide Mr(calc)
319 fragments contains a list of b-ions from a peptide
320
321 Use this information to generate an test search that can be run on Mascot.
322 """
323
324 for i in range(vecFixed.getNumberOfModifications()) :
325 print("MODS=%s" % vecFixed.getModificationByNumber(i).getTitle())
326
327
328 for i in range(vecVariable.getNumberOfModifications()) :
329 print("IT_MODS=%s" % vecVariable.getModificationByNumber(i).getTitle())
330
331
332 print("CHARGE=Mr")
333 print("CLE=%s" % enzyme.getTitle())
334 print("INSTRUMENT=MALDI-TOF-TOF")
335
336 masses = []
337 for i in range(fragments.getNumberOfFragments()) :
338 masses.append("%.3f" % fragments.getFragmentByNumber(i).getMass())
339
340 print("%.3f ions(%s)" % (mr, ", ".join(masses)))
341
342
343if __name__ == "__main__" :
344 sys.exit(main())
345
346
347"""
348
349Running the program as
350
351python tools_aahelper.pl /usr/local/mascot/config/enzymes /usr/local/mascot/config/mod_file
352
353will give the following output under Mascot 2.3:
354
355
356List of peptides
357M
358MAIFR
359AIFR
360IDEIR
361NMSSEELEEELR
362K
363LEVELIR
364ER
365GAVR
366AGGAPEKPGR
367IR
368EIR
369R
370TIAR
371MK
372TVQR
373ER
374VR
375K
376End of list
377Peptide mass calculated using 'calcPeptideMZ' is 1320.686
378Peptide has been created successfully: MAIFRIDEIR
379b-ion series fragments:
380Number of fragments: 9
381 Col Start End Label Mass NL Name Immon Intern Reg
382 1 1 -1 b(1) 190.05 0.00 b False False True
383 2 2 -1 b(2) 261.09 0.00 b False False True
384 3 3 -1 b(3) 374.17 0.00 b False False True
385 4 4 -1 b(4) 521.24 0.00 b False False True
386 5 5 -1 b(5) 677.34 0.00 b False False True
387 6 6 -1 b(6) 790.43 0.00 b False False True
388 7 7 -1 b(7) 905.45 0.00 b False False True
389 8 8 -1 b(8) 1034.50 0.00 b False False True
390 9 9 -1 b(9) 1147.58 0.00 b False False True
391
392y-ion series fragments:
393Number of fragments: 9
394 Col Start End Label Mass NL Name Immon Intern Reg
395 9 9 -1 y(9) 1132.65 0.00 y False False True
396 8 8 -1 y(8) 1061.61 0.00 y False False True
397 7 7 -1 y(7) 948.53 0.00 y False False True
398 6 6 -1 y(6) 801.46 0.00 y False False True
399 5 5 -1 y(5) 645.36 0.00 y False False True
400 4 4 -1 y(4) 532.27 0.00 y False False True
401 3 3 -1 y(3) 417.25 0.00 y False False True
402 2 2 -1 y(2) 288.20 0.00 y False False True
403 1 1 -1 y(1) 175.12 0.00 y False False True
404
405y++-ion series fragments:
406Number of fragments: 8
407 Col Start End Label Mass NL Name Immon Intern Reg
408 9 9 -1 y(9)++ 566.83 0.00 y False False True
409 8 8 -1 y(8)++ 531.31 0.00 y False False True
410 7 7 -1 y(7)++ 474.77 0.00 y False False True
411 6 6 -1 y(6)++ 401.23 0.00 y False False True
412 5 5 -1 y(5)++ 323.18 0.00 y False False True
413 4 4 -1 y(4)++ 266.64 0.00 y False False True
414 3 3 -1 y(3)++ 209.13 0.00 y False False True
415 2 2 -1 y(2)++ 144.61 0.00 y False False True
416
417internal yb-ion series fragments:
418Number of fragments: 22
419 Col Start End Label Mass NL Name Immon Intern Reg
420 2 2 3 AI 185.13 0.00 yb False True False
421 2 2 4 AIF 332.20 0.00 yb False True False
422 2 2 5 AIFR 488.30 0.00 yb False True False
423 2 2 6 AIFRI 601.38 0.00 yb False True False
424 3 3 4 IF 261.16 0.00 yb False True False
425 3 3 5 IFR 417.26 0.00 yb False True False
426 3 3 6 IFRI 530.34 0.00 yb False True False
427 3 3 7 IFRID 645.37 0.00 yb False True False
428 4 4 5 FR 304.18 0.00 yb False True False
429 4 4 6 FRI 417.26 0.00 yb False True False
430 4 4 7 FRID 532.29 0.00 yb False True False
431 4 4 8 FRIDE 661.33 0.00 yb False True False
432 5 5 6 RI 270.19 0.00 yb False True False
433 5 5 7 RID 385.22 0.00 yb False True False
434 5 5 8 RIDE 514.26 0.00 yb False True False
435 5 5 9 RIDEI 627.35 0.00 yb False True False
436 6 6 7 ID 229.12 0.00 yb False True False
437 6 6 8 IDE 358.16 0.00 yb False True False
438 6 6 9 IDEI 471.24 0.00 yb False True False
439 7 7 8 DE 245.08 0.00 yb False True False
440 7 7 9 DEI 358.16 0.00 yb False True False
441 8 8 9 EI 243.13 0.00 yb False True False
442
443Paste the following into a Mascot search query window to verify this output:
444MODS=Phospho (Y)
445IT_MODS=Oxidation (M)
446IT_MODS=Acetyl (N-term)
447CHARGE=Mr
448CLE=Trypsin
449INSTRUMENT=MALDI-TOF-TOF
4501320.686 ions(190.053, 261.090, 374.174, 521.243, 677.344, 790.428, 905.455, 1034.498, 1147.582)
451
452"""
453