Matrix Science Mascot Parser toolkit
 
Loading...
Searching...
No Matches
resfile_summary.py

Example program for extracting the peptide or protein summary.

1#!/usr/bin/python
2
17
18import msparser
19import sys
20import re
21
22def main() :
23 returnValue = 1
24
25 if len(sys.argv) < 2 :
26 print("Must specify results filename as parameter" )
27 return returnValue
28
29 resfile = msparser.ms_mascotresfilebase.createResfile(sys.argv[1])
30
31 if checkErrors(resfile) :
32
33 # The parameters passed to ms_peptidesummary or ms_proteinsummary determine
34 # the type of grouping and the number of proteins and peptides displayed.
35 # Default parameters can be returned using ms_mascotresfilebase::get_ms_mascotresults_params().
36 # The return values from this function depend on the type of search,
37 # and also on values in the mascot.dat configuration file if that is available.
38
39 # You may need to change this path
40 datfile = msparser.ms_datfile("../config/mascot.dat")
41
42 # if the mascot.dat isn't available, use defaults
43 mascotOptions = msparser.ms_mascotoptions()
44
45 if (datfile.isValid()) :
46 mascotOptions = datfile.getMascotOptions()
47
48 (scriptName,
49 flags,
50 minProbability,
51 maxHitsToReport,
52 ignoreIonsScoreBelow,
53 minPepLenInPepSummary,
54 usePeptideSummary,
55 flags2) = resfile.get_ms_mascotresults_params(mascotOptions)
56
57 bResult = False
58 if (usePeptideSummary) :
59
60 # For peptide summary
61 #
62 # Flags defined for hierarchical clustering algorithm:
63 # flags: ms_mascotresults::MSRES_CLUSTER_PROTEINS
64 # | ms_mascotresults::MSRES_SHOW_SUBSETS
65 # | ms_mascotresults::MSRES_MUDPIT_PROTEIN_SCORE;
66 # flags2: ms_peptidesummary::MSPEPSUM_USE_HOMOLOGY_THRESH;
67 #
68 # Flags defined for original simple parsimony algorithm:
69 # flags: ms_mascotresults::MSRES_GROUP_PROTEINS | ms_mascotresults::MSRES_SHOW_SUBSETS;
70
71 print("-------------------------------------------------------------")
72 print("--- Peptide summary report ---")
73 print("-------------------------------------------------------------")
74
75 bResult = show_results(resfile,
76 usePeptideSummary,
77 flags,
78 minProbability,
79 maxHitsToReport,
80 ignoreIonsScoreBelow,
81 minPepLenInPepSummary,
82 flags2)
83 else :
84 # Show results from full protein summary, remove grouping
85 flags &= ~msparser.ms_mascotresults.MSRES_GROUP_PROTEINS
86 flags &= ~msparser.ms_mascotresults.MSRES_SHOW_SUBSETS
87
88 print("-------------------------------------------------------------")
89 print("--- Full Protein summary report ---")
90 print("-------------------------------------------------------------")
91 bResult = show_results(resfile,
92 usePeptideSummary,
93 flags,
94 minProbability,
95 maxHitsToReport,
96 ignoreIonsScoreBelow,
97 minPepLenInPepSummary,
98 flags2)
99
100 if (bResult and checkErrors(resfile)) :
101 print("")
102
103 # Show results from concise protein summary, add grouping
104 flags |= msparser.ms_mascotresults.MSRES_GROUP_PROTEINS
105 flags |= msparser.ms_mascotresults.MSRES_SHOW_SUBSETS
106
107 print("-------------------------------------------------------------")
108 print("--- Concise Protein summary report ---")
109 print("-------------------------------------------------------------")
110 bResult = show_results(resfile,
111 usePeptideSummary,
112 flags,
113 minProbability,
114 maxHitsToReport,
115 ignoreIonsScoreBelow,
116 minPepLenInPepSummary,
117 flags2)
118
119 if (bResult and checkErrors(resfile)) :
120 returnValue = 0
121
122 return returnValue
123
124
125def show_results(resfile, usePeptideSummary, flags, minProteinProb, maxHits, minIonsScore, minPepLenInPepSummary, flags2) :
126
127 if usePeptideSummary :
128 results = msparser.ms_peptidesummary(
129 resfile, flags, minProteinProb, maxHits, "", minIonsScore, minPepLenInPepSummary, "", flags2
130 )
131 else :
132 results = msparser.ms_proteinsummary(
133 resfile, flags, minProteinProb, maxHits
134 )
135
136 if not checkErrors(resfile) :
137 return False
138
139 family = 1
140 hit = 1
141 prot = results.getHit(hit)
142
143 while prot :
144 accession = prot.getAccession()
145 description = results.getProteinDescription(accession)
146 mass = results.getProteinMass(accession)
147 dbIdx = prot.getDB()
148
149 protein_hit = "Protein Hit %d" % hit
150 if flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS :
151 protein_hit = protein_hit + "." + str(family)
152
153 print( protein_hit )
154 print("===================")
155 print("Accession : %s" % accession)
156 print("Description : %s" % description)
157 print("Score : %s" % prot.getScore())
158 print("Mass : %s" % mass)
159 print("Frame : %s" % prot.getFrame())
160 print("Coverage : %s" % prot.getCoverage())
161 print("RMS error : %s" % prot.getRMSDeltas(results))
162 print("Peptides : %s" % prot.getNumDisplayPeptides())
163
164 # Each protein has a number of peptides that matched - list them:
165 num_peps = prot.getNumPeptides()
166
167 for i in range(1, 1+ num_peps) :
168 query = prot.getPeptideQuery(i)
169 p = prot.getPeptideP(i)
170
171 isDupSameQuery = prot.getPeptideDuplicate(i) == msparser.ms_protein.DUPE_DuplicateSameQuery
172 if p != -1 and query != -1 and not isDupSameQuery :
173 pep = results.getPeptide(query, p)
174 if not pep:
175 continue
176
177 displayPeptideInfo(
178 0, pep, results,
179 prot.getPeptideDuplicate(i) == msparser.ms_protein.DUPE_Duplicate,
180 prot.getPeptideIsBold(i),
181 prot.getPeptideShowCheckbox(i)
182 )
183
184 # Now display list of all proteins that contain a subset or a same set of the matching peptides
185
186 if flags & msparser.ms_mascotresults.MSRES_GROUP_PROTEINS or flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS :
187 print("Proteins matching the same set of peptides:")
188
189 i = 1
190 similar_prot = results.getNextSimilarProteinOf(accession, dbIdx, 1)
191 while similar_prot :
192 similar_accession = similar_prot.getAccession()
193 similar_dbIdx = similar_prot.getDB()
194 if(flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS and similar_dbIdx > 1) :
195 print(str(similar_dbIdx) + "::"),
196 print(similar_accession + " - Total score:" + str(similar_prot.getScore())),
197 print(" - Peptides matched:" + str(similar_prot.getNumDisplayPeptides()))
198 i += 1
199 similar_prot = results.getNextSimilarProteinOf(accession, dbIdx, i)
200
201 if flags & msparser.ms_mascotresults.MSRES_SHOW_SUBSETS :
202 print("Proteins matching a subset of these peptides:")
203
204 i = 1
205 subset_prot = results.getNextSubsetProteinOf(accession, dbIdx, 1)
206 while subset_prot :
207 subset_accession = subset_prot.getAccession()
208 subset_dbIdx = subset_prot.getDB()
209 if(flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS and subset_dbIdx > 1) :
210 print(str(subset_dbIdx) + "::"),
211 print(subset_accession + " - Total score:" + str(subset_prot.getScore())),
212 print(" - Peptides matched:" + str(subset_prot.getNumDisplayPeptides()))
213
214 if(flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS) :
215 j = 1
216 similar_subset_prot = results.getNextSimilarProteinOf(subset_accession, subset_dbIdx, j)
217 if similar_subset_prot :
218 print(" Proteins matching the same set of peptides for this subset:")
219 while similar_subset_prot :
220 similar_subset_accession = similar_subset_prot.getAccession()
221 similar_subset_dbIdx = similar_subset_prot.getDB()
222 print(" "),
223 if similar_subset_dbIdx > 1 :
224 print(str(similar_subset_dbIdx) + "::"),
225 print(similar_subset_accession + " - Total score:" + str(similar_subset_prot.getScore())),
226 print(" Peptides matched:" + str(similar_subset_prot.getNumDisplayPeptides()))
227 j += 1
228 similar_subset_prot = results.getNextSimilarProteinOf(subset_accession, subset_dbIdx, j)
229
230 i += 1
231 subset_prot = results.getNextSubsetProteinOf(accession, dbIdx, i)
232
233 if flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS :
234 prot = results.getNextFamilyProtein(hit, family)
235 family += 1
236 if not prot :
237 hit += 1
238 prot = results.getHit(hit)
239 family = 1
240 else :
241 hit += 1
242 prot = results.getHit(hit)
243
244 print(" ")
245
246 results.createUnassignedList(msparser.ms_mascotresults.SCORE)
247
248 if results.getNumberOfUnassigned() :
249 print("Unassigned list")
250 print("---------------")
251
252 for u in range(1, 1 + results.getNumberOfUnassigned()) :
253 pep = results.getUnassigned(u)
254 displayPeptideInfo(0, pep, results, 0, 1, 1)
255
256 if usePeptideSummary :
257 print(" ")
258 displayYellowPopupInfo(results, 1)
259
260 return True
261
262
263def displayYellowPopupInfo(results, q) :
264 """
265 Shows the equivalent of the yellow popup box for given query
266 - results is the results object
267 - q is the query number
268 """
269
270 fmt = "%5s %5s %9s %7s %7s"
271 print(fmt % ("Score", "Delta", "Hit", "Protein", "Peptide"))
272
273 for p in range(1, 11) :
274 pep = results.getPeptide(q, p)
275 if not pep: continue
276
277 seq = pep.getPeptideStr()
278 if not seq: continue
279
280 tmp = results.getProteinsWithThisPepMatch(q, p)
281
282 (hit, protein) = ('', '')
283
284 if tmp :
285 hit, protein, morethan = re.search('(\d+):([^ ]*)[ ]*(.*)', tmp).groups()
286
287 if morethan :
288 hit += "+"
289
290 print(fmt % (pep.getIonsScore(), pep.getDelta(), hit, protein, seq))
291
292 p = 1
293 print("Accessions that matched query %s rank %s :- %s" % (q, p, results.getProteinsWithThisPepMatch(q, p)))
294
295
296def displayPeptideInfo(showFullDetails, p, results, isDuplicate, isBold, showCheckBox) :
297 q = p.getQuery()
298
299 if not showFullDetails :
300 fmt = "%2s %4s %4d %11f %4d(%4d) %-20s %s%3.2f%s %3s"
301
302 cb, bold = "--", "dim"
303 if showCheckBox : cb = "CB"
304 if isBold : bold = "BOLD"
305
306 paren1, paren2 = "", ""
307 if isDuplicate : paren1 = "(" ; paren2 = ")"
308
309 print(fmt % (
310 cb,
311 bold,
312 q,
313 p.getObserved(),
314 p.getRank(),
315 p.getPrettyRank(),
316 p.getPeptideStr(),
317 paren1,
318 p.getIonsScore(),
319 paren2,
320 results.getReadableVarMods(q, p.getRank())
321 ))
322
323 return
324
325
326 print("Peptide hit")
327
328 if p.getAnyMatch() :
329 fmt = " %-12s: %s"
330 print(fmt % ('Query' , q))
331 print(fmt % ('Rank' , p.getRank()))
332 print(fmt % ('Matched' , p.getAnyMatch()))
333 print(fmt % ('missedCleave', p.getMissedCleavages()))
334 print(fmt % ('mrCalc' , p.getMrCalc()))
335 print(fmt % ('delta' , p.getDelta()))
336 print(fmt % ('observed' , p.getObserved()))
337 print(fmt % ('charge' , p.getCharge()))
338 print(fmt % ('mrExp' , p.getMrExperimental()))
339 print(fmt % ('ionsMatched' , p.getNumIonsMatched()))
340 print(fmt % ('peptideStr' , p.getPeptideStr()))
341 print(fmt % ('peaksUsed1' , p.getPeaksUsedFromIons1()))
342 print(fmt % ('varModsStr' , p.getVarModsStr()))
343 print(fmt % ('readable mod', results.getReadableVarMods(q, p.getRank)))
344 print(fmt % ('ionsScore' , p.getIonsScore()))
345 print(fmt % ('seriesUsedS' , p.getSeriesUsedStr()))
346 print(fmt % ('peaksUsed2' , p.getPeaksUsedFromIons2()))
347 print(fmt % ('peaksUsed3' , p.getPeaksUsedFromIons3()))
348 print(fmt % ('idth, hth, p', ', '.join(
349 results.getPeptideIdentityThreshold(q, 20),
350 results.getHomologyThreshold(q, 20),
351 results.getProbOfPepBeingRandomMatch(p.getIonsScore(), q)
352 )))
353 print(" ")
354 else :
355 print(" No match")
356
357def checkErrors(resfile) :
358 if resfile.getLastError() :
359 for i in range(1, 1 + resfile.getNumberOfErrors()) :
360 print("Error number: %s : %s" % (resfile.getErrorNumber(i), resfile.getErrorString(i)))
361
362 #Call isValid before clearAllErrors, otherwise this method always returns true
363 bIsValid = resfile.isValid()
364 resfile.clearAllErrors()
365 return bIsValid
366
367if __name__ == "__main__" :
368 sys.exit(main())
369
370
371"""
372
373Running the program as
374
375python resfile_summary.py F981123.dat
376
377will give the following output under Mascot Server 2.3:
378
379
380-------------------------------------------------------------
381--- Peptide summary report ---
382-------------------------------------------------------------
383Protein Hit 1
384===================
385Accession : CH60_HUMAN
386Description : 60 kDa heat shock protein, mitochondrial precursor (Hsp60) (60 kDa chaperonin) (CPN60) (Heat shock
387Score : 1225.18623377
388Mass : 61016.38
389Frame : 0
390Coverage : 283
391RMS error : 30.4200726378
392Peptides : 31
393-- dim 52 1065.039917 2( 2) ALMLQGVDLLADAVAVTMGPK 57.90 Oxidation (M)
394-- dim 53 1065.062256 2( 2) ALMLQGVDLLADAVAVTMGPK 7.49 Oxidation (M)
395Proteins matching the same set of peptides:
396Proteins matching a subset of these peptides:
397CH60_PONPY Total score: 1007.90623377 Peptides matched: 25
398CH60_CRIGR Total score: 951.166233769 Peptides matched: 23
399CH60_MOUSE Total score: 951.166233769 Peptides matched: 23
400CH60_RAT Total score: 951.166233769 Peptides matched: 23
401CH60_BOVIN Total score: 917.682467539 Peptides matched: 22
402CH60_CHICK Total score: 875.976233769 Peptides matched: 19
403CH60C_DROME Total score: 120.5 Peptides matched: 2
404CH60C_ARATH Total score: 90.68 Peptides matched: 2
405HSP60_CANAL Total score: 45.35 Peptides matched: 1
406HSP60_PARBR Total score: 45.35 Peptides matched: 1
407HSP60_YEAST Total score: 45.35 Peptides matched: 1
408CH602_VIBPA Total score: 45.35 Peptides matched: 1
409CH602_VIBVU Total score: 45.35 Peptides matched: 1
410CH602_VIBVY Total score: 45.35 Peptides matched: 1
411CH60_EUGGR Total score: 45.33 Peptides matched: 1
412
413Protein Hit 2
414===================
415Accession : CH60_DROME
416Description : 60 kDa heat shock protein, mitochondrial precursor (Hsp60) (60 kDa chaperonin) (CPN60) (Heat shock
417Score : 174.39
418Mass : 60770.89
419Frame : 0
420Coverage : 67
421RMS error : 29.5905072791
422Peptides : 4
423Proteins matching the same set of peptides:
424Proteins matching a subset of these peptides:
425HSP60_SCHPO Total score: 87.04 Peptides matched: 2
426
427Protein Hit 3
428===================
429Accession : CH60_CAEEL
430Description : Chaperonin homolog Hsp-60, mitochondrial precursor (Heat shock protein 60) (HSP-60) - Caenorhabditi
431Score : 134.91
432Mass : 60063.75
433Frame : 0
434Coverage : 21
435RMS error : 36.5383063194
436Peptides : 3
437Proteins matching the same set of peptides:
438Proteins matching a subset of these peptides:
439
440Protein Hit 4
441===================
442Accession : CH60_XANAC
443Description : 60 kDa chaperonin (Protein Cpn60) (groEL protein) - Xanthomonas axonopodis pv. citri
444Score : 42.2
445Mass : 57130.83
446Frame : 0
447Coverage : 9
448RMS error : 76.9237696062
449Peptides : 1
450Proteins matching the same set of peptides:
451CH60_XANAC Total score: 42.2 Peptides matched: 1
452CH60_XANAC Total score: 42.2 Peptides matched: 1
453CH60_XANAC Total score: 42.2 Peptides matched: 1
454CH60_XANAC Total score: 42.2 Peptides matched: 1
455CH60_XANAC Total score: 42.2 Peptides matched: 1
456CH60_XANAC Total score: 42.2 Peptides matched: 1
457Proteins matching a subset of these peptides:
458
459Protein Hit 5
460===================
461Accession : NMDE4_HUMAN
462Description :
463Score : 37.24
464Mass : 0.0
465Frame : 0
466Coverage : 10
467RMS error : 9.41906700791
468Peptides : 1
469Proteins matching the same set of peptides:
470NMDE4_HUMAN Total score: 37.24 Peptides matched: 1
471NMDE4_HUMAN Total score: 36.2762337693 Peptides matched: 1
472Proteins matching a subset of these peptides:
473
474Protein Hit 6
475===================
476Accession : YF81_THET2
477Description :
478Score : 34.76
479Mass : 0.0
480Frame : 0
481Coverage : 9
482RMS error : 37.0214184966
483Peptides : 1
484Proteins matching the same set of peptides:
485YF81_THET2 Total score: 34.76 Peptides matched: 1
486Proteins matching a subset of these peptides:
487
488Protein Hit 7
489===================
490Accession : F4ST_FLACH
491Description :
492Score : 33.85
493Mass : 0.0
494Frame : 0
495Coverage : 9
496RMS error : 87.8815544839
497Peptides : 1
498Proteins matching the same set of peptides:
499Proteins matching a subset of these peptides:
500
501Protein Hit 8
502===================
503Accession : ZN711_HUMAN
504Description : Zinc finger protein 711 (Zinc finger protein 6) - Homo sapiens (Human)
505Score : 30.84
506Mass : 87153.77
507Frame : 0
508Coverage : 13
509RMS error : 69.4028633218
510Peptides : 1
511Proteins matching the same set of peptides:
512Proteins matching a subset of these peptides:
513
514Unassigned list
515---------------
516CB BOLD 14 442.228302 1( 1) LIAQTPLK 25.09
517CB BOLD 9 747.396179 1( 1) EGETRR 15.03
518CB BOLD 4 662.275574 1( 1) KNAMAK 14.09
519CB BOLD 23 1101.621704 1( 1) QLLMVAGVDR 12.04
520CB BOLD 5 662.417175 1( 1) AIACER 11.79
521CB BOLD 8 714.372498 1( 1) LAPAQSK 10.69
522CB BOLD 6 673.349487 1( 1) AVNDVR 10.63
523CB BOLD 22 1101.536621 1( 1) ENVIPADSEK 8.65
524CB BOLD 55 1099.094727 1( 1) LNAEAVRTLLSANGQKPSEAK 8.05
525CB BOLD 29 642.353577 1( 1) VVGVAGQGASALVR 7.91
526CB BOLD 28 642.352600 1( 1) KNVSVSQGPDPR 7.22
527CB BOLD 30 663.837891 1( 1) TPLLVGVAKGESR 7.20
528CB BOLD 50 1048.561523 1( 1) ALDEILEYQNYPVVCAKK 5.70
529CB BOLD 57 747.036072 1( 1) VMGSAFTALLDANEDAQKAMR 4.83
530CB BOLD 49 1020.987915 1( 1) HQRLSGLMQTALEEQQR 4.11 Oxidation (M)
531CB BOLD 19 932.364380 1( 1) TGMTRNPR 4.09
532CB BOLD 2 500.256012 1( 1) LAVPT 3.87
533CB BOLD 38 749.383972 1( 1) IDLLADMMWDDK 3.43 2 Oxidation (M)
534CB BOLD 20 933.499023 1( 1) SRDPGMVR 3.21 Oxidation (M)
535CB BOLD 41 886.405884 1( 1) DRVALNQEVMAPEATK 1.85
536CB BOLD 10 747.412476 1( 1) MAPSTPK 1.68 Oxidation (M)
537CB BOLD 18 930.703003 1( 1) LGSGIKAER 1.60
538CB BOLD 7 711.364685 1( 1) GGAHEIK 1.34
539CB BOLD 17 930.683105 1( 1) KIQAEITK 1.00
540CB BOLD 44 949.550720 1( 1) LLSWDSVFFIKNITSK 0.30
541CB BOLD 1 498.272888 1( 1) 0.00
542CB BOLD 3 575.558411 1( 1) 0.00
543CB BOLD 32 711.370728 1( 1) 0.00
544CB BOLD 42 932.460815 1( 1) 0.00
545CB BOLD 43 933.003784 1( 1) 0.00
546CB BOLD 47 665.009583 1( 1) 0.00
547CB BOLD 56 1119.045166 1( 1) 0.00
548CB BOLD 63 832.798584 1( 1) 0.00
549CB BOLD 66 1113.894653 1( 1) 0.00
550CB BOLD 67 1116.177490 1( 1) 0.00
551
552Score Delta Hit Protein Peptide
553
554Accessions that matched query 97 rank 1 :-
555
556"""
557