Example program for extracting the peptide or protein summary.
import msparser
import sys
import re
def main() :
returnValue = 1
if len(sys.argv) < 2 :
print("Must specify results filename as parameter" )
return returnValue
resfile = msparser.ms_mascotresfile(sys.argv[1])
if checkErrors(resfile) :
datfile = msparser.ms_datfile("../config/mascot.dat")
mascotOptions = msparser.ms_mascotoptions()
if (datfile.isValid()) :
mascotOptions = datfile.getMascotOptions()
(scriptName,
flags,
minProbability,
maxHitsToReport,
ignoreIonsScoreBelow,
minPepLenInPepSummary,
usePeptideSummary,
flags2) = resfile.get_ms_mascotresults_params(mascotOptions)
bResult = False
if (usePeptideSummary) :
print("-------------------------------------------------------------")
print("--- Peptide summary report ---")
print("-------------------------------------------------------------")
bResult = show_results(resfile,
usePeptideSummary,
flags,
minProbability,
maxHitsToReport,
ignoreIonsScoreBelow,
minPepLenInPepSummary,
flags2)
else :
flags &= ~msparser.ms_mascotresults.MSRES_GROUP_PROTEINS
flags &= ~msparser.ms_mascotresults.MSRES_SHOW_SUBSETS
print("-------------------------------------------------------------")
print("--- Full Protein summary report ---")
print("-------------------------------------------------------------")
bResult = show_results(resfile,
usePeptideSummary,
flags,
minProbability,
maxHitsToReport,
ignoreIonsScoreBelow,
minPepLenInPepSummary,
flags2)
if (bResult and checkErrors(resfile)) :
print("")
flags |= msparser.ms_mascotresults.MSRES_GROUP_PROTEINS
flags |= msparser.ms_mascotresults.MSRES_SHOW_SUBSETS
print("-------------------------------------------------------------")
print("--- Concise Protein summary report ---")
print("-------------------------------------------------------------")
bResult = show_results(resfile,
usePeptideSummary,
flags,
minProbability,
maxHitsToReport,
ignoreIonsScoreBelow,
minPepLenInPepSummary,
flags2)
if (bResult and checkErrors(resfile)) :
returnValue = 0
return returnValue
def show_results(resfile, usePeptideSummary, flags, minProteinProb, maxHits, minIonsScore, minPepLenInPepSummary, flags2) :
if usePeptideSummary :
results = msparser.ms_peptidesummary(
resfile, flags, minProteinProb, maxHits, "", minIonsScore, minPepLenInPepSummary, "", flags2
)
else :
results = msparser.ms_proteinsummary(
resfile, flags, minProteinProb, maxHits
)
if not checkErrors(resfile) :
return False
family = 1
hit = 1
prot = results.getHit(hit)
while prot :
accession = prot.getAccession()
description = results.getProteinDescription(accession)
mass = results.getProteinMass(accession)
dbIdx = prot.getDB()
protein_hit = "Protein Hit %d" % hit
if flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS :
protein_hit = protein_hit + "." + str(family)
print protein_hit
print("===================")
print("Accession : %s" % accession)
print("Description : %s" % description)
print("Score : %s" % prot.getScore())
print("Mass : %s" % mass)
print("Frame : %s" % prot.getFrame())
print("Coverage : %s" % prot.getCoverage())
print("RMS error : %s" % prot.getRMSDeltas(results))
print("Peptides : %s" % prot.getNumDisplayPeptides())
num_peps = prot.getNumPeptides()
for i in range(1, 1+ num_peps) :
query = prot.getPeptideQuery(i)
p = prot.getPeptideP(i)
isDupSameQuery = prot.getPeptideDuplicate(i) == msparser.ms_protein.DUPE_DuplicateSameQuery
if p != -1 and query != -1 and not isDupSameQuery :
pep = results.getPeptide(query, p)
if not pep:
continue
displayPeptideInfo(
0, pep, results,
prot.getPeptideDuplicate(i) == msparser.ms_protein.DUPE_Duplicate,
prot.getPeptideIsBold(i),
prot.getPeptideShowCheckbox(i)
)
if flags & msparser.ms_mascotresults.MSRES_GROUP_PROTEINS or flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS :
print("Proteins matching the same set of peptides:")
i = 1
similar_prot = results.getNextSimilarProteinOf(accession, dbIdx, 1)
while similar_prot :
similar_accession = similar_prot.getAccession()
similar_dbIdx = similar_prot.getDB()
if(flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS and similar_dbIdx > 1) :
print(str(similar_dbIdx) + "::"),
print(similar_accession + " - Total score:" + str(similar_prot.getScore())),
print(" - Peptides matched:" + str(similar_prot.getNumDisplayPeptides()))
i += 1
similar_prot = results.getNextSimilarProteinOf(accession, dbIdx, i)
if flags & msparser.ms_mascotresults.MSRES_SHOW_SUBSETS :
print("Proteins matching a subset of these peptides:")
i = 1
subset_prot = results.getNextSubsetProteinOf(accession, dbIdx, 1)
while subset_prot :
subset_accession = subset_prot.getAccession()
subset_dbIdx = subset_prot.getDB()
if(flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS and subset_dbIdx > 1) :
print(str(subset_dbIdx) + "::"),
print(subset_accession + " - Total score:" + str(subset_prot.getScore())),
print(" - Peptides matched:" + str(subset_prot.getNumDisplayPeptides()))
if(flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS) :
j = 1
similar_subset_prot = results.getNextSimilarProteinOf(subset_accession, subset_dbIdx, j)
if similar_subset_prot :
print(" Proteins matching the same set of peptides for this subset:")
while similar_subset_prot :
similar_subset_accession = similar_subset_prot.getAccession()
similar_subset_dbIdx = similar_subset_prot.getDB()
print(" "),
if similar_subset_dbIdx > 1 :
print(str(similar_subset_dbIdx) + "::"),
print(similar_subset_accession + " - Total score:" + str(similar_subset_prot.getScore())),
print(" Peptides matched:" + str(similar_subset_prot.getNumDisplayPeptides()))
j += 1
similar_subset_prot = results.getNextSimilarProteinOf(subset_accession, subset_dbIdx, j)
i += 1
subset_prot = results.getNextSubsetProteinOf(accession, dbIdx, i)
if flags & msparser.ms_mascotresults.MSRES_CLUSTER_PROTEINS :
prot = results.getNextFamilyProtein(hit, family)
family += 1
if not prot :
hit += 1
prot = results.getHit(hit)
family = 1
else :
hit += 1
prot = results.getHit(hit)
print(" ")
results.createUnassignedList(msparser.ms_mascotresults.SCORE)
if results.getNumberOfUnassigned() :
print("Unassigned list")
print("---------------")
for u in range(1, 1 + results.getNumberOfUnassigned()) :
pep = results.getUnassigned(u)
displayPeptideInfo(0, pep, results, 0, 1, 1)
if usePeptideSummary :
print(" ")
displayYellowPopupInfo(results, 1)
return True
def displayYellowPopupInfo(results, q) :
"""
Shows the equivalent of the yellow popup box for given query
- results is the results object
- q is the query number
"""
fmt = "%5s %5s %9s %7s %7s"
print(fmt % ("Score", "Delta", "Hit", "Protein", "Peptide"))
for p in range(1, 11) :
pep = results.getPeptide(q, p)
if not pep: continue
seq = pep.getPeptideStr()
if not seq: continue
tmp = results.getProteinsWithThisPepMatch(q, p)
(hit, protein) = ('', '')
if tmp :
hit, protein, morethan = re.search('(\d+):([^ ]*)[ ]*(.*)', tmp).groups()
if morethan :
hit += "+"
print(fmt % (pep.getIonsScore(), pep.getDelta(), hit, protein, seq))
p = 1
print("Accessions that matched query %s rank %s :- %s" % (q, p, results.getProteinsWithThisPepMatch(q, p)))
def displayPeptideInfo(showFullDetails, p, results, isDuplicate, isBold, showCheckBox) :
q = p.getQuery()
if not showFullDetails :
fmt = "%2s %4s %4d %11f %4d(%4d) %-20s %s%3.2f%s %3s"
cb, bold = "--", "dim"
if showCheckBox : cb = "CB"
if isBold : bold = "BOLD"
paren1, paren2 = "", ""
if isDuplicate : paren1 = "(" ; paren2 = ")"
print(fmt % (
cb,
bold,
q,
p.getObserved(),
p.getRank(),
p.getPrettyRank(),
p.getPeptideStr(),
paren1,
p.getIonsScore(),
paren2,
results.getReadableVarMods(q, p.getRank())
))
return
print("Peptide hit")
if p.getAnyMatch() :
fmt = " %-12s: %s"
print(fmt % ('Query' , q))
print(fmt % ('Rank' , p.getRank()))
print(fmt % ('Matched' , p.getAnyMatch()))
print(fmt % ('missedCleave', p.getMissedCleavages()))
print(fmt % ('mrCalc' , p.getMrCalc()))
print(fmt % ('delta' , p.getDelta()))
print(fmt % ('observed' , p.getObserved()))
print(fmt % ('charge' , p.getCharge()))
print(fmt % ('mrExp' , p.getMrExperimental()))
print(fmt % ('ionsMatched' , p.getNumIonsMatched()))
print(fmt % ('peptideStr' , p.getPeptideStr()))
print(fmt % ('peaksUsed1' , p.getPeaksUsedFromIons1()))
print(fmt % ('varModsStr' , p.getVarModsStr()))
print(fmt % ('readable mod', results.getReadableVarMods(q, p.getRank)))
print(fmt % ('ionsScore' , p.getIonsScore()))
print(fmt % ('seriesUsedS' , p.getSeriesUsedStr()))
print(fmt % ('peaksUsed2' , p.getPeaksUsedFromIons2()))
print(fmt % ('peaksUsed3' , p.getPeaksUsedFromIons3()))
print(fmt % ('idth, hth, p', ', '.join(
results.getPeptideIdentityThreshold(q, 20),
results.getHomologyThreshold(q, 20),
results.getProbOfPepBeingRandomMatch(p.getIonsScore(), q)
)))
print(" ")
else :
print(" No match")
def checkErrors(resfile) :
if resfile.getLastError() :
for i in range(1, 1 + resfile.getNumberOfErrors()) :
print("Error number: %s : %s" % (resfile.getErrorNumber(i), resfile.getErrorString(i)))
bIsValid = resfile.isValid()
resfile.clearAllErrors()
return bIsValid
if __name__ == "__main__" :
sys.exit(main())
"""
Running the program as
python resfile_summary.py F981123.dat
will give the following output under Mascot Server 2.3:
-------------------------------------------------------------
--- Peptide summary report ---
-------------------------------------------------------------
Protein Hit 1
===================
Accession : CH60_HUMAN
Description : 60 kDa heat shock protein, mitochondrial precursor (Hsp60) (60 kDa chaperonin) (CPN60) (Heat shock
Score : 1225.18623377
Mass : 61016.38
Frame : 0
Coverage : 283
RMS error : 30.4200726378
Peptides : 31
-- dim 52 1065.039917 2( 2) ALMLQGVDLLADAVAVTMGPK 57.90 Oxidation (M)
-- dim 53 1065.062256 2( 2) ALMLQGVDLLADAVAVTMGPK 7.49 Oxidation (M)
Proteins matching the same set of peptides:
Proteins matching a subset of these peptides:
CH60_PONPY Total score: 1007.90623377 Peptides matched: 25
CH60_CRIGR Total score: 951.166233769 Peptides matched: 23
CH60_MOUSE Total score: 951.166233769 Peptides matched: 23
CH60_RAT Total score: 951.166233769 Peptides matched: 23
CH60_BOVIN Total score: 917.682467539 Peptides matched: 22
CH60_CHICK Total score: 875.976233769 Peptides matched: 19
CH60C_DROME Total score: 120.5 Peptides matched: 2
CH60C_ARATH Total score: 90.68 Peptides matched: 2
HSP60_CANAL Total score: 45.35 Peptides matched: 1
HSP60_PARBR Total score: 45.35 Peptides matched: 1
HSP60_YEAST Total score: 45.35 Peptides matched: 1
CH602_VIBPA Total score: 45.35 Peptides matched: 1
CH602_VIBVU Total score: 45.35 Peptides matched: 1
CH602_VIBVY Total score: 45.35 Peptides matched: 1
CH60_EUGGR Total score: 45.33 Peptides matched: 1
Protein Hit 2
===================
Accession : CH60_DROME
Description : 60 kDa heat shock protein, mitochondrial precursor (Hsp60) (60 kDa chaperonin) (CPN60) (Heat shock
Score : 174.39
Mass : 60770.89
Frame : 0
Coverage : 67
RMS error : 29.5905072791
Peptides : 4
Proteins matching the same set of peptides:
Proteins matching a subset of these peptides:
HSP60_SCHPO Total score: 87.04 Peptides matched: 2
Protein Hit 3
===================
Accession : CH60_CAEEL
Description : Chaperonin homolog Hsp-60, mitochondrial precursor (Heat shock protein 60) (HSP-60) - Caenorhabditi
Score : 134.91
Mass : 60063.75
Frame : 0
Coverage : 21
RMS error : 36.5383063194
Peptides : 3
Proteins matching the same set of peptides:
Proteins matching a subset of these peptides:
Protein Hit 4
===================
Accession : CH60_XANAC
Description : 60 kDa chaperonin (Protein Cpn60) (groEL protein) - Xanthomonas axonopodis pv. citri
Score : 42.2
Mass : 57130.83
Frame : 0
Coverage : 9
RMS error : 76.9237696062
Peptides : 1
Proteins matching the same set of peptides:
CH60_XANAC Total score: 42.2 Peptides matched: 1
CH60_XANAC Total score: 42.2 Peptides matched: 1
CH60_XANAC Total score: 42.2 Peptides matched: 1
CH60_XANAC Total score: 42.2 Peptides matched: 1
CH60_XANAC Total score: 42.2 Peptides matched: 1
CH60_XANAC Total score: 42.2 Peptides matched: 1
Proteins matching a subset of these peptides:
Protein Hit 5
===================
Accession : NMDE4_HUMAN
Description :
Score : 37.24
Mass : 0.0
Frame : 0
Coverage : 10
RMS error : 9.41906700791
Peptides : 1
Proteins matching the same set of peptides:
NMDE4_HUMAN Total score: 37.24 Peptides matched: 1
NMDE4_HUMAN Total score: 36.2762337693 Peptides matched: 1
Proteins matching a subset of these peptides:
Protein Hit 6
===================
Accession : YF81_THET2
Description :
Score : 34.76
Mass : 0.0
Frame : 0
Coverage : 9
RMS error : 37.0214184966
Peptides : 1
Proteins matching the same set of peptides:
YF81_THET2 Total score: 34.76 Peptides matched: 1
Proteins matching a subset of these peptides:
Protein Hit 7
===================
Accession : F4ST_FLACH
Description :
Score : 33.85
Mass : 0.0
Frame : 0
Coverage : 9
RMS error : 87.8815544839
Peptides : 1
Proteins matching the same set of peptides:
Proteins matching a subset of these peptides:
Protein Hit 8
===================
Accession : ZN711_HUMAN
Description : Zinc finger protein 711 (Zinc finger protein 6) - Homo sapiens (Human)
Score : 30.84
Mass : 87153.77
Frame : 0
Coverage : 13
RMS error : 69.4028633218
Peptides : 1
Proteins matching the same set of peptides:
Proteins matching a subset of these peptides:
Unassigned list
---------------
CB BOLD 14 442.228302 1( 1) LIAQTPLK 25.09
CB BOLD 9 747.396179 1( 1) EGETRR 15.03
CB BOLD 4 662.275574 1( 1) KNAMAK 14.09
CB BOLD 23 1101.621704 1( 1) QLLMVAGVDR 12.04
CB BOLD 5 662.417175 1( 1) AIACER 11.79
CB BOLD 8 714.372498 1( 1) LAPAQSK 10.69
CB BOLD 6 673.349487 1( 1) AVNDVR 10.63
CB BOLD 22 1101.536621 1( 1) ENVIPADSEK 8.65
CB BOLD 55 1099.094727 1( 1) LNAEAVRTLLSANGQKPSEAK 8.05
CB BOLD 29 642.353577 1( 1) VVGVAGQGASALVR 7.91
CB BOLD 28 642.352600 1( 1) KNVSVSQGPDPR 7.22
CB BOLD 30 663.837891 1( 1) TPLLVGVAKGESR 7.20
CB BOLD 50 1048.561523 1( 1) ALDEILEYQNYPVVCAKK 5.70
CB BOLD 57 747.036072 1( 1) VMGSAFTALLDANEDAQKAMR 4.83
CB BOLD 49 1020.987915 1( 1) HQRLSGLMQTALEEQQR 4.11 Oxidation (M)
CB BOLD 19 932.364380 1( 1) TGMTRNPR 4.09
CB BOLD 2 500.256012 1( 1) LAVPT 3.87
CB BOLD 38 749.383972 1( 1) IDLLADMMWDDK 3.43 2 Oxidation (M)
CB BOLD 20 933.499023 1( 1) SRDPGMVR 3.21 Oxidation (M)
CB BOLD 41 886.405884 1( 1) DRVALNQEVMAPEATK 1.85
CB BOLD 10 747.412476 1( 1) MAPSTPK 1.68 Oxidation (M)
CB BOLD 18 930.703003 1( 1) LGSGIKAER 1.60
CB BOLD 7 711.364685 1( 1) GGAHEIK 1.34
CB BOLD 17 930.683105 1( 1) KIQAEITK 1.00
CB BOLD 44 949.550720 1( 1) LLSWDSVFFIKNITSK 0.30
CB BOLD 1 498.272888 1( 1) 0.00
CB BOLD 3 575.558411 1( 1) 0.00
CB BOLD 32 711.370728 1( 1) 0.00
CB BOLD 42 932.460815 1( 1) 0.00
CB BOLD 43 933.003784 1( 1) 0.00
CB BOLD 47 665.009583 1( 1) 0.00
CB BOLD 56 1119.045166 1( 1) 0.00
CB BOLD 63 832.798584 1( 1) 0.00
CB BOLD 66 1113.894653 1( 1) 0.00
CB BOLD 67 1116.177490 1( 1) 0.00
Score Delta Hit Protein Peptide
Accessions that matched query 97 rank 1 :-
"""