Example program extracting peptide ratios from a Reporter or Multiplex file.
import msparser
import sys
if len(sys.argv) != 5:
print("Usage: %s <quantitation schema 1 filepath> <quantitation schema 2 filepath> <unimod schema filepath> <results file>" % sys.argv[0])
print("The location of schema files should be e.g.")
print(" ../html/xmlns/schema/quantitation_1/quantitation_1.xsd")
print(" ../html/xmlns/schema/quantitation_2/quantitation_2.xsd")
print(" ../html/xmlns/schema/unimod_2/unimod_2.xsd")
print("if running from the Mascot cgi directory.")
sys.exit(1)
quant_schema1_filepath = sys.argv[1]
quant_schema2_filepath = sys.argv[2]
unimod_schema_filepath = sys.argv[3]
resfile = msparser.ms_mascotresfile(sys.argv[4], 1)
if not resfile.isValid():
print(resfile.getLastErrorString())
sys.exit(1)
def dump_warnings(err):
for i in ms_range(1, err.getNumberOfErrors()):
print('Warning: %s' %(err.getErrorString(i)))
def ms_range(start, stop, step=1):
i = start
while i <= stop:
yield i
i += step
dump_warnings(resfile.getErrorHandler())
resfile.clearAllErrors()
resfile.setXMLschemaFilePath(msparser.ms_mascotresfile.XML_SCHEMA_QUANTITATION, "http://www.matrixscience.com/xmlns/schema/quantitation_2 " + quant_schema2_filepath + " http://www.matrixscience.com/xmlns/schema/quantitation_1 " + quant_schema1_filepath)
resfile.setXMLschemaFilePath(msparser.ms_mascotresfile.XML_SCHEMA_UNIMOD, "http://www.unimod.org/xmlns/schema/unimod_2 " + unimod_schema_filepath)
qmethod = msparser.ms_quant_method()
if not resfile.getQuantitationMethod(qmethod):
print(resfile.getLastErrorString())
sys.exit(1)
datfile = msparser.ms_datfile()
undef, flags, minprob, maxhits, iisb, minpeplen, use_pepsum, flags2 = resfile.get_ms_mascotresults_params(datfile.getMascotOptions())
if not use_pepsum:
print("Results File cannot be opened as a peptide summary")
sys.exit(1)
pepsum = msparser.ms_peptidesummary(resfile, flags, minprob, maxhits, '', iisb, minpeplen, '', flags2)
if not resfile.isValid():
print(resfile.getLastErrorString)
sys.exit(1)
dump_warnings(resfile.getErrorHandler())
resfile.clearAllErrors()
ms2quant = msparser.ms_ms2quantitation(pepsum, qmethod)
if not ms2quant.isValid():
sys.exit(1)
if ms2quant.hasIntensityNormalisation():
ms2quant.normaliseIntensities()
elif ms2quant.hasPeptideRatioNormalisation():
ms2quant.normalisePeptideRatios()
qmethod = ms2quant.getQuantitationMethod()
dump_warnings(ms2quant.getErrorHandler())
ms2quant.clearAllErrors()
comps = []
for i in ms_range(0, qmethod.getNumberOfComponents()-1):
comp = qmethod.getComponentByNumber(i)
comps.append(comp.getName())
print("Components: %s" %(comps))
rationames = []
for i in ms_range(0, qmethod.getNumberOfReportRatios()-1):
reportratio = qmethod.getReportRatioByNumber(i)
reportrationame = reportratio.getName()
rationames.append(reportrationame)
print("Report ratios: %s" %(rationames))
print("Protein ratio type = %s" %(qmethod.getProteinRatioType()))
print("Min. number of peptides = %d" %(qmethod.getMinNumPeptides()))
if qmethod.haveQuality():
q = qmethod.getQuality()
print("Quality: min. precursor charge = %s" %(q.getMinPrecursorCharge()))
print("Quality: pep. threshold type = %s" %(q.getPepThresholdType()))
print("Quality: pep. threshold value = %s" %(q.getPepThresholdValue()))
else:
print("Quality: no restrictions")
if qmethod.haveNormalisation():
q = qmethod.getNormalisation()
print("Normalisation = %s" %(q.getMethod()))
if q.haveProteins():
p = q.getProteins()
for i in ms_range(0, p.getNumberOfProteins()-1):
accessions = []
accessions.append(p.getProtein().getAccession())
print(" - Accessions = %s" %(accessions))
if q.havePeptides():
p = q.getPeptides()
for i in ms_range(0, p.getNumberOfPeptides()-1):
peptides = []
peptides.append(p.getPeptide().getSequence())
print(" - Sequences = %s" %(peptides))
component_bases = []
for i in ms_range(0, qmethod.getNumberOfComponents()-1):
component = qmethod.getComponentByNumber(i)
compname = component.getName()
baseNormalisation = ms2quant.getIntensityNormalisationBase(compname)
component_bases.append(baseNormalisation)
for rationame in rationames:
ms2gprn = ms2quant.getPeptideRatioNormalisationBase(rationame)
print("Intensity normalisation constants: %s" %(component_bases))
print("Ratio normalisation constants: %s" %(ms2gprn))
else:
print("Normalisation: none")
if qmethod.haveOutliers():
q = qmethod.getOutliers()
print("Outliers = %s" %(q.getMethod()))
else:
print("Outliers: none")
print("\n")
proteins = []
for i in ms_range(1, pepsum.getNumberOfHits()):
hit = pepsum.getHit(i)
proteins.append(hit)
j = 0
protein = pepsum.getNextFamilyProtein(i, j)
while protein !=None:
proteins.append(protein)
j += 1
protein = ''
for protein in proteins:
print("%d::%s:" %(protein.getDB(), protein.getAccession()))
for rationame in rationames:
ratio = ms2quant.getProteinRatio(protein.getAccession(), protein.getDB(), rationame)
print(rationame)
if ratio.isMissing():
print("%10s: ---" %(rationame))
else:
print("%10s = %10g, stderr = %10g, N = %d, normal-p = %6g, hypo-p = %6g" %(rationame, ratio.getValue(), ratio.getStandardError_only(), ratio.getSampleSize(), ratio.getNormalityPvalue_only(), ratio.getHypothesisPvalue_only()))
print(rationame)
print(" %10s %s" %('Peptide:', rationame))
for i in ms_range(1, protein.getNumPeptides()):
if protein.getPeptideDuplicate(i) == msparser.ms_protein.DUPE_DuplicateSameQuery:
continue
q = protein.getPeptideQuery(i)
p = protein.getPeptideP(i)
peptide = pepsum.getPeptide(q, p)
if not peptide:
continue
values = ''
for rationame in rationames:
quantKey = msparser.ms_peptide_quant_key(q, p)
ratio = ms2quant.getPeptideRatio(quantKey, rationame)
if ratio.isMissing():
values = "---"
continue
if ratio.isInfinite():
values = "###"
else:
values = ratio.getValue()
print(" %-9s %-10s" %("q%d_p%d" %(q, p), str(values)))
print("\n")