Testing peptides for quantifiability.
import msparser
import sys
QUANT_SCHEMA = 'http://www.matrixscience.com/xmlns/schema/quantitation_2 ../html/xmlns/schema/quantitation_2/quantitation_2.xsd http://www.matrixscience.com/xmlns/schema/quantitation_1 ../html/xmlns/schema/quantitation_1/quantitation_1.xsd'
UNIMOD_SCHEMA = 'http://www.unimod.org/xmlns/schema/unimod_2 ../html/xmlns/schema/unimod_2/unimod_2.xsd'
def ms_range(start, stop, step=1):
i = start
while i <= stop:
yield i
i += step
def load_method_or_exit(resfile):
params = msparser.ms_searchparams(resfile)
quant_method_name = params.getQUANTITATION()
if not quant_method_name or quant_method_name.lower() == 'none':
print("File has no quantitation method")
sys.exit(1)
qf = msparser.ms_quant_configfile()
qf.setSchemaFileName(QUANT_SCHEMA)
if not resfile.getQuantitation(qf):
print("File has no quantitation method (%s)" %(resfile.getLastErrorString()))
sys.exit(1)
if not qf.isValid():
print("Quantitation file is not valid (%s)" %(qf.getLastErrorString()))
sys.exit(1)
str = qf.validateDocument()
if str != '':
print("Quantitation file does not vaidate (%s)" %(str))
sys.exit(1)
qmethod = qf.getMethodByName(quant_method_name)
if not qmethod:
print("Quantitation file has no method called %s" %(quant_method_name))
sys.exit(1)
return(qf, qmethod)
def load_umod_configfile_or_exit(resfile):
umodfile = msparser.ms_umod_configfile()
umodfile.setSchemaFileName(UNIMOD_SCHEMA)
if not resfile.getUnimod(umodfile):
print("Results file does not have a Unimod section")
sys.exit(1)
if not umodfile.isValid():
print("Unimod file is not valid (%s)" %(umodfile.getLastErrorString))
sys.exit(1)
str = umodfile.validateDocument()
if not str == '':
print("Unimod file does not validate (%s)" %(str))
sys.exit(1)
return umodfile
def open_peptidesummary_or_exit(resfile):
opts = msparser.ms_mascotoptions()
undef, flags, minprob, maxhits, iisb, minpeplen, use_pepsum, flags2 = resfile.get_ms_mascotresults_params(opts)
if not use_pepsum:
print("Results file cannot be opened as a peptide summary")
sys.exit(1)
pepsum = msparser.ms_peptidesummary(resfile, flags, minprob, maxhits, '', iisb, minpeplen, '', flags2)
if not resfile.isValid():
print(resfile.getLastErrorString)
sys.exit(1)
return pepsum
def pull_proteins_from(pepsum):
proteins = []
for i in ms_range(1, pepsum.getNumberOfHits()-1):
hit = pepsum.getHit(i)
proteins.append(hit)
j = 1
protein = pepsum.getNextFamilyProtein(i, j)
while protein !=None:
proteins.append(protein)
j += 1
protein = pepsum.getNextFamilyProtein(i, j)
return proteins
def dump_quant_method(qmethod):
comps=[]
for i in ms_range(0, qmethod.getNumberOfComponents()-1):
comp = qmethod.getComponentByNumber(i)
comps.append(comp.getName())
print("Components: %s" %(comps))
print("Protein ratio type = %s" %(qmethod.getProteinRatioType()))
print("Min. number of peptides = %d" %(qmethod.getMinNumPeptides()))
if qmethod.haveQuality():
q = qmethod.getQuality()
print("Quality: min. precursor charge = %s" %(q.getMinPrecursorCharge()))
print("Quality: pep. threshold type = %s" %(q.getPepThresholdType()))
print("Quality: pep. threshold value = %s" %(q.getPepThresholdValue()))
else:
print("Quality: no restrictions")
if qmethod.haveNormalisation():
q = qmethod.getNormalisation()
print("Normalisation = %s" %(q.getMethod()))
else:
print("Normalisation: none")
if qmethod.haveOutliers():
q = qmethod.getOutliers()
print("Outliers = %s" %(q.getMethod()))
else:
print("Outliers: none")
if len(sys.argv) < 2:
print("Usage: %s <quantitation results.dat>" % sys.argv[0])
sys.exit(1)
resfile = msparser.ms_mascotresfile(sys.argv[1],1)
if not resfile.isValid():
print(resfile.getLastErrorString())
sys.exit(1)
quant_config_file, qmethod = load_method_or_exit(resfile)
umodfile = load_umod_configfile_or_exit(resfile)
pepsum = open_peptidesummary_or_exit(resfile)
quant_helper = msparser.ms_quant_helper(pepsum, qmethod, umodfile)
if not quant_helper.isValid():
print("ms_quant_helper is not valid: %s" %(quant_helper.getLastErrorString()))
sys.exit(1)
proteins = pull_proteins_from(pepsum)
print("File %s uses %s and has %d family proteins\n" %(sys.argv[1], qmethod.getName(), len(proteins)))
dump_quant_method(qmethod)
print("\n")
peptide_quant_str = {
msparser.ms_quant_helper.PEPTIDE_HAS_EXCLUDED_FIXEDMOD: "PEPTIDE_HAS_EXCLUDED_FIXEDMOD",
msparser.ms_quant_helper.PEPTIDE_HAS_EXCLUDED_LOCAL_FIXEDMOD: "PEPTIDE_HAS_EXCLUDED_LOCAL_FIXEDMOD",
msparser.ms_quant_helper.PEPTIDE_HAS_EXCLUDED_LOCAL_VARMOD: "PEPTIDE_HAS_EXCLUDED_LOCAL_VARMOD",
msparser.ms_quant_helper.PEPTIDE_HAS_EXCLUDED_VARMOD: "PEPTIDE_HAS_EXCLUDED_VARMOD",
msparser.ms_quant_helper.PEPTIDE_HAS_NO_REQUIRED_FIXEDMOD: "PEPTIDE_HAS_NO_REQUIRED_FIXEDMOD",
msparser.ms_quant_helper.PEPTIDE_HAS_NO_REQUIRED_VARMOD: "PEPTIDE_HAS_NO_REQUIRED_VARMOD",
msparser.ms_quant_helper.PEPTIDE_HAS_UNMODIFIED_SITE: "PEPTIDE_HAS_UNMODIFIED_SITE",
msparser.ms_quant_helper.PEPTIDE_IS_QUANTIFIABLE: "PEPTIDE_IS_QUANTIFIABLE",
msparser.ms_quant_helper.PEPTIDE_QUANTIFIABILITY_UNAVAILABLE: "PEPTIDE_QUANTIFIABILITY_UNAVAILABLE"
};
peptide_quality_str = {
msparser.ms_quant_helper.PEPTIDE_EXPECT_ABOVE_THRESHOLD: "PEPTIDE_EXPECT_ABOVE_THRESHOLD",
msparser.ms_quant_helper.PEPTIDE_CHARGE_BELOW_PRECURSOR_MIN: "PEPTIDE_CHARGE_BELOW_PRECURSOR_MIN",
msparser.ms_quant_helper.PEPTIDE_HAS_NO_EXCLUSIVE_MODS: "PEPTIDE_HAS_NO_EXCLUSIVE_MODS",
msparser.ms_quant_helper.PEPTIDE_NOT_UNIQUE: "PEPTIDE_NOT_UNIQUE",
msparser.ms_quant_helper.PEPTIDE_QUALITY_IS_OK: "PEPTIDE_QUALITY_IS_OK",
msparser.ms_quant_helper.PEPTIDE_QUALITY_UNAVAILABLE: "PEPTIDE_QUALITY_UNAVAILABLE",
msparser.ms_quant_helper.PEPTIDE_SCORE_BELOW_HOMOLOGY_THR: "PEPTIDE_SCORE_BELOW_HOMOLOGY_THR",
msparser.ms_quant_helper.PEPTIDE_SCORE_BELOW_IDENTITY_THR: "PEPTIDE_SCORE_BELOW_IDENTITY_THR",
msparser.ms_quant_helper.PEPTIDE_SCORE_BELOW_IDENTITY_THR_NOHOM: "PEPTIDE_SCORE_BELOW_IDENTITY_THR_NOHOM",
msparser.ms_quant_helper.PEPTIDE_SCORE_BELOW_SCORE_THR: "PEPTIDE_SCORE_BELOW_SCORE_THR"
};
for protein in proteins:
print("Protein %d::%s" %(protein.getDB(), protein.getAccession()))
for i in ms_range(1, protein.getNumPeptides()-1):
if protein.getPeptideDuplicate(i) == msparser.ms_protein.DUPE_DuplicateSameQuery:
continue
q = protein.getPeptideQuery(i)
p = protein.getPeptideP(i)
peptide = pepsum.getPeptide(q, p)
if not peptide:
continue
ok, reason = quant_helper.isPeptideQuantifiable(q, p, protein, i)
if reason == None:
reason = '<undef>'
print("\tq%d_p%d quantifiable? %s (%s)" %(q, p, peptide_quant_str[ok], reason))
ok, reason = quant_helper.isPeptideQualityOK(q, p)
if reason == None:
reason = '<undef>'
print("\tq%d_p%d quality? %s (%s)" %(q, p, peptide_quality_str[ok], reason))
'''
When run from bin, tools_quant_helper.py ../data/F981133.dat
Will give the following output:
File ../data/F981133.dat uses SILAC K+6 R+6 multiplex and has 21 family proteins
Components: ['light', 'heavy']
Protein ratio type = weighted
Min. number of peptides = 2
Quality: min. precursor charge = 1
Quality: pep. threshold type = at least homology
Quality: pep. threshold value = 0.05
Normalisation: none
Outliers = auto
Protein 1::K2C1_PANTR
q18_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q18_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q28_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q28_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q33_p2 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q33_p2 quality? PEPTIDE_QUALITY_IS_OK ()
q38_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q38_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q39_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q39_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q40_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q40_p1 quality? PEPTIDE_QUALITY_IS_OK ()
Protein 1::TRYP_PIG
q1_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q1_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q2_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q2_p1 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q3_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q3_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q9_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q9_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q72_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q72_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q73_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q73_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q74_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q74_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q75_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q75_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q76_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q76_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q77_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q77_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q78_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q78_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q81_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q81_p1 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q82_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q82_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q90_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q90_p1 quality? PEPTIDE_QUALITY_IS_OK ()
Protein 1::IGG2B_MOUSE
q12_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q12_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q57_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q57_p1 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q58_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q58_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q62_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q62_p1 quality? PEPTIDE_QUALITY_IS_OK ()
Protein 1::ALBU_BOVIN
q16_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q16_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q30_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q30_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q46_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q46_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q49_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q49_p1 quality? PEPTIDE_QUALITY_IS_OK ()
Protein 1::ENPL_MOUSE
q4_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q4_p1 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q19_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q19_p1 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q20_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q20_p1 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q24_p2 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q24_p2 quality? PEPTIDE_QUALITY_IS_OK ()
q26_p2 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q26_p2 quality? PEPTIDE_QUALITY_IS_OK ()
q41_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q41_p1 quality? PEPTIDE_QUALITY_IS_OK ()
Protein 1::NUCL_MOUSE
q5_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q5_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q6_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q6_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q7_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q7_p1 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q8_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q8_p1 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q44_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q44_p1 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q45_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q45_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q92_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q92_p1 quality? PEPTIDE_QUALITY_IS_OK ()
Protein 1::EPHB2_HUMAN
q13_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q13_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q21_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q21_p1 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q53_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q53_p1 quality? PEPTIDE_QUALITY_IS_OK ()
Protein 1::K2C1_RAT
q33_p4 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q33_p4 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q38_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q38_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q39_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q39_p1 quality? PEPTIDE_QUALITY_IS_OK ()
Protein 1::K2C75_BOVIN
q28_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q28_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q38_p4 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q38_p4 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
Protein 1::HNRPU_HUMAN
q32_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q32_p1 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q34_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q34_p1 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q51_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q51_p1 quality? PEPTIDE_QUALITY_IS_OK ()
Protein 1::SFPQ_HUMAN
q14_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q14_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q15_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q15_p1 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q22_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q22_p1 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q69_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q69_p1 quality? PEPTIDE_QUALITY_IS_OK ()
Protein 1::CAPR1_MOUSE
q23_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q23_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q36_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q36_p1 quality? PEPTIDE_QUALITY_IS_OK ()
Protein 1::K2C1B_HUMAN
q18_p2 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q18_p2 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q38_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q38_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q40_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q40_p1 quality? PEPTIDE_QUALITY_IS_OK ()
Protein 1::ENPL_ARATH
q24_p2 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q24_p2 quality? PEPTIDE_QUALITY_IS_OK ()
q26_p2 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q26_p2 quality? PEPTIDE_QUALITY_IS_OK ()
q41_p2 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q41_p2 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q42_p10 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q42_p10 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
Protein 1::VIME_CRIGR
q17_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q17_p1 quality? PEPTIDE_QUALITY_IS_OK ()
Protein 1::BCAR1_MOUSE
q10_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q10_p1 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q11_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q11_p1 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
Protein 1::HTPG_ALKEH
q24_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q24_p1 quality? PEPTIDE_QUALITY_IS_OK ()
q26_p1 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q26_p1 quality? PEPTIDE_QUALITY_IS_OK ()
Protein 1::FAK1_MOUSE
Protein 1::HTPG_BDEBA
q24_p3 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q24_p3 quality? PEPTIDE_QUALITY_IS_OK ()
Protein 1::K2C8_MOUSE
Protein 1::TRY1_RAT
q72_p10 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q72_p10 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q73_p2 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q73_p2 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q74_p2 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q74_p2 quality? PEPTIDE_QUALITY_IS_OK ()
q75_p2 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q75_p2 quality? PEPTIDE_SCORE_BELOW_IDENTITY_THR_NOHOM (Peptide score is below identity threshold (no homology threshold))
q76_p2 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q76_p2 quality? PEPTIDE_SCORE_BELOW_IDENTITY_THR_NOHOM (Peptide score is below identity threshold (no homology threshold))
q78_p2 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q78_p2 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
q81_p4 quantifiable? PEPTIDE_IS_QUANTIFIABLE ()
q81_p4 quality? PEPTIDE_SCORE_BELOW_HOMOLOGY_THR (Peptide score is below homology threshold)
'''