Repeating a search from a Mascot results file.
import msparser
import sys
import subprocess
import re
def main() :
if len(sys.argv) < 2 :
usage()
return 1
return repeatSearch(sys.argv[1])
def repeatSearch(filename) :
resfile = msparser.ms_mascotresfile(filename)
if not resfile.isValid() :
print("Cannot open results file %s : %s" % (filename, resfile.getLastErrorString()))
return 1
s = []
s.append("----12345")
s.append('Content-Disposition: form-data; name="QUE"')
s.append('')
sec_params = msparser.ms_mascotresfile.SEC_PARAMETERS
count = 1
key = resfile.enumerateSectionKeys(sec_params, count)
while len(key) > 0 :
val = resfile.getSectionValueStr(sec_params, key)
if len(val) > 0 and key != "INTERMEDIATE" and key != "RULES" and key != "INTERNALS" and key != "SEARCH" :
s.append("%s=%s" % (key, val))
count += 1
key = resfile.enumerateSectionKeys(sec_params, count)
s.append("SEARCH=SQ")
s.append("INTERMEDIATE=" + filename)
for q in range(1, 1 + resfile.getNumQueries()) :
s.append(resfile.getRepeatSearchString(q))
s.append("----12345--")
try :
tmp = open("tmp.txt", "w")
except IOError as err:
errno, strerror = err.args
print("Cannot open tmp.txt for writing: %s" % strerror)
return 1
try :
proc = subprocess.Popen(
['./nph-mascot.exe', '4', '-commandline'],
stdin=subprocess.PIPE,
stdout=tmp
)
except OSError as e :
print("Cannot run nph-mascot.exe: %s" % e)
return 1
for line in s :
proc.stdin.write(line.encode('utf-8'))
proc.stdin.write("\n".encode('utf-8'))
proc.stdin.close()
proc.wait()
tmp.close()
try :
tmp = open("tmp.txt", "r")
except IOError as err:
errno, strerror = err.args
print("Cannot open tmp.txt for reading: %s" % strerror)
return 1
while True :
line = tmp.readline()
if len(line) == 0 : break
if re.match('.*SUCCESS.*', line) :
line = tmp.readline()
compareResults(resfile, line.rstrip('\n'))
continue
if re.match('.*ERROR.*', line) :
print("Search failed:", line.rstrip('\n'))
while len(line) != 0 :
line = tmp.readline()
print(line.rstrip('\n'))
break
def compareResults(originalSearch, repeatedSearchFileName) :
repeatedSearch = msparser.ms_mascotresfile(repeatedSearchFileName)
anyReport = 0
if not repeatedSearch.isValid() :
print("Invalid repeat search: %s" % repeatedSearch.getLastErrorString())
return
if originalSearch.anyPMF() :
originalResults = msparser.ms_proteinsummary(originalSearch)
repeatedResults = msparser.ms_proteinsummary(repeatedSearch)
originalProt = originalResults.getHit(1)
repeatedProt = repeatedResults.getHit(1)
if originalProt and repeatedProt :
diff = repeatedProt.getScore() - originalProt.getScore()
if diff > 10.0 :
print("Protein score is %d higher for search" % diff)
print("%s than %s " % (originalSearch.getFileName(), repeatedSearchFileName))
anyReport = 1
else :
originalResults = msparser.ms_peptidesummary(originalSearch)
repeatedResults = msparser.ms_peptidesummary(repeatedSearch)
for q in range(1, 1 + originalSearch.getNumQueries()) :
pepOriginal = originalResults.getPeptide(q, 1)
pepRepeated = repeatedResults.getPeptide(q, 1)
diff = pepRepeated.getIonsScore() - pepOriginal.getIonsScore()
if diff > 10.0 :
print("Query %d has score %d higher for search %s than %s" % (q, diff, originalSearch.getFileName(), repeatedSearchFileName))
anyReport = 1
if not anyReport :
print("Similar results for %s and %s" % (originalSearch.getFileName(), repeatedSearchFileName))
def usage() :
print("""
Usage: repeat_search.py <results file>
Given an mascot results file name, repeat the search against the same data.
This program must be run in the Mascot Server cgi directory.
""")
if __name__ == "__main__" :
sys.exit(main())