Read in the mascot.dat file.
import msparser
import sys
if len(sys.argv) < 2 :
print("""
Location of mascot.dat has to be specified as a parameter.
The location should either be the full path to the mascot.dat file
or a URL to a Mascot server - e.g. http://mascot-server/mascot/cgi
""")
sys.exit(1)
if len(sys.argv) > 2 :
cs = msparser.ms_connection_settings()
cs.setSessionID(sys.argv[2])
file = msparser.ms_datfile(sys.argv[1], 0, cs)
else :
file = msparser.ms_datfile(sys.argv[1])
if not file.isValid() :
print("There are errors. Cannot continue. The last error description:")
print(file.getLastErrorString())
sys.exit(1)
dbs = file.getDatabases()
if dbs.isSectionAvailable() :
n = dbs.getNumberOfDatabases()
print("There are %d databases configured:" % n)
for i in range(n) :
database = dbs.getDatabase(i).getName()
if dbs.getDatabase(i).isActive() :
inuse = "active"
else :
inuse = "inactive"
print('{:<20}'.format(database) + '{:<}'.format(inuse))
else :
print("Section 'Databases' is missing")
print("")
parseOptions = file.getParseOptions()
if parseOptions.isSectionAvailable() :
n = parseOptions.getNumberOfParseRules()
print("There are %d parse rules in the file; the following are specified:" % n)
for i in range(n) :
if parseOptions.getParseRule(i).isAvailable() :
print("Rule_%d : %s" % (i, parseOptions.getParseRule(i).getRuleStr()))
else :
print("Section 'Parse' is missing")
print("")
wwwOptions = file.getWWWOptions()
if wwwOptions.isSectionAvailable() :
n = wwwOptions.getNumberOfEntries()
print("There are %d sequence report sources configured:" % n)
for i in range(n) :
if wwwOptions.getEntry(i).getType() == msparser.WWW_SEQ :
type = "SEQ"
else :
type = "REP"
print("%s_%s" % (wwwOptions.getEntry(i).getName(), type))
else :
print("Section 'WWW' is missing")
print("")
print("Available taxonomy sources:")
maxtax = file.getMaxTaxonomyRules()
activetax = 0
for taxind in range(1, 1 + maxtax) :
if file.getTaxonomyRules(taxind) :
activetax += 1
print("TAXONOMY_%d" % taxind)
print(file.getTaxonomyRules(taxind).getIdentifier())
if activetax == 0 :
print("(none)")
print("")
clusterParams = file.getClusterParams()
if clusterParams.isSectionAvailable() :
if clusterParams.isEnabled() :
print("Cluster mode: enabled")
else :
print("Cluster mode: disabled")
else :
print("Section 'Cluster' is missing")
print("")
procOptions = file.getProcessors()
if procOptions.isSectionAvailable() :
print("%d CPU(s) configured" % procOptions.getNumberOfProcessors())
else :
print("Section 'Processor' is missing")
print("")
mascotOptions = file.getMascotOptions()
if mascotOptions.isSectionAvailable() :
print("MascotCmdLine: %s" % mascotOptions.getMascotCmdLine())
else :
print("Section 'Options' is missing")
print("")
cronOptions = file.getCronOptions()
if cronOptions.isSectionAvailable() :
if cronOptions.isCronEnabled() :
n = cronOptions.getNumberOfCronJobs()
if n == 0 :
print("There are %d cron jobs configured" % n)
else :
print("There are %d cron jobs configured:" % n)
for i in range(n) :
print(cronOptions.getCronJob(i).getCommandStr())
else :
print("Cron functionality is disabled")
else :
print("Section 'Cron' is missing")
print("")
"""
Running the program as
python config_mascotdat.py /usr/local/mascot/config/mascot.dat
will give the following output under Mascot Server 2.3 (exact details depend
on how the server has been configured):
There are 29 databases configured:
contaminants : active
cRAP : active
EST_human : inactive
EST_mouse : inactive
EST_others : inactive
IPI_arabidopsis : inactive
IPI_bovine : inactive
IPI_chicken : inactive
IPI_human : inactive
IPI_mouse : inactive
IPI_rat : inactive
IPI_zebrafish : inactive
MSIPI_human : inactive
MSIPI_mouse : inactive
NCBInr : active
SwissProt : active
Trembl : inactive
UniRef100 : inactive
Environmental_EST : inactive
Fungi_EST : inactive
Human_EST : inactive
Invertebrates_EST : inactive
Mammals_EST : inactive
Mus_EST : inactive
Plants_EST : inactive
Prokaryotes_EST : inactive
Rodents_EST : inactive
Unclassified_EST : inactive
Vertebrates_EST : inactive
There are 256 parse rules in the file; the following are specified:
Rule_1 : >owl[^ ]*|\([^ ]*\)
Rule_2 : >owl[^ ]*|[^ ]*[ ]\(.*\)
Rule_3 : >[A-Z][0-9];\([^ ]*\)
Rule_4 : >\([^ ]*\)
Rule_5 : >[^ ]* \(.*\)
Rule_6 : >\(gi|[0-9]*\)
Rule_7 : >[^ ]* \(.*\)
Rule_8 : \*\(.*\)>
Rule_9 : \*.*\(>[A-Z][0-9];.*\)
Rule_10 : \(LOCUS .*\)ORIGIN
Rule_11 : \(LOCUS .*\)
Rule_12 : >\([^ ]*\)
Rule_13 : >[^ ]* \(.*\)
Rule_14 : <pre>\(.*\)</pre>
Rule_15 : ^ID \([^ ]*\)
Rule_16 : \*.*\(ID [A-Z0-9]*_[A-Z0-9]* .*\)
Rule_17 : >\([^ ]*\)
Rule_18 : >[^ ]* \(.*\)
Rule_19 : >[A-Z][0-9];\([^ ]*\)[ ]*
Rule_20 : >\(.*\)
Rule_21 : >IPI:\([^| .]*\)
Rule_22 : \*.*\(ID IPI[0-9]* .*\)
Rule_23 : \(.*\)
Rule_24 : \*.*\(ID [-A-Z0-9_].*\)
Rule_25 : >[^(]*.\([^)]*\)
Rule_26 : ^AC \([^ ;]*\)
Rule_27 : \*.*\(AC \S.*\)
Rule_28 : ^ID \([^ .]*\)
Rule_29 : \*.*\(ID IPI[0-9.]* .*\)
Rule_30 : >UniRef100_\([^ ]*\)
Rule_31 : >[^|]*|\([^ ]*\)
Rule_32 : >\([^|]*\)
Rule_33 : >..|[^|]*|\([^ ]*\)
Rule_34 : >..|\([^|]*\)
Rule_35 : >sp|\([^|]*\)
Rule_36 : >IPI:CON_\([^|]*\)
Rule_37 : >MSIPI:s*p*|*\([^| .]*\)
Rule_38 : >EM_EST:\([A-Z0-9]*\);
Rule_39 : >EM_ENV:\([A-Z0-9]*\);
There are 56 sequence report sources configured:
contaminants_SEQ
cRAP_SEQ
EST_human_REP
EST_human_SEQ
EST_mouse_REP
EST_mouse_SEQ
EST_others_REP
EST_others_SEQ
IPI_arabidopsis_REP
IPI_arabidopsis_SEQ
IPI_bovine_REP
IPI_bovine_SEQ
IPI_chicken_REP
IPI_chicken_SEQ
IPI_human_REP
IPI_human_SEQ
IPI_mouse_REP
IPI_mouse_SEQ
IPI_rat_REP
IPI_rat_SEQ
IPI_zebrafish_REP
IPI_zebrafish_SEQ
MSIPI_human_REP
MSIPI_human_SEQ
MSIPI_mouse_REP
MSIPI_mouse_SEQ
NCBInr_REP
NCBInr_SEQ
SwissProt_REP
SwissProt_SEQ
Trembl_REP
Trembl_SEQ
UniRef100_REP
UniRef100_SEQ
Environmental_EST_SEQ
Environmental_EST_REP
Fungi_EST_SEQ
Fungi_EST_REP
Human_EST_SEQ
Human_EST_REP
Invertebrates_EST_SEQ
Invertebrates_EST_REP
Mammals_EST_SEQ
Mammals_EST_REP
Mus_EST_SEQ
Mus_EST_REP
Plants_EST_SEQ
Plants_EST_REP
Prokaryotes_EST_SEQ
Prokaryotes_EST_REP
Rodents_EST_SEQ
Rodents_EST_REP
Unclassified_EST_SEQ
Unclassified_EST_REP
Vertebrates_EST_SEQ
Vertebrates_EST_REP
Available taxonomy sources:
TAXONOMY_1 Obsolete
TAXONOMY_2 OWL REF
TAXONOMY_3 SwissProt FASTA
TAXONOMY_4 Obsolete
TAXONOMY_5 Swiss-prot DAT
TAXONOMY_6 MSDB REF (pre 20000621)
TAXONOMY_7 MSDB REF
TAXONOMY_8 NCBI nr FASTA using GI2TAXID
TAXONOMY_9 dbEST FASTA using GI2TAXID
TAXONOMY_10 EST_human FASTA with TaxID
TAXONOMY_11 EST_mouse FASTA with TaxID
TAXONOMY_12 UniRef Fasta
TAXONOMY_13 EMBL EST Fasta
Cluster mode : disabled
Section 'Processor' is missing
MascotCmdLine : ../cgi/nph-mascot.exe
Cron functionality is disabled
"""