Read in the mascot.dat file.
#!/usr/local/bin/perl
##############################################################################
# file: config_mascotdat.pl #
# 'msparser' toolkit example code #
##############################################################################
# COPYRIGHT NOTICE #
# Copyright 1998-2010 Matrix Science Limited All Rights Reserved. #
# #
##############################################################################
# $Source: parser/examples/test_perl/config_mascotdat.pl $ #
# $Author: villek@matrixscience.com $ #
# $Date: 2018-07-30 16:23:53 +0100 $ #
# $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_3_0_0-2024-09-24-0-g93ebaeb4f4 $ #
# $NoKeywords:: $ #
##############################################################################
use strict;
##############################################################################
use msparser;
if (!defined($ARGV[0])) {
print <<EOF;
Location of mascot.dat has to be specified as a parameter.
The location should either be the full path to the mascot.dat file
or a URL to a Mascot server - e.g. http://mascot-server/mascot/cgi
EOF
exit 1;
}
# Note: $cs must not be lexically scoped; you need to keep it in scope
# for as long as you use $file. See "Using the toolkit from Perl, Java and
# Python" in Mascot Parser manual.
my ($file, $cs);
# A sessionID can optionally be passed as the second parameter.
# This will only be required if the 'file' is a URL.
if (defined($ARGV[1])) {
$cs = new msparser::ms_connection_settings;
$cs->setSessionID($ARGV[1]);
$file = new msparser::ms_datfile($ARGV[0], 0, $cs);
} else {
$file = new msparser::ms_datfile($ARGV[0]);
}
if (!$file->isValid) {
print "There are errors. Cannot continue. The last error description:\n";
print $file->getLastErrorString(), "\n";
exit 1;
}
# Retrieving section 'Databases' content.
my $dbs = $file->getDatabases;
# Check if there is actually a 'Databases' section in the file before
# continuing.
if ($dbs->isSectionAvailable) {
my $n = $dbs->getNumberOfDatabases;
print "There are ", $n, " databases configured:\n";
for my $i (0 .. $n-1) {
print $dbs->getDatabase($i)->getName, " : ";
if ($dbs->getDatabase($i)->isActive()) {
print "active\n";
} else {
print "inactive\n";
}
}
} else {
print "Section 'Databases' is missing\n";
}
print "\n";
# Retrieving section 'Parse' content.
my $parseOptions = $file->getParseOptions();
# Check if there is a 'Parse' section in the file before continuing.
if ($parseOptions->isSectionAvailable) {
my $n = $parseOptions->getNumberOfParseRules();
print "There are ", $n, " parse rules in the file; the following are specified:\n";
for my $i (0 .. $n-1) {
# Not all of them need be specified in the file.
if ($parseOptions->getParseRule($i)->isAvailable()) {
print "Rule_", $i, " : ";
print $parseOptions->getParseRule($i)->getRuleStr(), "\n";
}
}
} else {
print "Section 'Parse' is missing\n";
}
print "\n";
# Retrieving section 'WWW' content.
my $wwwOptions = $file->getWWWOptions();
# Check if there is a 'WWW' section in the file before continuing.
if ($wwwOptions->isSectionAvailable) {
my $n = $wwwOptions->getNumberOfEntries();
print "There are ", $n, " sequence report sources configured:\n";
for my $i (0 .. $n-1) {
print $wwwOptions->getEntry($i)->getName(), "_";
if ($wwwOptions->getEntry($i)->getType() == $msparser::WWW_SEQ) {
print "SEQ\n";
} else {
print "REP\n";
}
}
} else {
print "Section 'WWW' is missing\n";
}
print "\n";
# Retrieving section 'Taxonomy' content.
print "Available taxonomy sources:\n";
my $maxtax = $file->getMaxTaxonomyRules();
my $activetax = 0;
for my $taxind (1 .. $maxtax) {
# Check whether the taxonomy section exists.
if ($file->getTaxonomyRules($taxind)) {
++$activetax;
print "TAXONOMY_", $taxind, " ";
print $file->getTaxonomyRules($taxind)->getIdentifier(), "\n";
}
}
if ($activetax == 0) {
print "(none)\n";
}
print "\n";
# Retrieving section 'Cluster' content.
my $clusterParams = $file->getClusterParams();
# Check if there is a 'Cluster' section in the file before continuing.
if ($clusterParams->isSectionAvailable) {
print "Cluster mode : ";
if ($clusterParams->isEnabled) {
print "enabled\n";
} else {
print "disabled\n";
}
} else {
print "Section 'Cluster' is missing\n";
}
print "\n";
# Retrieving section 'Processor' content.
my $procOptions = $file->getProcessors();
# Check if there is a 'Processor' section in the file before continuing.
if ($procOptions->isSectionAvailable) {
print $procOptions->getNumberOfProcessors, " CPU(s) configured\n";
} else {
print "Section 'Processor' is missing\n";
}
print "\n";
# Retrieving section 'Options' content.
my $mascotOptions = $file->getMascotOptions();
# Check if there is an 'Options' section in the file before continuing.
if ($mascotOptions->isSectionAvailable) {
print "MascotCmdLine : ", $mascotOptions->getMascotCmdLine(), "\n";
} else {
print "Section 'Options' is missing\n";
}
print "\n";
# Retrieving section 'Cron' content.
my $cronOptions = $file->getCronOptions();
# Check if there is a 'Cron' section in the file before continuing.
if ($cronOptions->isSectionAvailable) {
if ($cronOptions->isCronEnabled) {
my $n = $cronOptions->getNumberOfCronJobs;
if ($n == 0) {
print "There are ", $n, " cron jobs configured\n";
} else {
print "There are ", $n, " cron jobs configured:\n";
}
for my $i (0 .. $n-1) {
print $cronOptions->getCronJob($i)->getCommandStr(), "\n";
}
} else {
print "Cron functionality is disabled\n";
}
} else {
print "Section 'Cron' is missing\n";
}
print "\n";
=pod
Running the program as
perl -I../bin config_mascotdat.pl ../config/mascot.dat
will give the following output under Mascot Server 2.3 (exact details depend
on how the server has been configured):
There are 29 databases configured:
contaminants : active
cRAP : active
EST_human : inactive
EST_mouse : inactive
EST_others : inactive
IPI_arabidopsis : inactive
IPI_bovine : inactive
IPI_chicken : inactive
IPI_human : inactive
IPI_mouse : inactive
IPI_rat : inactive
IPI_zebrafish : inactive
MSIPI_human : inactive
MSIPI_mouse : inactive
NCBInr : active
SwissProt : active
Trembl : inactive
UniRef100 : inactive
Environmental_EST : inactive
Fungi_EST : inactive
Human_EST : inactive
Invertebrates_EST : inactive
Mammals_EST : inactive
Mus_EST : inactive
Plants_EST : inactive
Prokaryotes_EST : inactive
Rodents_EST : inactive
Unclassified_EST : inactive
Vertebrates_EST : inactive
There are 256 parse rules in the file; the following are specified:
Rule_1 : >owl[^ ]*|\([^ ]*\)
Rule_2 : >owl[^ ]*|[^ ]*[ ]\(.*\)
Rule_3 : >[A-Z][0-9];\([^ ]*\)
Rule_4 : >\([^ ]*\)
Rule_5 : >[^ ]* \(.*\)
Rule_6 : >\(gi|[0-9]*\)
Rule_7 : >[^ ]* \(.*\)
Rule_8 : \*\(.*\)>
Rule_9 : \*.*\(>[A-Z][0-9];.*\)
Rule_10 : \(LOCUS .*\)ORIGIN
Rule_11 : \(LOCUS .*\)
Rule_12 : >\([^ ]*\)
Rule_13 : >[^ ]* \(.*\)
Rule_14 : <pre>\(.*\)</pre>
Rule_15 : ^ID \([^ ]*\)
Rule_16 : \*.*\(ID [A-Z0-9]*_[A-Z0-9]* .*\)
Rule_17 : >\([^ ]*\)
Rule_18 : >[^ ]* \(.*\)
Rule_19 : >[A-Z][0-9];\([^ ]*\)[ ]*
Rule_20 : >\(.*\)
Rule_21 : >IPI:\([^| .]*\)
Rule_22 : \*.*\(ID IPI[0-9]* .*\)
Rule_23 : \(.*\)
Rule_24 : \*.*\(ID [-A-Z0-9_].*\)
Rule_25 : >[^(]*.\([^)]*\)
Rule_26 : ^AC \([^ ;]*\)
Rule_27 : \*.*\(AC \S.*\)
Rule_28 : ^ID \([^ .]*\)
Rule_29 : \*.*\(ID IPI[0-9.]* .*\)
Rule_30 : >UniRef100_\([^ ]*\)
Rule_31 : >[^|]*|\([^ ]*\)
Rule_32 : >\([^|]*\)
Rule_33 : >..|[^|]*|\([^ ]*\)
Rule_34 : >..|\([^|]*\)
Rule_35 : >sp|\([^|]*\)
Rule_36 : >IPI:CON_\([^|]*\)
Rule_37 : >MSIPI:s*p*|*\([^| .]*\)
Rule_38 : >EM_EST:\([A-Z0-9]*\);
Rule_39 : >EM_ENV:\([A-Z0-9]*\);
There are 56 sequence report sources configured:
contaminants_SEQ
cRAP_SEQ
EST_human_REP
EST_human_SEQ
EST_mouse_REP
EST_mouse_SEQ
EST_others_REP
EST_others_SEQ
IPI_arabidopsis_REP
IPI_arabidopsis_SEQ
IPI_bovine_REP
IPI_bovine_SEQ
IPI_chicken_REP
IPI_chicken_SEQ
IPI_human_REP
IPI_human_SEQ
IPI_mouse_REP
IPI_mouse_SEQ
IPI_rat_REP
IPI_rat_SEQ
IPI_zebrafish_REP
IPI_zebrafish_SEQ
MSIPI_human_REP
MSIPI_human_SEQ
MSIPI_mouse_REP
MSIPI_mouse_SEQ
NCBInr_REP
NCBInr_SEQ
SwissProt_REP
SwissProt_SEQ
Trembl_REP
Trembl_SEQ
UniRef100_REP
UniRef100_SEQ
Environmental_EST_SEQ
Environmental_EST_REP
Fungi_EST_SEQ
Fungi_EST_REP
Human_EST_SEQ
Human_EST_REP
Invertebrates_EST_SEQ
Invertebrates_EST_REP
Mammals_EST_SEQ
Mammals_EST_REP
Mus_EST_SEQ
Mus_EST_REP
Plants_EST_SEQ
Plants_EST_REP
Prokaryotes_EST_SEQ
Prokaryotes_EST_REP
Rodents_EST_SEQ
Rodents_EST_REP
Unclassified_EST_SEQ
Unclassified_EST_REP
Vertebrates_EST_SEQ
Vertebrates_EST_REP
Available taxonomy sources:
TAXONOMY_1 Obsolete
TAXONOMY_2 OWL REF
TAXONOMY_3 SwissProt FASTA
TAXONOMY_4 Obsolete
TAXONOMY_5 Swiss-prot DAT
TAXONOMY_6 MSDB REF (pre 20000621)
TAXONOMY_7 MSDB REF
TAXONOMY_8 NCBI nr FASTA using GI2TAXID
TAXONOMY_9 dbEST FASTA using GI2TAXID
TAXONOMY_10 EST_human FASTA with TaxID
TAXONOMY_11 EST_mouse FASTA with TaxID
TAXONOMY_12 UniRef Fasta
TAXONOMY_13 EMBL EST Fasta
Cluster mode : disabled
Section 'Processor' is missing
MascotCmdLine : ../cgi/nph-mascot.exe
Cron functionality is disabled
=cut