Class for parsing and reading files in dat28 format. More...
#include <ms_mascotresfile_dat.hpp>
Public Member Functions | |
ms_mascotresfile_dat (const char *szFileName, const int keepAliveInterval=0, const char *keepAliveText="<!-- %d seconds -->\n", const unsigned int flags=RESFILE_NOFLAG, const char *cacheDirectory="../data/cache/%Y/%m", const char *XMLschemaDirectory=0, ms_progress_info *progressMonitor=0) | |
Constructor to open a Mascot results file in dat28 format. | |
bool | anyCrosslinkedMatches (const bool isDecoy=false) const override |
Returns true if there are any Crosslinked matches. | |
bool | anyErrorTolerantMatches (const bool isDecoy=false) const override |
Returns true if there are any Error Tolerant matches. | |
bool | anyFastaMatches (const bool isDecoy=false) const override |
Returns true if there are any FASTA matches. | |
bool | anyMSMS () const override |
Returns true if any of the queries in the search contain ions data. | |
bool | anyPeptideSummaryMatches (const section sec=SEC_PEPTIDES) const |
Returns true if there is a peptides section, and if there are any results in it. | |
bool | anyPMF () const override |
Returns true if any of the queries in the search just contain a single peptide mass. | |
bool | anySpectralLibraryMatches (const bool isDecoy=false) const override |
Returns true if there are any Spectral Library matches. | |
bool | anySQ () const override |
Returns true if any of the queries in the search contain seq or comp commands. | |
bool | anyTag () const override |
Returns true if any of the queries in the search contain tag or etag commands. | |
void | appendErrors (const ms_errors &src) |
Copies all errors from another instance and appends them at the end of own list. | |
int | appendResfile (const char *filename, int flags=RESFILE_USE_PARENT_PARAMS, const char *cacheDirectory=0) override |
Multiple results files can be summed together and treated as 'one'. | |
void | clearAllErrors () |
Remove all errors from the current list of errors. | |
void | copyFrom (const ms_errors *right) |
Use this member to make a copy of another instance. | |
bool | doesSectionExist (const section sec) const |
Returns true if there is an entry for the passed section. | |
std::string | enumerateQuerySectionKeys (const int query, const int num, int *pPreviousNum=0, OFFSET64_T *pPreviousOffset=0) const |
Get the key name for each item in a query section. | |
std::string | enumerateSectionKeys (const section sec, const int num, int *pPreviousNum=0, OFFSET64_T *pPreviousOffset=0) const |
Get the key name for each item in a section. | |
std::string | get_ms_mascotresults_params (const ms_mascotoptions &opts, ms_mascotresults_params ¶ms) const |
Return default flags and parameters for creating an ms_peptidesummary or ms_proteinsummary object. | |
std::string | get_ms_mascotresults_params (const ms_mascotoptions &opts, unsigned int *gpFlags, double *gpMinProbability, int *gpMaxHitsToReport, double *gpIgnoreIonsScoreBelow, unsigned int *gpMinPepLenInPepSummary, bool *gpUsePeptideSummary, unsigned int *gpFlags2) const |
[Deprecated] Return default flags and parameters for creating an ms_peptidesummary or ms_proteinsummary object. | |
std::string | getCacheDirectory (bool processed=true) const override |
Returns the directory being used for cache files (if any). | |
std::string | getCacheFileName () const override |
Returns the filename of the cache file. | |
bool | getCrosslinkingMethod (ms_crosslinking_method *method) const |
Return the crosslinking method object from the embedded crosslinking file. | |
DATABASE_TYPE | getDatabaseType (const int idx) const override |
Return database type if available. | |
int | getDate () const override |
Returns the date and time of the search in seconds since midnight January 1st 1970. | |
ms_mascotoptions::DECOY_ALGORITHM | getDecoyTypeForDB (const int idx=1) const override |
Returns the decoy algorithm type for a given database. | |
bool | getEnzyme (ms_enzymefile *efile, const char *enzymeFileName=0) const override |
Returns an object that represents the embedded enzyme file as a reduced enzymes file. | |
const ms_errs * | getErrorHandler () const |
Retrive the error object using this function to get access to all errors and error parameters. | |
int | getErrorNumber (const int num=-1) const |
Return a specific error number - or ms_errs::ERR_NO_ERROR. | |
std::string | getErrorString (const int num) const |
Return a specific error as a string. | |
int | getExecTime () const override |
Returns the time taken for the search. | |
std::string | getFastaPath (int idx=1) const override |
Returns the path to the FASTA file used. | |
std::string | getFastaVer (int idx=1) const override |
Returns the FASTA file version. | |
std::string | getFileName (const int id=1) const override |
Returns the name of the results file passed into the constructor. | |
double | getFirstPassThreshold () const override |
Return the threshold value for the first pass of an automated error tolerant search. | |
void | getHeaderKeyValues (std::vector< std::string > &keys, std::vector< std::string > &values) const override |
Return all the header key-value pairs. | |
virtual std::string | getHeaderValue (const std::string &key) const override |
Return the header value for the given key. | |
ms_inputquery | getInputQuery (const int queryNum) const override |
Return the ms_inputquery object for the query given as argument. | |
int | getJobNumber (const int resfileID=1) const override |
Return the job number for this file - obtained from the file name. | |
void | getKeepAlive (KA_TASK &kaTask, int &kaPercentage, std::string &kaAccession, int &kaHit, int &kaQuery, std::string &kaText) const |
Return the progress indicators used by the keepAlive functions. | |
int | getLastError () const |
Return the last error number - or ms_erros::ERR_NO_ERROR. | |
std::string | getLastErrorString () const |
Return the last error number - or an empty string. | |
int | getLibraryMods (std::vector< std::string > &modNames, std::vector< double > &modDeltas) const override |
Return all the library mod names and deltas. | |
std::string | getMascotVer () const override |
Returns the version of Mascot used to perform the search. | |
bool | getMasses (ms_masses *masses) const |
Returns an ms_masses object from the mass values in the results file. | |
void | getMassesKeyValues (std::vector< std::string > &keys, std::vector< double > &values) const override |
Return all the residue and modification masses as key-value pairs. | |
virtual double | getMassValue (const std::string &key) const override |
Return the residue or modification mass for the given key. | |
const ms_modification * | getMonoLinkModification (const int modNum, const int monoLink) const |
Returns an ms_modification object that represents a monolink variable modification. | |
std::string | getMSParserVersion () const |
Returns the version number of the Mascot Parser library. | |
int | getMultiFileQueryNumber (const int localQuery, const int fileId) const override |
Return the multi-file query number from the local query number in an appended file. | |
int | getNumberOfErrors () const |
Return the number of errors since the last call to clearAllErrors. | |
int | getNumberOfResfiles () const override |
Multiple results files can be summed together and treated as 'one'. | |
int | getNumEtSeqsSearched (const int idx=0) const override |
Returns the number of sequences searched in the second pass of an integrated error tolerant search. | |
int | getNumHits (const section sec=SEC_SUMMARY) const |
Returns the maximum number of hits possible for a protein summary. | |
int | getNumLibraryEntries (const int idx=0) const override |
Returns the number of entries in the spectral library searched. | |
int | getNumQueries (const int resfileID=0) const override |
Returns the number of queries (peptide masses or ms-ms spectra). | |
double | getNumResidues (const int idx=0) const override |
Returns the number of residues in the FASTA file(s) searched. | |
int | getNumSeqs (const int idx=0) const override |
Returns the number of sequences in the FASTA file(s) searched. | |
int | getNumSeqsAfterTax (const int idx=0) const override |
Returns the number of sequences that passed the taxonomy filter in the FASTA file(s) searched. | |
int | getObservedCharge (const int query, const bool decoy=false) const override |
The 'charge' returned will be 0 for Mr, otherwise it will be 1, -1, 2, -2, 3, -3 etc. and -100 for an error. | |
double | getObservedIntensity (const int query) const override |
Returns the experimental intensity for the peptide. | |
double | getObservedMass (const int query) const override |
Returns the experimental mass value as entered by the user. | |
double | getObservedMrValue (const int query, const bool decoy=false) const override |
Returns the experimental mass value (as a relative mass) as entered by the user. | |
std::vector< std::string > | getPercolatorFileNames () const |
Retrieve the filenames use for percolator input and output. | |
ms_progress_info * | getProgressInfo (bool forPeptideSummary=false) const |
If a matrix_science::ms_progress_info object is passed to the constructor, this is returned here. | |
int64_t | getQmatch (const int query, const ms_peptide::PSM_TYPE pepType) const override |
Return the number of peptide masses within precursor tolerance of this query. | |
double | getQplughole (const int query, const ms_peptide::PSM_TYPE pepType) const override |
Return the threshold score for homologous peptide match (MIS only). | |
bool | getQuantitation (ms_quant_configfile *qfile) const override |
Returns an object that represents the embedded quantitation file as a reduced quantitation.xml file. | |
bool | getQuantitationMethod (ms_quant_method *qmethod) const |
Return the quantitation method object from the embedded quantitation file. | |
int | getQuerySectionValue (const int query, const char *key, char *str, int maxLen) const |
Return the string value from a query in the results file. | |
double | getQuerySectionValueDouble (const int query, const char *key) const |
Return the floating point value from a query in the results file. | |
int | getQuerySectionValueInt (const int query, const char *key) const |
Return the integer value from a query in the results file. | |
std::string | getQuerySectionValueStr (const int query, const char *key) const |
Return the string value from a query in the results file. | |
int | getReferenceDatabaseNumberOfSL (const int idx) const override |
Return the database number of the reference database of a spectral library. | |
std::string | getRepeatSearchString (const int query, const bool fullQuery=false) const override |
To perform a repeat search need to build up appropriate string. | |
const ms_mascotresfile_dat * | getResfile (int id) const override |
Returns a pointer to the underlying ms_mascotresfile_dat object created by calling appendResfile. | |
virtual std::string | getSearchParameter (const std::string &key) const override |
Return the search parameter the given key. | |
void | getSearchParametersKeyValues (std::vector< std::string > &keys, std::vector< std::string > &values) const override |
Return all the search parameters as key-value pairs. | |
int | getSectionValue (const section sec, const char *key, char *str, int maxLen) const |
Return the string value from any line in the results file. | |
double | getSectionValueDouble (const section sec, const char *key) const |
Return the floating point value from any line in the results file. | |
int | getSectionValueInt (const section sec, const char *key) const |
Return the integer value from any line in the results file. | |
std::string | getSectionValueStr (const section sec, const char *key) const |
Return the string value from any line in the results file. | |
std::vector< int > | getSLDatabaseNumbersOfReference (const int idx) const override |
Return the database numbers of the spectral libraries whose reference database is at the given index. | |
virtual std::string | getSLExecCommand (int idx=1) const override |
Returns the library search command line and parameters (sl_exec_command). | |
double | getSLFragmentTolerance (int idx=1) const override |
Returns the effective spectral library fragment tolerance. | |
std::string | getSLFragmentToleranceUnit (int idx=1) const override |
Returns the unit of the effective spectral library fragment tolerance. | |
bool | getSrcQueryAndFileIdForMultiFile (const int q, int &gsqNewQuery, int &gsqFileId) const override |
Return the query number and file ID in the source .dat file. | |
bool | getTaxonomy (ms_taxonomyfile *tfile) const override |
Returns an object that represents the embedded taxonomy file as a reduced taxonomy file. | |
bool | getUnimod (ms_umod_configfile *ufile, bool useSchemaFromResfile=false) const override |
Returns an object that represents the embedded unimod file as a reduced unimod_2.xml file. | |
bool | getUnimodXL (ms_umod_configfile *ufile, bool useSchemaFromResfile=false) const override |
Returns an object that represents the embedded unimod_xl file as a reduced unimod_xl.xml file. | |
std::string | getUniqueTaskID () const override |
Returns the unique task ID used by Mascot Daemon. | |
std::string | getXMLschemaFilePath (XML_SCHEMA XMLschema) const |
Gets the XML schema to be used by functions using quantitation or unimod. | |
bool | hasEnzyme () const override |
Return true if the results file contains information about the enzyme used. | |
bool | hasQuantitation () const override |
Return true if the results file contains quantitation data. | |
bool | hasRT () const override |
Return true if the results file contains retention time data. | |
bool | isDatabaseTypeAvailable () const override |
Check whether database types are available. | |
bool | isErrorTolerant () const override |
Returns true if the search was an error tolerant search. | |
bool | isMSMS () const override |
Returns true if the search was an MSMS search (SEARCH=MIS ). | |
bool | isPMF () const override |
Returns true if the search was a PMF search (SEARCH=PMF ). | |
bool | isSQ () const override |
Returns true if the search was a sequence query search (SEARCH=SQ ). | |
bool | isValid () const |
Call this function to determine if there have been any errors. | |
bool | outputKeepAlive () const |
Outputs the "keep-alive" string during time-consuming operations. | |
ms_searchparams & | params () const |
Returns a reference to the search parameters class. | |
void | resetKeepAlive (const int keepAliveInterval, const char *keepAliveText, const bool propagateToAppended=true, const bool resetStartTime=false) |
Replace the existing keepAlive values with new values. | |
void | setPercolatorFeatures (const char *percolatorFeatures, const char *additionalFeatures, const bool useRetentionTimes) |
Set Percolator features before creating an ms_peptidesummary with Percolator scoring (deprecated). | |
void | setPercolatorFeatures (const ms_mascotoptions &options, const char *additionalFeatures, const std::vector< std::string > &adapterParameters=std::vector< std::string >()) |
Set Percolator features before creating an ms_peptidesummary with Percolator scoring. | |
bool | setXMLschemaFilePath (XML_SCHEMA XMLschema, const char *path) |
Sets the XML schema to be used by functions using quantitation or unimod. | |
bool | versionGreaterOrEqual (int major, int minor, int revision) const |
Compare the value returned by getMascotVer() with the passed version number. | |
Static Public Member Functions | |
static std::unique_ptr< ms_mascotresfilebase > | createResfile (const char *szFileName, const int keepAliveInterval=0, const char *keepAliveText="<!-- %d seconds -->\n", const unsigned int flags=matrix_science::ms_mascotresfilebase::RESFILE_NOFLAG, const char *cacheDirectory="../data/cache/%Y/%m", const char *XMLschemaDirectory=0, matrix_science::ms_progress_info *progressMonitor=0) |
Return a new ms_mascotresfile_msr or ms_mascotresfile_dat based on the file contents. | |
static section | getPeptideSection (const ms_peptide::PSM_TYPE pepType) |
Return peptide section in the .dat file that contains matches of type pepType. | |
static const char * | getSectionName (const section sec) |
Return the section name as a string given the enum value. | |
static section | getSummarySection (const ms_peptide::PSM_TYPE pepType) |
Return the summary section in the .dat file that contains matches of type pepType. | |
static RESFILE_TYPE | resfileType (const std::string &fileName) |
Return the results format of the file provided as an argument. | |
static bool | staticGetPercolatorFileNames (const char *szFileName, const char *cacheDirectory, const char *percolatorFeatures, const char *additionalFeatures, const bool useRetentionTimes, std::vector< std::string > &filenames, std::vector< bool > &exists) |
Returns a list of the Percolator input and output files for the specified data file (deprecated). | |
static bool | staticGetPercolatorFileNames (const char *szFileName, const char *cacheDirectory, const ms_mascotoptions &options, const char *additionalFeatures, const std::vector< std::string > &adapterParameters, std::vector< std::string > &filenames, std::vector< bool > &exists) |
Returns a list of the Percolator input and output files for the specified data file. | |
static bool | willCreateCache (const char *szFileName, const ms_mascotoptions &opts, const char *applicationName, std::string &resfileCacheFileName, unsigned int &cacheStatus) |
Returns true if a cache file will be created when the ms_mascotresfile_dat constructor is called. | |
static bool | willCreateCache (const char *szFileName, const unsigned int flags, const char *cacheDirectory, std::string *cacheFileName) |
Returns true if a cache file will be created when the ms_mascotresfile_dat constructor is called. | |
Protected Member Functions | |
bool | getCrosslinking (ms_crosslinking_configfile *crosslinkingFile) const override |
std::string | getErrorInfoAsString (const int num) const |
bool | setErrorInfoFromString (const std::string &e) |
Class for parsing and reading files in dat28 format.
Until Mascot Server 2.8, there was only one Mascot results file format: plain text MIME format file with .dat extension. Mascot Server 3.0 introduced a new file format, Mascot Search Results (MSR), which is an SQLite database. The old .dat format is frozen and now referred to as dat28.
This class reads files in the dat28 format.
enum err |
Definitions for error numbers.
See Using enumerated values and static const ints in Perl, Java, Python and C#. Messages are classified as fatal errors [F] or warnings [W]. A warning will not cause ms_mascotresfile_dat::isValid() to return false.
Enumerator | |
---|---|
ERR_NO_ERROR | [W] Success |
ERR_NOMEM | [F] Failed to allocate memory to load the file |
ERR_NOSUCHFILE | [F] The file passed in the constructor does not exist |
ERR_READINGFILE | [F] Opened the file successfully, but failed to read from it |
ERR_QUERYOUTOFRANGE | [F] Set if query < 1 or query > getNumQueries |
ERR_MISSINGENTRY | [F] Set if there is no qexp value in the file |
ERR_PEPSUMMPEPGET | [F] Value of q, p or h out of range, so cannot get peptide info |
ERR_PEPTIDESTR | [F] The string in the peptides block is not valid |
ERR_ACCINPEPTIDESTR | [F] Could not parse an item for a given accession in the peptide section |
ERR_PROTSUMM | [F] Error parsing a line in the protein summary |
ERR_PROTSUMMPEP | [F] Couldn't parse peptide information from the protein summary section |
ERR_ADDPEPTIDES | [F] Failed to add peptides when creating the peptide summary |
ERR_MISSINGHIT | [F] Missing hit in the summary section |
ERR_MISSINGSECTION | [F] Complete missing section in the file |
ERR_MISSINGSECTIONEND | [F] Missing end of section in the file |
ERR_MALFORMED_ERR_TOL | [W] Expecting a line of format: q1_p2_et_mods=0.984020,0.000000,Citrullination |
ERR_NO_ERR_TOL_PARENT | [F] No parent search file. See Error tolerant searches |
ERR_NULL_ACC_PEP_SUM | [W] An empty accession string has been found. Possible problem in database |
ERR_NULL_ACC_PROT_SUM | [W] An empty accession string has been found. Possible problem in database |
ERR_DUPE_ACCESSION | [W] A possible duplicate accession string has been found. Possible problem in database. |
ERR_UNASSIGNED_PROG | [F] Programming error! Calling getNumberOfUnassigned() or getUnassigned() before createUnassignedList() |
ERR_UNASSIGNED_RANGE | [F] Calling ms_mascotresults::getUnassigned() with out of range number |
ERR_UNASSIGNED_UNK | [F] Calling ms_mascotresults::getUnassigned() - unable to retrieve value |
ERR_NO_UNIGENE_FILE | [F] Failed to open the UniGene file specified |
ERR_DUPLICATE_KEY | [W] Duplicate entries with the same key in the named section. |
ERR_OLDRESULTSFILE | [F] Very old results file (last century!). Parser requires 1.02 or later |
ERR_MALFORMED_TAG | [W] Expecting a line in format: q1_p2_tag=1:3:5:6,2:4:12:6,... |
ERR_MALFORMED_DRANGE | [W] Expecting a line in format: q1_p2_drange=0,256 |
ERR_INVALID_NUMQUERIES | [W] Invalid number of queries in results file has been corrected. |
ERR_MALFORMED_TERMS | [W] Expecting a line in format: q1_p2_terms=A,B:-,I:... |
ERR_INVALID_RESFILE | [F] Invalid results file format - missing or corrupt headers |
ERR_INVALID_PROTDB | [W] Invalid h1_db-string format. Expecting an integer number. |
ERR_UNIGENE_MULTIDB | [W] UniGene index is not supported in multi-database search |
ERR_INVALID_CACHE_DIR | [F] Must specify a cache directory if using CDB cache files |
ERR_FAIL_OPEN_DAT_FILE | [F] Failed to open the results file for reading |
ERR_MISSING_CDB_FILE | [W] Cache file is missing or cannot be opened |
ERR_FAIL_MK_CACHE_DIR | [F] Failed to create cache directory for cache files |
ERR_FAIL_MK_CDB_FILE | [W] Failed to create an cache file |
ERR_FAIL_CLOSE_FILE | [W] Failed to close file |
ERR_FAIL_CDB_INIT | [W] Failed to initialise cache file (%s). Error code %d. |
ERR_INVALID_CDB_FILE | [W] Value in cdb cache file (%s) is corrupt: %s |
ERR_WRITE_CDB_FILE | [W] Failed to write to the cache file (%s). Error %d (%s) |
ERR_CDB_TOO_LARGE | [W] Cannot use cache file (s) which exceeded max size of s. Try deleting the cache file and retrying |
ERR_NEED_64_BIT | [F] This results file (%s) is too large for 32 bit Mascot Parser. Please upgrade to 64 bit. |
ERR_CDB_64_BIT_REMAKE | [W] Re-creating %s. Was too large for 32 bit, but may succeed with 64 bit |
ERR_CDB_OLD_VER_RETRY | [W] Cache file %s is an old version. Creating new cache file |
ERR_CDB_OLD_VER_NO_RETRY | [W] Cache file %s is an old version. Continuing without cache |
ERR_CDB_INCOMPLETE_RETRY | [W] Cache file %s was not complete. Re-creating the cache file |
ERR_CDB_INCOMPLETE_NO_RETRY | [W] Cache file %s was not complete. Continuing without cache |
ERR_CDB_BEING_CREATED | [W] Cache file %s being created by another task. Continuing without cache |
ERR_CDB_FAIL_REMOVE | [W] Failed to remove old cache file %s - error %s. Continuing without cache |
ERR_CDB_FAIL_LOCK | [W] Failed to lock cache file %s. Error code: %d |
ERR_CDB_FAIL_UNLOCK | [W] Failed to unlock cache file %s. Error code: %d |
ERR_CDB_SOURCE_CHANGE_RETRY | [W] %s changed. %s (was %s), %s bytes (was %s). Re-creating the cache file |
ERR_CDB_SOURCE_CHANGE_NO_RETRY | [W] %s changed. %s (was %s), %s bytes (was %s). Continuing without cache |
ERR_MISSING_PERCOLATOR_FILE | [F] Percolator file %s is missing. Cannot continue |
ERR_CANNOT_APPEND_RESFILE | [F] The file %s cannot be appended to %s because %s values are different |
ERR_CANNOT_APPEND_RESFILE_NO_FNAMES | [F] The file cannot be appended because %s values are different |
ERR_RESULTS_NOT_CREATED | [W] Attempting to call function %s before createSummary() has completed. |
|
inherited |
Flags for opening the results file.
See Using enumerated values and static const ints in Perl, Java, Python and C# and Caching Mascot Results.
Enumerator | |
---|---|
RESFILE_NOFLAG | Dat28 format: Read the whole file into memory. MSR format: Use standard SQLite methods to read the file with low memory overhead. |
RESFILE_USE_CACHE | Dat28 format: Create the resfile cache if it doesn't already exist. Use the cache rather than reading the whole .dat file into memory. MSR format: this flag is ignored. |
RESFILE_CACHE_IGNORE_ACC_DUPES | When creating a cache file, don't check for duplicate accessions in the SEC_PROTEINS and SEC_DECOYPROTEINS sections which can save some time. Strongly recommend that this flag is never used unless performance becomes a real issue and it is known that ms_mascotoptions::getIgnoreDupeAccession was not defined for the relevant database(s) when they were compressed. |
RESFILE_USE_PARENT_PARAMS | For use when Combining multiple results files. The flags and parameters are then inherited from the parent search. |
RESFILE_CACHE_IGNORE_DATE_CHANGE | Dat28 format: Opening the resfile cache CDB file should ignore the last modified timestamp on the .dat file. MSR format: this flag is ignored. |
|
inherited |
Processing some results files is computationally intensive. These are the tasks that can be performed.
See Using enumerated values and static const ints in Perl, Java, Python and C#.
Used with getKeepAlive(), but also see outputKeepAlive()
Enumerator | |
---|---|
KA_CREATEINDEX_CI | Creating a cache file when Using the resfile cache (dat28 format only) in dat28 format. |
KA_READFILE_RF | Reading the results file into memory when not using a cache. |
KA_ASSIGNPROTEINS_AP | Assigning peptides to proteins to get a list of all possible proteins. |
KA_GROUPPROTEINS_GP | Grouping proteins using ms_mascotresults::MSRES_GROUP_PROTEINS or ms_mascotresults::MSRES_CLUSTER_PROTEINS. |
KA_UNASSIGNEDLIST_UL | Creating the unassigned list - see ms_mascotresults::createUnassignedList. |
KA_QUANTITATION | Calculating quantitation values for reporter and multiplex protocols. |
KA_CREATECACHE_CC | Creating a cache file when Using the pepsum cache (MSR and dat28). |
KA_THRESHFORFDR_FDR | Calls to ms_mascotresults::getThresholdForFDRAboveHomology can be slow. |
KA_LAST | Placeholder that is equal to the number of possible tasks. |
|
inherited |
Offsets into a vector of Percolator filenames.
See Using enumerated values and static const ints in Perl, Java, Python and C#.
Used with getPercolatorFileNames().
|
inherited |
enum section |
Section names in the standard mascot results files.
See Using enumerated values and static const ints in Perl, Java, Python and C#.
Enumerator | |
---|---|
SEC_PARAMETERS | parameters section |
SEC_HEADER | header section |
SEC_MASSES | masses section |
SEC_SUMMARY | summary section |
SEC_MIXTURE | mixture section (pmf mixture) |
SEC_PEPTIDES | peptides section |
SEC_PROTEINS | proteins section |
SEC_QUERY1 | query1 section. Don't use, see getQuerySectionValueStr() etc. |
SEC_QUANTITATION | quantitation section |
SEC_UNIMOD | unimod section |
SEC_ENZYME | enzyme section |
SEC_TAXONOMY | taxonomy section |
SEC_DECOYSUMMARY | decoy_summary section. See also Target-decoy searches and false discovery rate. |
SEC_DECOYMIXTURE | decoy_mixture section. See also Target-decoy searches and false discovery rate. |
SEC_DECOYPEPTIDES | decoy_peptides section. See also Target-decoy searches and false discovery rate. |
SEC_DECOYPROTEINS | decoy_proteins section. See also Target-decoy searches and false discovery rate. |
SEC_ERRTOLSUMMARY | error tolerant summary section. See also Error tolerant searches. |
SEC_ERRTOLPEPTIDES | error tolerant peptides section. See also Error tolerant searches. |
SEC_SPECTRAL_LIBRARY | spectral library section. See also Spectral libraries. |
SEC_LIBRARYPEPTIDES | spectral library peptides section. See also Spectral libraries. |
SEC_LIBRARYSUMMARY | spectral library summary section. See also Spectral libraries. |
SEC_CROSSLINK_SUMMARY | crosslink_summary section. See also Crosslinked search results. |
SEC_CROSSLINK_PEPTIDES | crosslink_peptides section. See also Crosslinked search results. |
SEC_CROSSLINKING | crosslinking section |
SEC_UNIMOD_XL | unimod_xl section |
SEC_ERRTOLDECOYSUMMARY | error tolerant decoy summary section. See also Error tolerant searches. |
SEC_ERRTOLDECOYPEPTIDES | error tolerant decoy peptides section. See also Error tolerant searches. |
SEC_INDEX | index section |
SEC_NUMSECTIONS | !!don't use - place holder |
|
inherited |
The results file contains embedded files in XML format and these need to be validated against a schema.
This is the list of schema that can be set using setXMLschemaFilePath() and getXMLschemaFilePath()
Enumerator | |
---|---|
XML_SCHEMA_QUANTITATION | From the embedded quantation file. Valid aliases are: "http://www.matrixscience.com/xmlns/schema/quantitation_1" and "http://www.matrixscience.com/xmlns/schema/quantitation_2". |
XML_SCHEMA_UNIMOD | From the embedded unimod file. Valid alias is: http://www.unimod.org/xmlns/schema/unimod_2. |
XML_SCHEMA_DIRECTORY | From the value of XMLschemaDirectory passed into the ms_mascotresfilebase constructor. |
XML_SCHEMA_CROSSLINKING | From the embedded crosslinking file. Valid alias is: http://www.matrixscience.com/xmlns/schema/crosslinking_1. |
XML_SCHEMA_LAST | Placeholder that is equal to the number of possible schema. |
ms_mascotresfile_dat | ( | const char * | szFileName, |
const int | keepAliveInterval = 0 , |
||
const char * | keepAliveText = "<!-- %d seconds -->\n" , |
||
const unsigned int | flags = RESFILE_NOFLAG , |
||
const char * | cacheDirectory = "../data/cache/%Y/%m" , |
||
const char * | XMLschemaDirectory = 0 , |
||
ms_progress_info * | progressMonitor = 0 |
||
) |
Constructor to open a Mascot results file in dat28 format.
If the ms_mascotresfilebase::RESFILE_NOFLAG flag is specified, then the constructor reads the whole file into memory, and makes an in memory index of all the keys for fast lookup later. This was the default (and only) behaviour for Mascot Parser versions 2.2 and earlier.
If the ms_mascotresfilebase::RESFILE_USE_CACHE flag is specified, then the file is not read into memory, but a separate cache file containing offsets is used to read each line in the results file when requested. This is considerably faster if just a few lines of the results file need to be accessed and also takes less memory. The filenames for the cache file will be created by extracting the filename part from the path supplied in szFileName and appending '.cdb'. So, typically the cache filenames will be of the form Fxxxxx.dat.cdb
. See Caching Mascot Results.
For HTML reports with large result files it is sometimes necessary to output HTML fragments to keep the connection alive. This can be done by specifying the interval at which the text is output (keepAliveInterval) and the text that should be output (keepAliveText). See ms_mascotresfilebase::outputKeepAlive() for more details.
The functions isValid() or getLastError() should be called after creating the object to determine if the file was valid and loaded properly.
Possible error values:
szFileName | is the path to a valid Mascot results file |
keepAliveInterval | is the interval in seconds between each time the keepAliveText is output to stdout. If a value of zero is specified, then no keep alive text will be output. |
keepAliveText | is output every keepAliveInterval seconds while the file is being loaded. See outputKeepAlive() for further details. |
flags | are created by bitwise ORing the ms_mascotresfile_dat::FLAGS |
cacheDirectory | is the location where any cache files are stored. See Specifying cache file directory. If cacheDirectory is null or empty and RESFILE_USE_CACHE is specified, then ERR_INVALID_CACHE_DIR will be set and the object will be invalid. Most applications should get this value from ms_mascotoptions::getCacheDirectory(). |
XMLschemaDirectory | is the location where the xml schema files are located. Some sections of the results file are encoded in XML, and Mascot Parser needs to verify that these are in the correct format using 'xsd' schema files. The required files are supplied with Mascot Parser in the config directory. Either supply the path to that directory, or copy the .xsd files from it to another directory and supply the path to that (only the xsd files from that directory are required). If this parameter is 0 (the default) or empty string (""), then it is assumed that the software is running on the Mascot Server and that the files are located in separate directories under ../html/xmlns/schema as defined in setXMLschemaFilePath(). The constructor does not check that the schema files exist, but subsequent calls to functions such as getUnimod() and getQuantitationMethod() will fail if the schema is not available. The function setXMLschemaFilePath() may be called to override this value passed in the constructor for individual schema files. |
progressMonitor | is an optional parameter that can be used to track progress of the creation of this object. It can also be used (from another thread) to cancel the creation of the object by calling ms_progress_info::setBreak. If the passed progressMonitor has a subTask, created by calling ms_progress_info::addSubtask("Creating peptide summary", 100), then that subtask will be used when creating the ms_peptidesummary object. If the subtask doesn't exist, then one is created. Both pointers can be obtained by calling ms_mascotresfile_dat::getProgressInfo(). Make sure that the calling function does not delete the object pointed to by this paramater until after the ms_mascotresfile_dat object has been deleted. See: Maintaining object references: two rules of thumb |
|
overridevirtual |
Returns true if any of the queries in the search contain ions data.
See also the isMSMS() member, although this function is the preferred one.
Implements ms_mascotresfilebase.
bool anyPeptideSummaryMatches | ( | const section | sec = SEC_PEPTIDES | ) | const |
Returns true if there is a peptides section, and if there are any results in it.
If no parameter is passed to this function, information is returned about the standard peptides
section.
This function is useful if, for example, you want to know if there is a peptides
, decoy_peptides
or et_peptides
section in the results file. Very early versions of Mascot didn't have a peptides section for MS-MS searches, and the decoy and error tolerant peptides sections were added in Mascot 2.2, so it is safest to check that this function returns true before creating an ms_peptidesummary report. If this function returns false, consider creating an ms_proteinsummary instead.
This function will also return false if there is a peptides
section that doesn't contain any results.
Even if this function returns true, it is possible that an ms_peptidesummary object created from the file will be empty if, for example, all the matches have ions scores below the specified minimum ions score.
|
overridevirtual |
Returns true if any of the queries in the search just contain a single peptide mass.
See also the isPMF() member, although this function is the preferred one.
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns true if any of the queries in the search contain seq
or comp
commands.
See also the isSQ() member, although this function is the preferred one.
seq
or comp
commands Implements ms_mascotresfilebase.
|
overridevirtual |
Returns true if any of the queries in the search contain tag
or etag
commands.
See also anySQ(), anyPMF() and anyMSMS()
tag
or etag
commands Implements ms_mascotresfilebase.
|
inherited |
Copies all errors from another instance and appends them at the end of own list.
src | The object to copy the errors across from. See Maintaining object references: two rules of thumb. |
|
overridevirtual |
Multiple results files can be summed together and treated as 'one'.
See Combining multiple results files.
Attempts to load the specifed results file and append to the existing file.
Any warning or error messages in the file are also appended to the existing object. If isValid() for the new file returns false, it is not appended and this function returns 0.
A merged report requires the results to be in the same file format. That is, a dat28 (.dat) can be appended to another dat28 file, and an MSR file (.msr) can be appended to another MSR file, but you cannot append an MSR file to a dat28 file or vice versa.
ms_errs::ERR_INVALID_RESFILE, ms_errs::ERR_READINGFILE or ms_errs::ERR_MSP_MSR_READING_FILE may be set if the file formats are not compatible.
ms_errs::ERR_CANNOT_APPEND_RESFILE_NO_FNAMES and ms_errs::ERR_CANNOT_APPEND_RESFILE will be set if the file cannot be appended because of different parameters, such as a different enzyme.
filename | is the path to the results file to append. |
flags | are one of the ms_mascotresfilebase::FLAGS. If RESFILE_USE_PARENT_PARAMS is specified, then the flags, keepAlive and cache directory are copied from the parent object. |
cacheDirectory | is the directory for the cache files if RESFILE_USE_CACHE has been specified. If RESFILE_USE_PARENT_PARAMS is specified and cacheDirectory is null or an empty string, then the cache directory for the parent object is used. |
Implements ms_mascotresfilebase.
|
inherited |
Remove all errors from the current list of errors.
The list of 'errors' can include fatal errors, warning messages, information messages and different levels of debugging messages.
All messages are accumulated into a list in this object, until clearAllErrors() is called.
See Error Handling.
|
inherited |
Use this member to make a copy of another instance.
right | is the source to initialise from |
|
staticinherited |
Return a new ms_mascotresfile_msr or ms_mascotresfile_dat based on the file contents.
The function 'sniffs' and detects the file contents using ms_mascotresfilebase::resfileType. If it looks like an MSR file, the function returns a new ms_mascotresfile_msr. If it looks like a dat28 (.dat) file, the function returns ms_mascotresfile_dat. Otherwise, a 'nil' object is returned which contains no data and is invalid.
The arguments are the same between the classes, but the details differ a bit. For example, ms_mascotresfile_msr doesn't need caching for fast random access, so it ignores the flags parameter. However, it's always safe to use the same flags regardless of class.
Please see the detailed class documentation:
szFileName | is the path to a valid Mascot results file |
keepAliveInterval | is the interval in seconds between each time the keepAliveText is output to stdout. |
keepAliveText | is output every keepAliveInterval seconds while the file is being loaded. See outputKeepAlive() for further details. |
flags | are created by bitwise ORing the ms_mascotresfile_dat::FLAGS. |
cacheDirectory | is the location where any cache files are stored. |
XMLschemaDirectory | is the location where the xml schema files are located. |
progressMonitor | is an optional parameter that can be used to track progress of the creation of this object. |
bool doesSectionExist | ( | const section | sec | ) | const |
Returns true if there is an entry for the passed section.
This function is useful if, for example, you want to know if there is a peptides section in the results file. Very early versions of Mascot didn't have a peptides
section for MS-MS searches.
sec | is the section number |
std::string enumerateQuerySectionKeys | ( | const int | query, |
const int | num, | ||
int * | pPreviousNum = 0 , |
||
OFFSET64_T * | pPreviousOffset = 0 |
||
) | const |
Get the key name for each item in a query section.
Enumerate through all the entries in a section. It returns the key name (not value) for a single item.
In version of Mascot Parser prior to version 2.3, the items are returned in alphabetical order. In Mascot Parser version 2.3 and later, the items are returned in the order in the results file.
See ms_searchparams::getAllUSERParams for an example of using this method.
query | should be in the range 1..getNumQueries(). |
num | is the line number (1..n) of the line in the required section. |
pPreviousNum | is an optional parameter to speed up iterating through a large section when using cached files. If this value is not null, then the pPreviousOffset should also be passed. |
pPreviousOffset | is an optional parameter to speed up iterating through a large section when using cached files. If this value is not null, then the pPreviousNum should also be passed. |
std::string enumerateSectionKeys | ( | const section | sec, |
const int | num, | ||
int * | pPreviousNum = 0 , |
||
OFFSET64_T * | pPreviousOffset = 0 |
||
) | const |
Get the key name for each item in a section.
Enumerate through all the entries in a section. It returns the key name (not value) for a single item.
In version of Mascot Parser prior to version 2.3, the items are returned in alphabetical order. In Mascot Parser version 2.3 and later, the items are returned in the order in the results file.
See ms_searchparams::getAllUSERParams for an example of using this method.
sec | can be any section number that contains value= lines This function will not work for sections that contain embedded XML such as the unimod section. |
num | is the line number (1..n) of the line in the required section. |
pPreviousNum | is an optional parameter to speed up iterating through a large section when using cached files. If this value is not null, then the pPreviousOffset should also be passed. |
pPreviousOffset | is an optional parameter to speed up iterating through a large section when using cached files. If this value is not null, then the pPreviousNum should also be passed. |
|
inherited |
Return default flags and parameters for creating an ms_peptidesummary or ms_proteinsummary object.
A number of optional flags and parameters can be passed to the ms_proteinsummary or ms_peptide summary constructors. For an application or script running on the Mascot server, the default values for some of these parameters should normally be taken from the mascot.dat file. This function sets the values and flags required to pass to the constructor in the passed ms_mascotresults_params object.
[in] | opts | contains the options stored in mascot.dat. Call the ms_datfile construction and then ms_datfile::getMascotOptions() to obtain this value. |
[out] | resParams | the values and flags required to pass to the peptide or protein summary object are set to this ms_mascotresults_params object, overwriting any values which were already set. |
|
inherited |
[Deprecated] Return default flags and parameters for creating an ms_peptidesummary or ms_proteinsummary object.
A number of optional flags and parameters can be passed to the ms_proteinsummary or ms_peptide summary constructors. For an application or script running on the Mascot server, the default values for some of these parameters should normally be taken from the mascot.dat file. This function returns the values and flags required to pass to the constructor.
See Multiple return values in Perl, Java, Python and C#.
[in] | opts | contains the options stored in mascot.dat. Call the ms_datfile construction and then ms_datfile::getMascotOptions() to obtain this value. |
[out] | gpFlags | will return the flags that are to be passed as the second parameter to the ms_proteinsummary or ms_peptidesummary object. |
[out] | gpMinProbability | is the third parameter to be passed to the ms_proteinsummary or ms_peptidesummary objects. This return value will nomally be equal to the value returned from ms_mascotoptions::getSigThreshold(). |
[out] | gpMaxHitsToReport | this return value will normally be the one returned by ms_searchparams::getREPORT(). |
[out] | gpIgnoreIonsScoreBelow | this return value will be the one returned by ms_mascotoptions::getIgnoreIonsScoreBelow(). |
[out] | gpMinPepLenInPepSummary | this return value will be the one returned by ms_mascotoptions::getMinPepLenInPepSummary. |
[out] | gpUsePeptideSummary | will be false is the file doesn't contain any anyMSMS (as returned by the anyMSMS() function) or any sequence tags (anyTag()). In this case, you should create an ms_proteinsummary. If gpUsePeptideSummary is true, you should create an ms_peptidesummary object. |
[out] | gpFlags2 | is only required for an ms_peptidesummary. If gpUsePeptideSummary is true, gpFlags2 will have the following bits set.
|
|
overridevirtual |
Returns the directory being used for cache files (if any).
The cacheDirectory supplied to the constructor ms_mascotresfilebase::ms_mascotresfilebase may contain a number of '%' flags which get substituted by Mascot Parser.
This function returns either an absolute directory or a directory relative to the current working directory, depending on what was supplied and the parameter processed.
See Caching Mascot Results and ms_mascotoptions::getCacheDirectory
processed | if true (the default), then the returned directory is relative to the current directory and will have any '%' flags replaced with the relevant directory. If processed is false, then the directory returned will be identical to the one passed to the constructor. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns the filename of the cache file.
If the cacheDirectory supplied to the ms_mascotresfile_dat::ms_mascotresfile_dat constructor is not empty, then a filename will be returned. This does not guarantee that the file exists, or is being used.
Implements ms_mascotresfilebase.
|
overrideprotectedvirtual |
The return value indicates that the embedded crosslinking method file exists in the results file. Call ms_crosslinking_configfile::isValid to determine whether the XML part has been parsed successfully.
The contents of the file are validated against a schema by default.
See Object initialising functions in Perl, Java, Python and C#.
crosslinkingFile | a pointer to crosslinking file object. This must be a valid pointer to a valid object, which should normally be created using the default constructor: ms_crosslinking_configfile::ms_crosslinking_configfile |
Implements ms_mascotresfilebase.
|
inherited |
Return the crosslinking method object from the embedded crosslinking file.
This method returns true if all of the following apply:
CROSSLINKING
parameter exists; Otherwise the method returns false.
If the CROSSLINKING
parameter is empty or equals "none", then the method simply returns false. Otherwise, on failure, the method sets the warning ms_errors::ERR_MSP_CROSSLINKING_FAILEDLOAD.
method | A pointer to crosslinking method object. This must be a valid pointer to a valid object, which should normally be created using the default constructor ms_quant_method::ms_quant_method. |
|
overridevirtual |
Return database type if available.
In dat28 format, Mascot 2.6 and later save the type of the searched database(s) in the results file, as db_typeX=
lines in the header section. These types are AA (amino acid), NA (nucleic acid) or SL (spectral library).
In MSR format, introduced in Mascot Server 3.0, the 'db_type' column is always present in the search__databases table.
The number of databases is ms_searchparams::getNumberOfDatabases(), so idx should be between 1 and getNumberOfDatabases()
.
Spectral libraries must have a reference database. If the reference database is not part of the actual search, protein accessions mapped to it have a database number above ms_searchparams::getNumberOfDatabases(). For example, if the search contains one AA database and one spectral library, getNumberOfDatabases() is 2 and the types returned by getDatabaseType() are AA (idx = 1) and SL (idx = 2). The reference database is at index 3 with type SLREF.
To find the number of the reference database of a spectral library, see ms_mascotresfilebase::geReferenceDatabaseNumberOfSL().
idx | index of the database; must normally be between 1 and ms_searchparams::getNumberOfDatabases(), or a valid database number returned by ms_mascotresfilebase::getReferenceDatabaseNumberOfSL() |
Implements ms_mascotresfilebase.
|
inlineoverridevirtual |
Returns the date and time of the search in seconds since midnight January 1st 1970.
In dat28 format, obtained from the date=
line in the header section of the file.
In MSR format, obtained from the 'date' row in the search__header table.
Can be converted to day, month, year etc. using gmtime or similar functions.
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns the decoy algorithm type for a given database.
In dat28 format, the decoy algorithm type is saved as decoy_type=
or decoy_typeX=
in the header section, depending on Mascot version.
In MSR format, the algorithm type is saved as the 'decoy_type' column in the search__databases table.
If idx = 1, the method returns the value of decoy_type=
. If idx > 1, the method returns the corresponding decoy_typeX=
line, or if one doesn't exist, falls back on decoy_type=
.
If there is no suitable value in the file or idx is outside its range, the method returns ms_mascotoptions::DECOY_ALGORITHM_NONE.
idx | index of the database from 1 to ms_searchparams::getNumberOfDatabases(). |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns an object that represents the embedded enzyme file as a reduced enzymes
file.
In dat28 format, for data files created with Mascot 2.2 and later, the full definition of the enzyme used is included in the Mascot results file. For earlier versions of Mascot, just the name is recorded. This function attempts to read the definition from the results file. If the definition is not present in the results file and a path to the enzymes file has been passed, then this function reads the enzymes file and removes all entries from the list in memory apart from the one with the name specified in the results file.
In MSR format, the full definition of the enzyme used in the search is included in the Mascot results file.
To determine whether the content has been parsed successfully call ms_enzymefile::isValid.
See Object initialising functions in Perl, Java, Python and C#.
efile | a pointer to enzymes-file object that will accept the content from the embedded file or the extenal enzymes file if necessary. If successful, the enzyme itself can be retrieved by passing an index of zero to ms_enzymefile::getEnzymeByNumber() |
enzymeFileName | is only used for results files prior to Mascot 2.2 |
Implements ms_mascotresfilebase.
|
inherited |
Retrive the error object using this function to get access to all errors and error parameters.
See Error Handling.
|
protectedinherited |
For saving any errors in the .cdb file
num | 1..getNumberOfErrors() |
|
inherited |
Return a specific error number - or ms_errs::ERR_NO_ERROR.
All errors are accumulated into a list in this object, until clearAllErrors() is called.
Errors in other classes are accumulated here. If, for example, there is an error when creating a peptide summary, the errors need to be accessed through this class.
See Error Handling.
In Mascot Parser 2.5 and later, this is implemented by calling: ms_errs::getErrorNumber()
num | is the error number in the range 1..getNumberOfErrors(). Passing a value of -1 will return the last error, or ERR_NO_ERROR. If an invalid number is passed, ERR_NO_ERROR will be returned (and no error will be added to the list of errors!). |
|
inherited |
Return a specific error as a string.
All errors are accumulated into a list in this object, until clearAllErrors() is called. To return a particular error, call this function with a number 1..getNumberOfErrors(). Passing a value of -1 will return the last error, or an empty string. If an invalid number is passed an empty string will be returned (and no error will be added to the list of errors!).
Errors in other classes are accumulated here. If, for example, there is an error when creating a peptide summary, the errors need to be accessed through this class.
In Mascot Parser 2.5 and later, this is implemented by calling ms_errs::getErrorString but functionality is identical to previous versions.
See Error Handling.
num | 1 to number of errors, or -1 |
|
inlineoverridevirtual |
Returns the time taken for the search.
In dat28 format, obtained from the exec_time=
line in the header section.
In MSR format, obtained from the 'exec_time' row in the search__header table.
This is the 'wall clock' time, not the CPU time.
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns the path to the FASTA file used.
Available in Mascot 2.2 and later.
Mascot 2.6 and later support spectral libraries. Each spectral library must have a reference database into which found peptide sequences are mapped at the end of the search. The "effective" reference database could be one of the protein sequence databases searched, or it could be a separate database used only for lookup purposes. You can find the database number of the reference database with ms_mascotresfilebase::getReferenceDatabaseNumberOfSL(). If this is larger than getNumberOfDatabases(), the FASTA file path is obtained from the library_reference_fastafile
line in the results file header.
idx | index of the database from 1 to ms_searchparams::getNumberOfDatabases() for databases or libraries searched, or an index returned by ms_mascotresfilebase::getReferenceDatabaseNumberOfSL(). |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns the FASTA file version.
Mascot 2.6 and later support spectral libraries. Each spectral library must have a reference database into which found peptide sequences are mapped at the end of the search. The "effective" reference database could be one of the protein sequence databases searched, or it could be a separate database used only for lookup purposes. You can find the database number of the reference database with ms_mascotresfilebase::getReferenceDatabaseNumberOfSL(). If this is larger than getNumberOfDatabases(), the FASTA file version is obtained from the library_reference_release
line in the results file header.
idx | index of the database from 1 to ms_searchparams::getNumberOfDatabases() for databases or libraries searched, or an index returned by ms_mascotresfilebase::getReferenceDatabaseNumberOfSL(). |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns the name of the results file passed into the constructor.
id | a 1 based index. Unless appendResfile() has been called, this value must be '1'. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Return the threshold value for the first pass of an automated error tolerant search.
Return the first pass threshold of an automated error tolerant search. This is the significance threshold used at the end of the first pass to select proteins for the second pass.
In Mascot Server 2.7 and earlier, the first pass threshold was always 0.05.
In Mascot Server 2.8 and later, you can specify a target FDR in the search form. If the search is an error tolerant target-decoy search with target FDR, the first pass threshold can differ from 0.05.
See Score thresholds and score filtering (Mascot Server 2.8 and later).
Implements ms_mascotresfilebase.
|
overridevirtual |
Return all the header key-value pairs.
Get all the key-value pairs of the results header. The header contains data such as: number of queries, exec_time (search duration), database types (AA, NA, SL) and task ID.
To get the value of an individual header key, use ms_mascotresfilebase::getHeaderValue().
[out] | keys | A vector of non-empty keys. |
[out] | values | A vector of strings (some may be empty) in the same order as keys. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Return the header value for the given key.
Get the value associated with the input key in the results header. To get all the values, use ms_mascotresfilebase::getHeaderKeyValues().
[in] | key | A non-empty string. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Return the ms_inputquery object for the query given as argument.
This method call is equivalent to creating an ms_inputquery object with the current results file and queryNum as parameter.
queryNum | is the query number |
Implements ms_mascotresfilebase.
|
overridevirtual |
Return the job number for this file - obtained from the file name.
The library can only 'guess' at this since the value is not in the results file. To perform this function, it retrieves the job number from the file name, so be warned about changing file names. The function returns 0 if it cannot determine the job number.
resfileID | is the the 1 based id of the results file. If multiple files have been merged together with appendResfile(), use the file ID returned by appendResfile() or getSrcFileIdForMultiFile() to access the job number of the appended files. |
Implements ms_mascotresfilebase.
|
inherited |
Return the progress indicators used by the keepAlive functions.
See Multiple return values in Perl, Java, Python and C# although there may be issues with some languages and the kaTask parameter.
It is normally easier for client applications to call ms_mascotresults::getCreateSummaryProgress() or outputKeepAlive() than to call this function.
kaTask | is the task currently being performed by Mascot Parser. If Parser is doing nothing, then this will be the last task that was completed and kaPercentage will be 100 |
kaPercentage | is the percentage (0..100) complete for the current kaTask. |
kaAccession | is the current 'accession' being processed. See outputKeepAlive() for details of which tasks set this value |
kaHit | is the current hit being processed. See outputKeepAlive() for details of which tasks set this value |
kaQuery | is the current 'query' being processed. See outputKeepAlive() for details of which tasks set this value |
kaText | is the text that would be output by outputKeepAlive() |
|
inherited |
Return the last error number - or ms_erros::ERR_NO_ERROR.
Same as calling getErrorNumber() with -1 as a parameter.
|
inherited |
Return the last error number - or an empty string.
Same as calling getErrorString() with -1 as a parameter.
|
overridevirtual |
Return all the library mod names and deltas.
Get a list of all library modifications. If multiple spectral libraries were searched, this is the combined list of modifications.
[out] | modNames | Library modification names. |
[out] | modDeltas | Library modification deltas. |
Implements ms_mascotresfilebase.
|
inlineoverridevirtual |
Returns the version of Mascot used to perform the search.
In dat28 format, obtained from the version=
entry in the header section of the file.
In MSR format, obtained from the 'version' row in the search__header table.
Implements ms_mascotresfilebase.
|
inherited |
Returns an ms_masses object from the mass values in the results file.
For results files from Mascot 2.2 and later, this function simply calls
if (getUnimod(&umodConfigFile) && umodConfigFile.isValid()) { masses->copyFrom(&umodConfigFile); }
For earlier results files in dat28 format, it reads the residue mass values from the masses section of the file. Since this section either contains average or monoisotopic masses (but not both), the resulting ms_masses object will only have one set of masses.
See Object initialising functions in Perl, Java, Python and C#.
masses | a pointer to a valid masses object that will accept the content from the section. |
|
overridevirtual |
Return all the residue and modification masses as key-value pairs.
Get all the modification and residue masses as key-value pairs. This includes:
To get the value of an individual key, use ms_mascotresfilebase::getMassValue().
In general, it is easier to access these values through ms_searchparams.
[out] | keys | A vector of non-empty keys. |
[out] | values | A vector of doubles in the same order as keys. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Return the residue or modification mass for the given key.
Get the value associated with the input key in the list of masses. To get all the values, use ms_mascotresfilebase::getMassKeyValues().
[in] | key | A non-empty string. |
Implements ms_mascotresfilebase.
|
inherited |
Returns an ms_modification object that represents a monolink variable modification.
The method performs the following steps:
The ms_modification object contains the following fields:
modNum | Variable mod index in range 1..32. Usually this comes from the peptide's variable mods string. |
monoLink | Index of the neutral loss element. Usually this comes from the peptide's monolink string. |
|
inherited |
Returns the version number of the Mascot Parser library.
This version information is also available:
perl -Mmsparser -e "print msparser->VERSION()"
JarFile jar = new JarFile(new File(jarName)); Manifest jarManifest = jar.getManifest(); Attributes mainAttributes = jarManifest.getMainAttributes(); String version = (String) mainAttributes.get(Attributes.Name.IMPLEMENTATION_VERSION);
|
overridevirtual |
Return the multi-file query number from the local query number in an appended file.
See Multiple return values in Perl, Java, Python and C#.
Needs to be called on the 'primary' file object rather than on a ms_mascotresfile_dat or ms_mascotresfile_msr object returned by the getResfile() function.
See Combining multiple results files.
Example: Assume that the primary results file has 10 queries, file 2 has 20 queries and file 3 has 30 queries. This function will return the following:
localQuery | fileId | returned query |
---|---|---|
6 | 1 | 6 |
11 | 2 | 21 |
1 | 3 | 31 |
localQuery | is the query number which should be a value between 1 and getNumQueries() for the file specified by fileId. |
fileId | is a 1 based index to the source file. |
Implements ms_mascotresfilebase.
|
inherited |
Return the number of errors since the last call to clearAllErrors.
This will be zero if there has been no error.
All errors are accumulated into a list in this object, until clearAllErrors() is called.
Errors in other classes are accumulated here. If, for example, there is an error when creating a peptide summary, the errors need to be accessed through this class.
From version 2.5, implemented by calling getErrorHandler()->getNumberOfErrors()
See Error Handling.
|
overridevirtual |
Multiple results files can be summed together and treated as 'one'.
See Combining multiple results files.
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns the number of sequences searched in the second pass of an integrated error tolerant search.
In dat28 format, the value is obtained from the et_sequences=
or et_sequencesX=
line in the header section of the file.
In MSR format, the value is obtained from the 'et_sequences' column in the search__databases table.
See Integrated error tolerant search . This function will return -1 for the Original error tolerant search and for searches prior to Mascot 2.4.1
idx | index of the database. Specifying a value of 0 (the default) will return the sum of all sequences after taxonomy in all FASTA files searched. Otherwise, the value should be in the range from 1 to ms_searchparams::getNumberOfDatabases(). Specifying a value outside the range of 0..ms_searchparams::getNumberOfDatabases() will result in -1 being returned. If the database at index idx is a spectral library, the value returned is -1. |
Implements ms_mascotresfilebase.
int getNumHits | ( | const section | sec = SEC_SUMMARY | ) | const |
Returns the maximum number of hits possible for a protein summary.
If no parameter is passed to this function, information is returned about the standard 'summary' section.
This function returns the number of hits contained in the summary, decoy_summary, et_summary or et_decoy_summary section of the results file. The value returned by ms_proteinsummary::getNumberOfHits will generally be more useful. The number of hits recorded in the summary section sec could be zero even if there are peptide matches in the corresponding peptides section.
If appendResfile() has been called (see Combining multiple results files) then this value will return -1 because the protein summary is not supported for multiple dat files.
sec | Can be one of ms_mascotresfile_dat::SEC_SUMMARY, ms_mascotresfile_dat::SEC_DECOYSUMMARY, ms_mascotresfile_dat::SEC_ERRTOLSUMMARY, ms_mascotresfile_dat::SEC_ERRTOLDECOYSUMMARY. |
num_hits=
in the specified section or -1 if an invalid section is passed of if the object is a multifile results file.
|
overridevirtual |
Returns the number of entries in the spectral library searched.
In dat28 format, the value is obtained from the library_entriesX=
line in the header section of the file. Spectral library support was added in Mascot 2.6.
In MSR format, the value is obtained by summing up the values of the 'entries' column in the search__spectral_libraries table.
idx | index of the database. Specifying a value of 0 (the default) will return the sum of all library entries searched. Otherwise, the value should be in the range from 1 to ms_searchparams::getNumberOfDatabases(). Specifying a value outside the range of 0..ms_searchparams::getNumberOfDatabases() will result in -1 being returned. If the database at index idx is not a spectral library, the value returned is -1. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns the number of queries (peptide masses or ms-ms spectra).
Obtained from the queries=
line in the header section of the file (dat28 format) or by getting the maximum value of the 'query_id' column in the query__data table (MSR format).
resfileID | is the the 1 based id of the results file. When the default value of 0 is used for a single results file, this is number of queries in the file. For Combining multiple results files supplying a value of zero returns the total number of queries in all the results files that have been combined. Use a value of 1 to get the number of queries just in the first results file. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns the number of residues in the FASTA file(s) searched.
In dat28 format, the value is obtained from the residues=
or residuesX=
line in the header section of the file. Multiple FASTA file support was added in Mascot 2.3.
In MSR format, the value is obtained from the 'residues' column of the search__databases table.
idx | index of the database. Specifying a value of 0 (the default) will return the sum of all sequences after taxonomy in all FASTA files searched. Otherwise, the value should be in the range from 1 to ms_searchparams::getNumberOfDatabases(). Specifying a value outside the range of 0..ms_searchparams::getNumberOfDatabases() will result in -1 being returned. If the database at index idx is a spectral library, the value returned is 0. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns the number of sequences in the FASTA file(s) searched.
In dat28 format, the value is obtained from the sequences=
or sequencesX=
in the header section of the file. Multiple FASTA file support was added in Mascot 2.3.
In MSR format, the value is obainted from the 'sequences' column in the search__databases table.
idx | index of the database. Specifying a value of 0 (the default) will return the sum of all sequences in all FASTA files searched. Otherwise, the value should be in the range from 1 to ms_searchparams::getNumberOfDatabases(). Specifying a value outside the range of 0..ms_searchparams::getNumberOfDatabases() will result in -1 being returned. If the database at index idx is a spectral library, the value returned is 0. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns the number of sequences that passed the taxonomy filter in the FASTA file(s) searched.
In dat28 format, the value is obtained from the sequences_after_tax=
or sequences_after_taxX=
line in the header section of the file. Multiple FASTA file support was added in Mascot 2.3.
In MSR format, the value is obtained from the 'sequences_after_tax' column of the search__databases table.
idx | index of the database. Specifying a value of 0 (the default) will return the sum of all sequences after taxonomy in all FASTA files searched. Otherwise, the value should be in the range from 1 to ms_searchparams::getNumberOfDatabases(). Specifying a value outside the range of 0..ms_searchparams::getNumberOfDatabases() will result in -1 being returned. If the database at index idx is a spectral library, the value returned is 0. |
Implements ms_mascotresfilebase.
|
overridevirtual |
The 'charge' returned will be 0 for Mr, otherwise it will be 1, -1, 2, -2, 3, -3 etc. and -100 for an error.
In dat28 format, this is obtained from the qexp[query] value. It will come from the SEC_SUMMARY section. If decoy is set to true, it is obtained from the SEC_DECOYSUMMARY section.
In MSR format, the observed charge is obtained from the query__summary table. If decoy is false, the value comes from the standard, target pass of the search; decoy is true, from the standard, decoy pass.
The 'charge' returned will be 0 for Mr, otherwise it will be 1, -1, 2, -2, 3, -3 etc. and -100 for an error.
Possible error values:
If an 'ambiguous' charge state is specified for the whole search or for a specific query, then Mascot just records matches for the highest scoring charge state, and it is this charge state that is returned from this function. For example, the search may have been performed with "2+, 3+ or 4+" and ms_inputquery::getCharge() will return "2+, 3+ or 4+". If the highest scoring peptide match for a particular query was to charge state 4+, then all top 10 matches for that query will be for 4+ and this function will return '4'.
It is therefore not impossible to get different charge values for standard peptide matches from the decoy and target passes of the results files.
In dat28 format, this method can only read charge from the 'summary' and 'decoy_summary' sections. In MSR format, this method can only read charge for standard target or decoy peptide matches. If the peptide match type is error tolerant or crosslink, it's best to use ms_peptide::getCharge(), as these can have different charge state from the standard peptide matches in this query.
The functions getObservedIntensity() and getObservedMass() do not require the decoy parameter as the values will be identical from target and decoy passes.
query | is a number in the range 1..getNumQueries() |
decoy | should be false for target and true for decoy matches. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns the experimental intensity for the peptide.
In dat28 format, this is obtained from SEC_SUMMARY - qintensity[query]. In MSR format, from the query__summary table.
The observed intensity is not always available and does not need to be supplied by the end user.
Returns 0 if the query cannot be found and sets the error ms_errs::ERR_QUERYOUTOFRANGE.
query | is a number in the range 1..getNumQueries() |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns the experimental mass value as entered by the user.
In dat28 format, this is obtained from SEC_SUMMARY - qexp[query].
In MSR format, this is obtained from the query__summary table.
Returns 0 if the value cannot be found and sets the error ms_errs::ERR_QUERYOUTOFRANGE.
query | query number |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns the experimental mass value (as a relative mass) as entered by the user.
In dat28 format, this is obtained from the qmass[query] value. It will come from the SEC_SUMMARY section unless decoy is set to true in which case it is obtained from the SEC_DECOYSUMMARY section.
In MSR format, the value is obtained from the query__summary table. If decoy is false, the value comes from the standard, target pass of the search; decoy is true, from the standard, decoy pass.
Returns 0 if the value cannot be found and sets the error ms_errs::ERR_QUERYOUTOFRANGE.
query | is a number in the range 1..getNumQueries() |
decoy | should only be set to true if ms_searchparams::getDECOY returns true. |
Implements ms_mascotresfilebase.
|
static |
Return peptide section in the .dat file that contains matches of type pepType.
pepType | the ms_peptide::PSM_TYPE |
|
inherited |
Retrieve the filenames use for percolator input and output.
This function will return an empty vector unless setPercolatorFeatures() has been called beforehand.
The offsets into the return array are defined by ms_mascotresfilebase::PERCOLATOR_FILE_NAMES.
See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.
You can get the list of Percolator file names without creating an object by using matrix_science::ms_mascotresfilebase::staticGetPercolatorFileNames(const char*, const char*, const ms_mascotoptions&, const char *, const std::vector<std::string>&, std::vector<std::string>&, std::vector<bool>&).
|
inherited |
If a matrix_science::ms_progress_info object is passed to the constructor, this is returned here.
forPeptideSummary | should be set to true to get the progress object used when creating the ms_peptidesummary, or false to return the object used when creating the ms_mascotresfilebase object. |
|
overridevirtual |
Return the number of peptide masses within precursor tolerance of this query.
Return the 'qmatch' value of the query. This is the count of trials for the query, where a trial is a candidate peptide sequence + modifications whose mass is within precursor tolerance.
The count of trials could be different depending on the peptide type. For example, error tolerant peptide matches normally have much higher count of trials than peptides from the first pass of the search.
query | Query number, 1..getNumQueries(). |
pepType | Peptide match type. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Return the threshold score for homologous peptide match (MIS only).
Return the 'qplughole' value of the query. This is the critical value for calculating the homology threshold. The value could be different depending on peptide type, as the emprical score distribution within a query can be different between (for example) the first and second pass of an error tolerant search.
query | Query number, 1..getNumQueries(). |
pepType | Peptide match type. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns an object that represents the embedded quantitation file as a reduced quantitation.xml
file.
The return value indicates that the embedded quantitation file exists in the results file. Call ms_quant_configfile::isValid to determine whether the XML part has been parsed successfully.
For quantitation_2 and later, the contents of the file are validated against a schema by default. For quantitation_1, to explicitly validate against a schema, use ms_quant_configfile::setSchemaFileName() to choose a schema, and then use ms_quant_configfile::validateDocument() to validate.
See Object initialising functions in Perl, Java, Python and C#.
qfile | a pointer to quantitation file object. This must be a valid pointer to a valid object, which should normally be created using the default constructor: ms_quant_configfile::ms_quant_configfile |
Implements ms_mascotresfilebase.
|
inherited |
Return the quantitation method object from the embedded quantitation file.
This method returns true if all of the following apply:
QUANTITATION
parameter exists; Otherwise the method returns false.
If the QUANTITATION
parameter is empty or equals "none", then the method simply returns false. Otherwise, on failure, the method sets the warning ms_errors::ERR_MSP_QUANT_FAILEDLOAD.
qm | A pointer to quantitation method object. This must be a valid pointer to a valid object, which should normally be created using the default constructor ms_quant_method::ms_quant_method. |
int getQuerySectionValue | ( | const int | query, |
const char * | key, | ||
char * | str, | ||
int | maxLen | ||
) | const |
Return the string value from a query in the results file.
Returns a string from the query
section. It is generally easier to use the ms_inputquery class rather than use this lower level function.
query | is a number in the range 1..getNumQueries() |
key | is case insensitive. |
str | is a pointer to a buffer to receive the string. |
maxLen | is the length of the buffer (including a null terminator). |
double getQuerySectionValueDouble | ( | const int | query, |
const char * | key | ||
) | const |
Return the floating point value from a query in the results file.
Gets the number from the query section of the results file if it exists.
query | is a number in the range 1..getNumQueries() |
key | is case insensitive. |
int getQuerySectionValueInt | ( | const int | query, |
const char * | key | ||
) | const |
Return the integer value from a query in the results file.
Gets the number from the query section of the results file if it exists.
query | is a number in the range 1..getNumQueries() |
key | is case insensitive. |
std::string getQuerySectionValueStr | ( | const int | query, |
const char * | key | ||
) | const |
Return the string value from a query in the results file.
Gets the string from the query section of the results file if it exists. It is generally easier to use the ms_inputquery class rather than use this lower level function.
query | is a number in the range 1..getNumQueries() |
key | is case insensitive. |
|
overridevirtual |
Return the database number of the reference database of a spectral library.
The reference database of a spectral library is either one of the databases searched – if the reference database was part of the actual search – or a virtual database whose number is above ms_searchparams::getNumberOfDatabases(). In the first case, getReferenceDatabaseNumberOfSL() returns a database number between 1 and ms_searchparams::getNumberOfDatabases(). In the second case, the number is above getNumberOfDatabases()
.
If idx does not refer to a spectral library, the method returns -1.
idx | Database number of the spectral library of interest, between 1 and ms_searchparams::getNumberOfDatabases() and whose type is SL (see getDatabaseType()) |
Implements ms_mascotresfilebase.
|
overridevirtual |
To perform a repeat search need to build up appropriate string.
If the 'fullQuery' parameter is false (the default) then the format will be:
mr from(observed, charge) query(querynum) etc.
If the 'fullQuery' parameter is true then the format will be:
mr from(observer, charge1, charge2...) ions() etc.
where 'etc' will be one or more of the following (split onto several lines here for readability):
intensity(value) peptol(value, units) seq() comp() tag() etag() title() instrument() it_mods() index() rtinseconds() rtinseconds[x]() scans scans[x]() rawscans rawscans[x]()
mr
value is for the first charge. PepTol=
line in the queryx
section of the results file. In MSR format, from the 'pep_tol' column in the query__data table. seq1=
line in the queryx
section of the results file. If there were multiple seq()
commands in the original search, then these will be in the results file as seq1=
, seq2=
etc., and will be returned as seq(...) seq(...)
. In MSR format, the values are selected from the query__seq table. comp1=
line in the queryx
section of the results file. If there were multiple comp()
commands in the original search, then these will be in the results file as comp1=
, comp2=
etc., and will be returned as comp(...) comp(...)
. In MSR format, the values are selected from the query__comp table. tag1=t
line in the queryx section of the results file. If there were multiple tag()
commands in the original search, then these will be in the results file as tag1=
, tag2=
etc., and will be returned as tag(...) tag(...)
. In MSR format, the values are selected from the query__seq_tag table. tag1=e
line in the queryx
section of the results file. If there were multiple etag()
commands in the original search, then these will be in the results file as tag1=
, tag2=
etc., and will be returned as etag(...) etag(...)
. In MSR format, the values are selected from the query__seq_tag table. scans(29-34, 43)
or scans[0](29-34) scans[1](43)
. rtinseconds(10-20, 25)
or rtinseconds[0](10-20) rtinseconds[1](25)
. rawscans[0](pd0cy1ex1:pd0cy1ex3) rawscans[1](fn2ix1)
. rawfile(c:/data/rawfile.raw)
. locus(2.1.1.24.1)
. nph-mascot.exe
comes to the query(x)
command it gets the ions vales from the original .dat file Returns 0 if the value cannot be found and sets the error ms_errs::ERR_QUERYOUTOFRANGE.
See Automated repeating of searches.
query | is a number in the range 1..getNumQueries() |
fullQuery | If true, then a complete and self contained sequence query will be returned. See above for details |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns a pointer to the underlying ms_mascotresfile_dat object created by calling appendResfile.
See Combining multiple results files and Maintaining object references: two rules of thumb.
id | is the the 1 based id of the results file and must be in the range 1..getNumberOfResfiles(). A value of 1 will (not particularly usefully!) return a pointer to the ms_mascotresfilebase that was originally created. A value of 2 will return ms_mascotresfilebase created in the first successful call to appendResfile(), and so on. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Return the search parameter the given key.
Get the value associated with the input key in the search parameters. To get all the values, use ms_mascotresfilebase::getSearchParameterKeyValues().
[in] | key | A non-empty string. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Return all the search parameters as key-value pairs.
Get all the search parameters as key-value pairs. Search parameters are more easily accessible through ms_searchparams. This low-level method is useful for creating repeat searches, where the search parameters should just be copied as is.
To get the value of an individual header key, use ms_mascotresfilebase::getSearchParameter().
[out] | keys | A vector of non-empty keys. |
[out] | values | A vector of strings (some may be empty) in the same order as keys. |
Implements ms_mascotresfilebase.
|
static |
Return the section name as a string given the enum value.
The section name is intended for printing in error or log messages.
sec | One of the values in ms_mascotresfile_dat::section. |
int getSectionValue | ( | const section | sec, |
const char * | key, | ||
char * | str, | ||
int | maxLen | ||
) | const |
Return the string value from any line in the results file.
Function to return a string (into the passed str value) from the results file without using STL. It is generally easier to use getSectionValueStr() which returns a std::string.
Gets the requested string from the results file if it exists. Use the enumeration to choose which section. Returns the number of characters copied into 'str' which will not be greater than maxLen. E.g.
char s[1024]; f.getSectionValue(ms_mascotresfile_dat::SEC_PARAMETERS,"MODS",s,1024);
sec | can be any section number |
key | is case insensitive. |
str | is a pointer to a buffer to receive the string |
maxLen | is the length of the buffer (including a null terminator) |
double getSectionValueDouble | ( | const section | sec, |
const char * | key | ||
) | const |
Return the floating point value from any line in the results file.
sec | can be any section number |
key | is case insensitive. |
int getSectionValueInt | ( | const section | sec, |
const char * | key | ||
) | const |
Return the integer value from any line in the results file.
A value of -1 will be returned if there is no corresponding entry in the results file.
A value of zero will be returned if the entry contains a value that isn't a number.
If the correct value is out of the range of representable values, INT_MAX or INT_MIN is returned.
sec | can be any section number |
key | is case insensitive. |
std::string getSectionValueStr | ( | const section | sec, |
const char * | key | ||
) | const |
Return the string value from any line in the results file.
Gets the requested string from the results file if it exists. Use the enumeration to choose which section. E.g.
std::string s; s = file.getSectionValue(ms_mascotresfile_dat::SEC_PARAMETERS, "MODS");
sec | can be any section number |
key | is case insensitive. |
|
overridevirtual |
Return the database numbers of the spectral libraries whose reference database is at the given index.
The reference database of a spectral library is either one of the databases searched – if the reference database was part of the actual search – or a virtual database whose number is above ms_searchparams::getNumberOfDatabases(). The same database can be the reference database of multiple spectral libraries.
If idx does not refer to a reference database, the method returns the empty vector.
idx | Database number of the reference database of interest. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns the library search command line and parameters (sl_exec_command).
The command line for the library search is typically like:
../bin/NIST/mspepsearch/MSPepSearch.exe m a P /Z 0.1 /M 0.602993 /LIB [PATH_TO_MSP_FILE] /INP [PATH_TO_MSP_FILE] /OUTTAB [PATH_TO_TSV_FILE] /HITS 10 /MinMF 0 /NumCompared /OutPrecursorMz /OutDeltaPrecursorMz /OutSpecNum
The exact contents depend on the file paths, fragment tolerance and library search options specified in mascot.dat.
In dat28 format, the command string is saved as sl_exec_commandX=
line in the header section.
In MSR format, the command string is saved in the search__spectral_libraries table as 'sl_exec_command'.
If idx is outside its range, the method returns the empty string.
idx | index of the database from 1 to ms_searchparams::getNumberOfDatabases(). |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns the effective spectral library fragment tolerance.
When a search is run against a spectral library, the effective fragment tolerance is calculated from the user-configured library fragment tolerance and the tolerance specified in the search parameters. This may be different from ms_searchparams::getITOL() and ms_searchparams::getITOLU().
The tolerance unit can be retrieved with getSLFragmentToleranceUnit().
In dat28 format, in Mascot 2.6.01 and later, the effective tolerance value and unit are saved in the header section of the results file as sl_itolX=
. For files created by Mascot 2.6.00, the value is parsed from the sl_exec_commandX=
line if present.
In MSR format, the effective tolerance value and unit are saved in the search__spectral_libraries table as 'itol'.
If idx is outside its range, the method returns 0.0.
idx | index of the database from 1 to ms_searchparams::getNumberOfDatabases(). |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns the unit of the effective spectral library fragment tolerance.
When a search is run against a spectral library, the effective fragment tolerance is calculated from the user-configured library fragment tolerance and the tolerance specified in the search parameters. This may be different from ms_searchparams::getITOL() and ms_searchparams::getITOLU().
The tolerance can be retrieved with getSLFragmentTolerance().
In dat28 format, in Mascot 2.6.01 and later, the effective tolerance value and unit are saved in the header section of the results file as sl_itolX=
. For files created by Mascot 2.6.00, the value is parsed from the sl_exec_commandX=
line if present.
In MSR format, the effective tolerance value and unit are saved in the search__spectral_libraries table as 'itol_units'.
If idx is outside its range, the method returns 0.0.
idx | index of the database from 1 to ms_searchparams::getNumberOfDatabases(). |
Implements ms_mascotresfilebase.
|
overridevirtual |
Return the query number and file ID in the source .dat file.
See Multiple return values in Perl, Java, Python and C#.
Useful for combining multiple results files (see Combining multiple results files) but also returns valid values for a single file.
Example: Assume that the primary results file has 10 queries, file 2 has 20 queries and file 3 has 30 queries. This function will return the following:
q | newQuery | fileId |
---|---|---|
6 | 6 | 1 |
21 | 11 | 2 |
31 | 1 | 3 |
q | is the query number which should be a value between 1 and getNumQueries(). |
gsqNewQuery | is used to return the query number in the specified source file. |
gsqFileId | is a 1 based index to the source file. |
Implements ms_mascotresfilebase.
|
static |
Return the summary section in the .dat file that contains matches of type pepType.
pepType | the ms_peptide::PSM_TYPE |
|
overridevirtual |
Returns an object that represents the embedded taxonomy file as a reduced taxonomy
file.
The return value only indicates that the embedded file exists. If you want to find out whether the content has been parsed successfully, call the methods of matrix_science::ms_taxonomyfile.
See Object initialising functions in Perl, Java, Python and C#.
tfile | a pointer to taxonomy file object that will accept the content from the embedded taxonomy file. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns an object that represents the embedded unimod file as a reduced unimod_2.xml
file.
The return value only indicates that the embedded Unimod XML exists in the results file. If you want to find out whether the XML part has been parsed successfully, call ms_umod_configfile::isValid
See Object initialising functions in Perl, Java, Python and C#.
ufile | a pointer to unimod file object that will accept the XML content. |
useSchemaFromResfile | determines where the location of the XML schema is defined. If 'true', then the schema location should have been defined by specifying XMLschemaDirectory in the constructor. If 'false', then ms_umod_configfile::setSchemaFileName must have been called on ufile before calling this function. This parameter was added in Parser 2.5, and the default value is false to ensure that it is backward compatible with previous versions. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns an object that represents the embedded unimod_xl file as a reduced unimod_xl.xml
file.
The return value only indicates that the embedded Unimod crosslinking file exists in the results file. If you want to find out whether the XML part has been parsed successfully, call ms_umod_configfile::isValid
See Object initialising functions in Perl, Java, Python and C#.
ufile | a pointer to unimod file object that will accept the XML content. |
useSchemaFromResfile | determines where the location of the XML schema is defined. If 'true', then the schema location should have been defined by specifying XMLschemaDirectory in the constructor. If 'false', then ms_umod_configfile::setSchemaFileName must have been called on ufile before calling this function. getUnimodXL uses the same schema as ms_mascotresfilebase::getUnimod. |
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns the unique task ID used by Mascot Daemon.
Although this value is a number, it is a 64 bit integer. Some languages on some platforms cannot deal with 64 bit integers properly, so the value is returned as a string. For searches with no taskid, an empty string is returned.
Implements ms_mascotresfilebase.
|
inherited |
Gets the XML schema to be used by functions using quantitation or unimod.
XMLschema | The XML_SCHEMA enumeration value of the required xml schema file. |
|
overridevirtual |
Return true if the results file contains information about the enzyme used.
Check whether the results file contains enzyme information as an embedded 'enzymes' file.
Implements ms_mascotresfilebase.
|
overridevirtual |
Return true if the results file contains quantitation data.
Check whether the results file contains an embedded quantitation method.
Implements ms_mascotresfilebase.
|
overridevirtual |
Return true if the results file contains retention time data.
Check whether the first query has retention time stored in the RTINSECONDS field. If it does, return true. The implicit assumption is that all the other queries also have (or don't have) RTINSECONDS if the first query has (or doesn't have).
Implements ms_mascotresfilebase.
|
overridevirtual |
Check whether database types are available.
In dat28 format, Mascot 2.6 and later save the type of the database(s) in the results file, as db_typeX=
lines in the header section. If the types are not available, the database or databases searched could be AA or NA.
In MSR format, introduced in Mascot Server 3.0, the 'db_type' column is always present in the search__databases table.
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns true if the search was an error tolerant search.
In dat28 format, obtained from the ERRORTOLERANT value in the parameters section. In MSR format, obtained from the ERRORTOLERANT value in the search__parameters table.
In Mascot versions 1.8 and later, an error tolerant search can be run as a repeat search. In this case, one or more ACCESSIONs (which may be retrieved using ms_searchparams::getACCESSION ) must have been specified, and the results file will just contain the error tolerant search results. In Mascot 2.2 and later, a single search can be performed which contains both the standard search results and the error tolerant search results of automatically selected proteins. In this case, ms_searchparams::getACCESSION will return an empty string.
Implements ms_mascotresfilebase.
|
overridevirtual |
Returns true if the search was an MSMS search (SEARCH=MIS
).
Since all types of search may be entered as a sequence query, it may be more useful to use the anyMSMS() member.
SEARCH=MIS
in the parameters section or search__parameters table of the file. Implements ms_mascotresfilebase.
|
overridevirtual |
Returns true if the search was a PMF search (SEARCH=PMF
).
Since all types of search may be entered as a sequence query, it may be more useful to use the anyPMF() member.
SEARCH=PMF
in the parameters section or search__parameters table of the file. Implements ms_mascotresfilebase.
|
overridevirtual |
Returns true if the search was a sequence query search (SEARCH=SQ
).
Since all types of search may be entered as a sequence query, it may be more useful to use the anySQ() member.
SEARCH=SQ
in the parameters section or search__parameters table of the file. Implements ms_mascotresfilebase.
|
inherited |
Call this function to determine if there have been any errors.
This will return true unless there have been any fatal errors.
See Error Handling.
|
inherited |
Outputs the "keep-alive" string during time-consuming operations.
For HTML reports with large result files it is sometimes necessary to output HTML comments to keep the connection alive. This can be done by specifying the interval at which the text is output (keepAliveInterval) and the text that should be output (keepAliveText) as parameters to the ms_mascotresfilebase::ms_mascotresfilebase() constructor.
If the value of keepAliveInterval passed to ms_mascotresfilebase::ms_mascotresfilebase is not 0, then the text specfied by keepAliveText will be output approximately every keepAliveInterval seconds. A 'd' in the keepAliveText will be replaced by the number of seconds since the process started.
This functionality is implemented by calling this function 'often', rather than by using a separate thread. This means that the times between calls will not be accurate. A computationally intensive application that uses Mascot Parser can also call this function as required.
From version 2.3 onwards, the keepAliveText can contain tags that allow different text to be output for different lengthy tasks. The tags are:
The text can also include the following tags which are substituted by values:
The following table indicates which values are available for which tasks:
ci= | rf= | ap= | gp= | ul= | cc= | fd= | |
---|---|---|---|---|---|---|---|
%d | X | X | X | X | X | X | X |
%p | X | X | X | X | X | X | X |
%h | X | X | X | ||||
%q | X | X | X | X | |||
%a | X | X | X | ||||
%f | X | X | X | X | X | X |
The %a and %h values for cc=
are not output for the second half of caching.
A 'complete' example string might be: ' ul=Creating unassigned list (%p% complete)\n qu=Calculating quantitation component intensities (%p% complete)\ ci=Creating cache file (%p% complete)\n rf=Reading results file (%p% complete)\n ap=Assigning peptides to proteins (%p% complete) hit=%h, time=%d\n gp=Found protein group: %a, hit=%h, %p% complete, %d seconds\n cc=Caching results (%p% complete)\n fd=Calculating false discovery rate (%p% complete)\n '
Any text before the first tag will be used as a default for cases where text isn't supplied for a particular task. For example:
Processing: %p% complete gp=Grouping %a
would output the text:
Processing 23% complete
for all tasks except the protein grouping which would output:
Grouping gi|12345
|
inlineinherited |
Returns a reference to the search parameters class.
For C# only, params is a keyword, so this function is renamed to _params.
|
inherited |
Replace the existing keepAlive values with new values.
KeepAlive values are passed when creating the ms_mascotresfilebase object, but it can be useful to change these at a later time.
See outputKeepAlive() for further details.
keepAliveInterval | is the new interval in seconds. Specify a value of -1 to keep the old value, or 0 to stop outputting keepAliveText. |
keepAliveText | is the text to output every keepAliveInterval seconds while the file is being loaded, or while other tasks are in progress. |
propagateToAppended | only has meaning if additional files have been appended by calling appendResfile(). |
resetStartTime | can be set to true to reset the "%d" value to zero. See outputKeepAlive() for details. |
|
staticinherited |
Return the results format of the file provided as an argument.
This method tries to open the file and 'sniff' the first few bytes. If those bytes are the SQLite database header, then this is a Mascot Search Results (MSR) file and the method returns RESFILE_MSR. If the bytes look like a MIME format header, then this is a dat28 (.dat) file and the method returns RESFILE_DAT28. In any other case, the method returns RESFILE_UNKNOWN.
fileName | relative or absolute path to the file to 'sniff'. |
|
protectedinherited |
For restoring any errors in the .cdb file
e | is the stored string obtained from calling getErrorInfoAsString |
|
inherited |
Set Percolator features before creating an ms_peptidesummary with Percolator scoring (deprecated).
The names (and contents!) of the Percolator files depend on the features that have been enabled. When running from a report script on Mascot Server, turning on an additional feature in percolatorFeatures will cause Mascot to create a new file, but the old file will still be available if the feature is removed again from percolatorFeatures.
When running outside Mascot Server, the parameters passed to this method must match the parameters used by the report scripts, in order to get the same Percolator file names. If the pip/pop files are from Mascot Server 2.8 or later, please use matrix_science::ms_mascotresfilebase::setPercolatorFeatures(const ms_mascotoptions&, const char*, const std::vector<std::string>&) and the same mascot.dat options as were used on the server.
percolatorFeatures | is normally retrieved by calling ms_mascotoptions::getPercolatorFeatures(). |
additionalFeatures | is normally a string passed to ms-createpip.exe. For example, "-a numUniqPeps -r varmods" would add numUniqPeps and remove varmods from the default. An empty string means nothing is added or removed from ms_mascotoptions::getPercolatorFeatures(). |
useRetentionTimes | is a flag to indicate whether retention time information is used by percolator.exe. This value is normally retrieved by calling ms_mascotoptions::isPercolatorUseRT(). |
|
inherited |
Set Percolator features before creating an ms_peptidesummary with Percolator scoring.
The names (and contents!) of the Percolator files depend on the features that have been enabled. When running from a report script on Mascot Server, turning on an additional feature in PercolatorFeatures will cause Mascot to create a new file, but the old file will still be available if the feature is removed again from PercolatorFeatures.
The method uses the following fields from options:
When running outside Mascot Server, the parameters passed to this method must match the parameters used by the report scripts, in order to get the same Percolator file names. If the pip/pop files are from Mascot Server 2.8 or later, please use the same mascot.dat options as were used on the server. Make sure you also use the same adapterParameters.
options | contains the options stored in mascot.dat. |
additionalFeatures | is normally a string passed to ms-createpip.exe For example, "-a numUniqPeps -r varmods" would add numUniqPeps and remove varmods from the default |
adapterParameters | is a vector of parameters to ML adapters introduced in Mascot Server 3.0. If the vector is not empty, then the parameters are sorted and hashed into an additional MD5 checksum component of the pip/pop file names. |
|
inherited |
Sets the XML schema to be used by functions using quantitation or unimod.
It is generally easier to pass a directory as the XMLschemaDirectory parameter to the constructor rather than calling this function for each of the required schema.
Example:
std::string qs; qs = "http://www.matrixscience.com/xmlns/schema/quantitation_1 "; qs += "C:/myfiles/quant_schema_1.xsd "; qs += "http://www.matrixscience.com/xmlns/schema/quantitation_2 "; qs += "../schema%20files/quantitation_2.xsd"; setXMLschemaFilePath(XML_SCHEMA_QUANTITATION, qs.c_str()); *
The default values used in cases where this function has not been called and no parameter is passed to the constructor are the values suitable for scripts and programs running on the Mascot Server. These values are:
XML_SCHEMA_QUANTITATION : http://www.matrixscience.com/xmlns/schema/quantitation_1 ../html/xmlns/schema/quantitation_1/quantitation_1.xsd http://www.matrixscience.com/xmlns/schema/quantitation_2 ../html/xmlns/schema/quantitation_2/quantitation_2.xsd XML_SCHEMA_UNIMOD : http://www.unimod.org/xmlns/schema/unimod_2 ../html/xmlns/schema/unimod_2/unimod_2.xsd XML_SCHEMA_CROSSLINKING : http://www.matrixscience.com/xmlns/schema/crosslinking_1 ../html/xmlns/schema/crosslinking_1/crosslinking_1.xsd
XMLschema | must be one of the valid XML_SCHEMA values |
path | should be a list of pairs "_schema_alias_ SPACE _file_path_", where SPACE is the space character. See XML_SCHEMA for the supported _scheama_alias_ values for each type of schema |
|
staticinherited |
Returns a list of the Percolator input and output files for the specified data file (deprecated).
This static function can be called without creating an ms_mascotresfilebase object, and can be used in advance of creating an object to see if the percolator files will need to be created. If an object has already been created, it is normally easier to call setPercolatorFeatures() and then getPercolatorFileNames().
See Using Percolator scores and Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.
szFileName | is the absolute or relative path to the results file. |
cacheDirectory | will normally be the value returned from ms_mascotoptions::getCacheDirectory. If it's empty, then the default pattern is used (../data/cache/%Y/%d). |
percolatorFeatures | is normally retrieved by calling ms_mascotoptions::getPercolatorFeatures(). The filenames encode the features so that there is no conflict. |
additionalFeatures | is normally a string passed to ms-createpip.exe. For example, "-a numUniqPeps -r varmods" would add numUniqPeps and remove varmods from the default. Any other parameters except -a and -r are ignored. An empty string means nothing is added or removed from percolatorFeatures. |
useRetentionTimes | is a flag to indicate whether retention time information is used by percolator.exe. |
filenames | returns the list of files. The offsets are defined by ms_mascotresfilebase::PERCOLATOR_FILE_NAMES. |
exists | is a boolean vector which will return flags indicating if the Percolator files exists. The values correspond to the values in filenames vector. |
|
staticinherited |
Returns a list of the Percolator input and output files for the specified data file.
This static function can be called without creating an ms_mascotresfilebase object, and can be used in advance of creating an object to see if the percolator files will need to be created. If an object has already been created, it is normally easier to call setPercolatorFeatures() and then getPercolatorFileNames().
The function uses the following fields from options:
Make sure you set PercolatorExeFlags in options based on whether the results file has any queries with a retention time. Otherwise, this method may generate a filename different from setPercolatorFeatures().
bool anyRT = (whether any query has RTINSECONDS); std::string percolatorFlags = options.getPercolatorRtFlags(anyRT, options.isPercolatorUseRT()); options.setPercolatorExeFlags(percolatorFlags.c_str());
See Using Percolator scores and Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.
szFileName | is the absolute or relative path to the results file. |
cacheDirectory | will normally be the value returned from ms_mascotoptions::getCacheDirectory. If it's empty, then the default pattern is used (../data/cache/%Y/%d). |
options | contains the options stored in mascot.dat It is used to access the relevant options to generate the file names. |
additionalFeatures | is normally a string passed to ms-createpip.exe. For example, "-a numUniqPeps -r varmods" would add numUniqPeps and remove varmods from the default. Any other parameters except -a and -r are ignored. An empty string means nothing is added or removed from ms_mascotoptions::getPercolatorFeatures(). |
adapterParameters | is a vector of parameters to ML adapters introduced in Mascot Server 3.0. If the vector is not empty, then the parameters are sorted and hashed into an additional MD5 checksum component of the pip/pop file names. |
filenames | returns the list of files. The offsets are defined by ms_mascotresfilebase::PERCOLATOR_FILE_NAMES. |
exists | is a boolean vector which will return flags indicating if the Percolator files exists. The values correspond to the values in filenames vector. |
|
inherited |
Compare the value returned by getMascotVer() with the passed version number.
Utility function to perform easy comparison. For example, to test if a results file could have taxonomy information, use:
if (versionGreaterOrEqual(2, 4, 0)) then ...
major | is the major version to be compared with |
minor | is the minor version to be compared with |
revision | is the minor revision to be compared with |
|
staticinherited |
Returns true if a cache file will be created when the ms_mascotresfile_dat constructor is called.
This static function can be called without creating an ms_mascotresfile_msr or ms_mascotresfile_dat object. It can be used in advance of creating the object to see if there will be a delay while (re)creating the cache file(s).
The purpose of this method is to get the status of the cache in addition of whether the cache will be created or not.
See Multiple return values in Perl, Java, Python and C#
See Static functions in Perl, Java, Python and C#
[in] | szFileName | is the absolute or relative path to the Fxxxxx.dat file |
[in] | opts | normally loaded from the mascot.dat file using ms_datfile::getMascotOptions() |
[in] | applicationName | is the name of the application or script that is calling this function. The applicationName is searched for in the return value from ms_mascotoptions::getResultsCache and ms_mascotoptions::getResfileCache to determine if the application should be using cache files. If it is not found then the function returns false and sets the cacheStatus to ms_peptidesummary::RESFILE_CACHE_DISABLED_IN_OPTIONS. If null, or an empty string is passed, no check is made. |
[out] | resfileCacheFileName | returns the name of the ms_mascotresfilebase cache file if one exists or would be created |
[out] | cacheStatus | is the ms_peptidesummary::CACHE_STATUS enumeration which gives more details about why the cache file may or may not be created. Multiple values may be bitwise OR'd toegether. |
|
staticinherited |
Returns true if a cache file will be created when the ms_mascotresfile_dat constructor is called.
This static function can be called without creating an ms_mascotresfile_dat or ms_mascotresfile_msr object, and can be used in advance of creating an object to see if there will be a delay while (re)creating a cache file. The function has the same parameters as the ms_mascotresfilebase constructor.
The behaviour depends on the file type (ms_mascotresfilebase::resfileType()). If it is MSR, this method always returns false, because MSR files do not need a resfile cache. If it is dat28 format (.dat), a resfile cache may be needed.
See Static functions in Perl, Java, Python and C#
See Multiple return values in Perl, Java, Python and C#.
[in] | szFileName | - see ms_mascotresfilebase::ms_mascotresfilebase |
[in] | flags | - see ms_mascotresfilebase::ms_mascotresfilebase |
[in] | cacheDirectory | - see ms_mascotresfilebase::ms_mascotresfilebase |
[out] | cacheFileName | - the full path name of the cache file. For languages other than C++, this will be a reference rather than a pointer to a std::string. |