19#ifndef MS_MASCOTRESFILEBASE_HPP
20#define MS_MASCOTRESFILEBASE_HPP
32namespace msparser_internal {
33 class ms_peptidesummarybase;
34 class ms_proteinsummarybase;
35 class ms_inputquerybase;
36 class ms_searchparamsbase;
39namespace matrix_science {
40 class ms_searchparams;
41 class ms_umod_configfile;
44 class ms_taxonomyfile;
45 class ms_mascotresults_params;
46 class ms_proteinsummary;
77 friend class msparser_internal::ms_peptidesummarybase;
78 friend class msparser_internal::ms_proteinsummarybase;
90 KA_CREATEINDEX_CI = 0,
92 KA_ASSIGNPROTEINS_AP = 2,
93 KA_GROUPPROTEINS_GP = 3,
94 KA_UNASSIGNEDLIST_UL = 4,
96 KA_CREATECACHE_CC = 6,
97 KA_THRESHFORFDR_FDR = 7,
109 RESFILE_NOFLAG = 0x00000000,
110 RESFILE_USE_CACHE = 0x00000001,
111 RESFILE_CACHE_IGNORE_ACC_DUPES = 0x00000002,
112 RESFILE_USE_PARENT_PARAMS = 0x00000004,
113 RESFILE_CACHE_IGNORE_DATE_CHANGE = 0x00000008
124 PERCOLATOR_INPUT_FILE = 0,
125 PERCOLATOR_OUTPUT_TARGET = 1,
126 PERCOLATOR_OUTPUT_DECOY = 2
134 XML_SCHEMA_QUANTITATION = 0,
135 XML_SCHEMA_UNIMOD = 1,
136 XML_SCHEMA_DIRECTORY = 2,
137 XML_SCHEMA_CROSSLINKING = 3,
153 const char * keepAliveText =
"<!-- %d seconds -->\n",
154 const unsigned int flags = RESFILE_NOFLAG,
155 const char * cacheDirectory = 0,
const char * XMLschemaDirectory = 0,
ms_progress_info * progressMonitor = 0);
162 std::string getMSParserVersion()
const;
165 bool versionGreaterOrEqual(
int major,
int minor,
int revision)
const;
169 int flags=RESFILE_USE_PARENT_PARAMS,
170 const char * cacheDirectory = 0) = 0;
183 void setError(
int error, ...) ;
184 void vsetError(
int error, va_list arg);
188 int getNumberOfErrors()
const;
191 int getErrorNumber(
const int num = -1)
const;
194 int getLastError()
const;
197 std::string getErrorString(
const int num)
const;
200 std::string getLastErrorString()
const;
203 void appendErrors(
ms_errs & errs);
206 void resetKeepAlive(
const int keepAliveInterval,
const char * keepAliveText,
207 const bool propagateToAppended =
true,
const bool resetStartTime =
false);
364 bool setXMLschemaFilePath(
XML_SCHEMA XMLschema,
const char * path);
367 std::string getXMLschemaFilePath(
XML_SCHEMA XMLschema)
const;
382 const ms_modification* getMonoLinkModification(
const int modNum,
const int monoLink)
const;
385 bool getMasses(
ms_masses * masses)
const;
394 bool outputKeepAlive()
const;
398 void getKeepAlive(
KA_TASK & kaTask,
400 std::string & kaAccession,
403 std::string & kaText)
const;
405 void getKeepAlive(
KA_TASK & OUTPUT,
407 std::string & OUTPUT,
410 std::string & OUTPUT)
const;
413 bool outputKeepAlive(
KA_TASK kaTask,
int percentageComplete,
const char * accession,
int hit,
int query)
const;
429 void setPercolatorFeatures(
const char * percolatorFeatures,
430 const char * additionalFeatures,
431 const bool useRetentionTimes);
435 const char * additionalFeatures,
436 const std::vector<std::string> &adapterParameters = std::vector<std::string>());
439 std::vector<std::string> getPercolatorFileNames()
const;
445 virtual bool getSrcQueryAndFileIdForMultiFile(
const int q,
int & OUTPUT,
int & OUTPUT)
const = 0;
451 std::string getEncodedPercolatorFeatures()
const {
return percolatorFeatures_; }
454 static bool staticGetPercolatorFileNames(
const char * szFileName,
455 const char * cacheDirectory,
456 const char * percolatorFeatures,
457 const char * additionalFeatures,
458 const bool useRetentionTimes,
459 std::vector<std::string> & filenames,
460 std::vector<bool> & exists);
463 static bool staticGetPercolatorFileNames(
const char * szFileName,
464 const char * cacheDirectory,
466 const char * additionalFeatures,
467 const std::vector<std::string> &adapterParameters,
468 std::vector<std::string> & filenames,
469 std::vector<bool> & exists);
474 static bool willCreateCache(
const char * szFileName,
475 const unsigned int flags,
476 const char * cacheDirectory,
477 std::string * cacheFileName);
479 static bool willCreateCache(
const char * szFileName,
480 const unsigned int flags,
481 const char * cacheDirectory,
482 std::string & cacheFileName) {
483 return (ms_mascotresfilebase::willCreateCache(szFileName, flags, cacheDirectory, &cacheFileName));
487 static bool willCreateCache(
const char * szFileName,
488 const ms_mascotoptions & opts,
489 const char * applicationName,
490 std::string & resfileCacheFileName,
491 unsigned int & cacheStatus);
494 std::string get_ms_mascotresults_params(
const ms_mascotoptions & opts,
495 unsigned int * gpFlags,
496 double * gpMinProbability,
497 int * gpMaxHitsToReport,
498 double * gpIgnoreIonsScoreBelow,
499 unsigned int * gpMinPepLenInPepSummary,
500 bool * gpUsePeptideSummary,
501 unsigned int * gpFlags2)
const;
504 static bool willCreateCache(
const char * szFileName,
505 const unsigned int flags,
506 const char * cacheDirectory,
507 std::string & OUTPUT);
509 static bool willCreateCache(
const char * szFileName,
510 const ms_mascotoptions & opts,
511 const char * applicationName,
512 std::string & OUTPUT,
513 unsigned int & OUTPUT);
515 std::string get_ms_mascotresults_params(
const ms_mascotoptions & opts,
516 unsigned int * OUTPUT,
520 unsigned int * OUTPUT,
522 unsigned int * OUTPUT)
const;
526 static RESFILE_TYPE resfileType(
const std::string& fileName);
529 static std::unique_ptr<ms_mascotresfilebase> createResfile(
const char * szFileName,
530 const int keepAliveInterval = 0,
531 const char * keepAliveText =
"<!-- %d seconds -->\n",
533 const char * cacheDirectory =
"../data/cache/%Y/%m",
534 const char * XMLschemaDirectory = 0,
553 virtual void getHeaderKeyValues(std::vector<std::string> & keys, std::vector<std::string> & values)
const = 0;
559 virtual void getMassesKeyValues(std::vector<std::string> & keys, std::vector<double> & values)
const = 0;
561 virtual void getHeaderKeyValues(std::vector<std::string> & OUTPUT, std::vector<std::string> & OUTPUT)
const = 0;
563 virtual void getSearchParametersKeyValues(std::vector<std::string> & OUTPUT, std::vector<std::string> & OUTPUT)
const = 0;
565 virtual void getMassesKeyValues(std::vector<std::string> & OUTPUT, std::vector<double> & OUTPUT)
const = 0;
577 virtual int getLibraryMods(std::vector<std::string> & modNames, std::vector<double> & modDeltas)
const = 0;
590 void setError(
int error, ...)
const;
591 bool setErrorInfoFromString(
const std::string & e);
592 std::string getErrorInfoAsString(
const int num)
const;
593 bool isErrorAlreadyPresent(
const int errNum)
const;
595 bool getPercolatorUseRetentionTimes()
const {
return percolatorUseRetentionTimes_; };
597 virtual void validateResfileVersion() = 0;
602 void prepareKeepAlive(
const char * keepAliveText,
const bool resetStartTime);
603 virtual void propagateKeepAlive(
const int keepAliveInterval,
604 const char * keepAliveText,
605 const bool propagateToAppended,
606 const bool resetStartTime) = 0;
608 virtual std::string getPepSumCacheFilename(
609 const unsigned int flags,
610 double minProbability,
612 const char * unigeneIndexFile,
613 double ignoreIonsScoreBelow,
614 int minPepLenInPepSummary,
615 const char * singleHit,
616 const unsigned int flags2)
const = 0;
624 virtual bool willCreatePepSumCache(
625 const unsigned int flags,
626 double minProbability,
628 const char * unigeneIndexFile,
629 double ignoreIonsScoreBelow,
630 int minPepLenInPepSummary,
631 const char * singleHit,
632 const unsigned int flags2)
const = 0;
640 std::string & peptideSummaryCacheFileName,
641 unsigned int & cacheStatus)
const = 0;
645 std::shared_ptr<msparser_internal::ms_mascotresultsbase> & results,
646 std::shared_ptr<msparser_internal::ms_peptidesummarybase> & iPepSum
651 std::shared_ptr<msparser_internal::ms_mascotresultsbase> & results,
652 std::shared_ptr<msparser_internal::ms_proteinsummarybase> & iProtSum
655 virtual std::shared_ptr<msparser_internal::ms_inputquerybase> buildInputQueryImpl(
const int q)
const = 0;
657 virtual std::shared_ptr<msparser_internal::ms_searchparamsbase> buildSearchParamsImpl()
const = 0;
661 std::string fileName_;
663 std::string cacheDirectory_;
664 std::string percolatorFeatures_;
665 bool percolatorUseRetentionTimes_;
666 std::vector<std::string> percolatorFileNames_;
667 std::string version_;
669 std::unique_ptr<ms_searchparams> params_;
670 std::vector<std::string> xmlSchemaPath_;
672 int keepAliveInterval_;
673 std::string keepAliveTextStr_;
674 std::string keepAliveText_[KA_LAST];
675 time_t keepAliveStartTime_;
677 mutable KA_TASK keepAliveTask_;
678 mutable time_t lastKeepAliveTime_;
679 mutable int keepAlivePercentage_;
680 mutable std::string keepAliveAccession_;
681 mutable int keepAliveHit_;
682 mutable int keepAliveQuery_;
691 static bool willCreateCacheDat(
const char * szFileName,
692 const unsigned int flags,
693 const char * cacheDirectory,
694 std::string * cacheFileName);
697 static bool willCreateCacheDat(
const char * szFileName,
699 const char * applicationName,
700 std::string & resfileCacheFileName,
701 unsigned int & cacheStatus);
703 std::string getKeepAliveString(
const double elapsedTime)
const;
705 std::vector<int> getPMFqueriesUsed()
const;
707 void cacheMonoLinkModifications()
const;
709 mutable std::map< std::pair<int, int>,
ms_modification > monoLinkCache_;
710 mutable bool monoLinksCached_;
This class represents the file crosslinking.xml.
Definition: ms_crosslinking_configfile.hpp:49
Represents a method object in crosslinking.xml
Definition: ms_crosslinking_method.hpp:50
Reads and parses the enzymes file that contains multiple enzyme definitions.
Definition: ms_enzyme.hpp:194
This class is used as a base class for several Mascot Parser classes.
Definition: ms_errors.hpp:696
All errors are collected in an instance of this class.
Definition: ms_errors.hpp:37
An instance of this class represents all the parameters specified in the Options section of mascot....
Definition: ms_mascotoptions.hpp:91
DECOY_ALGORITHM
Definitions for how the decoy sequences are generated.
Definition: ms_mascotoptions.hpp:156
Abstract base class of ms_mascotresfile_dat and ms_mascotresfile_msr.
Definition: ms_mascotresfilebase.hpp:72
virtual int getExecTime() const =0
Returns the time taken for the search.
PERCOLATOR_FILE_NAMES
Offsets into a vector of Percolator filenames.
Definition: ms_mascotresfilebase.hpp:123
virtual std::string getFastaPath(int idx=1) const =0
Returns the path to the FASTA file used.
virtual bool hasRT() const =0
Return true if the results file contains retention time data.
virtual std::vector< int > getSLDatabaseNumbersOfReference(const int idx) const =0
Return the database numbers of the spectral libraries whose reference database is at the given index.
virtual bool anyFastaMatches(const bool isDecoy=false) const =0
Returns true if there are any FASTA matches.
virtual int getDate() const =0
Returns the date and time of the search in seconds since midnight January 1st 1970.
virtual int getNumberOfResfiles() const =0
Multiple results files can be summed together and treated as 'one'.
virtual double getNumResidues(const int idx=0) const =0
Returns the number of residues in the FASTA file(s) searched.
virtual int getNumSeqs(const int idx=0) const =0
Returns the number of sequences in the FASTA file(s) searched.
virtual std::string getHeaderValue(const std::string &key) const =0
Return the header value for the given key.
KA_TASK
Processing some results files is computationally intensive. These are the tasks that can be performed...
Definition: ms_mascotresfilebase.hpp:89
virtual bool anySpectralLibraryMatches(const bool isDecoy=false) const =0
Returns true if there are any Spectral Library matches.
XML_SCHEMA
The results file contains embedded files in XML format and these need to be validated against a schem...
Definition: ms_mascotresfilebase.hpp:133
virtual bool anyErrorTolerantMatches(const bool isDecoy=false) const =0
Returns true if there are any Error Tolerant matches.
virtual int getJobNumber(const int resfileID=1) const =0
Return the job number for this file - obtained from the file name.
virtual int getNumLibraryEntries(const int idx=0) const =0
Returns the number of entries in the spectral library searched.
virtual double getObservedIntensity(const int query) const =0
Returns the experimental intensity for the peptide.
virtual double getObservedMrValue(const int query, const bool decoy=false) const =0
Returns the experimental mass value (as a relative mass) as entered by the user.
virtual int getObservedCharge(const int query, const bool decoy=false) const =0
The 'charge' returned will be 0 for Mr, otherwise it will be 1, -1, 2, -2, 3, -3 etc....
virtual DATABASE_TYPE getDatabaseType(const int idx) const =0
Return database type if available.
virtual bool getEnzyme(ms_enzymefile *efile, const char *enzymeFileName=0) const =0
Returns an object that represents the embedded enzyme file as a reduced enzymes file.
virtual std::string getUniqueTaskID() const =0
Returns the unique task ID used by Mascot Daemon.
virtual int getNumSeqsAfterTax(const int idx=0) const =0
Returns the number of sequences that passed the taxonomy filter in the FASTA file(s) searched.
virtual ms_inputquery getInputQuery(const int queryNum) const =0
Return the ms_inputquery object for the query given as argument.
virtual bool getUnimodXL(ms_umod_configfile *ufile, bool useSchemaFromResfile=false) const =0
Returns an object that represents the embedded unimod_xl file as a reduced unimod_xl....
virtual int getLibraryMods(std::vector< std::string > &modNames, std::vector< double > &modDeltas) const =0
Return all the library mod names and deltas.
virtual bool isDatabaseTypeAvailable() const =0
Check whether database types are available.
virtual std::string getSLFragmentToleranceUnit(int idx=1) const =0
Returns the unit of the effective spectral library fragment tolerance.
virtual bool getQuantitation(ms_quant_configfile *qfile) const =0
Returns an object that represents the embedded quantitation file as a reduced quantitation....
virtual bool anyTag() const =0
Returns true if any of the queries in the search contain tag or etag commands.
virtual bool getCrosslinking(ms_crosslinking_configfile *crosslinkingFile) const =0
RESFILE_TYPE
Supported results file formats.
Definition: ms_mascotresfilebase.hpp:145
virtual bool hasEnzyme() const =0
Return true if the results file contains information about the enzyme used.
virtual bool anyMSMS() const =0
Returns true if any of the queries in the search contain ions data.
virtual int getNumEtSeqsSearched(const int idx=0) const =0
Returns the number of sequences searched in the second pass of an integrated error tolerant search.
virtual bool hasQuantitation() const =0
Return true if the results file contains quantitation data.
virtual bool isPMF() const =0
Returns true if the search was a PMF search (SEARCH=PMF).
virtual std::string getMascotVer() const =0
Returns the version of Mascot used to perform the search.
virtual bool isMSMS() const =0
Returns true if the search was an MSMS search (SEARCH=MIS).
virtual std::string getCacheFileName() const =0
Returns the filename of the cache file; see ms_mascotresfile_msr::getCacheFileName() and ms_mascotres...
virtual bool getUnimod(ms_umod_configfile *ufile, bool useSchemaFromResfile=false) const =0
Returns an object that represents the embedded unimod file as a reduced unimod_2.xml file.
virtual bool anyCrosslinkedMatches(const bool isDecoy=false) const =0
Returns true if there are any Crosslinked matches.
virtual int appendResfile(const char *filename, int flags=RESFILE_USE_PARENT_PARAMS, const char *cacheDirectory=0)=0
Multiple results files can be summed together and treated as 'one'.
virtual bool getTaxonomy(ms_taxonomyfile *tfile) const =0
Returns an object that represents the embedded taxonomy file as a reduced taxonomy file.
virtual double getMassValue(const std::string &key) const =0
Return the residue or modification mass for the given key.
virtual int getMultiFileQueryNumber(const int localQuery, const int fileId) const =0
Return the multi-file query number from the local query number in an appended file.
virtual double getFirstPassThreshold() const =0
Return the threshold value for the first pass of an automated error tolerant search.
virtual std::string getSLExecCommand(int idx=1) const =0
Returns the library search command line and parameters (sl_exec_command).
virtual const ms_mascotresfilebase * getResfile(int id) const =0
virtual double getQplughole(const int query, const ms_peptide::PSM_TYPE pepType) const =0
Return the threshold score for homologous peptide match (MIS only).
virtual bool isErrorTolerant() const =0
Returns true if the search was an error tolerant search.
virtual void getHeaderKeyValues(std::vector< std::string > &keys, std::vector< std::string > &values) const =0
Return all the header key-value pairs.
virtual std::string getSearchParameter(const std::string &key) const =0
Return the search parameter the given key.
virtual int64_t getQmatch(const int query, const ms_peptide::PSM_TYPE pepType) const =0
Return the number of peptide masses within precursor tolerance of this query.
virtual std::string getCacheDirectory(bool processed=true) const =0
Returns the directory being used for cache files (if any).
virtual double getObservedMass(const int query) const =0
Returns the experimental mass value as entered by the user.
virtual bool isSQ() const =0
Returns true if the search was a sequence query search (SEARCH=SQ).
virtual std::string getFastaVer(int idx=1) const =0
Returns the FASTA file version.
virtual bool anyPMF() const =0
Returns true if any of the queries in the search just contain a single peptide mass.
virtual int getReferenceDatabaseNumberOfSL(const int idx) const =0
Return the database number of the reference database of a spectral library.
virtual bool getSrcQueryAndFileIdForMultiFile(const int q, int &gsqNewQuery, int &gsqFileId) const =0
Return the query number and file ID in the source results file.
virtual void getMassesKeyValues(std::vector< std::string > &keys, std::vector< double > &values) const =0
Return all the residue and modification masses as key-value pairs.
virtual std::string getRepeatSearchString(const int query, const bool fullQuery=false) const =0
To perform a repeat search need to build up appropriate string.
virtual void getSearchParametersKeyValues(std::vector< std::string > &keys, std::vector< std::string > &values) const =0
Return all the search parameters as key-value pairs.
virtual ms_mascotoptions::DECOY_ALGORITHM getDecoyTypeForDB(const int idx=1) const =0
Returns the decoy algorithm type for a given database.
virtual double getSLFragmentTolerance(int idx=1) const =0
Returns the effective spectral library fragment tolerance.
virtual int getNumQueries(const int resfileID=0) const =0
Returns the number of queries (peptide masses or ms-ms spectra).
ms_searchparams & params() const
Returns a reference to the search parameters class.
Definition: ms_mascotresfilebase.hpp:358
virtual std::string getFileName(const int id=1) const =0
Returns the name of the results file passed into the constructor.
virtual bool anySQ() const =0
Returns true if any of the queries in the search contain seq or comp commands.
FLAGS
Flags for opening the results file.
Definition: ms_mascotresfilebase.hpp:107
@ RESFILE_NOFLAG
Dat28 format: Read the whole file into memory. MSR format: Use standard SQLite methods to read the fi...
Definition: ms_mascotresfilebase.hpp:109
Class which provides constructor parameters for either ms_peptidesummary or ms_proteinsummary.
Definition: ms_mascotresults_params.hpp:32
Reads and parses the masses file with residue and atom masses.
Definition: ms_masses.hpp:48
The class represents a single modification-entry in mod_file.
Definition: ms_modfile.hpp:134
PSM_TYPE
Specifies the search pass and origin of the peptide match.
Definition: ms_peptide.hpp:107
Use this class to get peptide summary results.
Definition: ms_peptidesummary.hpp:51
Contains information of the current progress of a task being performed.
Definition: ms_progress_info.hpp:40
Definition: ms_proteinsummary.hpp:45
Use this class in order to read/write quantitation.xml.
Definition: ms_quant_configfile.hpp:52
An object of this class represent a single quantitation method from quantitation.xml.
Definition: ms_quant_method.hpp:51
This class encapsulates the search parameters in the Mascot results file.
Definition: ms_searchparams.hpp:47
Use this class in order to read in a taxonomy file.
Definition: ms_taxonomyfile.hpp:145
This class represents the file unimod.xml.
Definition: ms_umod_configfile.hpp:54
DATABASE_TYPE
Definition: ms_databaseoptions.hpp:39