19#ifndef MS_MASCOTRESFILE_DAT_HPP
20#define MS_MASCOTRESFILE_DAT_HPP
32namespace msparser_internal {
34 class ms_peptidesumcdb;
36 class ms_peptidesummarybase;
37 class ms_proteinsummarybase;
41namespace matrix_science {
42 class ms_umod_configfile;
45 class ms_taxonomyfile;
46 class ms_mascotresults_params;
56 multiBuf() {pMem_ = 0; len_ = 0; pEnd_ = 0;}
63 typedef std::vector<multiBuf> multiBuf_v;
65 class multiBufMemPtr {
67 enum MBMP { MBMP_INVALID = -1, MBMP_USING_CDB = -2 };
68 multiBufMemPtr(
int bufNum,
char *pMem)
69 : bufNum_(bufNum), pMem_(pMem) {};
70 multiBufMemPtr() : bufNum_(MBMP_INVALID), pMem_(0) {};
71 void decrement(
const multiBuf_v & buffers);
72 void decrementUntil(
const multiBuf_v & buffers,
const char * chars);
73 void increment(
const multiBuf_v & buffers);
74 bool isValid()
const {
return bufNum_ != MBMP_INVALID;}
75 bool operator<(
const multiBufMemPtr & rhs);
76 bool operator<=(
const multiBufMemPtr & rhs);
85 class ms_sortByKeyCriterion
88 enum CMP_MODE {CASE_INSENSITIVE=0x01, CASE_SENSITIVE=0x02, QUOTED=0x04};
89 ms_sortByKeyCriterion(
int m=(
int)CASE_INSENSITIVE) : mode_(m) {}
90 bool operator() (
const char * p1,
const char * p2)
const;
99 inline char my_toupper(
char ch)
const
101 if ((mode_ & CASE_INSENSITIVE) && ch >=
'a' && ch <=
'z')
102 return static_cast<char>(
'A' + (ch-
'a'));
122 friend class msparser_internal::ms_peptidesumcdb;
166 static const char *getSectionName(
const section sec);
185 ERR_NO_ERROR = 0x0000,
187 ERR_NOSUCHFILE = 0x0002,
188 ERR_READINGFILE = 0x0003,
189 ERR_QUERYOUTOFRANGE = 0x0004,
190 ERR_MISSINGENTRY = 0x0005,
191 ERR_PEPSUMMPEPGET = 0x0006,
192 ERR_PEPTIDESTR = 0x0007,
193 ERR_ACCINPEPTIDESTR = 0x0008,
194 ERR_PROTSUMM = 0x0009,
195 ERR_PROTSUMMPEP = 0x000A,
196 ERR_ADDPEPTIDES = 0x000B,
197 ERR_MISSINGHIT = 0x000C,
198 ERR_MISSINGSECTION = 0x000D,
199 ERR_MISSINGSECTIONEND = 0x000E,
200 ERR_MALFORMED_ERR_TOL = 0x000F,
201 ERR_NO_ERR_TOL_PARENT = 0x0010,
202 ERR_NULL_ACC_PEP_SUM = 0x0011,
203 ERR_NULL_ACC_PROT_SUM = 0x0012,
204 ERR_DUPE_ACCESSION = 0x0013,
205 ERR_UNASSIGNED_PROG = 0x0014,
206 ERR_UNASSIGNED_RANGE = 0x0015,
207 ERR_UNASSIGNED_UNK = 0x0016,
208 ERR_NO_UNIGENE_FILE = 0x0017,
209 ERR_DUPLICATE_KEY = 0x0018,
210 ERR_OLDRESULTSFILE = 0x0019,
211 ERR_MALFORMED_TAG = 0x001A,
212 ERR_MALFORMED_DRANGE = 0x001B,
213 ERR_INVALID_NUMQUERIES = 0x001C,
214 ERR_MALFORMED_TERMS = 0x001D,
215 ERR_INVALID_RESFILE = 0x001E,
216 ERR_INVALID_PROTDB = 0x001F,
217 ERR_UNIGENE_MULTIDB = 0x0020,
218 ERR_INVALID_CACHE_DIR = 0x0021,
219 ERR_FAIL_OPEN_DAT_FILE = 0x0022,
220 ERR_MISSING_CDB_FILE = 0x0023,
221 ERR_FAIL_MK_CACHE_DIR = 0x0024,
222 ERR_FAIL_MK_CDB_FILE = 0x0025,
223 ERR_FAIL_CLOSE_FILE = 0x0026,
224 ERR_FAIL_CDB_INIT = 0x0027,
225 ERR_INVALID_CDB_FILE = 0x0028,
226 ERR_WRITE_CDB_FILE = 0x0029,
227 ERR_CDB_TOO_LARGE = 0x002A,
228 ERR_NEED_64_BIT = 0x002B,
229 ERR_CDB_64_BIT_REMAKE = 0x002C,
230 ERR_CDB_OLD_VER_RETRY = 0x002D,
231 ERR_CDB_OLD_VER_NO_RETRY = 0x002E,
232 ERR_CDB_INCOMPLETE_RETRY = 0x002F,
233 ERR_CDB_INCOMPLETE_NO_RETRY = 0x0030,
234 ERR_CDB_BEING_CREATED = 0x0031,
235 ERR_CDB_FAIL_REMOVE = 0x0032,
236 ERR_CDB_FAIL_LOCK = 0x0033,
237 ERR_CDB_FAIL_UNLOCK = 0x0034,
238 ERR_CDB_SOURCE_CHANGE_RETRY = 0x0035,
239 ERR_CDB_SOURCE_CHANGE_NO_RETRY = 0x0036,
240 ERR_MISSING_PERCOLATOR_FILE = 0x0037,
241 ERR_CANNOT_APPEND_RESFILE = 0x0038,
242 ERR_CANNOT_APPEND_RESFILE_NO_FNAMES = 0x0039,
243 ERR_RESULTS_NOT_CREATED = 0x003A,
251 const int keepAliveInterval = 0,
252 const char * keepAliveText =
"<!-- %d seconds -->\n",
253 const unsigned int flags = RESFILE_NOFLAG,
254 const char * cacheDirectory =
"../data/cache/%Y/%m",
255 const char * XMLschemaDirectory = 0,
264 int appendResfile(
const char * filename,
265 int flags=RESFILE_USE_PARENT_PARAMS,
266 const char * cacheDirectory = 0)
override;
272 int getNumberOfResfiles()
const override;
275 bool doesSectionExist(
const section sec)
const;
278 bool anyPeptideSummaryMatches(
const section sec=SEC_PEPTIDES)
const;
280 bool anyFastaMatches(
const bool isDecoy=
false)
const override;
282 bool anySpectralLibraryMatches(
const bool isDecoy=
false)
const override;
284 bool anyErrorTolerantMatches(
const bool isDecoy=
false)
const override;
286 bool anyCrosslinkedMatches(
const bool isDecoy=
false)
const override;
294 int getSectionValue(
const section sec,
const char * key,
char * str,
int maxLen)
const;
297 int getSectionValueInt(
const section sec,
const char * key)
const;
300 double getSectionValueDouble(
const section sec,
const char * key)
const;
303 std::string getSectionValueStr(
const section sec,
const char * key)
const;
306 int getQuerySectionValue(
const int query,
const char * key,
char * str,
int maxLen)
const;
309 int getQuerySectionValueInt(
const int query,
const char * key)
const;
312 double getQuerySectionValueDouble(
const int query,
const char * key)
const;
315 std::string getQuerySectionValueStr(
const int query,
const char * key)
const;
318 int getJobNumber(
const int resfileID = 1)
const override;
321 std::string enumerateSectionKeys(
const section sec,
323 int * pPreviousNum = 0,
324 OFFSET64_T * pPreviousOffset = 0)
const;
327 std::string enumerateQuerySectionKeys(
const int query,
329 int * pPreviousNum = 0,
330 OFFSET64_T * pPreviousOffset = 0)
const;
334 int getNumQueries(
const int resfileID = 0)
const override;
337 int getNumHits(
const section sec=SEC_SUMMARY)
const;
340 int getNumSeqs(
const int idx = 0)
const override;
343 int getNumSeqsAfterTax(
const int idx = 0)
const override;
346 int getNumEtSeqsSearched(
const int idx = 0)
const override;
349 int getNumLibraryEntries(
const int idx = 0)
const override;
352 double getNumResidues(
const int idx = 0)
const override;
355 bool isDatabaseTypeAvailable()
const override;
359 int getReferenceDatabaseNumberOfSL(
const int idx)
const override;
362 std::vector<int> getSLDatabaseNumbersOfReference(
const int idx)
const override;
372 int getDate()
const override {
return searchDate_; }
380 std::string getFastaVer(
int idx = 1)
const override;
383 std::string getFastaPath(
int idx = 1)
const override;
386 std::string getUniqueTaskID()
const override;
392 double getSLFragmentTolerance(
int idx = 1)
const override;
395 std::string getSLFragmentToleranceUnit(
int idx = 1)
const override;
398 virtual std::string getSLExecCommand(
int idx = 1)
const override;
401 bool isPMF()
const override;
404 bool isMSMS()
const override;
407 bool isSQ()
const override;
410 bool isErrorTolerant()
const override;
413 bool anyPMF()
const override;
416 bool anyMSMS()
const override;
419 bool anySQ()
const override;
422 bool anyTag()
const override;
425 double getObservedMass(
const int query)
const override;
428 int getObservedCharge(
const int query,
const bool decoy=
false)
const override;
431 double getObservedMrValue(
const int query,
const bool decoy=
false)
const override;
434 double getObservedIntensity(
const int query)
const override;
437 std::string getRepeatSearchString(
const int query,
const bool fullQuery =
false)
const override;
440 std::string getFileName(
const int id = 1)
const override;
446 bool getUnimod(
ms_umod_configfile *ufile,
bool useSchemaFromResfile =
false)
const override;
449 bool getUnimodXL(
ms_umod_configfile *ufile,
bool useSchemaFromResfile =
false)
const override;
452 bool getEnzyme(
ms_enzymefile *efile,
const char * enzymeFileName = 0)
const override;
458 std::string getCacheDirectory(
bool processed =
true)
const override;
461 std::string getCacheFileName()
const override;
465 bool getSrcQueryAndFileIdForMultiFile(
const int q,
int & gsqNewQuery,
int & gsqFileId)
const override;
467 bool getSrcQueryAndFileIdForMultiFile(
const int q,
int & OUTPUT,
int & OUTPUT)
const override;
470 int getMultiFileQueryNumber(
const int localQuery,
const int fileId)
const override;
473 bool hasEnzyme()
const override;
476 bool hasRT()
const override;
479 bool hasQuantitation()
const override;
482 ms_inputquery getInputQuery(
const int queryNum)
const override;
486 void getHeaderKeyValues(std::vector<std::string> & keys, std::vector<std::string> & values)
const override;
489 void getSearchParametersKeyValues(std::vector<std::string> & keys, std::vector<std::string> & values)
const override;
492 void getMassesKeyValues(std::vector<std::string> & keys, std::vector<double> & values)
const override;
494 void getHeaderKeyValues(std::vector<std::string> & OUTPUT, std::vector<std::string> & OUTPUT)
const override;
496 void getSearchParametersKeyValues(std::vector<std::string> & OUTPUT, std::vector<std::string> & OUTPUT)
const override;
498 void getMassesKeyValues(std::vector<std::string> & OUTPUT, std::vector<double> & OUTPUT)
const override;
502 virtual std::string getHeaderValue(
const std::string& key)
const override;
505 virtual std::string getSearchParameter(
const std::string& key)
const override;
508 virtual double getMassValue(
const std::string& key)
const override;
511 int getLibraryMods(std::vector<std::string> & modNames, std::vector<double> & modDeltas)
const override;
520 double getFirstPassThreshold()
const override;
533 void validateResfileVersion()
override;
535 std::string getPepSumCacheFilename(
536 const unsigned int flags,
537 double minProbability,
539 const char * unigeneIndexFile,
540 double ignoreIonsScoreBelow,
541 int minPepLenInPepSummary,
542 const char * singleHit,
543 const unsigned int flags2)
const override;
549 bool willCreatePepSumCache(
550 const unsigned int flags,
551 double minProbability,
553 const char * unigeneIndexFile,
554 double ignoreIonsScoreBelow,
555 int minPepLenInPepSummary,
556 const char * singleHit,
557 const unsigned int flags2)
const override;
563 std::string & peptideSummaryCacheFileName,
564 unsigned int & cacheStatus)
const override;
568 std::shared_ptr<msparser_internal::ms_mascotresultsbase> & results,
569 std::shared_ptr<msparser_internal::ms_peptidesummarybase> & iPepSum)
const override;
573 std::shared_ptr<msparser_internal::ms_mascotresultsbase> & results,
574 std::shared_ptr<msparser_internal::ms_proteinsummarybase> & iProtSum)
const override;
576 std::shared_ptr<msparser_internal::ms_inputquerybase> buildInputQueryImpl(
const int q)
const override;
578 std::shared_ptr<msparser_internal::ms_searchparamsbase> buildSearchParamsImpl()
const override;
580 std::vector<int> multifileNumQueries_;
581 typedef std::vector<ms_mascotresfile_dat *> resfileV_t;
582 resfileV_t multifileResfiles_;
583 int protSummaryHits_;
585 int numSequencesAfterTax_;
587 int numLibraryEntries_;
592 std::vector<DATABASE_TYPE> dbType_;
593 std::vector<int> slReferenceMap_;
603 mutable bool isErrorTolerant_;
604 mutable bool cachedIsErrorTolerant_;
606 msparser_internal::ms_cdb * pIndexFile_;
609 mutable char * readlnBuf_;
610 mutable unsigned int readlnBufSize_;
611 msparser_internal::ms_mutex * pMutex_;
614 const char * sectionTitles_[SEC_NUMSECTIONS];
616 multiBufMemPtr sectionStart_[SEC_NUMSECTIONS];
617 multiBufMemPtr sectionEnd_ [SEC_NUMSECTIONS];
621 std::string endSectionKey_;
622 std::string lineBasedEndSectionKey_;
623 std::string genericQuerySectionKey_;
624 bool isWinIniFormat_;
625 std::vector<double> cachedMrValues_;
626 std::vector<double> cachedExpValues_;
627 std::vector<short> cachedCharges_;
629 multiBufMemPtr findSectionStart(
const char * szSectionName,
630 const multiBufMemPtr * startLookingAt = 0)
const;
631 multiBufMemPtr findSectionEnd(
const multiBufMemPtr sectionStart)
const;
634 typedef std::set<const char *,ms_sortByKeyCriterion> sortedKeys;
635 sortedKeys sorted_[SEC_NUMSECTIONS];
636 bool fillUpSortedList(
const int section,
637 const multiBufMemPtr sectionStart,
638 const multiBufMemPtr sectionEnd,
639 sortedKeys & sorted_keys)
const;
642 bool hasQuerySectionBeenIndexed;
643 std::vector<multiBufMemPtr> querySectionStart_;
644 std::vector<multiBufMemPtr> querySectionEnd_;
645 std::vector<sortedKeys> sortedQueries_;
648 bool inDLLgetSectVal(
const section sec,
649 const int queryNumber,
650 const multiBufMemPtr sectionStart,
651 const multiBufMemPtr sectionEnd,
652 sortedKeys & sorted_keys,
654 std::string & result)
const;
657 bool inDLLgetSectionAsString(
const section sec,
658 std::string & result)
const;
660 bool readFile(
const char * szFileName);
661 bool readLine(
char * & buf,
unsigned int & bufSize)
const;
662 void getSectionTitles();
663 void debugCheckReadFileOK();
664 bool createCDBFile();
666 int extractObservedCharge(
char *szKey, std::string str)
const;
667 std::string enumerateQuerySection(
const int query,
670 OFFSET64_T * pPreviousOffset,
671 OFFSET64_T * pSectionEnd,
672 std::string & value)
const;
674 void getSLFragmentToleranceAndUnit_Value(
int idx,
double & tolerance, std::string & toleranceUnit)
const;
676 void getSectionKeyValues(std::vector<std::string> & keys, std::vector<std::string> & values, section sec,
const std::string &startswith=
"")
const;
678 void propagateKeepAlive(
const int keepAliveInterval,
679 const char * keepAliveText,
680 const bool propagateToAppended,
681 const bool resetStartTime)
override;
683 std::map<section, bool> mapSectionAnyMatch_;
This class represents the file crosslinking.xml.
Definition: ms_crosslinking_configfile.hpp:49
Reads and parses the enzymes file that contains multiple enzyme definitions.
Definition: ms_enzyme.hpp:194
An instance of this class represents all the parameters specified in the Options section of mascot....
Definition: ms_mascotoptions.hpp:91
DECOY_ALGORITHM
Definitions for how the decoy sequences are generated.
Definition: ms_mascotoptions.hpp:156
Class for parsing and reading files in dat28 format.
Definition: ms_mascotresfile_dat.hpp:121
section
Section names in the standard mascot results files.
Definition: ms_mascotresfile_dat.hpp:132
@ SEC_TAXONOMY
taxonomy section
Definition: ms_mascotresfile_dat.hpp:144
@ SEC_SPECTRAL_LIBRARY
spectral library section. See also Spectral libraries.
Definition: ms_mascotresfile_dat.hpp:151
@ SEC_ERRTOLDECOYPEPTIDES
error tolerant decoy peptides section. See also Error tolerant searches.
Definition: ms_mascotresfile_dat.hpp:159
@ SEC_ERRTOLDECOYSUMMARY
error tolerant decoy summary section. See also Error tolerant searches.
Definition: ms_mascotresfile_dat.hpp:158
@ SEC_MASSES
masses section
Definition: ms_mascotresfile_dat.hpp:135
@ SEC_DECOYPEPTIDES
decoy_peptides section. See also Target-decoy searches and false discovery rate.
Definition: ms_mascotresfile_dat.hpp:147
@ SEC_DECOYSUMMARY
decoy_summary section. See also Target-decoy searches and false discovery rate.
Definition: ms_mascotresfile_dat.hpp:145
@ SEC_ENZYME
enzyme section
Definition: ms_mascotresfile_dat.hpp:143
@ SEC_DECOYPROTEINS
decoy_proteins section. See also Target-decoy searches and false discovery rate.
Definition: ms_mascotresfile_dat.hpp:148
@ SEC_QUERY1
query1 section. Don't use, see getQuerySectionValueStr() etc.
Definition: ms_mascotresfile_dat.hpp:140
@ SEC_INDEX
index section
Definition: ms_mascotresfile_dat.hpp:160
@ SEC_PARAMETERS
parameters section
Definition: ms_mascotresfile_dat.hpp:133
@ SEC_LIBRARYPEPTIDES
spectral library peptides section. See also Spectral libraries.
Definition: ms_mascotresfile_dat.hpp:152
@ SEC_HEADER
header section
Definition: ms_mascotresfile_dat.hpp:134
@ SEC_SUMMARY
summary section
Definition: ms_mascotresfile_dat.hpp:136
@ SEC_PEPTIDES
peptides section
Definition: ms_mascotresfile_dat.hpp:138
@ SEC_UNIMOD
unimod section
Definition: ms_mascotresfile_dat.hpp:142
@ SEC_CROSSLINK_PEPTIDES
crosslink_peptides section. See also Crosslinked search results.
Definition: ms_mascotresfile_dat.hpp:155
@ SEC_MIXTURE
mixture section (pmf mixture)
Definition: ms_mascotresfile_dat.hpp:137
@ SEC_PROTEINS
proteins section
Definition: ms_mascotresfile_dat.hpp:139
@ SEC_LIBRARYSUMMARY
spectral library summary section. See also Spectral libraries.
Definition: ms_mascotresfile_dat.hpp:153
@ SEC_QUANTITATION
quantitation section
Definition: ms_mascotresfile_dat.hpp:141
@ SEC_CROSSLINKING
crosslinking section
Definition: ms_mascotresfile_dat.hpp:156
@ SEC_ERRTOLSUMMARY
error tolerant summary section. See also Error tolerant searches.
Definition: ms_mascotresfile_dat.hpp:149
@ SEC_CROSSLINK_SUMMARY
crosslink_summary section. See also Crosslinked search results.
Definition: ms_mascotresfile_dat.hpp:154
@ SEC_UNIMOD_XL
unimod_xl section
Definition: ms_mascotresfile_dat.hpp:157
@ SEC_ERRTOLPEPTIDES
error tolerant peptides section. See also Error tolerant searches.
Definition: ms_mascotresfile_dat.hpp:150
@ SEC_DECOYMIXTURE
decoy_mixture section. See also Target-decoy searches and false discovery rate.
Definition: ms_mascotresfile_dat.hpp:146
err
Definitions for error numbers.
Definition: ms_mascotresfile_dat.hpp:184
int getDate() const override
Returns the date and time of the search in seconds since midnight January 1st 1970.
Definition: ms_mascotresfile_dat.hpp:372
int getExecTime() const override
Returns the time taken for the search.
Definition: ms_mascotresfile_dat.hpp:367
std::string getMascotVer() const override
Returns the version of Mascot used to perform the search.
Definition: ms_mascotresfile_dat.hpp:377
Abstract base class of ms_mascotresfile_dat and ms_mascotresfile_msr.
Definition: ms_mascotresfilebase.hpp:72
Class which provides constructor parameters for either ms_peptidesummary or ms_proteinsummary.
Definition: ms_mascotresults_params.hpp:32
PSM_TYPE
Specifies the search pass and origin of the peptide match.
Definition: ms_peptide.hpp:107
Use this class to get peptide summary results.
Definition: ms_peptidesummary.hpp:51
Contains information of the current progress of a task being performed.
Definition: ms_progress_info.hpp:40
Definition: ms_proteinsummary.hpp:45
Use this class in order to read/write quantitation.xml.
Definition: ms_quant_configfile.hpp:52
Use this class in order to read in a taxonomy file.
Definition: ms_taxonomyfile.hpp:145
This class represents the file unimod.xml.
Definition: ms_umod_configfile.hpp:54
DATABASE_TYPE
Definition: ms_databaseoptions.hpp:39