Matrix Science Mascot Parser toolkit
 
Loading...
Searching...
No Matches
ms_peptidesummary.hpp
1/*
2##############################################################################
3# file: ms_mascotrespeptidesum.hpp #
4# 'msparser' toolkit #
5# Encapsulates the peptide summary report from the mascot results file #
6##############################################################################
7# COPYRIGHT NOTICE #
8# Copyright 1998-2016 Matrix Science Limited All Rights Reserved. #
9# #
10##############################################################################
11# $Archive:: /MowseBranches/ms_mascotresfile_1.2/include/ms_mascotrespe $ #
12# $Author: robertog@matrixscience.com $ #
13# $Date: 2024-08-29 17:08:49 +0100 $ #
14# $Revision: 5342f33ad8c8306230f6edda6c723ef3d3ff158b | MSPARSER_REL_3_0_0-2024-09-24-0-g93ebaeb4f4 $ #
15# $NoKeywords:: $ #
16##############################################################################
17*/
18
19#ifndef MS_MASCOTRESPEPTIDESUM_HPP
20#define MS_MASCOTRESPEPTIDESUM_HPP
21
22
23// Includes from the standard template library
24#include <list>
25#include <map>
26#include <set>
27#include <string>
28#include <vector>
29
30namespace msparser_internal {
31 class ms_peptidesummary_dat;
32 class ms_peptidesummarybase;
33
34}
35
36namespace matrix_science {
37
44
50 class MS_MASCOTRESFILE_API ms_peptidesummary : public matrix_science::ms_mascotresults
51 {
52 public:
53
55
70 {
71 BUGFIX_10780 = 10780,
72 BUGFIX_10995 = 10995,
73 BUGFIX_11002 = 11002,
74 BUGFIX_11018 = 11018,
75 BUGFIX_11235 = 11235,
76 BUGFIX_11254 = 11254,
77 BUGFIX_11344 = 11344,
78 BUGFIX_11411 = 11411,
79 BUGFIX_11425 = 11425,
80 BUGFIX_11438 = 11438,
81 BUGFIX_11483 = 11483,
82 BUGFIX_11499 = 11499,
83 BUGFIX_11856 = 11856,
84 BUGFIX_12123 = 12123,
85 BUGFIX_12317 = 12317,
86 BUGFIX_11481 = 11481,
87 BUGFIX_12538 = 12538,
88 BUGFIX_12729 = 12729,
89 BUGFIX_12740 = 12740,
90 BUGFIX_13324 = 13324,
91 BUGFIX_12447 = 12447,
92 BUGFIX_13759 = 13759
93 };
94
96
102 CACHE_MISSING_RESFILE = 0x10000,
103
104 RESFILE_CACHE_FILE_NOT_PRESENT = 0x00001,
105 RESFILE_CACHE_BEING_CREATED = 0x00002,
106 RESFILE_CACHE_DISABLED_IN_OPTIONS = 0x00004,
107 RESFILE_CACHE_VALID = 0x00008,
108 RESFILE_CACHE_CAN_CREATE = 0x00010,
109
110 PEPSUMMARY_CACHE_FILE_NOT_PRESENT = 0x00100,
111 PEPSUMMARY_CACHE_BEING_CREATED = 0x00200,
112 PEPSUMMARY_CACHE_DISABLED_IN_OPTIONS = 0x00400,
113 PEPSUMMARY_CACHE_VALID = 0x00800,
114 PEPSUMMARY_CACHE_CAN_CREATE = 0x01000,
115 PEPSUMMARY_CACHE_STATUS_NOT_AVAILABLE= 0x02000,
116 PEPSUMMARY_CACHE_NOT_FOR_PMF = 0x04000
117 };
118
120
124 {
125 QL_FIRST = 0x0000,
126 QL_ALL = 0x0000,
127 QL_UNASSIGNED = 0x0001,
128 QL_BELOW_IDENTITY = 0x0002,
129 QL_BELOW_HOMOLOGY = 0x0003,
130 QL_IGNORE_IONS_SCORE_BELOW = 0x0004,
131
132 QL_LAST = 0x0004
133 };
134
136
140 {
141 MSPEPSUM_NONE = 0x0000,
142 MSPEPSUM_PERCOLATOR = 0x0001,
143 MSPEPSUM_USE_CACHE = 0x0002,
144 MSPEPSUM_SINGLE_HIT_DBIDX = 0x0004,
145 MSPEPSUM_USE_HOMOLOGY_THRESH= 0x0008,
146 MSPEPSUM_NO_PROTEIN_GROUPING= 0x0010,
147 MSPEPSUM_DISCARD_RELOADABLE = 0x0020,
148 MSPEPSUM_DEFERRED_CREATE = 0x0040,
149 MSPEPSUM_CACHE_IGNORE_DATE_CHANGE = 0x0080,
150 MSPEPSUM_REMOVE_CHIMERIC_DUPES = 0x0100,
151 MSPEPSUM_SL_INTEGRATED = 0x0200,
152 MSPEPSUM_SL_ONLY = 0x0400,
153 MSPEPSUM_CROSSLINK_INTEGRATED = 0x0800,
154 MSPEPSUM_CROSSLINK_ONLY = 0x1000,
155 };
156
157
158 public:
159
161 static std::string getCacheFilename(
162 const ms_mascotresfilebase & resfile,
163 const unsigned int flags,
164 double minProbability = 0.0,
165 int maxHitsToReport = 50,
166 const char * unigeneIndexFile = 0,
167 double ignoreIonsScoreBelow = 0.0,
168 int minPepLenInPepSummary = 0,
169 const char * singleHit = 0,
170 const unsigned int flags2 = MSPEPSUM_NONE);
171
173 static std::string getCacheFilename(
174 const ms_mascotresfilebase & resfile,
175 const ms_mascotresults_params & parameters);
176
179 const unsigned int flags = ms_mascotresults::MSRES_GROUP_PROTEINS,
180 double minProbability = 0.0,
181 int maxHits = 50,
182 const char * unigeneIndexFile = 0,
183 double ignoreIonsScoreBelow = 0.0,
184 int minPepLenInPepSummary = 0,
185 const char * singleHit = 0,
186 const unsigned int flags2 = MSPEPSUM_NONE);
187
190 const ms_mascotresfilebase & resfile,
191 const ms_mascotresults_params & parameters);
192
193 virtual ~ms_peptidesummary();
194
196 const ms_mascotresfilebase& mascotresfile() const;
197
199 virtual bool createSummary();
200
201 virtual bool anyEmPAI() const;
202
204 virtual double getProteinEmPAI(const char *accession, const int dbIdx = 1, const int length = -1) const;
205
207 virtual ms_protein * getHit(const int hit, const int memberNumber = 0) const;
208
210 virtual void freeHit(const int hit);
211
212 // This is 'hard-coded' to 10 in Mascot
213 enum { PEPS_PER_QUERY = 10 };
214
216 virtual ms_peptide getPeptide(const int q, const int p) const;
217
219 virtual bool getPeptide(const int q, const int p, ms_peptide * & pep) const;
220
222 virtual double getIonsScore(const int q, const int p, const bool decoy) const;
223
225 virtual bool isPeptideUnique(const int q, const int p, const UNIQUE_PEP_RULES rules = UPR_DEFAULT) const;
226
227 virtual bool getHitAndFamilyMember(const ms_protein * prot,
228 ms_mascotresults::hitAndFamily_t & hitAndFamily,
229 const UNIQUE_PEP_RULES rules) const;
230
232 virtual std::string getProteinsWithThisPepMatch(const int q, const int p, const bool quotes=false);
233
235 virtual std::vector<std::string> getAllProteinsWithThisPepMatch(const int q, const int p,
236 std::vector<int> & start,
237 std::vector<int> & end,
238 std::vector<std::string> &pre,
239 std::vector<std::string> &post,
240 std::vector<int> & frame,
241 std::vector<int> & multiplicity,
242 std::vector<int> & db) const;
243
245 virtual std::vector<std::string> getAllProteinsWithThisPepMatch(const int q, const int p,
246 std::vector<int> & start,
247 std::vector<int> & end,
248 std::vector<std::string> &pre,
249 std::vector<std::string> &post,
250 std::vector<int> & frame,
251 std::vector<int> & multiplicity,
252 std::vector<int> & db,
253 std::vector<int> & psmComponent) const;
254
256 std::vector<int> getNumberOfAccessionDBTypes(int q, int p) const;
257
259 virtual int getAllFamilyMembersWithThisPepMatch(const int hit,
260 const int q,
261 const int p,
262 std::vector< int >& db,
263 std::vector< std::string >& acc,
264 std::vector< int >& dupe_status) const;
265
267 std::vector<int> getNumberOfFamilyAccessionDBTypes(int hit, int q, int p, bool includeSamesets = true) const;
268
270 virtual std::string getErrTolModString(const int q, const int p) const;
271
273 virtual std::string getLibraryModString(const int q, const int p) const;
274
276 virtual std::string getErrTolModMasterString(const int q, const int p) const;
277
279 virtual std::string getErrTolModSlaveString(const int q, const int p) const;
280
282 virtual std::string getErrTolModPepString(const int q, const int p) const;
283
285 virtual std::string getErrTolModReqPepString(const int q, const int p) const;
286
288 virtual std::string getTagString(const int q, const int p) const;
289
291 virtual int getTagDeltaRangeStart(const int q, const int p) const;
292
294 virtual int getTagDeltaRangeEnd(const int q, const int p) const;
295
297 virtual std::string getTerminalResiduesString(const int q, const int p, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
298
300 virtual std::string getComponentString(const int q, const int p, const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE) const;
301
303 virtual int getProteinScoreCutoff(double OneInXprobRnd) const;
304
306 int getSrcRank(int q, int p) const;
307
309 ms_mascotresfile_dat::section getSrcSection(int q, int p)const;
310
312 std::string getQueryList(QL_FLAG flag, bool outputListOfQueries = true);
313
315 virtual std::vector<int> getPepsWithSameScore(const int q, const int p) const;
316#ifndef SWIG
318 virtual bool getComponentIntensity(const int q, const int p, const std::string & componentName, double & value, double & rawValue) const;
319#else // SWIG Multiple return values
320 virtual bool getComponentIntensity(const int q, const int p, const std::string & componentName, double & OUTPUT, double & OUTPUT) const;
321#endif
323 virtual int findProteins(const int startHit,
324 const std::string & str,
325 const int dbIdx,
326 FIND_FLAGS item,
327 FIND_COMPARE_FLAGS compareFlags,
328 std::vector<std::string> & accessions,
329 std::vector<int> & dbIndexes) const;
330
332 virtual int findProteinsByAccession(const int startHit,
333 const std::string & str,
334 const int dbIdx,
335 FIND_COMPARE_FLAGS compareFlags,
336 std::vector<std::string> & accessions,
337 std::vector<int> & dbIndexes) const;
338
340 virtual int findProteinsByDescription(const int startHit,
341 const std::string & str,
342 FIND_COMPARE_FLAGS compareFlags,
343 std::vector<std::string> & accessions,
344 std::vector<int> & dbIndexes) const;
345
347 virtual int findPeptides(const int startHit,
348 const std::string & str,
349 FIND_FLAGS item,
350 FIND_COMPARE_FLAGS compareFlags,
351 std::vector<int> & q,
352 std::vector<int> & p) const;
353
355 virtual QUANT_COMPONENT_STATUS getQuantitationComponentForPeptide(
356 const ms_peptide & peptide,
357 ms_quant_component & component,
358 const ms_quant_method * method = NULL) const;
359
361 virtual const ms_protein * getProtein(const char * accession, const int dbIdx = 1) const;
362
364 virtual const ms_protein * getComponentProtein(const char * accession, const int dbIdx = 1) const;
365
367 virtual bool getTreeClusterNodes(const int hit,
368 std::vector<int> &left,
369 std::vector<int> &right,
370 std::vector<double> &distance,
371 ms_mascotresults::TREE_CLUSTER_METHOD tcm = ms_mascotresults::TCM_PAIRWISE_MAXIMUM,
372 double *** reserved1 = 0,
373 unsigned int * reserved2 = 0) const;
374
375 //virtual bool queryHasSignificantRank1Match(ms_mascotresfile_dat::section secSummary,
376 //int query) const;
377
379 static bool willCreateCache(const ms_mascotresfilebase & resfile,
380 const unsigned int flags = ms_mascotresults::MSRES_GROUP_PROTEINS,
381 double minProbability = 0.0,
382 int maxHits = 50,
383 const char * unigeneIndexFile = 0,
384 double ignoreIonsScoreBelow = 0.0,
385 int minPepLenInPepSummary = 0,
386 const char * singleHit = 0,
387 const unsigned int flags2 = MSPEPSUM_NONE);
388
390 static bool willCreateCache(const ms_mascotresfilebase & resfile,
391 const ms_mascotresults_params & parameters);
392
393#ifndef SWIG
395 static bool willCreateCache(const char * resultsFileName,
396 const ms_mascotoptions & opts,
397 const char * unigeneIndexFile,
398 const char * singleHit,
399 const char * applicationName,
400 std::string & resfileCacheFileName,
401 std::string & peptideSummaryCacheFileName,
402 unsigned int & cacheStatus);
403
405 static bool willCreateCache(const ms_mascotresfilebase & resfile,
406 const ms_mascotresults_params & parameters,
407 const ms_mascotoptions & opts,
408 std::string & peptideSummaryCacheFileName,
409 unsigned int & cacheStatus);
410#else // SWIG Multiple return values
411 static bool willCreateCache(const char * resultsFileName,
412 const ms_mascotoptions & opts,
413 const char * unigeneIndexFile,
414 const char * singleHit,
415 const char * applicationName,
416 std::string & OUTPUT,
417 std::string & OUTPUT,
418 unsigned int & OUTPUT);
419
420 static bool willCreateCache(const ms_mascotresfilebase & resfile,
421 const ms_mascotresults_params & parameters,
422 const ms_mascotoptions & opts,
423 std::string & OUTPUT,
424 unsigned int & OUTPUT);
425#endif
426
428 bool isPeptideModificationMatch(
429 const ms_quant_modgroup & group,
430 const ms_peptide & peptide) const;
431
433 bool isPeptideComponentMatch(
434 const ms_quant_component & component,
435 const ms_peptide & peptide) const;
436
438 bool hasQuantMethod() const;
440 const ms_quant_method * getQuantMethod() const;
441
443 bool hasCrosslinkingMethod() const;
445 const ms_crosslinking_method * getCrosslinkingMethod() const;
446
448 std::string getCacheFileName() const;
449
451 virtual bool loadPepMatchesForProteinFromCache(ms_protein * prot, const bool loadRelated = true);
452
454 virtual bool isValidQandP(const int q, const int p) const;
455
457 bool dumpCDB(const std::string dumpFileName);
458
460 void getLibraryEntryId(const int q, const int p, std::vector<int> &dbIdx, std::vector<int> &offset, std::vector<std::string> &checksum, std::vector<std::string> &mods) const;
461
463 static double getMinProbabilityForSLScore(double score);
464
466 static double getSLThresholdFromMinProbability(double minProbability);
467
469 std::vector<int> getPeptideAmbiguityRanks(const int q, const int p);
470
471 bool isDataCached(ms_peptidesummary::BUGFIX_NUM bugNum) const;
472
473 protected:
474 // Not safe to copy or assign this object.
475#ifndef SWIG
476 ms_peptidesummary(const ms_peptidesummary & rhs);
477 ms_peptidesummary & operator=(const ms_peptidesummary & rhs);
478#endif
479
480 private:
481
482 bool rerankingCouldHappen() const;
483
484 std::shared_ptr<msparser_internal::ms_peptidesummarybase> iPepSum_;
485
486 }; // end of resfile_group
488} // matrix_science namespace
489
490#endif // MS_MASCOTRESPEPTIDESUM_HPP
491
492/*------------------------------- End of File -------------------------------*/
Abstract base class of ms_mascotresfile_dat and ms_mascotresfile_msr.
Definition: ms_mascotresfilebase.hpp:72
Class which provides constructor parameters for either ms_peptidesummary or ms_proteinsummary.
Definition: ms_mascotresults_params.hpp:32
Abstract class for either ms_peptidesummary or ms_proteinsummary.
Definition: ms_mascotresults.hpp:83
Use this class to get peptide summary results.
Definition: ms_peptidesummary.hpp:51
BUGFIX_NUM
bugNum values for isDataCached().
Definition: ms_peptidesummary.hpp:70
QL_FLAG
Flags for getQueryList().
Definition: ms_peptidesummary.hpp:124
MSPEPSUM
flags2 for ms_peptidesummary introduced in Mascot Parser 2.3.
Definition: ms_peptidesummary.hpp:140
CACHE_STATUS
Returned by the willCreateCache function.
Definition: ms_peptidesummary.hpp:101
This class encapsulates a protein in the mascot results file.
Definition: ms_protein.hpp:57