19#ifndef MS_MASCOTRESPROTEIN_HPP
20#define MS_MASCOTRESPROTEIN_HPP
31namespace msparser_internal {
32 class ms_protein_match_data;
34 class ms_mascotresultsbase;
35 class ms_peptidesumsql;
38namespace matrix_science {
39 class ms_mascotresults;
40 class ms_proteinsummary;
41 class ms_pepinfoSortByScore;
118 MASS_NON_SELECT_NON_MATCH = 0x0001,
119 MASS_SELECT_NON_MATCH = 0x0010,
120 MASS_NON_SELECT_MATCH = 0x0100,
121 MASS_SELECT_MATCH = 0x1000
188 DPF_SEQUENCE = 0x0001,
192 DPF_NODUPSAMEQUERY = 0x0010
196 typedef std::pair<int, std::string> dbIdxPlusAcc_t;
197 typedef std::vector<dbIdxPlusAcc_t> dbIdxPlusAccVect_t;
198 typedef std::set<dbIdxPlusAcc_t> dbIdxPlusAccSet_t;
203 const std::string accession,
204 const bool updateScoreFromPepScores,
205 const int proteinSummaryHit = 0);
222 int64_t getProteinId()
const;
225 void setProteinId(int64_t proteinId);
228 std::string getAccession()
const;
234 void setDB(
int dbIdx);
237 double getScore()
const;
240 double getNonMudpitScore()
const;
243 double getScoreWithET()
const;
246 int getNumPeptides()
const;
249 int getNumDisplayPeptides(
bool aboveThreshold =
false)
const;
252 GROUP getGrouping()
const;
254#ifndef DOXYGEN_SHOULD_SKIP_THIS
256 void setGrouping(
GROUP g) { group_ = g; }
259 std::string getForCache(dbIdxPlusAccVect_t & supersetProteinsUnsorted,
260 dbIdxPlusAccVect_t & components)
const;
263 bool setFromCache(
const std::string & str, msparser_internal::ms_mascotresultsbase & results,
264 const dbIdxPlusAccVect_t & supersetProteinsUnsorted,
265 const dbIdxPlusAccVect_t & components,
266 const std::string & cdbFeatures);
269 std::vector<std::pair<int, int> > getIgnoredQPs()
const;
272 bool isIgnoredQP(
const int q,
const int p)
const;
276 void getIgnoredQPs(std::vector<int> &q, std::vector<int> &p)
const;
279 int getPeptideQuery (
const int pepNumber)
const;
282 int getPeptideP (
const int pepNumber)
const;
285 int getPepNumber(
const int q,
const int p)
const;
288 int getPeptideFrame (
const int pepNumber,
const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE)
const;
291 long getPeptideStart (
const int pepNumber,
const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE)
const;
294 long getPeptideEnd (
const int pepNumber,
const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE)
const;
297 long getPeptideMultiplicity (
const int pepNumber,
const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE)
const;
300 DUPLICATE getPeptideDuplicate (
const int pepNumber,
const bool allowErrTolDuplicate =
true)
const;
303 double getPeptideIonsScore (
const int pepNumber)
const;
306 bool getPeptideIsBold (
const int pepNumber)
const;
309 void setPeptideIsBold (
const int pepNumber);
312 bool getPeptideShowCheckbox (
const int pepNumber)
const;
315 void setPeptideShowCheckbox (
const int pepNumber);
318 int getPeptideComponentID (
const int pepNumber)
const;
321 char getPeptideResidueBefore (
const int pepNumber,
const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE)
const;
324 char getPeptideResidueAfter (
const int pepNumber,
const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE)
const;
327 bool isASimilarProtein(
const ms_protein * prot,
329 const bool groupByQueryNumber =
false);
332 std::string getSimilarProteinName()
const;
335 int getSimilarProteinDB()
const;
338 bool isSimilarProtein(
const std::string & acc,
const int dbIdx)
const;
341 int getSimilarProteins(std::vector<std::string> & accessions, std::vector<int> & dbIdxs)
const;
344 void setSimilarProtein(
const ms_protein * prot);
349 const int q,
const int p,
350 const msparser_internal::ms_protein_match_data &proteinMatchData,
351 const double correctedScore,
352 const double uncorrectedScore,
354 const ms_peptide::SEARCH_PHASE searchPhase,
355 const bool isIgnored);
359 long getCoverage()
const;
362 bool anyMatchToQuery(
const int query)
const;
365 bool anyMatchToQueryAndP(
const int query,
const int P)
const;
369 const int numDecimalPlaces = 2)
const;
374 const unsigned int flags = MASS_SELECT_MATCH,
375 const int numDecimalPlaces = 2)
const;
378 int getFrame()
const;
384 bool isUnigene()
const;
387 void setIsUnigeneEntry();
390 bool isPMFMixture()
const;
393 void setIsPMFMixture();
396 bool isUpdateScoreFromPepScores()
const;
399 void sortPeptides(
const ms_mascotresults & results,
bool keepAlive =
false,
int keepAlivePercent = 0,
const char * keepAliveAccession =
"",
int keepAliveCount = 0);
402 int getNumComponents()
const;
405 const ms_protein * getComponent(
const int componentNumber)
const;
408 int getProteinSummaryHit()
const;
414 int getHitNumber()
const;
422 void setHitNumber(
const int hit) { hitNum_ = hit;}
425 int getMemberNumber()
const;
428 int getLongestPeptideLen()
const;
431 int getNumDistinctPeptides(
bool aboveThreshold =
false,
432 DISTINCT_PEPTIDE_FLAGS flags = DPF_SEQUENCE)
const;
435 int getNumDistinctPeptideRepeats(
437 bool aboveThreshold =
false,
438 DISTINCT_PEPTIDE_FLAGS flags = DPF_SEQUENCE)
const;
441 ms_peptide getDistinctPeptide(
444 bool aboveThreshold =
false,
445 DISTINCT_PEPTIDE_FLAGS flags = DPF_SEQUENCE)
const;
448 int getLongestSigPeptideLen()
const;
451 int getNumObservedForEmPAI()
const;
464 if (lhs.dbIdx_ == rhs.dbIdx_) {
465 if ( lhs.proteinSummaryHit_ == 0 ) {
466 return lhs.accession_ < rhs.accession_;
468 if ( lhs.accession_ == rhs.accession_) {
471 return lhs.accession_ < rhs.accession_;
475 return lhs.dbIdx_ < rhs.dbIdx_;
480 const char * getAccessionStr()
const {
return accession_.c_str(); }
482 const std::vector<std::pair<int,int64_t>>& getSupersetProteinIds()
const {
return supersetProteinIdsUnsorted_; }
483 const std::vector<std::pair<int,int64_t>>& getComponentProteinIds()
const {
return componentProteinIds_; }
485 std::string stringify()
const;
491 static constexpr unsigned char FL_LOADED_BASE_INFO_FROM_CACHE = 0x01;
492 static constexpr unsigned char FL_LOADED_QP_FROM_CACHE = 0x02;
493 static constexpr unsigned char FL_LOADED_ALL_FROM_CACHE = 0x02;
495 static constexpr unsigned char FL_SORTED = 0x10;
496 static constexpr unsigned char FL_UNIGENE = 0x20;
497 static constexpr unsigned char FL_UPDATE_SCORE_FROM_PEP_SCORES = 0x40;
502 static constexpr unsigned char FL_PMF_MIXTURE = 0x80;
504 void initialiseDistinctPeptideTree(
506 DISTINCT_PEPTIDE_FLAGS flags)
const;
508 ms_errs* getErrorHandler()
const;
511 mutable std::vector<msparser_internal::PEPINFO *> peptides_;
512 mutable std::vector<msparser_internal::PEPINFO *> ignoredPeptides_;
513 mutable std::vector<msparser_internal::PEPINFO> allPeptides_;
515 msparser_internal::ms_mascotresultsbase * results_;
520 unsigned char flags_;
523 mutable int numDisplayPeptides_;
524 mutable int numDisplayPeptidesAboveThresh_;
525 mutable int numDistinctPeptides_;
526 mutable int numDistinctPeptidesAboveThresh_;
527 mutable int numDistinctUniquePeptides_;
528 mutable int numDistinctUniqPepAboveThresh_;
529 mutable int lenLongestPeptideAboveThresh_;
530 mutable int numObservedForEmPAI_;
532 mutable bool distinctPeptideAboveThreshold_;
533 mutable DISTINCT_PEPTIDE_FLAGS distinctPeptideFlags_;
534 mutable std::list<std::list<ms_peptide*> > distinctPeptideTree_;
535 dbIdxPlusAccSet_t supersetProteins_;
536 dbIdxPlusAccVect_t supersetProteinsUnsorted_;
538 std::vector<std::pair<int,int64_t>> supersetProteinIdsUnsorted_;
542 dbIdxPlusAccVect_t components_;
544 std::vector<std::pair<int,int64_t>> componentProteinIds_;
547 std::string accession_;
550 double nonMudPITScore_;
553 int proteinSummaryHit_;
555 mutable int memberNum_;
556 int longestPeptideLen_;
557 mutable long coverage_;
567 void copyPeptidePointers(std::vector<msparser_internal::PEPINFO *> &pointersTo,
const std::vector<msparser_internal::PEPINFO *> &pointersFrom,
const ms_protein *src);
568 void checkFromCache(
const char * calledBy)
const;
569 void checkQPFromCache(
const char * calledBy)
const;
570 bool isFlagSet(
unsigned char fl)
const {
return (flags_ & fl)?
true:
false; }
571 void setFlag(
unsigned char fl,
bool val) {
576 flags_ =
static_cast<unsigned char>(flags_ & ~fl);
580 static bool isVarModStrEmpty(
const std::string &str);
582 friend class prot_sort;
583 friend class ms_pepinfoSortByScore;
584 friend class msparser_internal::ms_peptidesumsql;
588 class ms_proteinPtrSortByAccession
591 bool operator() (
const ms_protein * p1,
const ms_protein * p2)
const {
596 class ms_proteinPtrSortByScore
599 bool operator() (
const ms_protein * p1,
const ms_protein * p2)
const {
600 if (p1->getScore() != p2->getScore()) {
601 return (p1->getScore() > p2->getScore());
609 class ms_pepinfoSortByScore
612 ms_pepinfoSortByScore(std::pair<bool, bool> pairParam): removeDiffPos_(pairParam.first), anyLibraryMatches_(pairParam.second) { }
613 bool operator() (
const msparser_internal::PEPINFO * p1,
const msparser_internal::PEPINFO * p2)
const;
614 ms_pepinfoSortByScore(
const ms_pepinfoSortByScore& other): removeDiffPos_(other.removeDiffPos_), anyLibraryMatches_(other.anyLibraryMatches_){}
615 ms_pepinfoSortByScore& operator=(
const ms_pepinfoSortByScore& other)
617 if (&other !=
this) {
618 removeDiffPos_ = other.removeDiffPos_;
619 anyLibraryMatches_ = other.anyLibraryMatches_;
626 bool anyLibraryMatches_;
629 inline std::ostream& operator << (std::ostream& out,
const ms_protein& prot)
631 out << prot.stringify();
Abstract base class of ms_mascotresfile_dat and ms_mascotresfile_msr.
Definition: ms_mascotresfilebase.hpp:72
Abstract class for either ms_peptidesummary or ms_proteinsummary.
Definition: ms_mascotresults.hpp:83
PSM
Type of data to return from accessor methods.
Definition: ms_peptide.hpp:98
This class encapsulates a protein in the mascot results file.
Definition: ms_protein.hpp:57
DISTINCT_PEPTIDE_FLAGS
Enum for getNumDistinctPeptides().
Definition: ms_protein.hpp:187
GROUP
Enum to say if a protein is similar to another higher scoring protein.
Definition: ms_protein.hpp:74
@ GROUP_NO
Does not contain same set (or subset) of peptides as another proteins. A 'lead' protein.
Definition: ms_protein.hpp:76
@ GROUP_UNKNOWN
No information about grouping.
Definition: ms_protein.hpp:75
@ GROUP_COMPLETE
Contains an identical set of peptides to one or more other proteins.
Definition: ms_protein.hpp:78
@ GROUP_SUBSET
Contains a subset of peptides in one ore more other proteins.
Definition: ms_protein.hpp:77
ms_protein(const double score, const std::string accession, const bool updateScoreFromPepScores, const int proteinSummaryHit=0)
Constructors - used from ms_proteinsummary and ms_peptidesummary.
MASS_FLAGS
enum for each protein to specify what masses to select.
Definition: ms_protein.hpp:117
int getFrame() const
Returns the frame number for the protein.
Definition: ms_protein.cpp:1403
friend bool operator<(const ms_protein &lhs, const ms_protein &rhs)
Protein objects perform a simple sort of themselves by database ID and then accession.
Definition: ms_protein.hpp:463
DUPLICATE
Enum for the each peptide in the protein to indicate if it is a duplicate.
Definition: ms_protein.hpp:98
@ DUPE_DuplicateSameQuery
Another match for the same query with the same peptide string got a higher score (different mods).
Definition: ms_protein.hpp:101
@ DUPE_Duplicate
Another peptide from a different query with the same sequence as this got a higher score.
Definition: ms_protein.hpp:100
@ DUPE_NotDuplicate
There are no other peptides with the same sequence in this protein - from this query or other queries...
Definition: ms_protein.hpp:99
@ DUPE_HighestScoringDuplicate
There is at least one other peptide the same as this with a lower score.
Definition: ms_protein.hpp:102
Definition: ms_proteinsummary.hpp:45