19#ifndef MS_MASCOTRESPROTEIN_HPP
20#define MS_MASCOTRESPROTEIN_HPP
31namespace msparser_internal {
32 class ms_protein_match_data;
34 class ms_mascotresultsbase;
35 class ms_peptidesumsql;
38namespace matrix_science {
39 class ms_mascotresults;
40 class ms_proteinsummary;
41 class ms_pepinfoSortByScore;
118 MASS_NON_SELECT_NON_MATCH = 0x0001,
119 MASS_SELECT_NON_MATCH = 0x0010,
120 MASS_NON_SELECT_MATCH = 0x0100,
121 MASS_SELECT_MATCH = 0x1000
188 DPF_SEQUENCE = 0x0001,
192 DPF_NODUPSAMEQUERY = 0x0010
196 typedef std::pair<int, std::string> dbIdxPlusAcc_t;
197 typedef std::vector<dbIdxPlusAcc_t> dbIdxPlusAccVect_t;
198 typedef std::set<dbIdxPlusAcc_t> dbIdxPlusAccSet_t;
202 const std::string accession,
203 const bool updateScoreFromPepScores,
204 const int proteinSummaryHit = 0);
229 int64_t getProteinId()
const;
232 void setProteinId(int64_t proteinId);
235 std::string getAccession()
const;
241 void setDB(
int dbIdx);
244 double getScore()
const;
247 double getNonMudpitScore()
const;
250 double getScoreWithET()
const;
253 int getNumPeptides()
const;
256 int getNumDisplayPeptides(
bool aboveThreshold =
false)
const;
259 GROUP getGrouping()
const;
261#ifndef DOXYGEN_SHOULD_SKIP_THIS
263 void setGrouping(
GROUP g) { group_ = g; }
266 std::string getForCache(dbIdxPlusAccVect_t & supersetProteinsUnsorted,
267 dbIdxPlusAccVect_t & components)
const;
270 bool setFromCache(
const std::string & str, msparser_internal::ms_mascotresultsbase & results,
271 const dbIdxPlusAccVect_t & supersetProteinsUnsorted,
272 const dbIdxPlusAccVect_t & components,
273 const std::string & cdbFeatures);
276 std::vector<std::pair<int, int> > getIgnoredQPs()
const;
279 bool isIgnoredQP(
const int q,
const int p)
const;
283 void getIgnoredQPs(std::vector<int> &q, std::vector<int> &p)
const;
286 int getPeptideQuery (
const int pepNumber)
const;
289 int getPeptideP (
const int pepNumber)
const;
292 int getPepNumber(
const int q,
const int p)
const;
295 int getPeptideFrame (
const int pepNumber,
const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE)
const;
298 long getPeptideStart (
const int pepNumber,
const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE)
const;
301 long getPeptideEnd (
const int pepNumber,
const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE)
const;
304 long getPeptideMultiplicity (
const int pepNumber,
const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE)
const;
307 DUPLICATE getPeptideDuplicate (
const int pepNumber,
const bool allowErrTolDuplicate =
true)
const;
310 double getPeptideIonsScore (
const int pepNumber)
const;
313 bool getPeptideIsBold (
const int pepNumber)
const;
316 void setPeptideIsBold (
const int pepNumber);
319 bool getPeptideShowCheckbox (
const int pepNumber)
const;
322 void setPeptideShowCheckbox (
const int pepNumber);
325 int getPeptideComponentID (
const int pepNumber)
const;
328 char getPeptideResidueBefore (
const int pepNumber,
const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE)
const;
331 char getPeptideResidueAfter (
const int pepNumber,
const ms_peptide::PSM psmComponent = ms_peptide::PSM_COMPLETE)
const;
334 bool isASimilarProtein(
const ms_protein * prot,
336 const bool groupByQueryNumber =
false);
339 std::string getSimilarProteinName()
const;
342 int getSimilarProteinDB()
const;
345 bool isSimilarProtein(
const std::string & acc,
const int dbIdx)
const;
348 int getSimilarProteins(std::vector<std::string> & accessions, std::vector<int> & dbIdxs)
const;
351 void setSimilarProtein(
const ms_protein * prot);
356 const int q,
const int p,
357 const msparser_internal::ms_protein_match_data &proteinMatchData,
358 const double correctedScore,
359 const double uncorrectedScore,
361 const ms_peptide::SEARCH_PHASE searchPhase,
362 const bool isIgnored);
366 long getCoverage()
const;
369 bool anyMatchToQuery(
const int query)
const;
372 bool anyMatchToQueryAndP(
const int query,
const int P)
const;
376 const int numDecimalPlaces = 2)
const;
381 const unsigned int flags = MASS_SELECT_MATCH,
382 const int numDecimalPlaces = 2)
const;
385 int getFrame()
const;
391 bool isUnigene()
const;
394 void setIsUnigeneEntry();
397 bool isPMFMixture()
const;
400 void setIsPMFMixture();
403 bool isUpdateScoreFromPepScores()
const;
406 void sortPeptides(
const ms_mascotresults & results,
bool keepAlive =
false,
int keepAlivePercent = 0,
const char * keepAliveAccession =
"",
int keepAliveCount = 0);
409 int getNumComponents()
const;
412 const ms_protein * getComponent(
const int componentNumber)
const;
415 int getProteinSummaryHit()
const;
421 int getHitNumber()
const;
429 void setHitNumber(
const int hit) { hitNum_ = hit;}
432 int getMemberNumber()
const;
435 int getLongestPeptideLen()
const;
438 int getNumDistinctPeptides(
bool aboveThreshold =
false,
439 DISTINCT_PEPTIDE_FLAGS flags = DPF_SEQUENCE)
const;
442 int getNumDistinctPeptideRepeats(
444 bool aboveThreshold =
false,
445 DISTINCT_PEPTIDE_FLAGS flags = DPF_SEQUENCE)
const;
448 ms_peptide getDistinctPeptide(
451 bool aboveThreshold =
false,
452 DISTINCT_PEPTIDE_FLAGS flags = DPF_SEQUENCE)
const;
455 int getLongestSigPeptideLen()
const;
458 int getNumObservedForEmPAI()
const;
471 if (lhs.dbIdx_ == rhs.dbIdx_) {
472 if ( lhs.proteinSummaryHit_ == 0 ) {
473 return lhs.accession_ < rhs.accession_;
475 if ( lhs.accession_ == rhs.accession_) {
478 return lhs.accession_ < rhs.accession_;
482 return lhs.dbIdx_ < rhs.dbIdx_;
487 const char * getAccessionStr()
const {
return accession_.c_str(); }
489 const std::vector<std::pair<int,int64_t>>& getSupersetProteinIds()
const {
return supersetProteinIdsUnsorted_; }
490 const std::vector<std::pair<int,int64_t>>& getComponentProteinIds()
const {
return componentProteinIds_; }
492 std::string stringify()
const;
498 static constexpr unsigned char FL_LOADED_BASE_INFO_FROM_CACHE = 0x01;
499 static constexpr unsigned char FL_LOADED_QP_FROM_CACHE = 0x02;
500 static constexpr unsigned char FL_LOADED_ALL_FROM_CACHE = 0x02;
502 static constexpr unsigned char FL_SORTED = 0x10;
503 static constexpr unsigned char FL_UNIGENE = 0x20;
504 static constexpr unsigned char FL_UPDATE_SCORE_FROM_PEP_SCORES = 0x40;
509 static constexpr unsigned char FL_PMF_MIXTURE = 0x80;
511 void initialiseDistinctPeptideTree(
513 DISTINCT_PEPTIDE_FLAGS flags)
const;
515 ms_errs* getErrorHandler()
const;
518 mutable std::vector<msparser_internal::PEPINFO *> peptides_;
519 mutable std::vector<msparser_internal::PEPINFO *> ignoredPeptides_;
520 mutable std::vector<msparser_internal::PEPINFO> allPeptides_;
522 msparser_internal::ms_mascotresultsbase * results_;
527 unsigned char flags_;
530 mutable int numDisplayPeptides_;
531 mutable int numDisplayPeptidesAboveThresh_;
532 mutable int numDistinctPeptides_;
533 mutable int numDistinctPeptidesAboveThresh_;
534 mutable int numDistinctUniquePeptides_;
535 mutable int numDistinctUniqPepAboveThresh_;
536 mutable int lenLongestPeptideAboveThresh_;
537 mutable int numObservedForEmPAI_;
539 mutable bool distinctPeptideAboveThreshold_;
540 mutable DISTINCT_PEPTIDE_FLAGS distinctPeptideFlags_;
541 mutable std::list<std::list<ms_peptide*> > distinctPeptideTree_;
542 dbIdxPlusAccSet_t supersetProteins_;
543 dbIdxPlusAccVect_t supersetProteinsUnsorted_;
545 std::vector<std::pair<int,int64_t>> supersetProteinIdsUnsorted_;
549 dbIdxPlusAccVect_t components_;
551 std::vector<std::pair<int,int64_t>> componentProteinIds_;
554 std::string accession_;
557 double nonMudPITScore_;
560 int proteinSummaryHit_;
562 mutable int memberNum_;
563 int longestPeptideLen_;
564 mutable long coverage_;
574 void copyPeptidePointers(std::vector<msparser_internal::PEPINFO *> &pointersTo,
const std::vector<msparser_internal::PEPINFO *> &pointersFrom,
const ms_protein *src);
575 void checkFromCache(
const char * calledBy)
const;
576 void checkQPFromCache(
const char * calledBy)
const;
577 bool isFlagSet(
unsigned char fl)
const {
return (flags_ & fl)?
true:
false; }
578 void setFlag(
unsigned char fl,
bool val) {
583 flags_ =
static_cast<unsigned char>(flags_ & ~fl);
587 static bool isVarModStrEmpty(
const std::string &str);
589 friend class prot_sort;
590 friend class ms_pepinfoSortByScore;
591 friend class msparser_internal::ms_peptidesumsql;
595 class ms_proteinPtrSortByAccession
598 bool operator() (
const ms_protein * p1,
const ms_protein * p2)
const {
603 class ms_proteinPtrSortByScore
606 bool operator() (
const ms_protein * p1,
const ms_protein * p2)
const {
607 if (p1->getScore() != p2->getScore()) {
608 return (p1->getScore() > p2->getScore());
616 class ms_pepinfoSortByScore
619 ms_pepinfoSortByScore(std::pair<bool, bool> pairParam): removeDiffPos_(pairParam.first), anyLibraryMatches_(pairParam.second) { }
620 bool operator() (
const msparser_internal::PEPINFO * p1,
const msparser_internal::PEPINFO * p2)
const;
621 ms_pepinfoSortByScore(
const ms_pepinfoSortByScore& other): removeDiffPos_(other.removeDiffPos_), anyLibraryMatches_(other.anyLibraryMatches_){}
622 ms_pepinfoSortByScore& operator=(
const ms_pepinfoSortByScore& other)
624 if (&other !=
this) {
625 removeDiffPos_ = other.removeDiffPos_;
626 anyLibraryMatches_ = other.anyLibraryMatches_;
633 bool anyLibraryMatches_;
636 inline std::ostream& operator << (std::ostream& out,
const ms_protein& prot)
638 out << prot.stringify();
Abstract base class of ms_mascotresfile_dat and ms_mascotresfile_msr.
Definition: ms_mascotresfilebase.hpp:72
Abstract class for either ms_peptidesummary or ms_proteinsummary.
Definition: ms_mascotresults.hpp:83
PSM
Type of data to return from accessor methods.
Definition: ms_peptide.hpp:98
This class encapsulates a protein in the mascot results file.
Definition: ms_protein.hpp:57
DISTINCT_PEPTIDE_FLAGS
Enum for getNumDistinctPeptides().
Definition: ms_protein.hpp:187
GROUP
Enum to say if a protein is similar to another higher scoring protein.
Definition: ms_protein.hpp:74
@ GROUP_NO
Does not contain same set (or subset) of peptides as another proteins. A 'lead' protein.
Definition: ms_protein.hpp:76
@ GROUP_UNKNOWN
No information about grouping.
Definition: ms_protein.hpp:75
@ GROUP_COMPLETE
Contains an identical set of peptides to one or more other proteins.
Definition: ms_protein.hpp:78
@ GROUP_SUBSET
Contains a subset of peptides in one ore more other proteins.
Definition: ms_protein.hpp:77
ms_protein(const double score, const std::string accession, const bool updateScoreFromPepScores, const int proteinSummaryHit=0)
Constructors - used from ms_proteinsummary and ms_peptidesummary.
MASS_FLAGS
enum for each protein to specify what masses to select.
Definition: ms_protein.hpp:117
int getFrame() const
Returns the frame number for the protein.
Definition: ms_protein.cpp:1454
friend bool operator<(const ms_protein &lhs, const ms_protein &rhs)
Protein objects perform a simple sort of themselves by database ID and then accession.
Definition: ms_protein.hpp:470
DUPLICATE
Enum for the each peptide in the protein to indicate if it is a duplicate.
Definition: ms_protein.hpp:98
@ DUPE_DuplicateSameQuery
Another match for the same query with the same peptide string got a higher score (different mods).
Definition: ms_protein.hpp:101
@ DUPE_Duplicate
Another peptide from a different query with the same sequence as this got a higher score.
Definition: ms_protein.hpp:100
@ DUPE_NotDuplicate
There are no other peptides with the same sequence in this protein - from this query or other queries...
Definition: ms_protein.hpp:99
@ DUPE_HighestScoringDuplicate
There is at least one other peptide the same as this with a lower score.
Definition: ms_protein.hpp:102
Definition: ms_proteinsummary.hpp:45