Abstract class for either ms_peptidesummary or ms_proteinsummary. More...

#include <ms_mascotresults.hpp>

Inheritance diagram for ms_mascotresults:

Public Types
enum	DB_MATCH_TYPE { DM_FASTA = 0 , DM_SPECTRAL_LIBRARY = 1 , DM_BOTH = 2 , DM_FASTA_FIRST_PASS = 3 , DM_FASTA_SECOND_PASS = 4 }
	Type of matches counted by getNumHitsAboveIdentity() and friends, and type of score for getIonsScoreHistogram(). More...

enum	DECOY_STATS_COUNT_TYPE { DS_COUNT_PSM = 0 , DS_COUNT_SEQUENCE = 1 }
	Type of object counted by getNumHitsAboveIdentity() and friends. More...

enum	FIND_COMPARE_FLAGS { FC_COMPLETESTR = 0x00000001 , FC_SUBSTR = 0x00000002 , FC_STARTSTR = 0x00000003 , FC_STRTOK = 0x00000004 , FC_MASK_STR_PART = 0x0000000F , FC_CASE_INSENSITIVE = 0x00000000 , FC_CASE_SENSITIVE = 0x00000010 , FC_MASK_CASE = 0x000000F0 , FC_FORWARD = 0x00000000 , FC_REVERSE = 0x00000100 , FC_MASK_DIRECTION = 0x00000F00 , FC_RESTRICT_TO_HIT = 0x00001000 , FC_LOOP_INTO_UNASSIGNED = 0x00002000 , FC_LOOP_FROM_UNASSIGNED = 0x00004000 , FC_UNASSIGNED_MASK = 0x00006000 , FC_SEARCH_ALL_RANKS = 0x00008000 , FC_ALL_PEPTIDES = 0x00000000 , FC_ALL_PEPTITDES = 0x00000000 , FC_SIGNIFICANT_PEPTIDES = 0x00010000 , FC_SEARCH_IGNORED_RANKS = 0x00020000 , FC_SCORING_MASK = 0x000F0000 , FC_PROTEIN_IGN_SAMESETS = 0x00100000 , FC_PROTEIN_IGN_SUBSETS = 0x00200000 , FC_PROTEIN_IGN_FAMILY = 0x00400000 , FC_PROTEIN_IGN_MASK = 0x00F00000 , FC_PROTEIN_INC_ALT_ACC = 0x01000000 }
	Flags to specify how comparisons are performed in the find functions. More...

enum	FIND_FLAGS { FT_PEPTIDE_EXP_MZ = 0x00000001 , FT_PEPTIDE_EXP_MR = 0x00000002 , FT_PEPTIDE_CALC_MZ = 0x00000004 , FT_PEPTIDE_CALC_MR = 0x00000008 , FT_PEPTIDE_STRING = 0x00000010 , FT_PEPTIDE_QUERY = 0x00000020 , FT_PEPTIDE_VARMOD = 0x00000040 , FT_PEPTIDE_FIXMOD = 0x00000080 , FT_PEPTIDE_ETMOD = 0x00000100 , FT_PEPTIDE_SLMOD = 0x00000200 , FT_PEPTIDE_VARMOD_BYNAME = 0x00000400 , FT_PEPTIDE_FIND_MASK = 0x00000FFF , FT_PROTEIN_ACCESSION = 0x00001000 , FT_PROTEIN_DESCRIPTION = 0x00002000 }
	Flags for findPeptides() and findProteins(). More...

enum	FLAGS { MSRES_NOFLAG = 0x00000000 , MSRES_GROUP_PROTEINS = 0x00000001 , MSRES_SHOW_SUBSETS = 0x00000002 , MSRES_SUBSETS_DIFF_PROT = 0x00000004 , MSRES_REQUIRE_BOLD_RED = 0x00000008 , MSRES_SHOW_ALL_FROM_ERR_TOL = 0x00000010 , MSRES_IGNORE_PMF_MIXTURE = 0x00000020 , MSRES_MUDPIT_PROTEIN_SCORE = 0x00000040 , MSRES_DECOY = 0x00000080 , MSRES_INTEGRATED_ERR_TOL = 0x00000100 , MSRES_ERR_TOL = 0x00000200 , MSRES_MAXHITS_OVERRIDES_MINPROB = 0x00000400 , MSRES_CLUSTER_PROTEINS = 0x00000800 , MSRES_DUPE_INCL_IN_SCORE_NONE = 0x00000000 , MSRES_DUPE_INCL_IN_SCORE_A = 0x00002000 , MSRES_DUPE_INCL_IN_SCORE_B = 0x00004000 , MSRES_DUPE_INCL_IN_SCORE_C = 0x00008000 , MSRES_DUPE_INCL_IN_SCORE_D = 0x00010000 , MSRES_DUPE_INCL_IN_SCORE_E = 0x00020000 , MSRES_DUPE_INCL_IN_SCORE_F = 0x00040000 , MSRES_DUPE_INCL_IN_SCORE_G = 0x00080000 , MSRES_DUPE_INCL_IN_SCORE_H = 0x00100000 , MSRES_DUPE_INCL_IN_SCORE_I = 0x00200000 , MSRES_DUPE_REMOVE_NONE = 0x00400000 , MSRES_DUPE_REMOVE_A = 0x00800000 , MSRES_DUPE_REMOVE_B = 0x01000000 , MSRES_DUPE_REMOVE_C = 0x02000000 , MSRES_DUPE_REMOVE_D = 0x04000000 , MSRES_DUPE_REMOVE_E = 0x08000000 , MSRES_DUPE_REMOVE_F = 0x10000000 , MSRES_DUPE_REMOVE_G = 0x20000000 , MSRES_DUPE_REMOVE_H = 0x40000000 , MSRES_DUPE_REMOVE_I = 0x80000000 , MSRES_DUPE_REMOVE_I = 0x80000000 , MSRES_DUPE_DEFAULT = 0x04800000 }
	Flags for the type of results. More...

enum	HOMOLOGY_THRESHOLD_SOURCE { }
	Flags for getHomologyThreshold() More...

enum	IONS_HISTOGRAM { IH_INCLUDE_TOP_MATCHES = 0x0000 , IH_INCLUDE_TOP_10_MATCHES = 0x0001 }
	Flags for getIonsScoreHistogram(). More...

enum	QUANT_COMPONENT_STATUS { QCS_OK , QCS_OK_NO_MATCH , QCS_ERROR_NO_METHOD , QCS_ERROR_NO_COMPONENTS , QCS_ERROR_BAD_COMPONENT_NAME , QCS_OK_MULTIPLE_MATCHES }
	Return codes for getQuantitationComponentForPeptide(). More...

enum	sortBy { QUERY , SCORE , INTENSITY }
	Flags for createUnassignedList(). More...

enum	THRESHOLD_TYPE { TT_HOMOLOGY = 0x0000 , TT_IDENTITY = 0x0001 , TT_PEPSUM_DEFAULT = 0x0002 }
	Flags for getPeptideThreshold() More...

enum	TREE_CLUSTER_METHOD { }
	Flags for getTreeClusterNodes(). More...

enum	UNIQUE_PEP_RULES { UPR_WITHIN_FAMILY = 0x0001 , UPR_WITHIN_FAMILY_MEMBER = 0x0002 , UPR_IGNORE_SUBSET_PROTEINS = 0x0004 , UPR_DEFAULT = (UPR_WITHIN_FAMILY_MEMBER + UPR_IGNORE_SUBSET_PROTEINS) }
	Flags for isPeptideUnique(). More...

Public Member Functions
	ms_mascotresults ()
	ms_mascotresults is an abstract class; use ms_peptidesummary::ms_peptidesummary or ms_proteinsummary::ms_proteinsummary.

virtual bool	anyEmPAI () const
	Return true if any emPAI values are available.

virtual bool	anyNumDiscoveredMods () const
	Return true if modification discovery statistics are available.

void	cancelCreateSummary (bool newValue=true)
	Cancel the call to createSummary()

virtual bool	createSummary ()
	Create the summary after the ms_mascotresults object has been created.

bool	createUnassignedList (sortBy s=QUERY)
	To have a list of unassigned peptides, need to call this first.

virtual int	findPeptides (const int startHit, const std::string &str, FIND_FLAGS item, FIND_COMPARE_FLAGS compareFlags, std::vector< int > &q, std::vector< int > &p) const =0
	Find the next hit that contains peptides with the specified attribute.

virtual int	findProteins (const int startHit, const std::string &str, const int dbIdx, FIND_FLAGS item, FIND_COMPARE_FLAGS compareFlags, std::vector< std::string > &accessions, std::vector< int > &dbIndexes) const =0
	Find the next hit that contains proteins with the specified attributes.

virtual int	findProteinsByAccession (const int startHit, const std::string &str, const int dbIdx, FIND_COMPARE_FLAGS compareFlags, std::vector< std::string > &accessions, std::vector< int > &dbIndexes) const =0
	Find the next hit that contains proteins with the specified accession.

virtual int	findProteinsByDescription (const int startHit, const std::string &str, FIND_COMPARE_FLAGS compareFlags, std::vector< std::string > &accessions, std::vector< int > &dbIndexes) const =0
	Find the next hit that contains proteins with the specified description.

virtual void	freeHit (const int hit)
	Frees any memory associated with the passed hit number.

virtual int	getAllFamilyMembersWithThisPepMatch (const int hit, const int q, const int p, std::vector< int > &db, std::vector< std::string > &acc, std::vector< int > &dupe_status) const =0
	Return a list of (top level) family proteins that have a match to the specified q and p.

virtual std::vector< std::string >	getAllProteinsWithThisPepMatch (const int q, const int p, std::vector< int > &start, std::vector< int > &end, std::vector< std::string > &pre, std::vector< std::string > &post, std::vector< int > &frame, std::vector< int > &multiplicity, std::vector< int > &db) const =0
	Return a complete list of proteins that contain this same peptide match.

virtual std::vector< std::string >	getAllProteinsWithThisPepMatch (const int q, const int p, std::vector< int > &start, std::vector< int > &end, std::vector< std::string > &pre, std::vector< std::string > &post, std::vector< int > &frame, std::vector< int > &multiplicity, std::vector< int > &db, std::vector< int > &psmComponent) const =0
	Return a complete list of proteins that contain this same peptide match.

virtual int	getAvePeptideIdentityThreshold (double OneInXprobRnd, DB_MATCH_TYPE dbType=DM_FASTA) const
	Return the average threshold value for all MS-MS data sets.

virtual bool	getComponentIntensity (const int q, const int p, const std::string &componentName, double &value, double &rawValue) const =0
	Returns the component intensity for reporter or multiplex methods.

virtual const ms_protein *	getComponentProtein (const char *accession, const int dbIdx=1) const
	Return a pointer to the protein entry given an accession.

virtual std::string	getComponentString (const int q, const int p, const ms_peptide::PSM psmComponent=ms_peptide::PSM_COMPLETE) const =0
	Return `q1_p2_comp` string value; for `h1_q2` this string is always empty.

bool	getCreateSummaryProgress (int cspTotalPercentComplete, unsigned int cspCurrTask, int cspCurrTaskPercentageComplete, std::string cspAccession, int cspHit, int cspQuery, std::string *cspKeepAliveText) const
	Return progress for the createSummary() call.

virtual std::vector< double >	getDiscoveredErrTolModDeltas (std::vector< std::string > *vecDeltaStrings=NULL) const
	Return the deltas of all error tolerant modifications discovered in this search.

virtual std::vector< std::string >	getDiscoveredErrTolModNames () const
	Return the names of all error tolerant modifications discovered in this search.

virtual std::vector< double >	getDiscoveredLocalModDeltas (std::vector< std::string > *vecDeltaStrings=NULL) const
	Return the deltas of all query-level modifications discovered in this search.

virtual std::vector< std::string >	getDiscoveredLocalModNames () const
	Return the names of all query-level modifications discovered in this search.

virtual double	getErrTolModDelta (const int q, const int p, std::string modString=NULL, std::string deltaAsString=NULL) const
	Return the error tolerant mod delta from `h1_q2_et_mods` or `q1_p1_et_mods`.

virtual std::string	getErrTolModifiedNaSeq (const int q, const int p) const
	Return the modified sequence of nucleic acids after a single base modification in an error tolerant search.

virtual std::vector< double >	getErrTolModMasterNeutralLoss (const int q, const int p) const
	Return the error tolerant mod additional primary neutral losses from `h1_q2_et_mods_master` or `q1_p1_et_mods_master`.

virtual std::string	getErrTolModMasterString (const int q, const int p) const =0
	Return the error tolerant mod primary neutral loss string from `h1_q2_et_mods_master` or `q1_p1_et_mods_master`.

virtual std::string	getErrTolModName (const int q, const int p, std::string *modString=NULL) const
	Return the error tolerant mod name from `h1_q2_et_mods` or `q1_p1_et_mods`.

virtual double	getErrTolModNeutralLoss (const int q, const int p) const
	Return the error tolerant mod neutral loss from `h1_q2_et_mods` or `q1_p1_et_mods`.

virtual std::vector< double >	getErrTolModPepNeutralLoss (const int q, const int p) const
	Return the error tolerant mod peptide neutral losses from `h1_q2_et_mods_pep` or `q1_p1_et_mods_pep`.

virtual std::string	getErrTolModPepString (const int q, const int p) const =0
	Return the error tolerant mod peptide neutral loss string from `h1_q2_et_mods_pep` or `q1_p1_et_mods_pep`.

virtual std::vector< double >	getErrTolModReqPepNeutralLoss (const int q, const int p) const
	Return the error tolerant mod peptide neutral losses from `h1_q2_et_mods_reqpep` or `q1_p1_et_mods_reqpep`.

virtual std::string	getErrTolModReqPepString (const int q, const int p) const =0
	Return the error tolerant mod required peptide neutral loss string from `h1_q2_et_mods_reqpep` or `q1_p1_et_mods_reqpep`.

virtual std::vector< double >	getErrTolModSlaveNeutralLoss (const int q, const int p) const
	Return the error tolerant mod slave neutral losses from `h1_q2_et_mods_slave` or `q1_p1_et_mods_slave`.

virtual std::string	getErrTolModSlaveString (const int q, const int p) const =0
	Return the error tolerant mod slave neutral loss string from `h1_q2_et_mods_slave` or `q1_p1_et_mods_slave`.

virtual std::string	getErrTolModString (const int q, const int p) const =0
	Return the complete error tolerant mod string from `h1_q2_et_mods` or `q1_p1_et_mods`.

virtual std::string	getErrTolOriginalNaSeq (const int q, const int p) const
	Return the original sequence of nucleic acids before a single base modification in an error tolerant search.

virtual double	getErrTolProbabilityThreshold () const

unsigned int	getFlags () const
	Returns the flags value passed to the constructor.

unsigned int	getFlags2 () const
	Return the flags2 value passed to the ms_peptidesummary constructor.

virtual ms_protein *	getHit (const int hit, const int memberNumber=0) const
	Return the ms_protein hit - returns null if `hit` > number of hits.

virtual int	getHomologyThreshold (const int query, double OneInXprobRnd, const int rank=1) const
	Returns the 'homology' threshold.

virtual int	getHomologyThresholdForHistogram (double OneInXprobRnd, DB_MATCH_TYPE dbType=DM_FASTA) const
	Returns the value for the 'yellow section' in the histogram.

virtual double	getIonsScore (const int q, const int p, const bool decoy) const =0
	Returns an ions score quickly without having to load an ms_peptide object.

virtual std::vector< int >	getIonsScoreHistogram (IONS_HISTOGRAM flags=IH_INCLUDE_TOP_MATCHES, DB_MATCH_TYPE dbType=DM_FASTA) const
	Returns a list of counts for binned ions scores.

virtual std::string	getLibraryModString (const int q, const int p) const =0
	Return the modification string of the spectral library match from `q1_p1_SLmod`.

virtual int	getMaxPeptideIdentityThreshold (double OneInXprobRnd, DB_MATCH_TYPE dbType=DM_FASTA) const
	Return the max threshold value for all MS-MS data sets.

virtual int	getMaxRankValue () const
	Returns the maximum 'rank' or 'hit' or 'p' value.

int	getMinPepLenInPepSummary () const
	Peptides shorter than this are ignored when putting proteins into groups.

virtual ms_protein *	getNextFamilyProtein (const int masterHit, const int id) const
	Find the next protein in the family `masterHit`.

virtual ms_protein *	getNextSimilarProtein (const int masterHit, const int id) const
	Return the next protein that contains all the peptides in the 'master' protein.

virtual ms_protein *	getNextSimilarProteinOf (const char *masterAccession, const int masterDB, const int id) const
	Return the next protein that contains all the peptides in the 'master' protein.

virtual ms_protein *	getNextSubsetProtein (const int masterHit, const int id, const bool searchWholeFamily=true) const
	Return the next protein that contains some of the peptides in the 'master' protein.

virtual ms_protein *	getNextSubsetProteinOf (const char *masterAccession, const int masterDB, const int id) const
	Return the next protein that contains some of the peptides in the 'master' protein.

virtual int	getNumberOfFamilyMembers () const
	Return the total number of family members.

virtual int	getNumberOfHits () const
	Returns the number of hits in the results.

int	getNumberOfUnassigned () const
	Return the number of peptides in the unassigned list.

virtual long	getNumDecoyHitsAboveHomology (double OneInXprobRnd, DECOY_STATS_COUNT_TYPE countType=DS_COUNT_PSM, DB_MATCH_TYPE dbType=DM_FASTA)
	Return the number of hits from the decoy search with a score at or above the homology threshold.

virtual long	getNumDecoyHitsAboveIdentity (double OneInXprobRnd, DECOY_STATS_COUNT_TYPE countType=DS_COUNT_PSM, DB_MATCH_TYPE dbType=DM_FASTA)
	Return the number of hits from the decoy search with a score at or above the identity threshold.

virtual std::vector< int >	getNumDiscoveredErrTolMods (const std::string modName, std::vector< std::string > &positions, std::vector< std::string > &sites) const
	Return the count of error tolerant modifications discovered in this search.

virtual std::vector< int >	getNumDiscoveredFixedMods (const int num, std::vector< std::string > &positions, std::vector< std::string > &sites) const
	Return the count of fixed modifications discovered in this search.

std::vector< int >	getNumDiscoveredIntactLinks (std::vector< int > &varModNum1, std::vector< std::string > &position1, std::vector< std::string > &site1, std::vector< int > &varModNum2, std::vector< std::string > &position2, std::vector< std::string > &site2, std::vector< int > &numLinkedPeptides) const
	Return the count of intact crosslinks in this search.

std::vector< int >	getNumDiscoveredLibraryMods (const int modId, std::vector< std::string > &positions, std::vector< std::string > &sites) const
	Return the count of library modifications discovered in this search.

virtual std::vector< int >	getNumDiscoveredLocalMods (const std::string modName, std::vector< std::string > &positions, std::vector< std::string > &sites) const
	Return the count of query-level modifications discovered in this search.

virtual int	getNumDiscoveredNonSpecCleavage () const
	Return the count of non-specific cleavages in an error tolerant search.

virtual std::vector< int >	getNumDiscoveredVariableMods (const int num, std::vector< std::string > &positions, std::vector< std::string > &sites) const
	Return the count of variable modifications discovered in this search.

virtual std::vector< int >	getNumDiscoveredVariableMods (const int num, std::vector< std::string > &positions, std::vector< std::string > &sites, std::vector< double > &deltas, std::vector< std::string > &names) const
	Return the count of variable modifications discovered in this search.

virtual long	getNumHitsAboveHomology (double OneInXprobRnd, DECOY_STATS_COUNT_TYPE countType=DS_COUNT_PSM, DB_MATCH_TYPE dbType=DM_FASTA)
	Return the number of hits with a score at or above the homology threshold.

virtual long	getNumHitsAboveIdentity (double OneInXprobRnd, DECOY_STATS_COUNT_TYPE countType=DS_COUNT_PSM, DB_MATCH_TYPE dbType=DM_FASTA)
	Return the number of hits with a score at or above the identity threshold.

virtual std::vector< int >	getPepsWithSameScore (const int q, const int p) const =0
	Returns a list of 'p' values for peptides with the same score.

virtual ms_peptide	getPeptide (const int q, const int p) const =0
	Return the ms_peptide object given the query and either the rank (ms_peptidesummary) or the hit (ms_proteinsummary).

virtual bool	getPeptide (const int q, const int p, ms_peptide *&pep) const =0
	Return the ms_peptide object given the query and either the rank (ms_peptidesummary) or the hit (ms_proteinsummary).

virtual double	getPeptideExpectationValue (const double score, const int query, const int rank=0) const
	Returns the expectation value for the given peptide score and query.

virtual int	getPeptideIdentityThreshold (const int query, double OneInXprobRnd, DB_MATCH_TYPE dbType=DM_FASTA) const
	Return the threshold value for this ms-ms data being a random match.

double	getPeptideThreshold (const int query, double OneInXprobRnd, const int rank=1, const THRESHOLD_TYPE thresholdType=TT_PEPSUM_DEFAULT) const
	Return either the identity or the homology threshold.

virtual double	getProbabilityThreshold () const
	Return the minProbability value passed to the ms_mascotresults::ms_mascotresults constructor.

virtual int	getProbFromScore (const double score) const
	Returns a probability value given a score.

virtual double	getProbOfPepBeingRandomMatch (const double score, const int query) const

virtual double	getProbOfProteinBeingRandomMatch (const double score) const

virtual const ms_protein *	getProtein (const char *accession, const int dbIdx=1) const
	Return a pointer to the protein entry given an accession.

std::string	getProteinDescription (const char *accession, const int dbIdx=1) const
	Return protein description if available.

virtual double	getProteinEmPAI (const char *accession, const int dbIdx=1, const int length=-1) const
	Return protein emPAI if available.

virtual double	getProteinExpectationValue (const double score) const
	Returns the expectation value for the given protein score.

double	getProteinMass (const char *accession, const int dbIdx=1) const
	Return protein mass if available.

virtual int	getProteinScoreCutoff (double OneInXprobRnd) const =0
	Return the 'protein' score value for cutting off results (different for peptide and protein summary).

virtual double	getProteinScoreForHistogram (const int num) const
	Returns scores for top 50 proteins, even if less in the peptidesummary or proteinsummary.

virtual std::string	getProteinsWithThisPepMatch (const int q, const int p, const bool quotes=false)=0
	Return a partial list of proteins that matched the same peptide.

void	getProteinTaxonomyIDs (const char *accession, const int dbIdx, std::vector< int > &gpt_ids, std::vector< std::string > &gpt_accessions) const
	Return the taxonomy ID(s), if any, from the results file.

virtual int	getProteinThreshold (double OneInXprobRnd) const
	Return a threshold value for the protein summary report.

virtual int	getQmatch (const int query) const
	[Deprecated] Return the number of peptides with masses that matched this query.

virtual int	getQmatch (const int query, const ms_mascotresfile_dat::section sec) const
	[Deprecated] Use ms_mascotresfilebase::getQmatch().

virtual QUANT_COMPONENT_STATUS	getQuantitationComponentForPeptide (const matrix_science::ms_peptide &peptide, matrix_science::ms_quant_component &component, const matrix_science::ms_quant_method *method=NULL) const =0
	Get the component in the quantitation method that matches the peptide.

virtual std::string	getReadableVarMods (const int q, const int p, const int numDecimalPlaces=2, const ms_peptide::PSM psmComponent=ms_peptide::PSM_COMPLETE) const
	Return a 'human readable' string with the variable, summed and error tolerant mods.

const ms_mascotresfilebase &	getResfile () const
	Return a reference to the resfile.

ms_mascotresults_params	getResultsParameters () const
	Returns an ms_mascotresults_params object containing the parameter and flag settings used to generate the report.

double	getSequenceMass (const char *seq) const
	Return the mass of a sequence (protein or peptide).

virtual int	getTagDeltaRangeEnd (const int q, const int p) const =0
	Return the second number from `h1_q2_drange=0,256`.

virtual int	getTagDeltaRangeStart (const int q, const int p) const =0
	Return the first number from `h1_q2_drange=0,256`.

virtual int	getTagEnd (const int q, const int p, const int tagNumber) const
	Return the end position for the tag-match from `h1_q2_tag` or `q1_p1_tag`.

virtual int	getTagSeries (const int q, const int p, const int tagNumber) const
	Return the series ID for the tag-match from `h1_q2_tag` or `q1_p1_tag`.

virtual int	getTagStart (const int q, const int p, const int tagNumber) const
	Return the start position for the tag-match from `h1_q2_tag` or `q1_p1_tag`.

virtual std::string	getTagString (const int q, const int p) const =0
	Return the complete tag string from `h1_q2_tag` or `q1_p1_tag`.

virtual std::string	getTerminalResiduesString (const int q, const int p, const ms_peptide::PSM psmComponent=ms_peptide::PSM_COMPLETE) const =0
	Return the complete terminal residue string from `h1_q1_terms` or `q1_p1_terms`.

bool	getThresholdForFDRAboveHomology (double targetFDR, DECOY_STATS_COUNT_TYPE countType, DB_MATCH_TYPE dbType, double closestFDR, double minProbability, int numTargetMatches=0, int numDecoyMatches=0)
	Given a target FDR for PSMs or sequences, return the probability threshold that gives the desired FDR using the homology threshold.

bool	getThresholdForFDRAboveHomology (double targetFDR, double closestFDR, double minProbability, int numTargetMatches=0, int numDecoyMatches=0)
	Given a target FDR, return the probability threshold that gives the desired FDR using the homology threshold.

bool	getThresholdForFDRAboveIdentity (double targetFDR, DECOY_STATS_COUNT_TYPE countType, DB_MATCH_TYPE dbType, double closestFDR, double minProbability, int numTargetMatches=0, int numDecoyMatches=0)
	Given a target FDR for PSMs or sequences, return the probability threshold that gives the desired FDR using the identity threshold.

bool	getThresholdForFDRAboveIdentity (double targetFDR, double closestFDR, double minProbability, int numTargetMatches=0, int numDecoyMatches=0)
	Given a target FDR, return the probability threshold that gives the desired FDR using the identity threshold.

double	getToleranceInDalton (bool &needMass, const double *const pMass=NULL) const
	Returns the tolerance in dalton, and whether a mass is needed if the unit is in % or ppm.

virtual bool	getTreeClusterNodes (const int hit, std::vector< int > &left, std::vector< int > &right, std::vector< double > &distance, TREE_CLUSTER_METHOD tcm=TCM_PAIRWISE_MAXIMUM, double **reserved1=0, unsigned int reserved2=0) const
	Return distances and structure suitable for a dendrogram plot.

ms_peptide	getUnassigned (const int num) const
	Need to call createUnassignedList() before calling this.

bool	getUnassignedIsBold (const int num) const
	Returns true if the item indexed by num in the assigned list should be bold.

bool	getUnassignedShowCheckbox (const int num) const
	Returns true if the item indexed by num in the assigned list should have a check box next to it.

bool	isEmPAIallowed () const
	Return true if emPAI could be calculated using data in this results file.

bool	isNA () const
	Returns TRUE for a search against a nucelic acid database.

virtual bool	isPeptideUnique (const int q, const int p, const UNIQUE_PEP_RULES rules=UPR_DEFAULT) const =0
	Returns true if this peptide match is unique to one protein or one protein family.

virtual void	setSubsetsThreshold (const double scoreFraction)
	Specifies which subset proteins should be reported.

Protected Member Functions
double	getIonsScoreCorrected (const double ionsScore, const long multiplicity) const

Detailed Description

Abstract class for either ms_peptidesummary or ms_proteinsummary.

The following functions provide threshold values:

Examples: peptide_list.cpp, and resfile_summary.cpp.

Member Enumeration Documentation

◆ DB_MATCH_TYPE

enum DB_MATCH_TYPE

Type of matches counted by getNumHitsAboveIdentity() and friends, and type of score for getIonsScoreHistogram().

Enumerator
DM_FASTA	Mascot matches to FASTA sequences. This is the default.
DM_SPECTRAL_LIBRARY	Matches to spectral library entries. This was introduced in Parser 2.6.
DM_BOTH	Only valid for count of matches (getNumHitsAboveIdentity() and friends): the combined count for FASTA and spectral library entries.
DM_FASTA_FIRST_PASS	Mascot matches to FASTA sequences, first pass only.
DM_FASTA_SECOND_PASS	Mascot matches to FASTA sequences, second pass only (e.g. error tolerant).

◆ DECOY_STATS_COUNT_TYPE

enum DECOY_STATS_COUNT_TYPE

Type of object counted by getNumHitsAboveIdentity() and friends.

Enumerator
DS_COUNT_PSM	Count peptide-spectrum matches (PSMs). This is the default.
DS_COUNT_SEQUENCE	Count distinct peptide sequences. This was introduced in Parser 2.6.

◆ FIND_COMPARE_FLAGS

enum FIND_COMPARE_FLAGS

Flags to specify how comparisons are performed in the find functions.

See Using enumerated values and static const ints in Perl, Java, Python and C#.

These flags are used in findProteins(), findPeptides() and the deprecated functions: findProteinsByAccession() and findProteinsByDescription(). The flags are used to specify how the comparison is performed and whether it should be a forward or reverse seach. Typically, three values will be combined together, using an OR operator; however, any default values do not need to be specifically specified.

Choose 1 of: FC_COMPLETESTR, FC_SUBSTR, FC_STARTSTR, FC_STRTOK
Choose 1 of: FC_CASE_INSENSITIVE, FC_CASE_SENSITIVE
Choose 1 of: FC_FORWARD, FC_REVERSE
Optionally choose FC_RESTRICT_TO_HIT or (FC_LOOP_INTO_UNASSIGNED and/or FC_LOOP_FROM_UNASSIGNED)
Optionally choose FC_SEARCH_ALL_RANKS
Optionally choose FC_ALL_PEPTIDES or FC_SIGNIFICANT_PEPTIDES
Optionally choose one or more of the FC_PROTEIN_IGN_ flags when calling findProteins()

Enumerator
FC_COMPLETESTR	Search for the complete string. If search item is a decimal number then match the number exactly.
FC_SUBSTR	Search for any substring. Not valid if search item is a decimal number.
FC_STARTSTR	String must match to start of target string. If search item is a decimal number then match to the same decimal precision of the filter value but rounded. For example a filter value of 1234.56 will return items with values in the range (1234.555 >= value < 1234.565).
FC_STRTOK	Supplied string is a set of tokens, for example "STY" could be used to search for S or T or Y in a peptide sequence.
FC_MASK_STR_PART	Bit mask to extract which one of FC_COMPLETESTR, FC_SUBSTR, FC_STARTSTR, FC_STRTOK has been specified.
FC_CASE_INSENSITIVE	Case insensitive search (default).
FC_CASE_SENSITIVE	Case sensitive search.
FC_MASK_CASE	Bit mask to extract which one of FC_CASE_INSENSITIVE, FC_CASE_SENSITIVE has been specified.
FC_FORWARD	Forward search. The returned hit number will be the same as or higher than the start hit number. (default).
FC_REVERSE	Reverse search. The returned hit number will be the same as or lower than the start hit number.
FC_MASK_DIRECTION	Bit mask to extract which one of FC_FORWARD, FC_REVERSE has been specified.
FC_RESTRICT_TO_HIT	Don't search beyond the specified hit number. Cannot be used with FC_LOOP_INTO_UNASSIGNED or FC_LOOP_FROM_UNASSIGNED.
FC_LOOP_INTO_UNASSIGNED	If no matches are found in the passed hit, or any subsequent hit, then search the unassigned list.
FC_LOOP_FROM_UNASSIGNED	If the passed hit number is 0, and no match is found in the unassigned list, then start searching at 1 if FC_FORWARD is specified or start searching at 'numHits' if FC_REVERSE is specified.
FC_UNASSIGNED_MASK	Bit mask to extract FC_LOOP_INTO_UNASSIGNED or FC_LOOP_FROM_UNASSIGNED.
FC_SEARCH_ALL_RANKS	For use with findPeptides() only. Ordinarily only those queries and ranks are searched that are assigned to a protein hit. Use this flag to search all ranks in such queries instead.
FC_ALL_PEPTIDES	Search all peptides regardless of score (default).
FC_ALL_PEPTITDES	Deprecated: Unfortunate spelling error, please use FC_ALL_PEPTIDES which has an identical value.
FC_SIGNIFICANT_PEPTIDES	Only search peptides above identitity or homology threshold.
FC_SEARCH_IGNORED_RANKS	In integrated library mode, also search peptides that are part of a family member's list of ignored peptides.
FC_SCORING_MASK	Bit mask to extract FC_ALL_PEPTIDES or FC_SIGNIFICANT_PEPTIDES.
FC_PROTEIN_IGN_SAMESETS	Ignore sameset proteins - only used for findProteins()
FC_PROTEIN_IGN_SUBSETS	Ignore subset proteins - only used for findProteins()
FC_PROTEIN_IGN_FAMILY	Ignore family member proteins - only used for findProteins()
FC_PROTEIN_IGN_MASK	Ignore proteins flags.
FC_PROTEIN_INC_ALT_ACC	For use with findProteins() only. If protein entries in the database are representative (i.e. contain multiple accessions) and those accessions are saved in the results file (Mascot 2.4 or later), setting this flag will search all accessions of each protein entry, not just the first accession.

◆ FIND_FLAGS

enum FIND_FLAGS

Flags for findPeptides() and findProteins().

Details what needs to be searched for. Any of the FT_PEPTIDE_ flags may be used for either findPeptides() or findProteins(), but the FT_PROTEIN flags may only be used for findProteins()

See Using enumerated values and static const ints in Perl, Java, Python and C#.

Enumerator
FT_PEPTIDE_EXP_MZ	Find an experimental m/z value. Refer to FIND_COMPARE_FLAGS for comparison behaviour of decimal numbers.
FT_PEPTIDE_EXP_MR	Find an experimental relative mass. Refer to FIND_COMPARE_FLAGS for comparison behaviour of decimal numbers.
FT_PEPTIDE_CALC_MZ	Find a calculated m/z value. Refer to FIND_COMPARE_FLAGS for comparison behaviour of decimal numbers.
FT_PEPTIDE_CALC_MR	Find a calculated relative mass. Refer to FIND_COMPARE_FLAGS for comparison behaviour of decimal numbers.
FT_PEPTIDE_STRING	Find a peptide string.
FT_PEPTIDE_QUERY	Find a query number.
FT_PEPTIDE_VARMOD	Find a variable modification. Specifiy the modification 'number' as the string.
FT_PEPTIDE_FIXMOD	Find a fixed modification. Specifiy the modification 'number' as the string.
FT_PEPTIDE_ETMOD	Find an error tolerant modification. Specifiy a delta mass as the string.
FT_PEPTIDE_SLMOD	Find a spectral library modification. Specifiy any substring of the mod name as the string.
FT_PEPTIDE_VARMOD_BYNAME	Find a variable or query level modification. Specify any substring of the mod name as the string.
FT_PEPTIDE_FIND_MASK	Bit mask for any of the peptide values to be found.
FT_PROTEIN_ACCESSION	Find an accession - findProteins() only.
FT_PROTEIN_DESCRIPTION	Find a description - findProteins() only.

◆ FLAGS

enum FLAGS

Flags for the type of results.

See Using enumerated values and static const ints in Perl, Java, Python and C#.

Not all of the flags applicable for protein summary (e.g. MSRES_REQUIRE_BOLD_RED); see ms_proteinsummary.

Enumerator
MSRES_NOFLAG	Does nothing.
MSRES_GROUP_PROTEINS	Group proteins with same peptide matches. See Grouping proteins together.
MSRES_SHOW_SUBSETS	Show proteins that only match a subset of peptides. See Grouping proteins together.
MSRES_SUBSETS_DIFF_PROT	Proteins that contain a subset of peptides are treated as a unique protein. See Grouping proteins together.
MSRES_REQUIRE_BOLD_RED	Only proteins that have a top scoring peptide not seen before will be returned.
MSRES_SHOW_ALL_FROM_ERR_TOL	If this flag is set, then all hits from error tolerant search are shown. See Error tolerant searches.
MSRES_IGNORE_PMF_MIXTURE	If this flag is set, then PMF mixtures are ignored. See Peptide mass fingerprint mixtures.
MSRES_MUDPIT_PROTEIN_SCORE	Protein scoring for the peptide summary was changed at Mascot 2.0 for large (MudPIT) searches. See ms_protein::getScore().
MSRES_DECOY	If this flag is set, then use the results from searching against the decoy database. See Target-decoy searches and false discovery rate.
MSRES_INTEGRATED_ERR_TOL	If this flag is set, then create a ms_peptidesummary object that contains results from the summary and et_summary section. See Error tolerant searches.
MSRES_ERR_TOL	If this flag is set, then create a ms_peptidesummary object that contains results from the et_summary section. See Error tolerant searches.
MSRES_MAXHITS_OVERRIDES_MINPROB	If minProbability and maxHitsToReport are both non zero, then minProbability is ignored when determining the number of proteins to be displayed. See ms_mascotresults::ms_mascotresults.
MSRES_CLUSTER_PROTEINS	Protein clustering introduced in Mascot 2.3. See Using MSRES_CLUSTER_PROTEINS.
MSRES_DUPE_INCL_IN_SCORE_NONE	See Treatment of duplicate peptides.
MSRES_DUPE_INCL_IN_SCORE_A	See Treatment of duplicate peptides.
MSRES_DUPE_INCL_IN_SCORE_B	See Treatment of duplicate peptides.
MSRES_DUPE_INCL_IN_SCORE_C	See Treatment of duplicate peptides.
MSRES_DUPE_INCL_IN_SCORE_D	See Treatment of duplicate peptides.
MSRES_DUPE_INCL_IN_SCORE_E	See Treatment of duplicate peptides.
MSRES_DUPE_INCL_IN_SCORE_F	See Treatment of duplicate peptides.
MSRES_DUPE_INCL_IN_SCORE_G	See Treatment of duplicate peptides.
MSRES_DUPE_INCL_IN_SCORE_H	See Treatment of duplicate peptides.
MSRES_DUPE_INCL_IN_SCORE_I	See Treatment of duplicate peptides.
MSRES_DUPE_REMOVE_NONE	See Treatment of duplicate peptides.
MSRES_DUPE_REMOVE_A	See Treatment of duplicate peptides.
MSRES_DUPE_REMOVE_B	See Treatment of duplicate peptides.
MSRES_DUPE_REMOVE_C	See Treatment of duplicate peptides.
MSRES_DUPE_REMOVE_D	See Treatment of duplicate peptides.
MSRES_DUPE_REMOVE_E	See Treatment of duplicate peptides.
MSRES_DUPE_REMOVE_F	See Treatment of duplicate peptides.
MSRES_DUPE_REMOVE_G	See Treatment of duplicate peptides.
MSRES_DUPE_REMOVE_H	See Treatment of duplicate peptides.
MSRES_DUPE_REMOVE_I	See Treatment of duplicate peptides.
MSRES_DUPE_REMOVE_I	See Treatment of duplicate peptides.
MSRES_DUPE_DEFAULT	Default parameter for treatment of duplicates. See Treatment of duplicate peptides.

◆ HOMOLOGY_THRESHOLD_SOURCE

enum HOMOLOGY_THRESHOLD_SOURCE

Flags for getHomologyThreshold()

The method getHomologyThreshold() accepts either a rank argument or one of the special enumerated values listed here. For backwards compatibility, the integers 1-20 have their own enumerated values.

Enumerator
HOMTHR_FASTA	Retrieve the homology threshold for FASTA matches.
HOMTHR_SL	Retrieve the homology threshold for spectral library matches.

◆ IONS_HISTOGRAM

enum IONS_HISTOGRAM

Flags for getIonsScoreHistogram().

See Using enumerated values and static const ints in Perl, Java, Python and C#.

Enumerator
IH_INCLUDE_TOP_MATCHES	The default. Just include the top match to each spectrum.
IH_INCLUDE_TOP_10_MATCHES	Instead of just the top match, use the top 10 matches to each spectrum.

◆ QUANT_COMPONENT_STATUS

enum QUANT_COMPONENT_STATUS

Return codes for getQuantitationComponentForPeptide().

See Using enumerated values and static const ints in Perl, Java, Python and C#.

Enumerator
QCS_OK	The component was retrieved successfully.
QCS_OK_NO_MATCH	The quantitation method's components were searched successfully, no matching component was found.
QCS_ERROR_NO_METHOD	There is no quantitation method available (e.g. for protein summary).
QCS_ERROR_NO_COMPONENTS	There are no components specified in the quantitation method (e.g. for non-precursor method).
QCS_ERROR_BAD_COMPONENT_NAME	The peptide's component name in the Mascot results does not match any component in the quantitation method.
QCS_OK_MULTIPLE_MATCHES	The quantitation method's components were searched successfully, multiple matching components were found.

◆ sortBy

enum sortBy

Flags for createUnassignedList().

See Using enumerated values and static const ints in Perl, Java, Python and C#.

Enumerator
QUERY	Sort the unassigned list by ascending query number - this is the same as ascending relative mass order.
SCORE	Sort the unassigned list by descending score.
INTENSITY	Sort the unassigned list by descending intensity. Intensity values are taken from the `qintensity` value in the results file if they are available (from PKL files, or some MGF files). If these values are not available, then the intensity is calculated from the sum of all the ions values. For a very large MS-MS file, this option can take some time to process unless there are `qintensity` value in the results file.

◆ THRESHOLD_TYPE

enum THRESHOLD_TYPE

Flags for getPeptideThreshold()

See Using enumerated values and static const ints in Perl, Java, Python and C#.

Enumerator
TT_HOMOLOGY	Homology threshold.
TT_IDENTITY	Identity threshold.
TT_PEPSUM_DEFAULT	If ms_peptidesummary::MSPEPSUM_USE_HOMOLOGY_THRESH is specified in the constructor, then this will resolve to TT_HOMOLOGY, otherwise it will resolve to TT_IDENTITY.

◆ TREE_CLUSTER_METHOD

enum TREE_CLUSTER_METHOD

Flags for getTreeClusterNodes().

See Using enumerated values and static const ints in Perl, Java, Python and C#.

Enumerator
TCM_PAIRWISE_SINGLE	's': pairwise single-linkage clustering.
TCM_PAIRWISE_MAXIMUM	'm': pairwise maximum- (or complete-) linkage clustering.
TCM_PAIRWISE_AVERAGE	'a': pairwise average-linkage clustering.

◆ UNIQUE_PEP_RULES

enum UNIQUE_PEP_RULES

Flags for isPeptideUnique().

See Using enumerated values and static const ints in Perl, Java, Python and C#.

Choose UPR_WITHIN_FAMILY or UPR_WITHIN_FAMILY_MEMBER and then optionally 'or' UPR_IGNORE_SUBSET_PROTEINS

Enumerator
UPR_WITHIN_FAMILY	The peptide is unique if it occurs in proteins that are part of a single family.
UPR_WITHIN_FAMILY_MEMBER	The peptide is unique if it occurs in proteins that just belong to a single family member.
UPR_IGNORE_SUBSET_PROTEINS	Ignore any susbset proteins that contain the match when deciding if a peptide is unique. However, if the peptide just belongs to subset proteins for the same hit, then it is still considered to be unique.
UPR_DEFAULT	Set to UPR_WITHIN_FAMILY_MEMBER \| UPR_IGNORE_SUBSET_PROTEINS.

Member Function Documentation

◆ anyEmPAI()

bool anyEmPAI ( ) const

virtual

Return true if any emPAI values are available.

emPAI is not available unless all of the following are true:

Results file has MS/MS data.
Results file is not an old-style error tolerant search.
Results file has not been opened in decoy mode (ms_mascotresults::MSRES_DECOY).
There are at least 100 queries.

Returns: true if above conditions are true and at least one protein has a non-zero emPAI value.

Reimplemented in ms_peptidesummary.

◆ anyNumDiscoveredMods()

bool anyNumDiscoveredMods ( ) const

virtual

Return true if modification discovery statistics are available.

Modification discovery counts for fixed and variable modifications (and error tolerant modifications in an error tolerant search) are only available if the results file contains a Unimod section. Since only significant rank 1 peptide matches assigned to a protein hit are inspected for modification counting purposes, it is possible for the counts to be zero even when a Unimod section exists.

Returns: True if number of modifications could be counted and there is at least one modification with a non-zero count; false otherwise.

◆ cancelCreateSummary()

void cancelCreateSummary ( bool newValue = true )

Cancel the call to createSummary()

It is normally recommended that an application should delete the ms_peptidesummary object after calling this function because some objects will still be in memory (no cleanup is performed).

Cancelling may not occur immediately, and the calling application needs to wait for the createSummary() call to return.

Parameters

newValue should be set to true to cancel the createSummary call

◆ createSummary()

bool createSummary ( )

virtual

Create the summary after the ms_mascotresults object has been created.

Creating an ms_peptidesummary object for a large results file can take a considerable amount of time. The standard Mascot reports do not have an option to 'cancel' the creation of the report and rely on the ms_mascotresfilebase::outputKeepAlive() function to provide progress reports and keep the connection alive. This is not so desireable for a client application that doesn't want to redirect stdout, and wants to allow the user to cancel a long operation. Specify the ms_peptidesummary::MSPEPSUM_DEFERRED_CREATE flag when creating the ms_peptidesummary object, and then use one thread to call this function and a separate thread to call getCreateSummaryProgress() and report progress to the end user.

This function is called directly by the ms_mascotresults constructor unless the ms_peptidesummary::MSPEPSUM_DEFERRED_CREATE flag is specified. If that flag is specified, then the constructor returns 'immediately' and this function must be called by the client code. Until this function returns, the client code in the separate thread can only reliably call the getCreateSummaryProgress(), ms_mascotresfilebase::outputKeepAlive() or cancelCreateSummary() functions. Calling any other function for the ms_mascotresults object may result in undefined behaviour and cause the ms_mascotresfilebase::ERR_RESULTS_NOT_CREATED error to be set.

This function should not be called for a ms_proteinsummary.

Returns: true if successful

Reimplemented in ms_peptidesummary.

◆ createUnassignedList()

bool createUnassignedList ( sortBy s = QUERY )

To have a list of unassigned peptides, need to call this first.

See Unassigned peptides list for details of creating an unassigned list.

Thread safe: This method is safe to use from multiple threads. See also Using Parser in multithreaded applications.

Parameters

s	order to sort the unassigned list

Returns: Currently always returns true unless createSummary() has not completed

Examples: resfile_summary.cpp.

◆ findPeptides()

int findPeptides	(	const int	startHit,
		const std::string &	str,
		FIND_FLAGS	item,
		FIND_COMPARE_FLAGS	compareFlags,
		std::vector< int > &	q,
		std::vector< int > &	p
	)		const

pure virtual

Find the next hit that contains peptides with the specified attribute.

Note: For a protein summary, this function always returns -1.

Can be used to find peptide-spectrum matches with a specified query number, sequence, mass, or fixed or variable modification. Returns the protein hit number and a list of q,p values that contains one or more string matches to the input string.

In crosslinked searches, both alpha and beta peptides are compared to the search criteria. Only one needs to match the input string.

All subset, sameset and family member proteins are searched, and it is possible that the returned q and p values only correspond to peptides in a single subset protein. The FC_PROTEIN_IGN_SAMESETS, FC_PROTEIN_IGN_SUBSETS and FC_PROTEIN_IGN_FAMILY flags cannot be used with this function. To find proteins which contain particular peptide matches, or to control which proteins to search, see findProteins().

For FT_PEPTIDE_VARMOD , str should contain the modification 'number' as passed to ms_searchparams::getVarModsName(). Modification numbers between 1 and 9 correspond to strings "1" through "9", and numbers between 10 and 32 correspond to "A" through "W". To find all error tolerant modifications, specify "X" and then use ms_peptidesummary::getErrTolModName() to retrieve the name of the modification.

For FT_PEPTIDE_FIXMOD, str should contain the modification 'number' as passed to ms_searchparams::getFixedModsName(). This will be in the range 1-20.

For both types of modification searches, str should only contain a single character. The value for compareFlags should include FC_SUBSTR, because this function is implemented by searching the return value from ms_peptide::getVarModsStr(). Specifying incorrect flags does not generate an error and results in undefined behaviour.

There is an alternative way to search for fixed mods: specify FT_PEPTIDE_STRING and FC_STRTOK as item and compareFlags, respectively, and pass the return value of ms_searchparams::getFixedModsResidues() as the string to search for, str. Although this may seem equivalent to the combination FT_PEPTIDE_FIXMOD and FC_SUBSTR, there are two corner cases where the latter works while the former doesn't:

ms_searchparams::getFixedModsResidues() can return "N_term" or "C_term", which are clearly not strings of amino acids.
The case where a fixed mod is 'replaced' with a variable mod. For example, if Carbamidomethyl (C) is specified as a fixed mod and Propionamide (C) as variable mod, then it is not sufficient to just check if a peptide contains a cysteine.

Parameters

startHit	is the hit number at which to start searching. To start searching at the first hit, pass a value of 1. To search the unassigned list, pass a value of 0.
str	is the string to be matched.
item	is the peptide item to be found (for example, the peptide sequence or mass). Any of the `FT_PEPTIDE_` values may be used.
compareFlags	specifies whether the str value is required to match all or just part of the target string. It can also be used to specify a reverse search.
q	is a vector of query values for peptides that match. See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.
p	is a vector of corresponding rank values for peptides that match. See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

Returns: the single hit number that contains all the returned matched items, or 0 if the match is in the unassigned list, or -1 if there is no match found.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ findProteins()

int findProteins	(	const int	startHit,
		const std::string &	str,
		const int	dbIdx,
		FIND_FLAGS	item,
		FIND_COMPARE_FLAGS	compareFlags,
		std::vector< std::string > &	accessions,
		std::vector< int > &	dbIndexes
	)		const

pure virtual

Find the next hit that contains proteins with the specified attributes.

Note: For a protein summary, this function always returns -1.

Used to find accessions, protein descriptions or proteins that have peptide matches with a specified query number, sequence, mass, or fixed or variable modification. Any of the FIND_FLAGS may be used.

The findPeptides() function does not return any information about which subset/sameset/family proteins in a hit contain the found matches. findProteins(), however, returns a list of the protein accessions that contained a peptide match that fits the search criteria, and also allows the searched proteins to exclude samesets (FC_PROTEIN_IGN_SAMESETS), subsets (FC_PROTEIN_IGN_SUBSETS or family members (FC_PROTEIN_IGN_FAMILY).

If protein entries in the database are representative (i.e. contain multiple accessions), by default only the first accession will be considered when FT_PROTEIN_ACCESSION is specified as a search item. If alternative accessions are saved in the results file (Mascot 2.4 or later), setting FC_PROTEIN_INC_ALT_ACC will search all accessions of each protein entry. Note that this will make the search slower.

Parameters

startHit	is the hit number at which to start searching. To start searching at the first hit, pass a value of 1.
str	is the string to be matched and depends on the value of item. If item is FT_PROTEIN_ACCESSION, then str will be the accession (or part of one) to be matched. If item is one of the `FT_PEPTIDE_` values, then see FIND_FLAGS and findPeptides() for operational details.
dbIdx	is the database ID to be matched. To search all databases, pass a value of -1. Note that the value 0 corresponds to a UniGene database.
item	is the accession, description or peptide item to be found. For example, return all proteins with a given peptide sequence or mass.
compareFlags	specifies whether the str value is required to match all or just part of the accession. It can also be used to specify a reverse search.
accessions	is a vector of protein accessions that match. See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.
dbIndexes	is a symmetric vector of database indexes for the protein accessions that match. See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

Returns: the single hit number that contains the matches, or -1 if there is no match found.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ findProteinsByAccession()

int findProteinsByAccession	(	const int	startHit,
		const std::string &	str,
		const int	dbIdx,
		FIND_COMPARE_FLAGS	compareFlags,
		std::vector< std::string > &	accessions,
		std::vector< int > &	dbIndexes
	)		const

pure virtual

Find the next hit that contains proteins with the specified accession.

Deprecated:: See findProteins() with the ms_mascotresults::FT_PROTEIN_ACCESSION flag.

Note: For a protein summary, this function always returns -1.

Return the hit number and a list of proteins with accessions that match to the the passed string. If protein grouping has been specified, then multiple proteins within the same hit may match, and these are all returned.

Parameters

startHit	is the number at which to start searching. To start searching at the first hit, pass a value of 1.
str	is the accession (or part of accession) to be matched.
dbIdx	is the database ID to be matched. To search all databases, pass a value of -1. Note that the value 0 corresponds to a UniGene database.
compareFlags	specifies whether the str value is required to match all or just part of the accession. It can also be used to specify a reverse search.
accessions	is a vector of protein accessions that match. See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.
dbIndexes	is a symmetric vector of database indexes for the protein accessions that match. See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

Returns: the single hit number that contains all the returned proteins, or -1 if there is no match found.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ findProteinsByDescription()

int findProteinsByDescription	(	const int	startHit,
		const std::string &	str,
		FIND_COMPARE_FLAGS	compareFlags,
		std::vector< std::string > &	accessions,
		std::vector< int > &	dbIndexes
	)		const

pure virtual

Find the next hit that contains proteins with the specified description.

Deprecated:: See findProteins() with the ms_mascotresults::FT_PROTEIN_DESCRIPTION flag.

Note: For a protein summary, this function always returns -1.

Return the hit number and a list of proteins with descriptions that match the passed string. If protein grouping has been specified, then multiple proteins within the same hit may match, and these are all returned.

Descriptions are just those included in the results file. See getProteinDescription() for details of what is loaded into the results.

Parameters

startHit	is the number at which to start searching. To start searching at the first hit, pass a value of 1.
str	is the accession (or part of accession) to be matched.
compareFlags	specifies whether the str value is required to match all or just part of the description. It can also be used to specify a reverse search.
accessions	is a vector of protein accessions that match. See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.
dbIndexes	is a symmetric vector of database indexes for the protein accessions that match. See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

Returns: the single hit number that contains all the returned proteins, or -1 if there is no match found.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ freeHit()

void freeHit ( const int hit )

virtual

Frees any memory associated with the passed hit number.

Has no effect except when Using the pepsum cache (MSR and dat28). This function frees the ms_protein objects for the specified hit. The return values from getHit(), getNextFamilyProtein(), getNextSimilarProtein(), getNextSimilarProteinOf(), getNextSubsetProtein() etc. for this hit will then point to deleted objects and hence should not be used.

In addition, any ms_peptide objects loaded into memory using the getPeptide(const int, const int, ms_peptide *&)const function will 'unloaded' if the ms_peptidesummary::MSPEPSUM_DISCARD_RELOADABLE has been specified. This function has no effect on the ms_peptide objects returned from the alternative getPeptide(const int, const int)const function.

Parameters

hit	should be in the range 1.. getNumberOfHits().

Reimplemented in ms_peptidesummary.

◆ getAllFamilyMembersWithThisPepMatch()

int getAllFamilyMembersWithThisPepMatch	(	const int	hit,
		const int	q,
		const int	p,
		std::vector< int > &	db,
		std::vector< std::string > &	acc,
		std::vector< int > &	dupe_status
	)		const

pure virtual

Return a list of (top level) family proteins that have a match to the specified q and p.

Only the top level proteins are returned by this function, that is, the proteins that would be returned by getHit() and getNextFamilyProtein(). It will be considerably faster than calling ms_protein::getPeptideQuery() and related functions, because that involves loading more information from the cache and from the .dat file.

This function will return the values rapidly when caching is enabled; see Using the pepsum cache (MSR and dat28). It also functions correctly when not using the cache.

The returned vectors are all guaranteed to have the same number of elements. See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C# how to use them from programming languages other than C++.

See also: getAllProteinsWithThisPepMatch() and getProteinsWithThisPepMatch()

Parameters

hit	Should be in the range 1.. getNumberOfHits().
q	is the query number in the range 1 to ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, the top 10 matches are saved and hence p would normally be in the range 1 to 10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.
db	is a vector of database IDs.
acc	is the corresponding vector of accessions.
dupe_status	is the corresponding vector of ms_protein::DUPLICATE values for the returned proteins. Note that ms_protein::DUPE_Ignored will only appear in integrated library searches where the peptide match was removed due to IgnoreIonsScoreBelow.

Returns: The number of proteins in the db/acc/dupe_status vectors.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ getAllProteinsWithThisPepMatch() [1/2]

std::vector< std::string > getAllProteinsWithThisPepMatch	(	const int	q,
		const int	p,
		std::vector< int > &	start,
		std::vector< int > &	end,
		std::vector< std::string > &	pre,
		std::vector< std::string > &	post,
		std::vector< int > &	frame,
		std::vector< int > &	multiplicity,
		std::vector< int > &	db
	)		const

pure virtual

Return a complete list of proteins that contain this same peptide match.

In a peptide summary, this function returns the complete list of proteins in the q1_p1= line of the peptides, et_peptides, or decoy_peptides or crosslink_peptides section as appropriate.

In a protein summary, the return value contains the complete list of proteins that had the same peptide match to the one specified in the h1_q1= line.

See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C# how to use the parameters start, end, pre, post, frame, multiplicity, db, psmComponent.

Crosslink searches only: If you need to know whether the accessions come from the alpha or beta peptide, use the 10-parameter method instead. In either case, the accession list may contain duplicates if the alpha and beta are assigned to the same protein hit.

See also: getProteinsWithThisPepMatch()

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, the top 10 matches are saved and hence p would normally be in the range 1 to 10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.
start	Returns a vector of the start residues – one for each accession
end	Returns a vector of the end residues – one for each accession
pre	Returns a vector of the residue before the peptide – one for each accession
post	Returns a vector of the residue after the peptide – one for each accession
frame	Returns a vector of the frames – one for each accession. For a protein sequence, the frame will always be 0
multiplicity	Returns a vector of the multiplicities – one for each accession. This is the number of precursor matches for each accession
db	Returns a vector of the database index numbers for searches against multiple databases. For searches against a single database, these values will always be 1.

Returns: A list of all the accessions that contained the peptide matched by this result.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ getAllProteinsWithThisPepMatch() [2/2]

std::vector< std::string > getAllProteinsWithThisPepMatch	(	const int	q,
		const int	p,
		std::vector< int > &	start,
		std::vector< int > &	end,
		std::vector< std::string > &	pre,
		std::vector< std::string > &	post,
		std::vector< int > &	frame,
		std::vector< int > &	multiplicity,
		std::vector< int > &	db,
		std::vector< int > &	psmComponent
	)		const

pure virtual

Return a complete list of proteins that contain this same peptide match.

In a peptide summary, this function returns the complete list of proteins in the q1_p1= line of the peptides, et_peptides, or decoy_peptides or crosslink_peptides section as appropriate.

In a protein summary, the return value contains the complete list of proteins that had the same peptide match to the one specified in the h1_q1= line.

See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C# how to use the parameters start, end, pre, post, frame, multiplicity, db, psmComponent.

This method was added in Parser 2.7 for crosslinking support. If you don't need to know whether the accessions come from the alpha or beta peptide, you can use the 9-parameter method instead. In either case, the accession list may contain duplicates if the alpha and beta are assigned to the same protein hit.

See also: getProteinsWithThisPepMatch()

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, the top 10 matches are saved and hence p would normally be in the range 1 to 10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.
start	Returns a vector of the start residues – one for each accession
end	Returns a vector of the end residues – one for each accession
pre	Returns a vector of the residue before the peptide – one for each accession
post	Returns a vector of the residue after the peptide – one for each accession
frame	Returns a vector of the frames – one for each accession. For a protein sequence, the frame will always be 0
multiplicity	Returns a vector of the multiplicities – one for each accession. This is the number of precursor matches for each accession
db	Returns a vector of the database index numbers for searches against multiple databases. For searches against a single database, these values will always be 1.
psmComponent	Returns a vector of psmComponent markers (ms_peptide::PSM) – one for each accession. For a crosslinked match, this will be one of ms_peptide::PSM_CROSSLINK_ALPHA or ms_peptide::PSM_CROSSLINK_BETA. In all other types of match, it will always be ms_peptide::PSM_COMPLETE.

Returns: A list of all the accessions that contained the peptide matched by this result.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ getAvePeptideIdentityThreshold()

int getAvePeptideIdentityThreshold	(	double	OneInXprobRnd,
		DB_MATCH_TYPE	dbType = `DM_FASTA`
	)		const

virtual

Return the average threshold value for all MS-MS data sets.

The average value is calculated by taking the mean value from calling getPeptideIdentityThreshold() for each query. This is the value that is used to be shown at the top of the standard Mascot reports in versions before Mascot 2.0. For example, threshold in the following string could be calculated by calling getAvePeptideIdentityThreshold(20):

Individual ions scores > 47 indicate identity or extensive homology (p<0.05).

Parameters

OneInXprobRnd

For a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20. It's easiest to always pass 1 / getProbabilityThreshold() as OneInXprobRnd.

This parameter is ignored in error tolerant search results. If the query was searched in the error tolerant pass, its significance is determined using 1 / getErrTolProbabilityThreshold(); otherwise using 1 / getProbabilityThreshold().

dbType If given, should be DM_FASTA for Mascot score threshold and DM_SPECTRAL_LIBRARY for spectral library threshold. DM_FASTA_FIRST_PASS and DM_FASTA_SECOND_PASS return the same threshold as DM_FASTA. (DM_BOTH is nonsensical; if given, -1 is returned.)

Returns: The average score threshold. If there was no MS-MS data then -1 will be returned.

◆ getComponentIntensity()

bool getComponentIntensity	(	const int	q,
		const int	p,
		const std::string &	componentName,
		double &	value,
		double &	rawValue
	)		const

pure virtual

Returns the component intensity for reporter or multiplex methods.

This function will only succeed for MS2 based quantitation methods such as "reporter" and "multiplex" where the required data is in the .dat file. It is up to the calling application to check that this is the case using ms_mascotresfilebase::getQuantitationMethod and ms_quant_method::getProtocol

This function was added in Mascot Parser 2.5. If the calling application is Using the pepsum cache (MSR and dat28) and the cache files were created with Mascot Parser 2.4 or earlier, then this function will calculate the intensities on demand. The first call to this function causes values for the whole file to be calculated, which may be slow. With new cache files, all the values are saved in the cache file.

This function will only calculate values for cases where ms_quant_helper::isPeptideQuantifiable returns ms_quant_helper::PEPTIDE_IS_QUANTIFIABLE

For a particular q,p pair there will always be values for all components or for no components. There can never be a case where there is just a value available for a subset of the components.

Parameters

	q	is the query number.
	p	is the 'hit' or 'rank' number in the range 1.. getMaxRankValue(). For "reporter" protocols, this must be 1.
	componentName	is the name of the component as defined in the method. For example, for iTRAQ this might be "114", and for one of the SILAC methods this might be "Heavy".
[out]	value	is the corrected intensity value. Corrections are applied as specified in the method included in the results file.
[out]	rawValue	is the uncorrected intensity value.

Returns: false if the method is unable to calculate or determine an intensity value for the specified q and p values

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ getComponentProtein()

const ms_protein * getComponentProtein	(	const char *	accession,
		const int	dbIdx = `1`
	)		const

virtual

Return a pointer to the protein entry given an accession.

Only use this function for the "components" of a UniGene search. It will generally be more convenient for client applications to call ms_protein::getComponent() to get an ms_protein object, both in UniGene searches and PMF mixture searches.

Parameters

accession	is the accession string.
dbIdx	a database index (1..ms_searchparams::getNumberOfDatabases()).

Returns: If the protein cannot be found in the results, then a null value is returned. See Maintaining object references: two rules of thumb.

Reimplemented in ms_peptidesummary.

◆ getComponentString()

std::string getComponentString	(	const int	q,
		const int	p,
		const ms_peptide::PSM	psmComponent = `ms_peptide::PSM_COMPLETE`
	)		const

pure virtual

Return q1_p2_comp string value; for h1_q2 this string is always empty.

Note: This function will always return the empty string for a protein summary.

The entry in the results file might be, for example:

   q1_p2_comp=light

For a crosslinked match, the results file line is q1_p1_comp_1= or q1_p1_comp_2=, depending on the psmComponent parameter. If the parameter is ms_peptide::PSM_COMPLETE, the method returns the empty string. Use ms_peptide::getNumberOfLinkedPeptides() to decide which value is needed.

◆ getCreateSummaryProgress()

bool getCreateSummaryProgress	(	int *	cspTotalPercentComplete,
		unsigned int *	cspCurrTask,
		int *	cspCurrTaskPercentageComplete,
		std::string *	cspAccession,
		int *	cspHit,
		int *	cspQuery,
		std::string *	cspKeepAliveText
	)		const

Return progress for the createSummary() call.

See Multiple return values in Perl, Java, Python and C#.

This function is most useful when using the ms_peptidesummary::MSPEPSUM_DEFERRED_CREATE flag and calling createSummary() from a separate thread. The ms_mascotresfilebase::outputKeepAlive() outputs progress text to stdout, and this may not be convenient for some applications.

Any of the passed parameters may be null.

Parameters

[out]	cspTotalPercentComplete	is simply calculated from: 100 * cspCurrTask / ms_mascotresfilebase::KA_LAST + cspCurrTask / ms_mascotresfilebase::KA_LAST
[out]	cspCurrTask	is the ms_mascotresfilebase::KA_TASK currently being performed by Mascot Parser. If Parser is doing nothing, then this will be the last task that was completed and cspCurrTaskPercentageComplete will be 100
[out]	cspCurrTaskPercentageComplete	is the percentage (0..100) complete for the current cspCurrTask.
[out]	cspAccession	is the current 'accession' being processed. See ms_mascotresfilebase::outputKeepAlive() for details of which tasks set this value. For languages other than C++, this will be a reference rather than a pointer to a std::string.
[out]	cspHit	is the current hit being processed. See ms_mascotresfilebase::outputKeepAlive() for details of which tasks set this value
[out]	cspQuery	is the current 'query' being processed. See ms_mascotresfilebase::outputKeepAlive() for details of which tasks set this value
[out]	cspKeepAliveText	is the text that would be output by ms_mascotresfilebase::outputKeepAlive(). To use this, specify a suitable keepAliveText to the ms_mascotresfilebase constructor but set keepAliveInterval to zero to prevent any output to stdout. For languages other than C++, this will be a reference rather than a pointer to a std::string.

Returns: true if the createSummary() call has completed.

◆ getDiscoveredErrTolModDeltas()

std::vector< double > getDiscoveredErrTolModDeltas ( std::vector< std::string > * vecDeltaStrings = NULL ) const

virtual

Return the deltas of all error tolerant modifications discovered in this search.

Only significant rank 1 matches assigned to a protein hit contribute towards the list of error tolerant modification deltas. This means that if the error tolerant match is not the rank 1 match (e.g. the same query had a higher scoring regular peptide match), then the error tolerant modification of that match is not counted.

The list of error tolerant modification deltas is most useful when used in combination with getNumDiscoveredErrTolMods().

The order of the elements in the return or parameter vector is the same as in the vector returned by getDiscoveredErrTolModNames()

Nucleic acid insertion, deletion and substitution whose special names are NA_INSERTION, NA_DELETION and NA_SUBSTITUTION have a delta of 0.0.

Note that this is not available if the cache file was created with Parser 2.5 or 2.6. Recreating the cache file makes this available.

See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

Parameters

vecDeltaStrings Pointer to a vector of strings to retrieve the deltas as strings instead of as doubles, if this pointer is valid then the returned vector is empty

Returns: A vector of error tolerant modification deltas discovered in significant rank 1 matches assigned to a protein hit in this search. The vector could be empty.

◆ getDiscoveredErrTolModNames()

std::vector< std::string > getDiscoveredErrTolModNames ( ) const

virtual

Return the names of all error tolerant modifications discovered in this search.

Only significant rank 1 matches assigned to a protein hit contribute towards the list of error tolerant modification names. This means that if the error tolerant match is not the rank 1 match (e.g. the same query had a higher scoring regular peptide match), then the error tolerant modification of that match is not counted. Nucleic acid insertion, deletion and substitution have the usual special names NA_INSERTION, NA_DELETION and NA_SUBSTITUTION. Otherwise error tolerant modification names follow Unimod names.

The list of error tolerant modification names is most useful when used in combination with getNumDiscoveredErrTolMods().

See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

Returns: A vector of error tolerant modification names discovered in significant rank 1 matches assigned to a protein hit in this search. The vector could be empty.

◆ getDiscoveredLocalModDeltas()

std::vector< double > getDiscoveredLocalModDeltas ( std::vector< std::string > * vecDeltaStrings = NULL ) const

virtual

Return the deltas of all query-level modifications discovered in this search.

The list of query-level modification names is most useful when used in combination with getNumDiscoveredLocalMods().

The order of the elements in the return or parameter vector is the same as in the vector returned by getDiscoveredLocalModNames()

Note that this is not available if the cache file was created with Parser 2.5 or 2.6. Recreating the cache file makes this available.

See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

Parameters

vecDeltaStrings Pointer to a vector of strings to retrieve the deltas as strings instead of as doubles, if this pointer is valid then the returned vector is empty

Returns: A vector of query-level modification deltas discovered in significant rank 1 matches assigned to a protein hit in this search. The vector could be empty.

◆ getDiscoveredLocalModNames()

std::vector< std::string > getDiscoveredLocalModNames ( ) const

virtual

Return the names of all query-level modifications discovered in this search.

The list of query-level modification names is most useful when used in combination with getNumDiscoveredLocalMods().

See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

Returns: A vector of query-level modification names discovered in significant rank 1 matches assigned to a protein hit in this search. The vector could be empty.

◆ getErrTolModDelta()

double getErrTolModDelta	(	const int	q,
		const int	p,
		std::string *	modString = `NULL`,
		std::string *	deltaAsString = `NULL`
	)		const

virtual

Return the error tolerant mod delta from h1_q2_et_mods or q1_p1_et_mods.

The entry in, for example, q1_p2_et_mods might be

   q1_p2_et_mods=0.984020,0.000000,Citrullination

where the modification delta will be 0.984020. In general, modification delta could be any valid floating point number, including 0.0.

See also: getErrTolModName(), getErrTolModNeutralLoss() and getReadableVarMods()

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.
modString	Optional pointer to a modString, if it is NULL or empty the modString is looked up in the result file, if the pointed string is empty its value is set from the result file, if it is already set to a value, this value is used to retrieve the delta directly
deltaAsString

Returns: The modification delta.

◆ getErrTolModifiedNaSeq()

std::string getErrTolModifiedNaSeq	(	const int	q,
		const int	p
	)		const

virtual

Return the modified sequence of nucleic acids after a single base modification in an error tolerant search.

If an error tolerant search was made against a nucleic acid database, and if the NA sequence had an unsuspected insertion, deletion or substitution, then this method returns the modified NA sequence before translation.

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number.

Returns: the modified sequence of nucleic acids before a single base modification in an error tolerant search

◆ getErrTolModMasterNeutralLoss()

std::vector< double > getErrTolModMasterNeutralLoss	(	const int	q,
		const int	p
	)		const

virtual

Return the error tolerant mod additional primary neutral losses from h1_q2_et_mods_master or q1_p1_et_mods_master.

Some modifications specify one or more neutral losses. These can be specified as 'master' or 'slave' neutral losses. If there are multiple neutral losses, then at least one and less than ten of the definitions must be masters.

During a search, Mascot iterates through the master neutral losses. The one that gives the highest score is chosen, and all the other neutral losses are treated as slaves. If a slave neutral loss gives a match to a peak, that peak is removed from the list of noise peaks, which further enhances the score. The ability to specify multiple neutral losses was introduced in Mascot 2.1.

The entry in, for example, q1_p1_et_mods_master might be

   q1_p1_et_mods_master=1.0,2.0

The returned list will not contain the dominant neutral loss – that can be retrieved using getErrTolModNeutralLoss().

See also: getErrTolModSlaveNeutralLoss() and getErrTolModNeutralLoss()

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.

Returns: The modification additional primary neutral loss values as a vector of doubles. See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

◆ getErrTolModMasterString()

std::string getErrTolModMasterString	(	const int	q,
		const int	p
	)		const

pure virtual

Return the error tolerant mod primary neutral loss string from h1_q2_et_mods_master or q1_p1_et_mods_master.

Consider the following two examples for peptide summary and protein summary, respectively:

The entry in q1_p2_et_mods_master might be
```
   q1_p2_et_mods_master=1.0,2.0 
```
The entry in h1_q2_et_mods_master might be
```
   h1_q2_et_mods_master=1.0,2.0 
```

This function returns the complete string value. A more useful function is ms_mascotresults::getErrTolModMasterNeutralLoss().

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.

Returns: complete error tolerant master neutral loss string.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ getErrTolModName()

std::string getErrTolModName	(	const int	q,
		const int	p,
		std::string *	modString = `NULL`
	)		const

virtual

Return the error tolerant mod name from h1_q2_et_mods or q1_p1_et_mods.

The entry in, for example, q1_p2_et_mods might be

   q1_p2_et_mods=0.984020,0.000000,Citrullination

where the modification name will be Citrullination.

See also: getErrTolModDelta(), getErrTolModNeutralLoss() and getReadableVarMods()

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.
modString	Optional pointer to a modString, if it is NULL or empty the modString is looked up in the result file, if the pointed string is empty its value is set from the result file, if it is already set to a value, this value is used to retrieve the name directly

Returns: The modification name, or empty string if this match has no ET modification.

◆ getErrTolModNeutralLoss()

double getErrTolModNeutralLoss	(	const int	q,
		const int	p
	)		const

virtual

Return the error tolerant mod neutral loss from h1_q2_et_mods or q1_p1_et_mods.

Some modifications specify one or more neutral losses. These can be specified as 'master' or 'slave' neutral losses. If there are multiple neutral losses, then at least one and less than 10 of the definitions must be masters.

During a search, Mascot iterates through the master neutral losses. The one that gives the highest score is chosen, and all the other neutral losses are treated as slaves. If a slave neutral loss gives a match to a peak, that peak is removed from the list of noise peaks, which further enhances the score. The ability to specify multiple neutral losses was introduced in Mascot 2.1.

The entry in, for example, q1_p2_et_mods might be

   q1_p1_et_mods=79.966330,97.976898,Phospho (STY)

where the highest scoring modification neutral loss, returned by this function is 97.976898.

See also: getErrTolModName(), getErrTolModDelta(), getReadableVarMods(), getErrTolModMasterNeutralLoss()

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.

Returns: The modification neutral loss value.

◆ getErrTolModPepNeutralLoss()

std::vector< double > getErrTolModPepNeutralLoss	(	const int	q,
		const int	p
	)		const

virtual

Return the error tolerant mod peptide neutral losses from h1_q2_et_mods_pep or q1_p1_et_mods_pep.

Some modifications specify one or more peptide neutral losses. PepNeutralLoss allows a neutral loss from the precursor to be specified so that this peak is not treated as a noise peak when being scored by Mascot. The ability to specify peptide neutral losses was introduced in Mascot 2.1.

The entry in, for example, q1_p2_et_mods_pep might be

   q1_p1_et_mods_pep=97.976896, 79.966330

See also: getErrTolModReqPepNeutralLoss()

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.

Returns: The modification peptide neutral loss values as a vector of doubles See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

◆ getErrTolModPepString()

std::string getErrTolModPepString	(	const int	q,
		const int	p
	)		const

pure virtual

Return the error tolerant mod peptide neutral loss string from h1_q2_et_mods_pep or q1_p1_et_mods_pep.

Consider the following two examples for peptide summary and protein summary, respectively:

The entry in q1_p2_et_mods_pep might be
```
   q1_p2_et_mods_pep=1.0,2.0 
```
The entry in h1_q2_et_mods_pep might be
```
   h1_q2_et_mods_pep=1.0,2.0 
```

This function returns the complete string value. A more useful function is ms_mascotresults::getErrTolModPepNeutralLoss().

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.

Returns: complete error tolerant peptide neutral loss string.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ getErrTolModReqPepNeutralLoss()

std::vector< double > getErrTolModReqPepNeutralLoss	(	const int	q,
		const int	p
	)		const

virtual

Return the error tolerant mod peptide neutral losses from h1_q2_et_mods_reqpep or q1_p1_et_mods_reqpep.

Some modifications specify one or more required peptide neutral losses. ReqPepNeutralLoss performs the same function as PepNeutralLoss but with the additional condition that the peak must be present in the spectrum. The ability to specify peptide neutral losses was introduced in Mascot 2.1.

The entry in, for example, q1_p2_et_mods_reqpep might be

   q1_p1_et_mods_reqpep=97.976896, 79.966330

See also: getErrTolModPepNeutralLoss()

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.

Returns: The modification required peptide neutral loss values as a vector of doubles. See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

◆ getErrTolModReqPepString()

std::string getErrTolModReqPepString	(	const int	q,
		const int	p
	)		const

pure virtual

Return the error tolerant mod required peptide neutral loss string from h1_q2_et_mods_reqpep or q1_p1_et_mods_reqpep.

Consider the following two examples for peptide summary and protein summary, respectively:

The entry in q1_p2_et_mods_reqpep might be
```
   q1_p2_et_mods_reqpep=1.0,2.0 
```
The entry in h1_q2_et_mods_reqpep might be
```
   h1_q2_et_mods_reqpep=1.0,2.0 
```

This function returns the complete string value. A more useful function is ms_mascotresults::getErrTolModReqPepNeutralLoss().

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.

Returns: complete error tolerant required peptide neutral loss string.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ getErrTolModSlaveNeutralLoss()

std::vector< double > getErrTolModSlaveNeutralLoss	(	const int	q,
		const int	p
	)		const

virtual

Return the error tolerant mod slave neutral losses from h1_q2_et_mods_slave or q1_p1_et_mods_slave.

Some modifications specify one or more neutral losses. These can be specified as 'master' or 'slave' neutral losses. If there are multiple neutral losses, then at least one and less than ten of the definitions must be masters.

During a search, Mascot iterates through the master neutral losses. The one that gives the highest score is chosen, and all the other neutral losses are treated as slaves. If a slave neutral loss gives a match to a peak, that peak is removed from the list of noise peaks, which further enhances the score. The ability to specify multiple neutral losses was introduced in Mascot 2.1.

The entry in, for example, q1_p2_et_mods_slave might be

   q1_p2_et_mods_slave=1.0,2.0

See also: getErrTolModMasterNeutralLoss(), getErrTolModNeutralLoss()

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.

Returns: The modification slave neutral loss values as a vector of doubles. See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

◆ getErrTolModSlaveString()

std::string getErrTolModSlaveString	(	const int	q,
		const int	p
	)		const

pure virtual

Return the error tolerant mod slave neutral loss string from h1_q2_et_mods_slave or q1_p1_et_mods_slave.

Consider the following two examples for peptide summary and protein summary, respectively:

The entry in q1_p2_et_mods_slave might be
```
   q1_p2_et_mods_slave=1.0,2.0
```
The entry in h1_q2_et_mods_slave might be
```
   h1_q2_et_mods_slave=1.0,2.0 
```

This function returns the complete string value. A more useful function is ms_mascotresults::getErrTolModSlaveNeutralLoss().

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.

Returns: complete error tolerant slave neutral loss string.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ getErrTolModString()

std::string getErrTolModString	(	const int	q,
		const int	p
	)		const

pure virtual

Return the complete error tolerant mod string from h1_q2_et_mods or q1_p1_et_mods.

Consider the following two examples for peptide summary and protein summary, respectively:

The entry in q1_p2_et_mods might be

   q1_p2_et_mods=0.984020,0.000000,Citrullination

The entry in h1_q2_et_mods might be

   h1_q2_et_mods=0.984020,0.000000,Citrullination

This function returns the complete string value. More useful functions are

all of which will work for both the protein summary and the peptide summary.

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.

Returns: complete error tolerant modification string.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ getErrTolOriginalNaSeq()

std::string getErrTolOriginalNaSeq	(	const int	q,
		const int	p
	)		const

virtual

Return the original sequence of nucleic acids before a single base modification in an error tolerant search.

If an error tolerant search was made against a nucleic acid database, and if the NA sequence had an unsuspected insertion, deletion or substitution, then this method returns the original NA sequence before translation.

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number.

Returns: the original sequence of nucleic acids before a single base modification in an error tolerant search

◆ getErrTolProbabilityThreshold()

double getErrTolProbabilityThreshold ( ) const

virtual

For a protein summary, this method always returns the same value as getProbabilityThreshold().

Error tolerant matches in an integrated error tolerant search can have a different probability threshold (significance threshold) from first pass peptide matches. The first pass matches always use getProbabilityThreshold() as the significance threshold, and second pass matches always use getErrTolProbabilityThreshold().

The error tolerant significance threshold is determined using ms_mascotresults_params::getTargetFDR(). If a target FDR is specified and the search is an error tolerant target-decoy search, the probability threshold is chosen such that error tolerant matches have false discovery rate (FDR) equal to the target or as close as feasible. If the search is not a target-decoy search or target FDR is not specified, the error tolerant significance threshold is the same as the first pass significance threshold.

See also: getProbabilityThreshold(), Error tolerant searches

Returns: probability threshold for error tolerant matches.

◆ getFlags()

unsigned int getFlags ( ) const

Returns the flags value passed to the constructor.

Returns: the ms_mascotresults::FLAGS value.

◆ getFlags2()

unsigned int getFlags2 ( ) const

Return the flags2 value passed to the ms_peptidesummary constructor.

For an ms_proteinsummary, will always return 0 as there is no option to set the flags2 value.

Returns: the ms_peptidesummary::MSPEPSUM value.

◆ getHit()

ms_protein * getHit	(	const int	hit,
		const int	memberNumber = `0`
	)		const

virtual

Return the ms_protein hit - returns null if hit > number of hits.

Parameters

hit	should be in the range 1.. getNumberOfHits().
memberNumber	Should be 1 or less for a main protein or one not in a family, or 2 or more for later sibling proteins in the hit family. If this number is greater than the number of proteins in the hit family then the function returns a null value.

Returns: If the hit number is outside the range above, then a null value is returned. See Maintaining object references: two rules of thumb.

Reimplemented in ms_peptidesummary.

Examples: peptide_list.cpp, and resfile_summary.cpp.

◆ getHomologyThreshold()

int getHomologyThreshold	(	const int	query,
		double	OneInXprobRnd,
		const int	rank = `1`
	)		const

virtual

Returns the 'homology' threshold.

For MS-MS data only, this returns a threshold value for significant homology rather than identity. This value appears in the yellow popup box for an MS-MS result: Score greater than XX indicates homology.

The return value will be zero if the threshold is not available. The value cannot be determined if the query contains error tolerant matches, or if the query contains a tag or etag, or if the value of qmatch is less than or equal to 100.

The value returned will not be higher than the identity threshold.

In versions prior to 2.2, the homology threshold was not affected by OneInXprobRnd – this was corrected in version 2.2.

The rank parameter is mandatory in an integrated error tolerant (ET) search and integrated spectral library (SL) search. You can either pass ms_peptide::getRank() as the rank parameter, or use one of the enumerated values for HOMOLOGY_THRESHOLD_SOURCE. The rank determines the data source.

In an integrated spectral library search:

If rank = HOMTHR_FASTA, the method returns the FASTA homology threshold.
If the peptide at the given rank is from a FASTA database and is not a library match, the method returns the FASTA homology threshold.
If rank = HOMTHR_SL, the method returns zero (since library searches don't have a homology threshold).
If the peptide at the given rank is from a spectral library, the method returns zero.

In an integrated error tolerant search, behaviour depends on Mascot version. If the results file is from Mascot 2.8 or later:

If the query was selected for the second pass search, the method returns zero regardless of the rank argument.
If the query was only searched in the first pass, the method returns the first pass homology threshold. The OneInXprobRnd argument is ignored; the homology threshold is calculated using 1 / getProbabilityThreshold().

If the integrated error tolerant search is from Mascot 2.7 or earlier:

If rank = HOMTHR_FASTA, the method returns the first pass homology threshold. The OneInXprobRnd argument is ignored; the homology threshold is calculated using 1 / getProbabilityThreshold().
If the peptide at the given rank is a first pass match, the method returns the first pass homology threshold.
If the peptide at the given rank is an ET match, the method returns 0.

Parameters

query	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
OneInXprobRnd	For a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20. It's easiest to always pass 1 / getProbabilityThreshold() as OneInXprobRnd. This parameter is ignored in error tolerant search results. If the query was searched in the error tolerant pass, its significance is determined using 1 / getErrTolProbabilityThreshold(); otherwise using 1 / getProbabilityThreshold().
rank	Specifies the rank of the match of interest.

Returns: The homology threshold. If there was no MS-MS data for this query then 0 will be returned.

Examples: resfile_summary.cpp.

◆ getHomologyThresholdForHistogram()

int getHomologyThresholdForHistogram	(	double	OneInXprobRnd,
		DB_MATCH_TYPE	dbType = `DM_FASTA`
	)		const

virtual

Returns the value for the 'yellow section' in the histogram.

For MS-MS data only, this returns a homology (rather than identity) threshold value that is shown at the top of the report: Individual ions scores > 19 indicate peptides with significant homology (p<0.05).

The yellow area on the histogram also corresponds to this value.

Parameters

OneInXprobRnd

For a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20. It's easiest to always pass 1 / getProbabilityThreshold() as OneInXprobRnd.

This parameter is ignored in error tolerant search results. If the query was searched in the error tolerant pass, its significance is determined using 1 / getErrTolProbabilityThreshold(); otherwise using 1 / getProbabilityThreshold().

dbType If given, should be DM_FASTA for Mascot homology threshold. If dbType is DM_SPECTRAL_LIBRARY for spectral library threshold. DM_FASTA_FIRST_PASS and DM_FASTA_SECOND_PASS return the same threshold as DM_FASTA. (DM_BOTH is nonsensical; if given, -1 is returned.)

Returns: The homology threshold. Note that this value is only available for an MS-MS search with just one MS-MS data set, otherwise the return value will be zero.

◆ getIonsScore()

double getIonsScore	(	const int	q,
		const int	p,
		const bool	decoy
	)		const

pure virtual

Returns an ions score quickly without having to load an ms_peptide object.

If other values are required from the ms_peptide object, then it is normally faster to call getPeptide() and then ms_peptide::getIonsScore

To get the q and p values for a peptide in an ms_protein object, call ms_protein::getPeptideQuery() and ms_protein::getPeptideP().

Parameters

q is the query number in the range 1 to ms_mascotresfilebase::getNumQueries().

p

is the 'hit' or 'rank' number.

For a peptide summary, the top 10 matches are saved and hence p would normally be in the range 1 to 10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20.
For a protein summary, a maximum of 50 protein hits are saved are saved and hence p must be in the range 1..50. It is safest to call getMaxRankValue() to find the maximum value.

Thread safe: This method is safe to use from multiple threads. See also Using Parser in multithreaded applications.

Parameters

decoy is a flag to indicate if the score should be taken from the decoy section. This should only be set to true for searches with decoy matches.

Returns: the score, or 0 if there was no score for these q / p values.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ getIonsScoreCorrected()

double getIonsScoreCorrected	(	const double	ionsScore,
		const long	multiplicity
	)		const

protected

Note: It shouldn't be necessary to call this function from outside the library.

This function is required here rather than as part of the ms_peptide object because one ms_peptide object can be used from many different proteins – each with a different multiplicity factor.

This function subtracts the factor:

-10 * log(multiplicity) * $tolFact / log(10); where
tolFact = sqrt(ITOL * ITOL + 0.0625);

Parameters

ionsScore	Is the uncorrected ions score.
multiplicity	can be obtained from ms_protein::getPeptideMultiplicity.

Returns: The corrected value..

◆ getIonsScoreHistogram()

std::vector< int > getIonsScoreHistogram	(	IONS_HISTOGRAM	flags = `IH_INCLUDE_TOP_MATCHES`,
		DB_MATCH_TYPE	dbType = `DM_FASTA`
	)		const

virtual

Returns a list of counts for binned ions scores.

The first element of the array returns the count of matches with a score between 0 and 1.
The second element of the array returns the count of matches with a score between 1 and 2.
The third element of the array returns the count of matches with a score between 2 and 3. etc. etc.

Parameters

flags	Currently only IH_INCLUDE_TOP_MATCHES and IH_INCLUDE_TOP_10_MATCHES are supported.
dbType	If given, should be DM_FASTA for Mascot scores and DM_SPECTRAL_LIBRARY for spectral library threshold. `DM_FASTA_FIRST_PASS` and `DM_FASTA_SECOND_PASS` return the same threshold as `DM_FASTA`. (DM_BOTH is nonsensical; if given, empty vector is returned.)

Returns: a vector of integers. See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

◆ getLibraryModString()

std::string getLibraryModString	(	const int	q,
		const int	p
	)		const

pure virtual

Return the modification string of the spectral library match from q1_p1_SLmod.

The modification string has the format

 q1_p1_SLmod=position:mod,position:mod,...

where position is a 1-based index into the peptide sequence and mod is a numeric index into the spectral_library section of the results file. The modification name and delta can be found with ms_searchparams::getLibraryModName() and ms_searchparams::getLibraryModDelta().

Note that position 1 is the first residue of the peptide sequence, which is the same convention as with ms_peptide::getVarModsStr(). However, MSPepSearch reports terminal modifications as modifications on the first and last residue. Positions 0 and N+1 (if sequence length is N) are reserved for future use.

For Mascot matches, the library modification string is empty.

In protein summary, this method always returns the empty string.

See also: Spectral libraries.

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the rank number.

Returns: modification string of the match if it is a spectral library match and has modifications, and empty string otherwise.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ getMaxPeptideIdentityThreshold()

int getMaxPeptideIdentityThreshold	(	double	OneInXprobRnd,
		DB_MATCH_TYPE	dbType = `DM_FASTA`
	)		const

virtual

Return the max threshold value for all MS-MS data sets.

The maximum value is calculated by taking the maximum value returned by calling getPeptideIdentityThreshold() for each query. This is the value that is shown at the top of the standard Mascot peptide summary report (version 2.0 and later). For example, threshold in the following string could be calculated by calling getMaxPeptideIdentityThreshold(20):

Individual ions scores > 47 indicate identity or extensive homology (p<0.05).

Versions of Mascot prior to 2.0 displayed the equivalent of getAvePeptideIdentityThreshold().

Parameters

OneInXprobRnd

For a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20. It's easiest to always pass 1 / getProbabilityThreshold() as OneInXprobRnd.

This parameter is ignored in error tolerant search results. If the query was searched in the error tolerant pass, its significance is determined using 1 / getErrTolProbabilityThreshold(); otherwise using 1 / getProbabilityThreshold().

dbType If given, should be DM_FASTA for Mascot identity threshold and DM_SPECTRAL_LIBRARY for spectral library threshold. DM_FASTA_FIRST_PASS and DM_FASTA_SECOND_PASS return the same threshold as DM_FASTA. (DM_BOTH is nonsensical; if given, -1 is returned.)

Returns: The maximum score threshold. If there was no MS-MS data then -1 will be returned.

◆ getMaxRankValue()

int getMaxRankValue ( ) const

virtual

Returns the maximum 'rank' or 'hit' or 'p' value.

For functions such as getPeptide(), a 'rank' or 'hit' value is required. The allowable range for this values depends on whether it is a protein summary or peptide summary.

For a peptide summary the value returned will normally be 10, but for an integrated error tolerant search, integrated spectral library search and integrated crosslinked search, this can be in the range 1..20.

For a protein summary the return value will be ms_mascotresfilebase::getNumHits() – with the appropriate parameter passed for the case of a decoy or error tolerant search.

Returns: max rank value

◆ getMinPepLenInPepSummary()

int getMinPepLenInPepSummary ( ) const

Peptides shorter than this are ignored when putting proteins into groups.

minPepLenInPepSummary is an optional flag for the peptide summary only. Any peptides shorter than this will be ignored when grouping proteins together.

Specify this value when creating an ms_peptidesummary::ms_peptidesummary object. This value will always be zero for ms_proteinsummary.

Returns: The minimum peptide length considered when creating the peptide summary.

◆ getNextFamilyProtein()

ms_protein * getNextFamilyProtein	(	const int	masterHit,
		const int	id
	)		const

virtual

Find the next protein in the family masterHit.

Call this function multiple times for each masterHit, incrementing id each time. When there are no more protein families this function will return a null value.

The main protein for the hit should be obtained by calling ms_mascotresults::getHit()

See Using MSRES_CLUSTER_PROTEINS for further information.

See also: ms_protein::GROUP

Parameters

masterHit	is the id for the main hit (1..n).
id	is a 'one based' number.

Returns: The next protein family or a null value if there is no such group. See Maintaining object references: two rules of thumb.

Examples: resfile_summary.cpp.

◆ getNextSimilarProtein()

ms_protein * getNextSimilarProtein	(	const int	masterHit,
		const int	id
	)		const

virtual

Return the next protein that contains all the peptides in the 'master' protein.

Find the next protein that contains exactly the same set of peptides as the 'master' one. See Grouping proteins together for further information. Call this function multiple times for each masterHit, incrementing id each time. When there are no more proteins containing the same set of peptides, this function will return a null value.

See also: ms_protein::GROUP

Parameters

masterHit	is a 'one based' number for the main hit.
id	is a 'one based' number.

Returns: The next protein with the same set of peptides as the masterAccession or a null value if there are no such proteins. See Maintaining object references: two rules of thumb.

◆ getNextSimilarProteinOf()

ms_protein * getNextSimilarProteinOf	(	const char *	masterAccession,
		const int	masterDB,
		const int	id
	)		const

virtual

Return the next protein that contains all the peptides in the 'master' protein.

Find the next protein that contains exactly the same set of peptides as the 'master' one. See Grouping proteins together for further information. Call this function multiple times for each masterAccession, incrementing id each time. When there are no more proteins containing the same set of peptides, this function will return a null value.

See also: ms_protein::GROUP

Parameters

masterAccession	is the accession for the main hit.
masterDB	is the database (FASTA) ID for the main hit.
id	is a 'one based' number.

Returns: The next protein with the same set of peptides as the masterAccession or a null value if there are no such proteins. See Maintaining object references: two rules of thumb.

Examples: resfile_summary.cpp.

◆ getNextSubsetProtein()

ms_protein * getNextSubsetProtein	(	const int	masterHit,
		const int	id,
		const bool	searchWholeFamily = `true`
	)		const

virtual

Return the next protein that contains some of the peptides in the 'master' protein.

Find the next protein that contains a subset of same peptides as the 'master' one. See Grouping proteins together and setSubsetsThreshold() for further information. Call this function multiple times for each masterHit, incrementing id each time. When there are no more proteins containing the same set of peptides, this function will return a null value.

See also: ms_protein::GROUP, getNextSubsetProteinOf()

Parameters

masterHit	is a 'one based' number for the main hit.
id	is a 'one based' number.
searchWholeFamily	only has meaning when MSRES_CLUSTER_PROTEINS is specified. See Using MSRES_CLUSTER_PROTEINS.

Returns: A protein that contains a subset of peptides in the master protein, or a null value if there is no such protein. See Maintaining object references: two rules of thumb.

◆ getNextSubsetProteinOf()

ms_protein * getNextSubsetProteinOf	(	const char *	masterAccession,
		const int	masterDB,
		const int	id
	)		const

virtual

Return the next protein that contains some of the peptides in the 'master' protein.

Find the next protein that contains a subset of same peptides as the 'master' one. See Grouping proteins together and setSubsetsThreshold() for further information.

Call this function multiple times for each masterAccession, incrementing id each time. When there are no more proteins containing the same set of peptides, this function will return a null value.

See also: ms_protein::GROUP, getNextSubsetProtein()

Parameters

masterAccession	is the accession for the master hit or a family hit.
masterDB	is the database (fasta) ID for the main hit.
id	is a 'one based' number.

Returns: A protein that contains a subset of peptides in the master protein, or a null value if there is no such protein. See Maintaining object references: two rules of thumb.

Examples: resfile_summary.cpp.

◆ getNumberOfFamilyMembers()

int getNumberOfFamilyMembers ( ) const

virtual

Return the total number of family members.

Returns: This function returns the number of 'top level' hits and family members, not taking into account any proteins that are identical or subsets of these hits. The value will be identical to that returned by getNumberOfHits() unless the ms_mascotresults::MSRES_CLUSTER_PROTEINS flag has been specified. In that case the number of values returned by getNextFamilyProtein() will be included.

◆ getNumberOfHits()

int getNumberOfHits ( ) const

virtual

Returns the number of hits in the results.

Thread safe: This method is safe to use from multiple threads. See also Using Parser in multithreaded applications.

Returns: This function returns the number of 'top level' hits – not taking into account any proteins that are identical or subsets of the 'top level' hits. ms_mascotresults::getHit() can be called with any number from 1 to the value returned by this function.

◆ getNumberOfUnassigned()

int getNumberOfUnassigned ( ) const

Return the number of peptides in the unassigned list.

See Unassigned peptides list for details of creating and using an unassigned list.

If there is no cache file for the results, then createUnassignedList() must be called before calling this function or the ms_errs::ERR_UNASSIGNED_PROG error will be set, and this function will return 0.

If a cache file has been created, then there is no requirement to call createUnassignedList() before calling this function.

Returns: the number of peptides in the unassigned list, or 0 in the case of an error.

Examples: resfile_summary.cpp.

◆ getNumDecoyHitsAboveHomology()

long getNumDecoyHitsAboveHomology	(	double	OneInXprobRnd,
		DECOY_STATS_COUNT_TYPE	countType = `DS_COUNT_PSM`,
		DB_MATCH_TYPE	dbType = `DM_FASTA`
	)

virtual

Return the number of hits from the decoy search with a score at or above the homology threshold.

For a peptide summary, the number of hits is the number of peptide matches. The value is not affected by ignoreIonsScoreBelow and the count only includes rank 1 peptides. The returned value will be affected by the minProbability value passed to the ms_peptidesummary constructor. The homology threshold is retrieved using getPeptideHomologyThreshold(). If there is no homology threshold, then the identity threshold is used, which is retrieved using getPeptideIdentityThreshold(). These values may be slightly different from the threshold for the non-decoy matches.

For a protein summary, this function will always return -1 because there is never a homology threshold for the protein summary.

y* The first call to any of the following functions may take some time as the values are not calculated when the ms_peptidesummary or ms_proteinsummary object is created. All four values are cached when the first call is made.

However, calling a function with a different probability threshold will cause the cached values to be re-calculated.

The false discovery rate (FDR) can be calculated from

   getNumDecoyHitsAboveIdentity() / getNumHitsAboveIdentity()

or

   getNumDecoyHitsAboveHomology() / getNumHitsAboveHomology()

Parameters

OneInXprobRnd	For a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20. It's easiest to always pass 1 / getProbabilityThreshold() as OneInXprobRnd. This parameter is ignored in error tolerant search results. If the query was searched in the error tolerant pass, its significance is determined using 1 / getErrTolProbabilityThreshold(); otherwise using 1 / getProbabilityThreshold().
countType	In peptide summary, type of object to count, either peptide-spectrum matches (PSMs) or distinct peptide sequences. The default, DS_COUNT_PSM, is the only mode available in Parser 2.5 and earlier. In protein summary, the argument is ignored.
dbType	In peptide summary, source of peptides being counted, either Mascot matches in FASTA sequences or spectral library matches. The default is DM_FASTA, which is the only mode available in Parser 2.5 and earlier. In an integrated ET search, DM_FASTA_FIRST_PASS and DM_FASTA_SECOND_PASS return counts for first pass and second pass only, while DM_FASTA returns counts for both passes. In protein summary, the argument is ignored.

Returns: The number of decoy hits with a score at or above the homology threshold.

◆ getNumDecoyHitsAboveIdentity()

long getNumDecoyHitsAboveIdentity	(	double	OneInXprobRnd,
		DECOY_STATS_COUNT_TYPE	countType = `DS_COUNT_PSM`,
		DB_MATCH_TYPE	dbType = `DM_FASTA`
	)

virtual

Return the number of hits from the decoy search with a score at or above the identity threshold.

For a peptide summary, the number of hits is the number of peptide matches. The value is not affected by ignoreIonsScoreBelow and the count only includes rank 1 peptides. The returned value will be affected by the minProbability value passed to the ms_peptidesummary constructor. The identity threshold is retrieved using getPeptideIdentityThreshold(). This value may be slightly different from the threshold for the non-decoy matches.

For a protein summary, the number of hits is the number of protein matches. This number includes the number of protein mixture matches plus the number of single protein matches. The returned value will be affected by the minProbability value passed to the ms_proteinsummary constructor. A value of -1 will be returned if the search contained any MS-MS, sequence query or tag data. The identity threshold is retrieved using getProteinThreshold().

The first call to any of the following functions may take some time as the values are not calculated when the ms_peptidesummary or ms_proteinsummary object is created. All four values are cached when the first call is made.

However, calling a function with a different probability threshold will cause the cached values to be re-calculated.

The false discovery rate (FDR) can be calculated from

   getNumDecoyHitsAboveIdentity() / getNumHitsAboveIdentity()

or

   getNumDecoyHitsAboveHomology() / getNumHitsAboveHomology()

Parameters

OneInXprobRnd	For a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20. It's easiest to always pass 1 / getProbabilityThreshold() as OneInXprobRnd. This parameter is ignored in error tolerant search results. If the query was searched in the error tolerant pass, its significance is determined using 1 / getErrTolProbabilityThreshold(); otherwise using 1 / getProbabilityThreshold().
countType	In peptide summary, type of object to count, either peptide-spectrum matches (PSMs) or distinct peptide sequences. The default, DS_COUNT_PSM, is the only mode available in Parser 2.5 and earlier. In protein summary, the argument is ignored.
dbType	In peptide summary, source of peptides being counted, either Mascot matches in FASTA sequences or spectral library matches. The default is DM_FASTA, which is the only mode available in Parser 2.5 and earlier. In an integrated ET search, DM_FASTA_FIRST_PASS and DM_FASTA_SECOND_PASS return counts for first pass and second pass only, while DM_FASTA returns counts for both passes. In protein summary, the argument is ignored.

Returns: The number of decoy hits with a score at or above the identity threshold.

◆ getNumDiscoveredErrTolMods()

std::vector< int > getNumDiscoveredErrTolMods	(	const std::string	modName,
		std::vector< std::string > &	positions,
		std::vector< std::string > &	sites
	)		const

virtual

Return the count of error tolerant modifications discovered in this search.

The number of times an error tolerant modification has been discovered in the search is the number of times it appears in rank 1 matches in the search results. Since the number and kind of error tolerant modifications is (practically) unlimited, you should call getDiscoveredErrTolModNames() first to find out which error tolerant modifications appear in the search. (The count of error tolerant modifications that do not appear in the search is, of course, zero.) Amino acid insertion, deletion and substitution have the usual special names NA_INSERTION, NA_DELETION and NA_SUBSTITUTION. Otherwise error tolerant modification names follow Unimod names.

Only significant rank 1 matches assigned to a protein hit are counted. This means that if the error tolerant match is not the rank 1 match (e.g. the same query had a higher scoring regular peptide match), then the error tolerant modification of that match is not counted.

Count data is returned in three vectors of equal length. The ith element in the returned vector of integers is the number of times the error tolerant modification occurred at specificity i. The ith element in the positions and sites vectors defines the position and site of the ith specificity. For example, if the ith specificity is N-terminal K, the ith element of positions would be "Any N-term" and the ith element of sites would be "K".

If caching is in use and the cache file contains modification counts, then the count data is loaded straight from the cache. Otherwise the first call to anyNumDiscoveredMods(), getNumDiscoveredFixedMods(), getNumDiscoveredVariableMods(), getNumDiscoveredLocalMods(), getNumDiscoveredLibraryMods(), getNumDiscoveredIntactLinks(), getNumDiscoveredErrTolMods() or getNumDiscoveredNonSpecCleavage() causes a loop over all queries to be run and modifications to be counted, which may be slow if the search results are very large.

See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

Parameters

modName	Error tolerant modification name (as returned by getDiscoveredErrTolModNames()).
positions	A vector of positions ("Anywhere", "Any N-term", "Any C-term", "Protein N-term", "Protein C-term") giving the position part of the count specificity.
sites	A vector of sites (amino acids A-W, "N-term", "C-term") giving the site part of the count specificity.

Returns: The number of times the error tolerant modification was observed in significant rank 1 matches assigned to a protein hit in this search at each specificity, returned in positions and sites.

◆ getNumDiscoveredFixedMods()

std::vector< int > getNumDiscoveredFixedMods	(	const int	num,
		std::vector< std::string > &	positions,
		std::vector< std::string > &	sites
	)		const

virtual

Return the count of fixed modifications discovered in this search.

The number of times a fixed modification has been discovered in the search is the number of times the terminus or residue matching its specificity appears in significant rank 1 matches assigned to a protein hit in the search results. For example, if Carbamidomethyl (C) is specified as a fixed modification, then the number of times it has been discovered in the search is the number of C residues in peptide sequences in significant rank 1 matches assigned to a protein hit.

Count data is returned in three vectors of equal length. The ith element in the returned vector of integers is the number of times the fixed modification occurred at specificity i. The ith element in the positions and sites vectors defines the position and site of the ith specificity. For example, if the ith specificity is N-terminal K, the ith element of positions would be "Any N-term" and the ith element of sites would be "K".

If caching is in use and the cache file contains modification counts, then the count data is loaded straight from the cache. Otherwise the first call to anyNumDiscoveredMods(), getNumDiscoveredFixedMods(), getNumDiscoveredVariableMods(), getNumDiscoveredLocalMods(), getNumDiscoveredLibraryMods(), getNumDiscoveredIntactLinks(), getNumDiscoveredErrTolMods() or getNumDiscoveredNonSpecCleavage() causes a loop over all queries to be run and modifications to be counted, which may be slow if the search results are very large.

Note that if the same modification is specified as a fixed modification with two different specificities in the search, discovery counts will be returned separately. For example, if fixed modification 1 is Carbamidomethyl (C) and 2 is Carbamidomethyl (N-term), then calling getNumDiscoveredFixedMods() with num = 1 returns the Carbamidomethyl count for specificity "Anywhere" and "C", while num = 2 returns the count for specificity "Any N-term" and "N-term". In both cases each vector contains a single item.

See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

Parameters

num	Fixed modification number (same value as passed to ms_searchparams::getFixedModsName()).
positions	A vector of positions ("Anywhere", "Any N-term", "Any C-term", "Protein N-term", "Protein C-term") giving the position part of the count specificity.
sites	A vector of sites (amino acids A-W, "N-term", "C-term") giving the site part of the count specificity.

Returns: The number of times the fixed modification was observed in significant rank 1 matches assigned to a protein hit in this search at each specificity, returned in positions and sites.

◆ getNumDiscoveredIntactLinks()

std::vector< int > getNumDiscoveredIntactLinks	(	std::vector< int > &	varModNum1,
		std::vector< std::string > &	position1,
		std::vector< std::string > &	site1,
		std::vector< int > &	varModNum2,
		std::vector< std::string > &	position2,
		std::vector< std::string > &	site2,
		std::vector< int > &	numLinkedPeptides
	)		const

Return the count of intact crosslinks in this search.

The number of times an intact crosslink has been discovered in the search is the number of times it appears as the intact link in significant rank 1 matches assigned to a protein hit.

Count data is returned in five vectors of equal length. The ith element in the returned vector of integers is the number of times the link occurred, while varModNum1, position1 and site1 have the variable mod number, position and site of the first end of the link and varModNum2, position2 and site2 have the second end.

For example, if the ith specificity is N-terminal K, the ith element of alphaPositions would be "Any N-term" and the ith element of alphaSites would be "K".

In the simplest case, there is one linker with one specificity, say Xlink:DSS (K). All the vectors have just one element. If the linker has two specificities, like Xlink:DSS (K) and Xlink:DSS (Protein N-term), there will be three site pairs: (K, K), (K, Protein N-term) and (Protein N-term, Protein N-term). The pair (Protein N-term, K) is collated with (K, Protein N-term).

If caching is in use and the cache file contains modification counts, then the count data is loaded straight from the cache. Otherwise the first call to anyNumDiscoveredMods(), getNumDiscoveredFixedMods(), getNumDiscoveredVariableMods(), getNumDiscoveredLocalMods(), getNumDiscoveredLibraryMods(), getNumDiscoveredIntactLinks(), getNumDiscoveredErrTolMods() or getNumDiscoveredNonSpecCleavage() causes a loop over all queries to be run and modifications to be counted, which may be slow if the search results are very large.

See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

Parameters

varModNum1	Variable modification number for the first end of link.
position1	A vector of positions ("Anywhere", "Any N-term", "Any C-term", "Protein N-term", "Protein C-term") giving the position part of the count specificity.
site1	A vector of sites (amino acids A-W, "N-term", "C-term") giving the site part of the count specificity.
varModNum2	Variable modification number for the second end of link.
position2	A vector of positions ("Anywhere", "Any N-term", "Any C-term", "Protein N-term", "Protein C-term") giving the position part of the count specificity.
site2	A vector of sites (amino acids A-W, "N-term", "C-term") giving the site part of the count specificity.
numLinkedPeptides	A vector containing the number of linked peptides corresponding to each count. Looplinked counts have 1 peptide, while crosslinked counts have 2 peptides.

Returns: The number of times the intact link was observed in significant rank 1 matches assigned to a protein hit.

◆ getNumDiscoveredLibraryMods()

std::vector< int > getNumDiscoveredLibraryMods	(	const int	modId,
		std::vector< std::string > &	positions,
		std::vector< std::string > &	sites
	)		const

Return the count of library modifications discovered in this search.

The number of times a library modification has been discovered in the search is the number of times it appears in the library mods string in significant rank 1 spectral library matches assigned to a protein hit in the search results.

Count data is returned in three vectors of equal length. The ith element in the returned vector of integers is the number of times the library modification occurred at specificity i. The ith element in the positions and sites vectors defines the position and site of the ith specificity.

If caching is in use and the cache file contains modification counts, then the count data is loaded straight from the cache. Otherwise the first call to anyNumDiscoveredMods(), getNumDiscoveredFixedMods(), getNumDiscoveredVariableMods(), getNumDiscoveredLocalMods(), getNumDiscoveredLibraryMods(), getNumDiscoveredIntactLinks(), getNumDiscoveredErrTolMods() or getNumDiscoveredNonSpecCleavage() causes a loop over all queries to be run and modifications to be counted, which may be slow if the search results are very large.

Note that modifications are part of the metadata of spectral library entries. Unlike in database searches, they are not specified as part of the search parameters. Some caveats apply:

A library modification may have the same name as a variable or fixed modification in the database search, and they may well be the same modification – but you need to know how the library was constructed to conclude either way.
Library modification name is usually a free-text string, which may or may not be a Unimod modification name.
The MSP format for spectral libraries does not support specifities in the same way as Mascot. The "position" of the modification is not encoded in the library and is thus unknown; it will always be Anywhere.

See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

Parameters

modId	modification ID, same as passed to ms_searchparams::getLibraryModName(). Must be in range 1..ms_searchparams::getNumberOfLibraryMods().
positions	A vector of positions, always "Anywhere".
sites	A vector of sites (amino acids A-W, "N-term", "C-term") giving the site part of the count specificity.

Returns: The number of times the library modification was observed in significant rank 1 spectral library matches assigned to a protein hit in this search at each specificity, returned in positions and sites.

◆ getNumDiscoveredLocalMods()

std::vector< int > getNumDiscoveredLocalMods	(	const std::string	modName,
		std::vector< std::string > &	positions,
		std::vector< std::string > &	sites
	)		const

virtual

Return the count of query-level modifications discovered in this search.

The number of times a query-level modification has been discovered in the search is the number of times it appears in significant rank 1 matches assigned to a protein hit in the search results.

Query-level modifications are counted separately from variable modifications (specified at search form level), because each individual query may contain up to 32 query-level variable modifications. This means the search as a whole may contain more than 32 different query-level modifications. You should call getDiscoveredLocalModNames() first to find out which query-level modifications appear in the search.

Count data is returned in three vectors of equal length. The ith element in the returned vector of integers is the number of times the query-level modification occurred at specificity i. The ith element in the positions and sites vectors defines the position and site of the ith specificity. For example, if the ith specificity is N-terminal K, the ith element of positions would be "Any N-term" and the ith element of sites would be "K".

If caching is in use and the cache file contains modification counts, then the count data is loaded straight from the cache. Otherwise the first call to anyNumDiscoveredMods(), getNumDiscoveredFixedMods(), getNumDiscoveredVariableMods(), getNumDiscoveredLocalMods(), getNumDiscoveredLibraryMods(), getNumDiscoveredIntactLinks(), getNumDiscoveredErrTolMods() or getNumDiscoveredNonSpecCleavage() causes a loop over all queries to be run and modifications to be counted, which may be slow if the search results are very large.

Note that only significant rank 1 matches assigned to a protein hit are counted.

See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

Parameters

modName	Query-level modification name (as returned by getDiscoveredLocalModNames()).
positions	A vector of positions ("Anywhere", "Any N-term", "Any C-term", "Protein N-term", "Protein C-term") giving the position part of the count specificity.
sites	A vector of sites (amino acids A-W, "N-term", "C-term") giving the site part of the count specificity.

Returns: The number of times the query-level modification was observed in significant rank 1 matches assigned to a protein hit in this search at each specificity, returned in positions and sites.

◆ getNumDiscoveredNonSpecCleavage()

int getNumDiscoveredNonSpecCleavage ( ) const

virtual

Return the count of non-specific cleavages in an error tolerant search.

An error tolerant match is the product of non-specific cleavage when the residue before or after the peptide terminal residue does not match the cleavage specificity of the enzyme. For example, trypsin cleaves at C-term K or R (unless followed by P), so all tryptic peptide sequences have K or R as the (N-term) residue before the sequence. An error tolerant match with non-specific cleavage would have a residue other than K or R at N-terminus.

Only significant rank 1 matches assigned to a protein hit contribute towards the count of non-specific cleavages. This means that if the error tolerant match is not the rank 1 match (e.g. the same query had a higher scoring regular peptide match), then the match is not counted.

Returns: The number of times non-specific cleavage occurred in significant rank 1 matches assigned to a protein hit, or -1 for non-error tolerant search.

◆ getNumDiscoveredVariableMods() [1/2]

std::vector< int > getNumDiscoveredVariableMods	(	const int	num,
		std::vector< std::string > &	positions,
		std::vector< std::string > &	sites
	)		const

virtual

Return the count of variable modifications discovered in this search.

The number of times a variable modification has been discovered in the search is the number of times it appears in the variable mods string in significant rank 1 matches assigned to a protein hit in the search results.

Note that error tolerant matches are not included in these counts; see getNumDiscoveredErrTolMods(). Query-level variable modifications are counted separately as well; see getNumDiscoveredLocalMods().

Count data is returned in three vectors of equal length. The ith element in the returned vector of integers is the number of times the variable modification occurred at specificity i.

The ith element in the positions and sites vectors defines the position and site of the ith specificity. For example, if the ith specificity is N-terminal K, the ith element of positions would be "Any N-term" and the ith element of sites would be "K".

If caching is in use and the cache file contains modification counts, then the count data is loaded straight from the cache. Otherwise the first call to anyNumDiscoveredMods(), getNumDiscoveredFixedMods(), getNumDiscoveredVariableMods(), getNumDiscoveredLocalMods(), getNumDiscoveredLibraryMods(), getNumDiscoveredIntactLinks(), getNumDiscoveredErrTolMods() or getNumDiscoveredNonSpecCleavage() causes a loop over all queries to be run and modifications to be counted, which may be slow if the search results are very large.

Note that if the same modification is specified as a variable modification with two different specificities in the search, discovery counts will be returned separately. For example, if variable modification 1 is Phospho (ST) and 2 is Phospho (Y), then calling getNumDiscoveredVariableMods() with num = 1 returns the Phospho counts for specificities "Anywhere" "S" and "Anywhere" "T", while num = 2 returns the count for specificity "Anywhere" "Y".

Note also that since error tolerant modifications are not included in counts of variable modifications, it is possible (though unlikely) for a modification to appear both as variable (specified explicitly in the search) and as error tolerant, especially if the two instances have different specificities.

See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

See also: getNumDiscoveredVariableMods(const int, std::vector<std::string>&, std::vector<std::string>&, std::vector<delta>&, std::vector<std::string>&) const

Parameters

num	Variable modification number (same value as passed to ms_searchparams::getVarModsName()).
positions	A vector of positions ("Anywhere", "Any N-term", "Any C-term", "Protein N-term", "Protein C-term") giving the position part of the count specificity.
sites	A vector of sites (amino acids A-W, "N-term", "C-term") giving the site part of the count specificity.

Returns: The number of times the variable modification was observed in significant rank 1 matches assigned to a protein hit in this search at each specificity, returned in positions and sites.

◆ getNumDiscoveredVariableMods() [2/2]

std::vector< int > getNumDiscoveredVariableMods	(	const int	num,
		std::vector< std::string > &	positions,
		std::vector< std::string > &	sites,
		std::vector< double > &	deltas,
		std::vector< std::string > &	names
	)		const

virtual

Return the count of variable modifications discovered in this search.

The number of times a variable modification has been discovered in the search is the number of times it appears in the variable mods string in significant rank 1 matches assigned to a protein hit in the search results.

Note that error tolerant matches are not included in these counts; see getNumDiscoveredErrTolMods(). Query-level variable modifications are counted separately as well; see getNumDiscoveredLocalMods().

Count data is returned in five vectors of equal length. The ith element in the returned vector of integers is the number of times the variable modification occurred at specificity i.

The ith element in the positions and sites vectors defines the position and site of the ith specificity. For example, if the ith specificity is N-terminal K, the ith element of positions would be "Any N-term" and the ith element of sites would be "K".

The ith element in the deltas and names vectors defines the delta and name of the modification, respectively. If the search has no crosslinking method, these two parameters can be ignored. When the modification number num refers to a crosslinker, the the deltas and names vectors contain values for all monolinks represented by the entry.

If caching is in use and the cache file contains modification counts, then the count data is loaded straight from the cache. Otherwise the first call to anyNumDiscoveredMods(), getNumDiscoveredFixedMods(), getNumDiscoveredVariableMods(), getNumDiscoveredLocalMods(), getNumDiscoveredLibraryMods(), getNumDiscoveredIntactLinks(), getNumDiscoveredErrTolMods() or getNumDiscoveredNonSpecCleavage() causes a loop over all queries to be run and modifications to be counted, which may be slow if the search results are very large.

Note that if the same modification is specified as a variable modification with two different specificities in the search, discovery counts will be returned separately. For example, if variable modification 1 is Phospho (ST) and 2 is Phospho (Y), then calling getNumDiscoveredVariableMods() with num = 1 returns the Phospho counts for specificities "Anywhere" "S" and "Anywhere" "T", while num = 2 returns the count for specificity "Anywhere" "Y".

Note also that since error tolerant modifications are not included in counts of variable modifications, it is possible (though unlikely) for a modification to appear both as variable (specified explicitly in the search) and as error tolerant, especially if the two instances have different specificities.

See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

Parameters

num	Variable modification number (same value as passed to ms_searchparams::getVarModsName()).
positions	A vector of positions ("Anywhere", "Any N-term", "Any C-term", "Protein N-term", "Protein C-term") giving the position part of the count specificity.
sites	A vector of sites (amino acids A-W, "N-term", "C-term") giving the site part of the count specificity.
deltas	A vector of deltas.
names	A vector of names.

Returns: The number of times the variable modification was observed in significant rank 1 matches assigned to a protein hit in this search at each specificity, returned in positions and sites.

◆ getNumHitsAboveHomology()

long getNumHitsAboveHomology	(	double	OneInXprobRnd,
		DECOY_STATS_COUNT_TYPE	countType = `DS_COUNT_PSM`,
		DB_MATCH_TYPE	dbType = `DM_FASTA`
	)

virtual

Return the number of hits with a score at or above the homology threshold.

For a peptide summary, the number of hits is the number of peptide matches. The value is not affected by ignoreIonsScoreBelow and the count only includes rank 1 peptides. The returned value will be affected by the minProbability value passed to the ms_peptidesummary constructor. The homology threshold is retrieved using getPeptideHomologyThreshold(). If there is no homology threshold, then the identity threshold is used, which is retrieved using getPeptideIdentityThreshold().

For a protein summary, this function will always return -1 because there is never a homology threshold for the protein summary.

The first call to any of the following functions may take some time as the values are not calculated when the ms_peptidesummary or ms_proteinsummary object is created. All four values are cached when the first call is made.

However, calling a function with a different probability threshold will cause the cached values to be re-calculated.

The false discovery rate (FDR) can be calculated from

   getNumDecoyHitsAboveIdentity() / getNumHitsAboveIdentity()

or

   getNumDecoyHitsAboveHomology() / getNumHitsAboveHomology()

Parameters

OneInXprobRnd	For a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20. It's easiest to always pass 1 / getProbabilityThreshold() as OneInXprobRnd. This parameter is ignored in error tolerant search results. If the query was searched in the error tolerant pass, its significance is determined using 1 / getErrTolProbabilityThreshold(); otherwise using 1 / getProbabilityThreshold().
countType	In peptide summary, type of object to count, either peptide-spectrum matches (PSMs) or distinct peptide sequences. The default, DS_COUNT_PSM, is the only mode available in Parser 2.5 and earlier. In protein summary, the argument is ignored.
dbType	In peptide summary, source of peptides being counted, either Mascot matches in FASTA sequences or spectral library matches. The default is DM_FASTA, which is the only mode available in Parser 2.5 and earlier. In an integrated ET search, DM_FASTA_FIRST_PASS and DM_FASTA_SECOND_PASS return counts for first pass and second pass only, while DM_FASTA returns counts for both passes. In protein summary, the argument is ignored.

Returns: The number of hits with a score at or above the homology threshold.

◆ getNumHitsAboveIdentity()

long getNumHitsAboveIdentity	(	double	OneInXprobRnd,
		DECOY_STATS_COUNT_TYPE	countType = `DS_COUNT_PSM`,
		DB_MATCH_TYPE	dbType = `DM_FASTA`
	)

virtual

Return the number of hits with a score at or above the identity threshold.

For a peptide summary, the number of hits is the number of peptide matches. The value is not affected by ignoreIonsScoreBelow and the count only includes rank 1 peptides. The returned value will be affected by the minProbability value passed to the ms_peptidesummary constructor. The identity threshold is retrieved using getPeptideIdentityThreshold().

For a protein summary, the number of hits is the number of protein matches. The number includes the number of protein mixture matches plus the number of single protein matches. The returned value will be affected by the minProbability value passed to the ms_proteinsummary constructor. A value of -1 will be returned if the search contained any MS-MS, sequence query or tag data. The identity threshold is retrieved using getProteinThreshold().

The first call to any of the following functions make take some time as the values are not calculated when the ms_peptidesummary or ms_proteinsummary object is created. All four values are cached when the first call is made.

However, calling a function with a different probability threshold will cause the cached values to be re-calculated.

The false discovery rate (FDR) can be calculated from

   getNumDecoyHitsAboveIdentity() / getNumHitsAboveIdentity()

or

   getNumDecoyHitsAboveHomology() / getNumHitsAboveHomology()

Parameters

OneInXprobRnd	For a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20. It's easiest to always pass 1 / getProbabilityThreshold() as OneInXprobRnd. This parameter is ignored in error tolerant search results. If the query was searched in the error tolerant pass, its significance is determined using 1 / getErrTolProbabilityThreshold(); otherwise using 1 / getProbabilityThreshold().
countType	In peptide summary, type of object to count, either peptide-spectrum matches (PSMs) or distinct peptide sequences. The default, DS_COUNT_PSM, is the only mode available in Parser 2.5 and earlier. In protein summary, the argument is ignored.
dbType	In peptide summary, source of peptides being counted, either Mascot matches in FASTA sequences or spectral library matches. The default is DM_FASTA, which is the only mode available in Parser 2.5 and earlier. In an integrated ET search, DM_FASTA_FIRST_PASS and DM_FASTA_SECOND_PASS return counts for first pass and second pass only, while DM_FASTA returns counts for both passes. In protein summary, the argument is ignored.

Returns: The number of hits with a score at or above the identity threshold.

◆ getPepsWithSameScore()

std::vector< int > getPepsWithSameScore	(	const int	q,
		const int	p
	)		const

pure virtual

Returns a list of 'p' values for peptides with the same score.

Peptides with the same score will have the same pretty rank. See ms_peptide::getPrettyRank() for details.

It is preferable to call this function rather than to loop through all the ranks calling getPeptide() for each one because the cached index stores the pretty ranks and this should therefore be faster.

For a protein summary, the returned list always contains a single value that is the same as the passed p value.

Parameters

q	is the query number.
p	is the 'hit' or 'rank' number in the range 1.. getMaxRankValue().

Returns: a list of the ranks that have the same pretty value. See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ getPeptide() [1/2]

ms_peptide getPeptide	(	const int	q,
		const int	p
	)		const

pure virtual

Return the ms_peptide object given the query and either the rank (ms_peptidesummary) or the hit (ms_proteinsummary).

To get the q and p values for a peptide in an ms_protein object, call ms_protein::getPeptideQuery() and ms_protein::getPeptideP().

An 'empty' peptide object will be returned if

q or p are invalid (the error ms_mascotresfilebase::ERR_PEPSUMMPEPGET will also be set) or
there was no match to this peptide.

To test for an 'empty' peptide, use ms_peptide::getAnyMatch().

Parameters

q is the query number in the range 1 to ms_mascotresfilebase::getNumQueries().

p

is the 'hit' or 'rank' number.

For a peptide summary, the top 10 matches are saved and hence p would normally be in the range 1 to 10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20.
For a protein summary, a maximum of 50 protein hits are saved are saved and hence p must be in the range 1..50. It is safest to call getMaxRankValue() to find the maximum value.

Thread safe: This method is safe to use from multiple threads. See also Using Parser in multithreaded applications.

Returns: an ms_peptide object.

Implemented in ms_peptidesummary, and ms_proteinsummary.

Examples: peptide_list.cpp, and resfile_summary.cpp.

◆ getPeptide() [2/2]

bool getPeptide	(	const int	q,
		const int	p,
		ms_peptide *&	pep
	)		const

pure virtual

Return the ms_peptide object given the query and either the rank (ms_peptidesummary) or the hit (ms_proteinsummary).

Note: This function can not be used in programming languages other than C++.

To get the q and p values for a peptide in an ms_protein, call ms_protein::getPeptideQuery() and ms_protein::getPeptideP().

An 'empty' peptide object will be returned if

q or p are invalid (the error ms_mascotresfilebase::ERR_PEPSUMMPEPGET will also be set) or
there was no match to this peptide.

To test for an 'empty' peptide, use ms_peptide::getAnyMatch().

Parameters

q is the query number in the range 1 to ms_mascotresfilebase::getNumQueries()

p

is the 'hit' or 'rank' number.

For a peptide summary, the top 10 matches are saved and hence p would normally be in the range 1 to 10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20.
For a protein summary, a maximum of 50 protein hits are saved are saved and hence p must be in the range 1..50. It is safest to call getMaxRankValue() to find the maximum value.

Thread safe: This method is safe to use from multiple threads. See also Using Parser in multithreaded applications.

Parameters

pep	is reference to a pointer to an ms_peptide object that will be returned.

Returns: true if the peptide exists.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ getPeptideExpectationValue()

double getPeptideExpectationValue	(	const double	score,
		const int	query,
		const int	rank = `0`
	)		const

virtual

Returns the expectation value for the given peptide score and query.

Returns the number of peptides that you would expect to get this score or higher from this database. This value is displayed as the expectation value in Mascot 2.0 and later reports. When the value is below 1, it approximates the p-value corrected for multiple testing.

For Mascot matches, the expectation value is calculated from

    E = Pthreshold * (10 ** ((Sthreshold - score) / 10))

For a score that is exactly on the default significance threshold, (p<0.05), the expectation value is also 0.05.

The Sthreshold value is the value returned by getHomologyThreshold() if ms_peptidesummary::MSPEPSUM_USE_HOMOLOGY_THRESH was specifed in the ms_peptidesummary constructor, otherwise the value used is the one returned from getPeptideIdentityThreshold().

Parser 2.8 and later can calculate expectation values for Mascot error tolerant matches. The value is based on the combined first and second pass identity threshold. If rank is zero, getPeptideExpectationValue() assumes the score is a first pass match score and calculates the value from the first pass identity or homology threshold. If rank is non-zero, the correct threshold is chosen based on the match at rank rank in this query.

ET expectation value is only available if the results file is from Mascot 2.8 and later. When you open results from Mascot 2.7 and earlier, expectation value for ET matches is -1. Note that Parser 2.7 and earlier returned a non-negative but incorrect number for ET matches.

For spectral library matches, the expectation value formula depends on report mode.

In integrated mode (ms_peptidesummary::MSPEPSUM_SL_INTEGRATED), library scores are scaled internally so that their mean and standard deviation match the mean and standard deviation of significant rank 1 Mascot matches with the same sequence in the same query. See Library scores and thresholds.
In SL-only mode (ms_peptidesummary::MSPEPSUM_SL_ONLY), the expect value formula is
```
 E = 0.05 * 10 ** (-(s - 300)/100).
```
For example, E = 0.05 for score 300.

Spectral library support was added in Mascot Parser 2.6. If rank is zero, which is the default in Parser 2.5 and earlier, the score is assumed to be a Mascot ions score. If rank is non-zero, the correct formula is chosen based on the type of the match at rank rank in this query.

See also: getProteinExpectationValue()

Parameters

score	is the Mascot score to be converted.
query	specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
rank	is the rank of the match of interest in this query.

Returns: The number of peptides that you would expect to get this score or higher from this database, or -1 if it cannot be calculated.

◆ getPeptideIdentityThreshold()

int getPeptideIdentityThreshold	(	const int	query,
		double	OneInXprobRnd,
		DB_MATCH_TYPE	dbType = `DM_FASTA`
	)		const

virtual

Return the threshold value for this ms-ms data being a random match.

See also: getAvePeptideIdentityThreshold()

Parameters

query	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
OneInXprobRnd	For a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20. It's easiest to always pass 1 / getProbabilityThreshold() as OneInXprobRnd. This parameter is ignored in error tolerant search results. If the query was searched in the error tolerant pass, its significance is determined using 1 / getErrTolProbabilityThreshold(); otherwise using 1 / getProbabilityThreshold().
dbType	If given, should be DM_FASTA for Mascot identity threshold and DM_SPECTRAL_LIBRARY for spectral library threshold. `DM_FASTA_FIRST_PASS` and `DM_FASTA_SECOND_PASS` return the same threshold as `DM_FASTA`. (DM_BOTH is nonsensical; if given, -1 is returned.)

Returns: A score threshold. If query is < 1 or > ms_mascotresfilebase::getNumQueries() then -1 will be returned. If there was no MS-MS data or no match for this query, then -1 will be returned.

Examples: peptide_list.cpp, and resfile_summary.cpp.

◆ getPeptideThreshold()

double getPeptideThreshold	(	const int	query,
		double	OneInXprobRnd,
		const int	rank = `1`,
		const THRESHOLD_TYPE	thresholdType = `TT_PEPSUM_DEFAULT`
	)		const

Return either the identity or the homology threshold.

The default is to use identity thresholds for the original Peptide Summary and Select Summary reports. For the Family Summary grouping, homology thresholds are used, and this needs to be specified by using the ms_peptidesummary::MSPEPSUM_USE_HOMOLOGY_THRESH flag. (This flag is automatically set when using the ms_mascotresfilebase::get_ms_mascotresults_params helper function).

This function calls getPeptideIdentityThreshold() or getHomologyThreshold() as appropriate. If it calls getHomologyThreshold() and this returns 0, then the identity threshold is returned.

The rank parameter is mandatory in an integrated error tolerant (ET) search and integrated spectral library (SL) search. See getHomologyThreshold().

If the peptide match at rank rank is an ET match, and the results file is from Mascot Server 2.7 or earlier, then getPeptideIdentityThreshold() returns -1 and getPeptideThreshold() returns -1.

Parameters

query	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries()
OneInXprobRnd	For a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20. It's easiest to always pass 1 / getProbabilityThreshold() as OneInXprobRnd. This parameter is ignored in error tolerant search results. If the query was searched in the error tolerant pass, its significance is determined using 1 / getErrTolProbabilityThreshold(); otherwise using 1 / getProbabilityThreshold().
rank	Specifies the rank of the match of interest.
thresholdType	can be any one of the ms_mascotresults::THRESHOLD_TYPE values

Returns: the threshold.

◆ getProbabilityThreshold()

double getProbabilityThreshold ( ) const

virtual

Return the minProbability value passed to the ms_mascotresults::ms_mascotresults constructor.

In a protein summary, the probability threshold specifies a cutoff point for protein scores, a cutoff for an Integrated error tolerant search and a threshold for calculating MudPIT scores.

In a peptide summary, the identity and homology thresholds of a peptide match are calculated from the probability threshold (significance threshold). This is the value of "Significance threshold p<" shown in Mascot search reports.

getProbabilityThreshold() returns the currently active first pass significance threshold. The value may differ from the minProbability constructor argument if a target FDR has been specified. See ms_mascotresults_params::getTargetFDR().

If target FDR has not been specified, and if a value of <= 1e-18 or >= 1 is passed to the constructor, then a value of 1/20 (0.05) will be returned by this function. Otherwise it will just return the value passed to the ms_mascotresults::ms_mascotresults constructor.

In an error tolerant search, getProbabilityThreshold() may differ from the significance threshold used during the search, which was the basis for protein selection for the second pass. This is recorded as the first_pass_threshold= line in the results file header.

Error tolerant matches use an independent significance threshold, getErrTolProbabilityThreshold().

Returns: probability threshold for peptide matches.

◆ getProbFromScore()

int getProbFromScore ( const double score ) const

virtual

Returns a probability value given a score.

Warning: Do not use this without fully understanding what the function returns. It is more likely that you will want to use getPeptideExpectationValue() or getProteinExpectationValue().

Parameters

score is the Mascot score.

Returns: probability calculated from p = pow(10.0, (score/10.0)).

◆ getProbOfPepBeingRandomMatch()

double getProbOfPepBeingRandomMatch	(	const double	score,
		const int	query
	)		const

virtual

Deprecated:: Use getPeptideExpectationValue().

Returns the inverse of getPeptideExpectationValue(). If, for example, the identity threshold (p<0.05) for a particular query is 30, and a value of 30 is passed to this function, then the return value will be 1/0.05. If, for the same search, a peptide of interest gets a score of 50, then this function will return a value of 1 / 0.00067 which is the (inverse of) the p value corresponding to that score.

This value is displayed as the expectation value in Mascot 2.0 reports.

See also: getProbOfProteinBeingRandomMatch()

Parameters

score	is the Mascot score
query	specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().

Returns: The probability.

Examples: resfile_summary.cpp.

◆ getProbOfProteinBeingRandomMatch()

double getProbOfProteinBeingRandomMatch ( const double score ) const

virtual

Deprecated:: Use getProteinExpectationValue().

Returns the inverse of getProteinExpectationValue(). If, for example, the identity threshold (p<0.05) for a PMF search is 56, and a value of 56 is passed to this function, then the return value will be 1/0.05. If, for the same search, a protein of interest gets a score of 90, then this function will return a value of 1 / 0.000025 which is the (inverse of) the p value corresponding to that score.

This value is displayed as the expectation value in Mascot 2.0 reports.

See also: getProbOfPepBeingRandomMatch()

Parameters

score is the Mascot score to be converted.

Returns: The inverse of getProteinExpectationValue().

◆ getProtein()

const ms_protein * getProtein	(	const char *	accession,
		const int	dbIdx = `1`
	)		const

virtual

Return a pointer to the protein entry given an accession.

Parameters

accession	is the accession string.
dbIdx	is the database index (1..ms_searchparams::getNumberOfDatabases()). For a PMF mixture in protein summary (see Peptide mass fingerprint mixtures), use the default value of 1 even if the component proteins are all from other databases.

Thread safe: This method is safe to use from multiple threads. See also Using Parser in multithreaded applications.

Returns: If the protein cannot be found in the results, then a null value is returned. See Maintaining object references: two rules of thumb.

Reimplemented in ms_peptidesummary.

◆ getProteinDescription()

std::string getProteinDescription	(	const char *	accession,
		const int	dbIdx = `1`
	)		const

Return protein description if available.

This method attempts to return the 'description line' for the given accession. The description line originated from the FASTA file that was searched. An empty string will be returned if the protein is not listed in the proteins section or the summary section of the results file.

Format in the proteins section is, for example:

   "gi|4838561"=79480.13,"(AF144646) heat shock protein 70 [Crassostrea gigas]"

When multiple databases are used for the search all, protein entries, except those from the first database, will have a database index as a prefix:

   "02::gi|483561"=...

Format in the summary section is, for example:

   h1=gi|4838561,1.89e+002,1.00,79480.13
   h1_db=01
   h1_text=(AF144646) heat shock protein 70 [Crassostrea gigas]

Again the protein entry will include the database index if there are more than one databases.

With the default settings, Mascot only saves description lines in the proteins section for accessions that are most likely to be used in a report. If description lines for all proteins with a match were saved, then the results files could potentially be huge. You can control which description lines are put into the proteins section by changing the entry for

ProteinsInResultsFile

in the options section of mascot.dat (see chapter 6 of the Mascot installation and setup manual).

The alternative way to get the description line is to call

     ms-getseq.exe [db] [accession] title

when nothing is returned by getProteinDescription(). See chapter 7 of the Mascot installation and setup manual for details of calling ms-getseq.exe.

If the 'protein' is actually a UniGene entry, then the description line is taken by concatenating the Gene and the Title line for the relevant entry.

If the protein is a PMF mixture, then the description line will be of the format:

 * Mixture from proteins:"gi|1234","gi|5432","gi|9999"

If the protein comes from the reference database of a spectral library, then its description is not saved in the results file and this method returns the empty string. If the protein comes from the spectral library, it may or may not have a description, depending on how the library was created.

For a search against a decoy database, if the result is from the decoy section, then the description is prefixed with "Random sequence, was ", or if there is no description in the file, "Random sequence.". Depending on the configuration of the search, Random may also be Reversed or Shuffled.

Parameters

accession	is the accession string.
dbIdx	database index used for the search.

Returns: The description line or an empty string.

Examples: resfile_summary.cpp.

◆ getProteinEmPAI()

double getProteinEmPAI	(	const char *	accession,
		const int	dbIdx = `1`,
		const int	length = `-1`
	)		const

virtual

Return protein emPAI if available.

The Exponentially Modified Protein Abundance Index (emPAI) offers approximate, label-free, relative quantitation of the proteins in a mixture based on protein coverage by the peptide matches in a database search result. The key publication is

Ishihama, Y., et al.: Exponentially modified protein abundance index (emPAI) for estimation of absolute protein amount in proteomics by the number of sequenced peptides per protein (2005). Molecular & Cellular Proteomics 4:1265-1272.

The number of observed peptides is determined using the function ms_protein::getNumObservedForEmPAI().

Protein sequence length is needed for emPAI calculation. You have two options:

Give the sequence length as an argument to this method. You will need to fetch it with e.g. ms-getseq.exe on the Mascot Server machine.
Give no argument to the method. If the protein has a mass in the results file, the mass will be used to calculate an approximate sequence length.

If no sequence length is given and the protein has no mass in the results file, the method returns -1. If you know the mass of the protein but not its sequence length, you can use the following approximate formula:

length = floor(0.5 + mass / 111)

Here 111 Da is the average mass of an amino acid residue.

If ms_mascotresults::isEmPAIallowed() is false, this method returns -1.

Parameters

accession	Protein accession.
dbIdx	Protein database index number.
length	Protein sequence length (optional). If the length is not given, protein mass from the results file will be used to approximate it. If protein mass is not available, the method returns -1.

Returns: emPAI or -1, as described above.

Reimplemented in ms_peptidesummary.

◆ getProteinExpectationValue()

double getProteinExpectationValue ( const double score ) const

virtual

Returns the expectation value for the given protein score.

Returns the number of proteins that you would expect to get this score or higher from random matches for this search. For a score that is exactly on the default significance threshold, (p<0.05), the expectation value is also 0.05.

This value is displayed as the expectation value in Mascot 2.0 reports.

See also: getPeptideExpectationValue()

Parameters

score is the Mascot protein score to be converted.

Returns: The expectation value.

◆ getProteinMass()

double getProteinMass	(	const char *	accession,
		const int	dbIdx = `1`
	)		const

Return protein mass if available.

This method attempts to return the mass for the given accession. The protein mass is calculated during the Mascot search, and some, but not all, of these masses are saved in the results file. A value of zero will be returned if the protein is not listed in the proteins section or the summary section of the results file.

The format in the proteins section is, for example, for a protein of mass 79480.13 daltons:

   "gi|4838561"=79480.13,"(AF144646) heat shock protein 70 [Crassostrea gigas]"

When multiple databases are used for the search, all protein entries, except those from the first database, will have a database index as a prefix:

   "02::gi|483561"=...

Format in the summary section is, for example:

   h1=gi|4838561,1.89e+002,1.00,79480.13 
   h1_db=01
   h1_text=(AF144646) heat shock protein 70 [Crassostrea gigas]

Again the protein entry will include the database index if there are more than one databases.

With the default settings, Mascot only saves description lines and masses in the proteins section for accessions that are most likely to be used in a report. If description lines for all proteins with a match were saved, then the results files could potentially be huge. You can control which description lines and masses are put into the proteins section by changing the entry for

ProteinsInResultsFile

in the options section of mascot.dat (see chapter 6 of the Mascot installation and setup manual).

If a mass of zero is returned by this function, then the alternative way to get the mass is to call

     ms-getseq.exe [db] [accession] seq

to get the sequence, and then call getSequenceMass(). See chapter 7 of the Mascot installation and setup manual for details of calling ms-getseq.exe.

Parameters

accession	is the accession string.
dbIdx	database number (1..ms_searchparams::getNumberOfDatabases()).

Returns: The protein mass or 0.0.

Examples: resfile_summary.cpp.

◆ getProteinScoreCutoff()

int getProteinScoreCutoff ( double OneInXprobRnd ) const

pure virtual

Return the 'protein' score value for cutting off results (different for peptide and protein summary).

The protein probability cutoff is calculated differently for a protein summary and a peptide summary.

For a peptide summary, we want to display any proteins that contain even one significant peptide, which means the threshold is the same as getAvePeptideIdentityThreshold().

For MudPIT scoring, any protein with a score greater than zero must have one or more peptides above the homology threshold, so this function returns a value of 1.
For a protein summary, we simply use the protein score that corresponds to the threshold.

Parameters

OneInXprobRnd For a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20.

Returns: The protein score cutoff threshold.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ getProteinScoreForHistogram()

double getProteinScoreForHistogram ( const int num ) const

virtual

Returns scores for top 50 proteins, even if less in the peptidesummary or proteinsummary.

The 'well known' green and red histogram at the top of the Mascot reports is created from the top 50 protein scores in the results. However, if the number of hits requested for a peptide or protein summary is 'AUTO' or is less than 50, then there won't be 50 hits in the ms_mascotresults object so it won't be possible to create the histogram.

This simple method can reliably be used to return the score for a hit number between 1 and 50, regardless of the number of entries in the protein or peptide summary. To create the histogram, it will be necessary to call this method 50 times - with num being 1..50.

In the rare case of there being less than 50 proteins in the results file, a value of -1 will be returned for values of num that have no protein hit. If a protein is rejected because it only has peptides shorter than minPepLenInPepSummary or because the MSRES_REQUIRE_BOLD_RED is set, then this protein will not be included in the list.

Parameters

num	Is a value in the range 1..50.

Returns: The score.

◆ getProteinsWithThisPepMatch()

std::string getProteinsWithThisPepMatch	(	const int	q,
		const int	p,
		const bool	quotes = `false`
	)

pure virtual

Return a partial list of proteins that matched the same peptide.

The returned string is of the following form:

    1:Q9XZJ2  2:100K_RAT  3:AAF55150

The list is the hit number / accession of all proteins that matched the same peptide. There is no indication of which database the hit is from, but this can of course be determined by calling getHit().

In ms_peptidesummary, this is taken from the list of accessions on the q1_p1= line. The returned string only includes proteins that are in the current peptide summary. This means for example that if you choose to only see the top 5 hits, and this query/rank is for a peptide that occurs in protein 6, then protein number 6 will not appear in the resulting string.

In ms_proteinsummary, the string is also constructed from the list of accessions on the q1_h1= line. There's an additional check that the query number is the same, and that the peptide is the same string.

If you want a list of accession strings rather than a single concatenated string, use the function getAllProteinsWithThisPepMatch().

Parameters

q	is the query number in the range 1 to ms_mascotresfilebase::getNumQueries().
p	is the 'hit' number (in ms_proteinsummary) or 'rank' number (in ms_proteinsummary). For a protein summary, a maximum of 50 protein hits are saved are saved and hence h must be in the range 1..50. It is safest to call getMaxRankValue() to find the maximum value. For a peptide summary, the top 10 matches are saved and hence p would normally be in the range 1 to 10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20.
quotes	specifies whether the accessions will be quoted, which should allow accesions with spaces and colons. If true, the string returned would then be 1:"Q9XZJ2" 2:"100K_RAT" 3:"AAF55150"

Returns: list of proteins matching the peptide, as a string

Implemented in ms_peptidesummary, and ms_proteinsummary.

Examples: peptide_list.cpp, and resfile_summary.cpp.

◆ getProteinTaxonomyIDs()

void getProteinTaxonomyIDs	(	const char *	accession,
		const int	dbIdx,
		std::vector< int > &	gpt_ids,
		std::vector< std::string > &	gpt_accessions
	)		const

Return the taxonomy ID(s), if any, from the results file.

In Mascot 2.4.0 and later, taxonomy IDs are saved in the proteins section of the results files in the form:

   [DB::]"accession"_tax=taxId[:"accession",taxID[:"accession",taxID[:...]]]

In Mascot 2.4.1 and later, taxonomy IDs are also saved in the summary section of the results files in the form:

   hN_tax=taxId[:"accession",taxID[:"accession",taxID[:...]]]

For example, for a single database, with a single accession/taxonomy:

   "gi|209490801"_tax=87229

   h1=gi|209490801, ...
   h1_tax=87229

and for a multi-database search, with a single accession/taxonomy:

   02::"gi|209490801"_tax=87229

   h1_db=2
   h1=gi|209490801, ...
   h1_tax=87229

and for a single database, with multiple accession/taxonomy:

   "gi|182438931"_tax=455632:"gi|282872348",649189:gi|178467447,455632:gi|282574201,649189

   h1=gi|182438931, ...
   h1_tax=455632:"gi|282872348",649189:gi|178467447,455632:gi|282574201,649189

If none of the databases used for the search contain taxonomy information, then no _tax lines will be output.

If any of the databases used for the search contain taxonomy information, then a _tax line will be output for all relevant proteins. This is governed by the same rules that determine for which proteins to output the mass and description lines. If a protein has no taxonomy information (for example, the database has no taxonomy), -1 will be used as the taxonomy ID.

Therefore, to determine if taxonomy information is available in the results file, simply take any protein in the proteins section, and see if there is a corresponding _tax entry.

For most FASTA files, there is a single accession and taxonomy for each sequence entry. For the NCBInr database, identical sequences are only stored once, but there are multiple accession/description lines separated by a "Control-A" character. Each accession may have a different taxonomy. This function returns the list of taxonomy IDs and corresponding accessions. For the last example shown above, this function will return the arrays of IDs

  [455632, 649189, 455632, 649189]

and accessions

  ["gi|182438931", "gi|282872348", "gi|178467447", "gi|282574201"]

See Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

Parameters

accession	is the primary accession
dbIdx	is the 1 based database ID
gpt_ids	the taxonomy IDs
gpt_accessions	the list of accessions

◆ getProteinThreshold()

int getProteinThreshold ( double OneInXprobRnd ) const

virtual

Return a threshold value for the protein summary report.

This is simply calculated by using:

   10.0 * log10(seqsAfterTax * OneInXprobRnd)

where seqsAfterTax is obtained from ms_mascotresfilebase::getNumSeqsAfterTax().

The threshold value is displayed at the top of the protein summary report: Protein scores greater than 72 are significant (p<0.05).

Parameters

OneInXprobRnd For a 1 in 20 probability, this should be 20. This value must be greater than 1, or it will be set to the default of 20.

Returns: The protein threshold.

◆ getQmatch() [1/2]

int getQmatch ( const int query ) const

virtual

[Deprecated] Return the number of peptides with masses that matched this query.

Deprecated:: This method is deprecated in favour of ms_mascotresfilebase::getQmatch

See also: getPeptideIdentityThreshold()

Note: This method only works with dat28 (.dat) results files. The value returned with MSR files is undefined.

If the value is greater than INT_MAX, then INT_MAX will be returned.

This function calls the getQmatch(query, section) function with the 'relevant' section which will normally be SEC_SUMMARY.

If the MSRES_DECOY flag was specified in the constructor, then the 'relevant' section will be SEC_DECOYSUMMARY. If the search is an error tolerant search, then the 'relevant' section will be SEC_ERRTOLPEPTIDES, and analogously for crosslinked searches (SEC_CROSSLINK_PEPTIDES).

However, to get the spectral library qmatch values, it is necessary to explicitly call: getQmatch(query, ms_mascotresfile_dat::SEC_LIBRARYSUMMARY)

Parameters

query Specifies the query number. This should be in the range 1 .. ms_mascotresfile_dat::getNumQueries().

Returns: the number of peptides with masses that matched this query

◆ getQmatch() [2/2]

int getQmatch	(	const int	query,
		const ms_mascotresfile_dat::section	sec
	)		const

virtual

[Deprecated] Use ms_mascotresfilebase::getQmatch().

Deprecated:: This method is deprecated in favour of ms_mascotresfilebase::getQmatch

If the value is greater than INT_MAX, then INT_MAX will be returned.

Note: This method only works with dat28 (.dat) results files. The value returned with MSR files is undefined.

Parameters

query	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
sec	is the section number and can be one of: ms_mascotresfile_dat::SEC_SUMMARY ms_mascotresfile_dat::SEC_DECOYSUMMARY ms_mascotresfile_dat::SEC_ERRTOLSUMMARY ms_mascotresfile_dat::SEC_ERRTOLDECOYSUMMARY ms_mascotresfile_dat::SEC_LIBRARYSUMMARY ms_mascotresfile_dat::SEC_CROSSLINK_SUMMARY

Returns: the number of peptides with masses that matched this query. If the specified sec doesn't exist, then 0 will be returned.

◆ getQuantitationComponentForPeptide()

int getQuantitationComponentForPeptide	(	const matrix_science::ms_peptide &	peptide,
		matrix_science::ms_quant_component &	component,
		const matrix_science::ms_quant_method *	method = `NULL`
	)		const

pure virtual

Get the component in the quantitation method that matches the peptide.

If the component name has already been determined (see ms_peptide::getComponentStr()) then that component is retrieved from the method.

If the component name has not yet been determined for the peptide, each component in the method is checked in turn to determine if it is the correct component for the peptide (see ms_peptidesummary::isPeptideComponentMatch()). A component only matches the peptide if every modification group of the component matches the peptide.

This is only applicable if there is a quantitation method, the method uses the Precursor protocol and the method has components defined. If an error occurs then an error message is appended to the matrix_science::ms_mascotresfilebase and a matching status code is returned.

Parameters

peptide	The peptide to find the component for.
component	This is set to the first matching component found, if any.
method	An optional parameter that overrides the quantitation method used to look for matching components. If no method is specified here then the method from the ms_mascotresfilebase is used.

Returns: One of the return status codes in ms_mascotresults::QUANT_COMPONENT_STATUS.

Implemented in ms_proteinsummary, and ms_peptidesummary.

◆ getReadableVarMods()

std::string getReadableVarMods	(	const int	q,
		const int	p,
		const int	numDecimalPlaces = `2`,
		const ms_peptide::PSM	psmComponent = `ms_peptide::PSM_COMPLETE`
	)		const

virtual

Return a 'human readable' string with the variable, summed and error tolerant mods.

This function creates a string of the form

2 Oxidation (M); Phosphorylation (ST)

It works by getting the variable mods string, e.g. 0110030, using ms_peptide::getVarModsStr(). It then converts the string to a human readable form. Each variable modification is assigned a number 1 to 32 (up to 32 variable modifications are allowed). In the results, a string is assigned for each matched peptide, and each character in the string will be 0..9A..W.

This function will work whether or not there are variable and/or error tolerant mods, and can be called for either the peptide or the protein summary. Variable modification names are obtained using ms_searchparams::getVarModsName() and error tolerant modification names are found using ms_mascotresults::getErrTolModName(). Library modification names, in peptide summary, are found using ms_searchparams::getLibraryModName(). See the Mascot installation and setup manual (configuration and log files chapter) for more information about variable modifications.

Error tolerant mods are shown with the delta in square brackets after the modification or substitution. For example:

   R->G [-99.08]

The delta is shown to numDecimalPlaces.

If the match is an intact crosslinked match, the returned string contains a short description of the linked sites and what they are linked with. For example:

   Xlink:DSS (K) in alpha crosslinked with Xlink:DSS (K) in beta

Use ms_peptide::PSM_CROSSLINK_ALPHA or ms_peptide::PSM_CROSSLINK_BETA as the psmComponent argument to inspect the alpha and beta mods separately. In this case, the intact linker is described like "intact Xlink:DSS (K)".

Summed mods, from ms_peptide::getSummedModsStr(), and query-level mods, from ms_peptide::getLocalModsStr(), are also included in the returned string.

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	Should be in the range 1..10.
numDecimalPlaces	(optional) Used for the display of modification deltas.
psmComponent	(optional) Either ms_peptide::PSM_COMPLETE for the match as a whole, or ms_peptide::PSM_CROSSLINK_ALPHA or ms_peptide::PSM_CROSSLINK_BETA to get components of a crosslinked match.

Returns: The formatted string.

Examples: resfile_summary.cpp.

◆ getResultsParameters()

ms_mascotresults_params getResultsParameters ( ) const

Returns an ms_mascotresults_params object containing the parameter and flag settings used to generate the report.

Returns: An ms_mascotresults_params object containing the parameter and flag values used to generate the report.

◆ getSequenceMass()

double getSequenceMass ( const char * seq ) const

Return the mass of a sequence (protein or peptide).

The sequence mass is calculated by summing the mass of each residue, using ms_searchparams::getResidueMass, and adding terminus masses using ms_searchparams::getCTermMass and ms_searchparams::getNTermMass.

The mass is therefore the 'Mr' (uncharged) mass, and is calculated using any static modifications.

Parameters

seq	is the protein sequence.

Returns: The relative mass of the sequence.

◆ getTagDeltaRangeEnd()

int getTagDeltaRangeEnd	(	const int	q,
		const int	p
	)		const

pure virtual

Return the second number from h1_q2_drange=0,256.

Consider the following two examples for peptide summary and protein summary, respectively:

The entry in q1_p2_drange might be
```
   q1_p2_drange=0,256 
```
The entry in h1_q2_drange might be
```
   h1_q2_drange=0,256 
```

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.

Returns: the second number from the string or -1 if there is no tag match.

Implemented in ms_peptidesummary, and ms_proteinsummary.

Examples: resfile_summary.cpp.

◆ getTagDeltaRangeStart()

int getTagDeltaRangeStart	(	const int	q,
		const int	p
	)		const

pure virtual

Return the first number from h1_q2_drange=0,256.

Consider the following two examples for peptide summary and protein summary, respectively:

The entry in q1_p2_drange might be
```
   q1_p2_drange=0,256 
```
The entry in h1_q2_drange might be
```
   h1_q2_drange=0,256 
```

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.

Returns: the first number from the string or -1 if there is no tag match.

Implemented in ms_peptidesummary, and ms_proteinsummary.

Examples: resfile_summary.cpp.

◆ getTagEnd()

int getTagEnd	(	const int	q,
		const int	p,
		const int	tagNumber
	)		const

virtual

Return the end position for the tag-match from h1_q2_tag or q1_p1_tag.

The entry in, for example, h1_q2_tag might be

   h1_q2_tag=1:10:12:6,3:1:12:1,...

Every group of four numbers between commas is tagNumber:startPos:endPos:series.

See also: getTagStart(), getTagSeries()

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	Should be in the range 1..10 for a peptide summary.
tagNumber	Is a one based value.

Returns: The residue end position of the tag.

Examples: resfile_summary.cpp.

◆ getTagSeries()

int getTagSeries	(	const int	q,
		const int	p,
		const int	tagNumber
	)		const

virtual

Return the series ID for the tag-match from h1_q2_tag or q1_p1_tag.

The entry in, for example, h1_q2_tag might be

   h1_q2_tag=1:10:12:6,3:1:12:1,...

Every group of four numbers between commas is tagNumber:startPos:endPos:series.

See also: getTagStart(), getTagEnd()

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	Should be in the range 1..10 for a peptide summary.
tagNumber	Is a one based value.

Returns

The series that contained a match to the tag. Possible ion series numbers are

-1 means no matches for the tag
0 "a" series (single charge)
1 "a-NH3" series (single charge)
2 "a" series (double charge)
3 "b" series (single charge)
4 "b-NH3" series (single charge)
5 "b" series (double charge)
6 "y" series (single charge)
7 "y-NH3" series (single charge)
8 "y" series (double charge)
9 "c" series (single charge)
10 "c" series (double charge)
11 "x" series (single charge)
12 "x" series (double charge)
13 "z" series (single charge)
14 "z" series (double charge)
15 "a-H2O" series (single charge)
16 "a-H2O" series (double charge)
17 "b-H2O" series (single charge)
18 "b-H2O" series (double charge)
19 "y-H2O" series (single charge)
20 "y-H2O" series (double charge)
21 "a-NH3" series (double charge)
22 "b-NH3" series (double charge)
23 "y-NH3" series (double charge)
25 "internal yb" series (single charge)
26 "internal ya" series (single charge)
27 "z+1" series (single charge)
28 "z+1" series (double charge)
29 high-enrgy "d" and "d'" series (single charge)
31 high-enrgy "v" series (single charge)
32 high-enrgy "w" and "w'" series (single charge)
33 "z+2" series (single charge)
34 "z+2" series (double charge)

Examples: resfile_summary.cpp.

◆ getTagStart()

int getTagStart	(	const int	q,
		const int	p,
		const int	tagNumber
	)		const

virtual

Return the start position for the tag-match from h1_q2_tag or q1_p1_tag.

The entry in, for example, h1_q2_tag might be

   h1_q2_tag=1:10:12:6,3:1:12:1,...

Every group of four numbers between commas is tagNumber:startPos:endPos:series.

See also: getTagEnd(), getTagSeries()

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	Should be in the range 1..10 for a peptide summary.
tagNumber	Is a one based value.

Returns: The residue start position of the tag.

Examples: resfile_summary.cpp.

◆ getTagString()

std::string getTagString	(	const int	q,
		const int	p
	)		const

pure virtual

Return the complete tag string from h1_q2_tag or q1_p1_tag.

Consider the following two examples for peptide summary and protein summary, respectively:

The entry in q1_p2_tag might be
```
   q1_p2_tag=1:10:12:6,3:1:12:1,... 
```
The entry in h1_q2_tag might be
```
   h1_q2_tag=1:10:12:6,3:1:12:1,... 
```

This function returns the complete string value. Every group of four numbers between commas is tagNumber:startPos:endPos:series.

More useful functions are

all of which will work for both the protein summary and the peptide summary.

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.

Returns: complete tag string.

Implemented in ms_peptidesummary, and ms_proteinsummary.

Examples: resfile_summary.cpp.

◆ getTerminalResiduesString()

std::string getTerminalResiduesString	(	const int	q,
		const int	p,
		const ms_peptide::PSM	psmComponent = `ms_peptide::PSM_COMPLETE`
	)		const

pure virtual

Return the complete terminal residue string from h1_q1_terms or q1_p1_terms.

Consider the following two examples for peptide summary and protein summary, respectively:

The entry in the results file might be

   q1_p2_terms=-,A:K,I:-,-:Q,-:@,R:...

The entry in the results file might be
```
   h1_q2_terms=A,Q 
```

This function returns the complete string value. More useful functions are ms_protein::getPeptideResidueBefore() and ms_protein::getPeptideResidueAfter(), which will work for both the protein summary and the peptide summary.

ms_peptidesummary only: For a crosslinked match, the results file line is q1_p1_terms_1= or q1_p1_terms_2=, depending on the psmComponent parameter. If the parameter is ms_peptide::PSM_COMPLETE, the method returns the empty string. Use ms_peptide::getNumberOfLinkedPeptides() to decide which value is needed.

Parameters

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, p should be in the range 1..10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the range 1 to 50, so it is safest to call getMaxRankValue() to find the maximum value.
psmComponent	is the type of data to return: complete molecule, alpha peptide or beta peptide. Leave as default if the search is not a crosslinked search.

Returns: the full terminal residues string.

Implemented in ms_peptidesummary, and ms_proteinsummary.

◆ getThresholdForFDRAboveHomology() [1/2]

bool getThresholdForFDRAboveHomology	(	double	targetFDR,
		DECOY_STATS_COUNT_TYPE	countType,
		DB_MATCH_TYPE	dbType,
		double *	closestFDR,
		double *	minProbability,
		int *	numTargetMatches = `0`,
		int *	numDecoyMatches = `0`
	)

Given a target FDR for PSMs or sequences, return the probability threshold that gives the desired FDR using the homology threshold.

In Parser 2.5 and earlier, the FDR was calculated based on peptide-spectrum matches (PSMs). Parser 2.6 and later support calculating the FDR based on distinct peptide sequences. You can choose between PSMs and sequences with countType. The default, when countType is not given, is to count PSMs.

Parser 2.6 also added support for spectral libraries. However, there is no support for decoy spectral libraries, and thus there is no support for library FDR. If dbType is not DM_FASTA, the method returns false.

Otherwise functionality is the same as getThresholdForFDRAboveHomology(double, double*, double*, int*, int*).

From version 2.8.0, it is possible to set target FDR directly as a constructor argument. See ms_mascotresults_params::setTargetFDR(). If the search is an error tolerant target-decoy search, use the constructor argument to set target FDR.

If calling from outside C++, see Multiple return values in Perl, Java, Python and C#. For C#, numTargetMatches and numDecoyMatches must be supplied.

Parameters

[in]	targetFDR	an FDR to aim for.
[in]	countType	whether to count PSMs or sequences.
[in]	dbType	must be DM_FASTA.
[out]	closestFDR	an FDR closest to the target value, or -1 if FDR cannot be calculated.
[out]	minProbability	the probability threshold that gives closestFDR, or -1 if FDR cannot be calculated.
[out]	numTargetMatches	returns the number of target matches at this FDR, or -1 if the FDR cannot be calculated.
[out]	numDecoyMatches	returns the number of decoy matches at this FDR, or -1 if the FDR cannot be calculated.

Returns: true if the FDR was reached, false otherwise.

◆ getThresholdForFDRAboveHomology() [2/2]

bool getThresholdForFDRAboveHomology	(	double	targetFDR,
		double *	closestFDR,
		double *	minProbability,
		int *	numTargetMatches = `0`,
		int *	numDecoyMatches = `0`
	)

Given a target FDR, return the probability threshold that gives the desired FDR using the homology threshold.

For example, to find out which probability threshold gives 1% FDR above identity or homology threshold, call this function with targetFDR = 0.01, and feed the returned threshold as the * minProbability parameter to the constructor. The FDR can be calculated as described in getNumHitsAboveHomology() and getNumDecoyHitsAboveHomology().

From version 2.8.0, it is possible to set target FDR directly as a constructor argument. See ms_mascotresults_params::setTargetFDR(). If the search is an error tolerant target-decoy search, use the constructor argument to set target FDR.

Otherwise functionality is the same as getThresholdForFDRAboveIdentity(double, double*, double*, int*, int*).

If calling from outside C++, see Multiple return values in Perl, Java, Python and C#. For C#, numTargetMatches and numDecoyMatches must be supplied.

Parameters

[in]	targetFDR	an FDR to aim for.
[out]	closestFDR	an FDR closest to the target value, or -1 if FDR cannot be calculated.
[out]	minProbability	the probability threshold that gives closestFDR, or -1 if FDR cannot be calculated.
[out]	numTargetMatches	returns the number of target matches at this FDR, or -1 if the FDR cannot be calculated.
[out]	numDecoyMatches	returns the number of decoy matches at this FDR, or -1 if the FDR cannot be calculated.

Returns: true if the FDR was reached, false otherwise.

◆ getThresholdForFDRAboveIdentity() [1/2]

bool getThresholdForFDRAboveIdentity	(	double	targetFDR,
		DECOY_STATS_COUNT_TYPE	countType,
		DB_MATCH_TYPE	dbType,
		double *	closestFDR,
		double *	minProbability,
		int *	numTargetMatches = `0`,
		int *	numDecoyMatches = `0`
	)

Given a target FDR for PSMs or sequences, return the probability threshold that gives the desired FDR using the identity threshold.

In Parser 2.5 and earlier, the FDR was calculated based on peptide-spectrum matches (PSMs). Parser 2.6 and later support calculating the FDR based on distinct peptide sequences. You can choose between PSMs and sequences with countType. The default, when countType is not given, is to count PSMs.

Parser 2.6 also added support for spectral libraries. However, there is no support for decoy spectral libraries, and thus there is no support for library FDR. If dbType is not DM_FASTA, the method returns false.

Otherwise functionality is the same as getThresholdForFDRAboveIdentity(double, double*, double*, int*, int*).

From version 2.8.0, it is possible to set target FDR directly as a constructor argument. See ms_mascotresults_params::setTargetFDR(). If the search is an error tolerant target-decoy search, use the constructor argument to set target FDR.

If calling from outside C++, see Multiple return values in Perl, Java, Python and C#. For C#, numTargetMatches and numDecoyMatches must be supplied.

Parameters

[in]	targetFDR	an FDR to aim for.
[in]	countType	whether to count PSMs or sequences.
[in]	dbType	must be DM_FASTA.
[out]	closestFDR	an FDR closest to the target value, or -1 if FDR cannot be calculated.
[out]	minProbability	the probability threshold that gives closestFDR, or -1 if FDR cannot be calculated.
[out]	numTargetMatches	returns the number of target matches at this FDR, or -1 if the FDR cannot be calculated.
[out]	numDecoyMatches	returns the number of decoy matches at this FDR, or -1 if the FDR cannot be calculated.

Returns: true if the FDR was reached, false otherwise.

◆ getThresholdForFDRAboveIdentity() [2/2]

bool getThresholdForFDRAboveIdentity	(	double	targetFDR,
		double *	closestFDR,
		double *	minProbability,
		int *	numTargetMatches = `0`,
		int *	numDecoyMatches = `0`
	)

Given a target FDR, return the probability threshold that gives the desired FDR using the identity threshold.

For example, to find out which probability threshold gives 1% FDR above identity threshold, call this function with targetFDR = 0.01, and feed the returned threshold as the minProbability parameter to the constructor. The FDR can be calculated as described in getNumHitsAboveIdentity() and getNumDecoyHitsAboveIdentity().

The function returns a value that maximises the number of peptide-spectrum matches while giving an FDR below the targetFDR value. In some cases, it would be possible to get an FDR closer to the supplied targetFDR, but with fewer target matches.

For a large file, this function can take some time to return. Feedback can be obtained by using ms_mascotresfilebase::outputKeepAlive with the fd= tag.

This function was first introduced in version 2.4.0 and the method for calculating the value was changed between version 2.4.0 and 2.4.1.

From version 2.8.0, it is possible to set target FDR directly as a constructor argument. See ms_mascotresults_params::setTargetFDR(). If the search is an error tolerant target-decoy search, use the constructor argument to set target FDR.

If this function returns false, there are several possibilities:

The function is being called for a ms_proteinsummary object
The supplied targetFDR is 0, or negative
It is not possible to get an FDR less than the targetFDR. In this case, the lowest possible FDR is given
The function is being called for an error tolerant search

If calling from outside C++, see Multiple return values in Perl, Java, Python and C#. For C#, numTargetMatches and numDecoyMatches must be supplied.

Parameters

[in]	targetFDR	an FDR to aim for.
[out]	closestFDR	an FDR closest to the target value, or -1 if FDR cannot be calculated.
[out]	minProbability	the probability threshold that gives closestFDR, or -1 if FDR cannot be calculated.
[out]	numTargetMatches	returns the number of target matches at this FDR, or -1 if the FDR cannot be calculated.
[out]	numDecoyMatches	returns the number of decoy matches at this FDR, or -1 if the FDR cannot be calculated.

Returns: true if the FDR was reached, false otherwise.

◆ getToleranceInDalton()

double getToleranceInDalton	(	bool &	needMass,
		const double *const	pMass = `NULL`
	)		const

Returns the tolerance in dalton, and whether a mass is needed if the unit is in % or ppm.

Returns the tolerance of the result file in dalton.

Parameters

needMass	Reference to a boolean to determine if the mass is needed to calculate the delta, namely when the tolerance unit is in % or ppm
pMass	Optional parameter to calculate the tolerance from the mass for % and ppm

Returns: The tolerance in dalton

◆ getTreeClusterNodes()

bool getTreeClusterNodes	(	const int	hit,
		std::vector< int > &	left,
		std::vector< int > &	right,
		std::vector< double > &	distance,
		TREE_CLUSTER_METHOD	tcm = `TCM_PAIRWISE_MAXIMUM`,
		double ***	reserved1 = `0`,
		unsigned int *	reserved2 = `0`
	)		const

virtual

Return distances and structure suitable for a dendrogram plot.

The 'distances' between each protein in the family are calculated using a tree clustering routine which, by default, implements pairwise maximum (or complete) linkage clustering. It is used for the dendrogram display in the reports introduced in Mascot 2.3.

The vectors returned by this function only contain values when MSRES_CLUSTER_PROTEINS has been specified and where there are at least two family members.

The code uses the cluster algorithm described at http://bonsai.hgc.jp/~mdehoon/software/cluster/

The License agreement for using this excellent Open Source code is included with Mascot Parser; see C Clustering library.

A value of 0 returned in the left or right vector corresponds to the lead protein as returned by getHit(). To find the protein associated with a positive node value returned in the left or right vector, call getNextFamilyProtein() with the id value set to the value in the vector.

The distances passed to the cluster algorithm are calculated by determining which peptides need to be excluded to make one protein a subset of another. Each unique peptide sequence is considered in turn, and the distance is calculated by taking the maximum difference between the score and the value returned by getPeptideThreshold(). Additionally, where a query has matches to different peptides above threshold in each protein, the delta of the scores is added to the distance.

For details of how to use the left, right and distance parameters, see Using STL vector classes vectori, vectord and VectorString in Perl, Java, Python and C#.

Parameters

hit	is the hit number for the lead protein.
left	returns a vector of 'left' values.
right	returns a vector of 'right' values.
distance	returns a vector of 'distance' values.
tcm	is the method to be used. See documentation referenced above
reserved1	is for Matrix Science use only
reserved2	is for Matrix Science use only

Returns: true unless the code causes an out of memory error.

Reimplemented in ms_peptidesummary.

◆ getUnassigned()

ms_peptide getUnassigned ( const int num ) const

Need to call createUnassignedList() before calling this.

See Unassigned peptides list for details of creating and using an unassigned list.

If this function is called before createUnassignedList() then an error ms_errs::ERR_UNASSIGNED_PROG will be set, and this function will return an empty ms_peptide.

Thread safe: This method is safe to use from multiple threads. See also Using Parser in multithreaded applications.

Parameters

num	Must be in the range 1..getNumberOfUnassigned().

Returns: If num is outside the range ..getNumberOfUnassigned(), then an error ms_errs::ERR_UNASSIGNED_RANGE will be set, and this function will return an empty ms_peptide.

Examples: resfile_summary.cpp.

◆ getUnassignedIsBold()

bool getUnassignedIsBold ( const int num ) const

Returns true if the item indexed by num in the assigned list should be bold.

This function returns true if this peptide should be displayed in bold in a Mascot report. Bold is used for the first time a query is shown in a report. See Unassigned peptides list for details of creating and using an unassigned list.

If this function is called before createUnassignedList() then an error ms_errs::ERR_UNASSIGNED_PROG will be set, and this function will return true.

Parameters

num	Must be in the range 1..getNumberOfUnassigned().

Returns: If num is outside the range ..getNumberOfUnassigned(), then an error ms_errs::ERR_UNASSIGNED_RANGE will be set, and this function will return true.

◆ getUnassignedShowCheckbox()

bool getUnassignedShowCheckbox ( const int num ) const

Returns true if the item indexed by num in the assigned list should have a check box next to it.

A check box is displayed if this is the first rank 1 match that has been displayed for this query. See also ms_peptide::getRank() and ms_peptide::getPrettyRank().

By definition, all unassigned queries will need a check box. See Unassigned peptides list for details of creating and using an unassigned list.

If this function is called before createUnassignedList() then an error ms_errs::ERR_UNASSIGNED_PROG will be set, and this function will return false.

Parameters

num	Must be in the range 1..getNumberOfUnassigned().

Returns: If num is outside the range ..getNumberOfUnassigned(), then an error ms_errs::ERR_UNASSIGNED_RANGE will be set, and this function will return an empty ms_peptide.

◆ isEmPAIallowed()

bool isEmPAIallowed ( ) const

Return true if emPAI could be calculated using data in this results file.

emPAI is not available unless all of the following are true:

Results file has MS/MS data.
Results file is not an old-style error tolerant search.
Results file has not been opened in decoy mode (ms_mascotresults::MSRES_DECOY).
There are at least 100 queries.

Returns: true if emPAI is allowed.

◆ isNA()

bool isNA ( ) const

Returns TRUE for a search against a nucelic acid database.

There is no specific information in the results file for this. It is implemented by looking to see if there is a frame number that is non-zero in any result. If there are no matches to any query, then this method will return false and therefore may be wrong. (However, if there are no matches then it is unlikely that the outcome of this function is important!)

In versions prior to 2.3.01, this was determined by looking in protein matches. In version 2.3.01 and later, this is determined by looking in all peptide matches, regardless of score.

Returns: True if the search was against a nucleic acid database.

◆ isPeptideUnique()

bool isPeptideUnique	(	const int	q,
		const int	p,
		const UNIQUE_PEP_RULES	rules = `UPR_DEFAULT`
	)		const

pure virtual

Returns true if this peptide match is unique to one protein or one protein family.

Note: This function currently returns false for ms_proteinsummary. This function is only likely to be useful for a peptide summary.

This function is used, for example, in quantitation to only report ratios for peptide sequences that are unique to one protein hit. This can be specified as part of a Mascot quantitation method; see ms_quant_quality::isUniquePepseq().

No attempt is made to collapse peptides that just differ by, for example, I or L.

The specified q and p values must be for a peptide that has a protein match in the current report, i.e. not in the unassigned list. The function searches all proteins and not just those in the current report. For example, if peptide 'ABCDEFK' appears in protein hit one and protein hit 100, then it will return false even if only the top 10 hits have been requested.

If grouping is not selected when creating the ms_peptidesummary, (i.e. neither ms_mascotresults::MSRES_GROUP_PROTEINS nor ms_mascotresults::MSRES_CLUSTER_PROTEINS), then it is likely that this function will return false for most peptides.

At version 2.3, this function did not have the optional third parameter and the default value was the same as using ms_mascotresults::UPR_WITHIN_FAMILY, which is not the default value in version 2.4 and later. At version 2.3, the function would therefore return true for all peptides with a score above the homology threshold when ms_mascotresults::MSRES_CLUSTER_PROTEINS is specified.

Parameters

q	is the query number in the range 1 to ms_mascotresfilebase::getNumQueries().
p	is the 'hit' or 'rank' number. For a peptide summary, the top 10 matches are saved and hence p would normally be in the range 1 to 10. For an integrated error tolerant search (see Integrated error tolerant search) this can be in the range 1 to 20. For a protein summary, this will be in the * range 1 to 50.
rules	should passed appropriate ms_mascotresults::UNIQUE_PEP_RULES flags to specify what rules are to be used for considerering when a peptide is unique.

Returns: true if the peptide match is unique. Always false in ms_proteinsummary.

Implemented in ms_proteinsummary, and ms_peptidesummary.

◆ setSubsetsThreshold()

void setSubsetsThreshold ( const double scoreFraction )

virtual

Specifies which subset proteins should be reported.

If you have a primary hit with (say) 100 peptide matches, you are very interested in sub-set proteins with 99 matches but not if they only have 1 or 2 matches. These are the sub-set hits that clutter up the report if Show Sub-sets is checked in Mascot 2.1 and earlier. If you have a primary hit with (say) 2 peptide matches, you are more likely to be interested in sub-set proteins with just 1 match.

A scoreFraction of 1 indicates that all proteins with a subset of matches should be reported. A value of 0 indicates that no subsets will be reported. The flag MSRES_SHOW_SUBSETS must have been specified if a value > 0 is used, or no subsets will be shown.

See also: getNextSubsetProtein(), getNextSubsetProteinOf()

Parameters

scoreFraction is the fractional score required for a protein to be counted as a subset. Its score must be greater than or equal to main_protein_score * (1-scoreFraction). The default value is 1.0.

The documentation for this class was generated from the following files:

ms_mascotresults.hpp
ms_mascotresults.cpp

q	Specifies the query number. This should be in the range 1 .. ms_mascotresfilebase::getNumQueries().
p	is the 'rank' number, which should be in the range 1..10.
psmComponent	is the type of data to return: complete molecule, alpha peptide or beta peptide. Leave as default if the search is not a crosslinked search.

Public Types

Public Member Functions

Protected Member Functions

Detailed Description

Member Enumeration Documentation

◆ DB_MATCH_TYPE

◆ DECOY_STATS_COUNT_TYPE

◆ FIND_COMPARE_FLAGS

◆ FIND_FLAGS

◆ FLAGS

◆ HOMOLOGY_THRESHOLD_SOURCE

◆ IONS_HISTOGRAM

◆ QUANT_COMPONENT_STATUS

◆ sortBy

◆ THRESHOLD_TYPE

◆ TREE_CLUSTER_METHOD

◆ UNIQUE_PEP_RULES

Member Function Documentation

◆ anyEmPAI()

◆ anyNumDiscoveredMods()

◆ cancelCreateSummary()

◆ createSummary()

◆ createUnassignedList()

◆ findPeptides()

◆ findProteins()

◆ findProteinsByAccession()

◆ findProteinsByDescription()

◆ freeHit()

◆ getAllFamilyMembersWithThisPepMatch()

◆ getAllProteinsWithThisPepMatch() [1/2]

◆ getAllProteinsWithThisPepMatch() [2/2]

◆ getAvePeptideIdentityThreshold()

◆ getComponentIntensity()

◆ getComponentProtein()

◆ getComponentString()

◆ getCreateSummaryProgress()

◆ getDiscoveredErrTolModDeltas()

◆ getDiscoveredErrTolModNames()

◆ getDiscoveredLocalModDeltas()

◆ getDiscoveredLocalModNames()

◆ getErrTolModDelta()

◆ getErrTolModifiedNaSeq()

◆ getErrTolModMasterNeutralLoss()

◆ getErrTolModMasterString()

◆ getErrTolModName()

◆ getErrTolModNeutralLoss()

◆ getErrTolModPepNeutralLoss()

◆ getErrTolModPepString()

◆ getErrTolModReqPepNeutralLoss()

◆ getErrTolModReqPepString()

◆ getErrTolModSlaveNeutralLoss()

◆ getErrTolModSlaveString()

◆ getErrTolModString()

◆ getErrTolOriginalNaSeq()

◆ getErrTolProbabilityThreshold()

◆ getFlags()

◆ getFlags2()

◆ getHit()

◆ getHomologyThreshold()

◆ getHomologyThresholdForHistogram()

◆ getIonsScore()

◆ getIonsScoreCorrected()

◆ getIonsScoreHistogram()

◆ getLibraryModString()

◆ getMaxPeptideIdentityThreshold()

◆ getMaxRankValue()

◆ getMinPepLenInPepSummary()

◆ getNextFamilyProtein()

◆ getNextSimilarProtein()

◆ getNextSimilarProteinOf()

◆ getNextSubsetProtein()

◆ getNextSubsetProteinOf()

◆ getNumberOfFamilyMembers()

◆ getNumberOfHits()

◆ getNumberOfUnassigned()

◆ getNumDecoyHitsAboveHomology()

◆ getNumDecoyHitsAboveIdentity()

◆ getNumDiscoveredErrTolMods()

◆ getNumDiscoveredFixedMods()

◆ getNumDiscoveredIntactLinks()