Matrix Science Mascot Parser toolkit
 
Loading...
Searching...
No Matches
tools_aahelper.cpp

For calculating peptide and fragment masses.

/*
##############################################################################
# File: tools_aahelper.cpp #
# Mascot Parser toolkit example code #
# Example code for generating peptides, peptide and fragment masses #
##############################################################################
# COPYRIGHT NOTICE #
# Copyright 1998-2006 Matrix Science Limited All Rights Reserved. #
# #
##############################################################################
# $Source: parser/examples/test_cxx/tools_aahelper.cpp $
# $Author: villek@matrixscience.com $
# $Date: 2018-07-30 16:23:53 +0100 $
# $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_3_0_0-2024-09-24-0-g93ebaeb4f4 $
##############################################################################
*/
#include "msparser.hpp"
#include <iostream>
#include <iomanip>
// All the classes are part of the matrix_science namespace
using namespace matrix_science;
static void printFragmentsTable(ms_fragmentvector & fragments);
static void displayMascotTestSearch(const ms_modvector vecFixed,
const ms_modvector vecVariable,
const ms_enzyme * enzyme,
const double mr,
const ms_fragmentvector fragments);
int main(int argc, char * argv[])
{
// We need an enzyme to build a list of peptides
// and mod_file file if we want to apply any modifications
if ( argc < 3 )
{
std::cout << "Location of enzymes file and mod_file has to be specified as parameters" << std::endl;
return 0;
}
ms_enzymefile enzymefile(argv[1]);
if ( !enzymefile.isValid() )
{
std::cout << "There are errors. Cannot continue. The last error description:" << std::endl;
std::cout << enzymefile.getLastErrorString() << std::endl;
return 1;
}
const ms_enzyme *enzyme = enzymefile.getEnzymeByName("Trypsin");
if ( enzyme == NULL )
{
std::cout << "Cannot find Trypsin enzyme in the file. Cannot continue." << std::endl;
return 1;
}
// we need masses file, but can use default masses anyway
ms_masses masses;
ms_modfile modfile(argv[2], &masses);
if ( !modfile.isValid() )
{
std::cout << "There are errors. Cannot continue. The last error description:" << std::endl;
std::cout << modfile.getLastErrorString() << std::endl;
return 1;
}
const ms_modification *oxidation = modfile.getModificationByName("Oxidation (M)");
const ms_modification *acetylNterm = modfile.getModificationByName("Acetyl (N-term)");
const ms_modification *phospho = modfile.getModificationByName("Phospho (STY)");
if ( oxidation == NULL || acetylNterm == NULL || phospho == NULL)
{
std::cout << "Cannot find necessary modifications in the mod_file. Cannot continue." << std::endl;
return 1;
}
ms_aahelper aahelper;
aahelper.setMasses(&masses);
aahelper.setEnzyme(enzyme);
// Now we can generate peptides for a given protein
char proteinStr[] = "MAIFRIDEIRNMSSEELEEELRKLEVELIRERGAVRAGGAPEKPGRIREIRRTIARMKTVQRERVRK";
aahelper.startIteratePeptides(proteinStr, static_cast<int>(strlen(proteinStr)), 0);// no missed cleavages are allowed
std::cout << "List of peptides" << std::endl;
while(aahelper.getNextPeptide())
{
int start = aahelper.getPepStart()-1; // the method returns 1-based position
int len = aahelper.getPepEnd() - aahelper.getPepStart()+1;
std::string peptideStr;
peptideStr.assign(proteinStr+start, len);
std::cout << peptideStr << std::endl;
}
std::cout << "End of list" << std::endl;
// create a list of fixed modifications
ms_modvector vecFixed;
vecFixed.appendModification(phospho);
// create a list of variable modifications
ms_modvector vecVariable;
vecVariable.appendModification(oxidation);
vecVariable.appendModification(acetylNterm);
aahelper.setAvailableModifications(&vecFixed, &vecVariable);
// ms_aahelper can also contain errors that might happen when applying modifications
// for instance, when we have a conflict between two modifications (same residue or same peptide end)
if ( !aahelper.isValid() )
{
std::cout << "There are errors. Cannot continue. The last error description:" << std::endl;
std::cout << aahelper.getLastErrorString() << std::endl;
return 1;
}
// we will need also a separate error-object for collecting peptide-specific errors
ms_errs err;
// Example of how to call calcPeptideMZ
// It will often be more convenient to create an ms_peptide and then
// call getMrCalc() on that object.
std::vector< int > numThatMustBeModded;
numThatMustBeModded.push_back(1); // 1 acetylNterm modification
numThatMustBeModded.push_back(1); // 1 site is oxidised
double mr = aahelper.calcPeptideMZ(proteinStr,
static_cast<int>(strlen(proteinStr)),
1, 10, // peptide ends (1-based)
numThatMustBeModded,
0, // no charge - i.e. mr
&err);
if ( !err.isValid() )
{
std::cout << "There have been errors while calculating peptide mass: " << std::endl;
std::cout << err.getLastErrorString() << std::endl;
// don't need to halt as they are not fatal errors
err.clearAllErrors(); // prepare to re-use it
}
else
{
std::cout << "Peptide mass calculated using 'calcPeptideMZ' is "
<< std::setprecision(3)
<< std::fixed
<< std::setw(8)
<< mr
<< std::endl << std::endl << std::endl;
}
// Create a peptide - which we can then fragment
//
// Specify which residues are modified by which modification.
// as it has to correspond to a modification string
// Nterm modification + 9 residues + Cterm modification
std::vector< int > numModded;
numModded.push_back(2); // N-term - modified by "Acetyl (N-term)"
numModded.push_back(1); // M - modified by "Oxidation (M)"
numModded.push_back(0); // A
numModded.push_back(0); // I
numModded.push_back(0); // F
numModded.push_back(0); // R
numModded.push_back(0); // I
numModded.push_back(0); // D
numModded.push_back(0); // E
numModded.push_back(0); // I
numModded.push_back(0); // R
numModded.push_back(0); // C-term
// we have to specify (or at least supply an empty vector) which neutral loss value to use
// in case there are more than one available for a modification
std::vector< int > whichNl;
whichNl.push_back(0); // N-term
whichNl.push_back(1); // M - has 2 neutral losses. Specify the first (-98)
whichNl.push_back(0); // A
whichNl.push_back(0); // I
whichNl.push_back(0); // F
whichNl.push_back(0); // R
whichNl.push_back(0); // I
whichNl.push_back(0); // D
whichNl.push_back(0); // E
whichNl.push_back(0); // I
whichNl.push_back(0); // R
whichNl.push_back(0); // C-term
ms_peptide peptide = aahelper.createPeptide(proteinStr,
static_cast<int>(strlen(proteinStr)),
1,10, // start, end positions
numModded, // modification string-like vector
whichNl, // which neutral loss to use
0, // no charge
&err); // collect errors in it
if ( !err.isValid() )
{
std::cout << "There have been errors while creating a peptide: " << std::endl;
std::cout << err.getLastErrorString() << std::endl;
// don't need to halt as they are not fatal errors
err.clearAllErrors(); // prepare to re-use it
}
else
{
std::cout
<< "Peptide has been created successfully: "
<< peptide.getPeptideStr()
<< std::endl;
}
std::vector< double > ions;
ms_fragmentvector fragments;
ms_fragmentvector all_fragments; // Keep a list of fragments from all series
ions = aahelper.calcFragments(&peptide, // that is why we needed to create a peptide object first
ms_fragmentationrules::FRAG_B_SERIES, // ions series ID
false, // single-charged ions only
100.0, // minimal fragment mass to return
mr, // maximal fragment mass to return
&fragments,
&err); // collect peptide-specific errors
std::cout << "b-ion series fragments: " << std::endl;
printFragmentsTable(fragments);
all_fragments.copyFrom(&fragments);
b_ions.copyFrom(&fragments);
ions = aahelper.calcFragments(&peptide, // that is why we needed to create a peptide object first
ms_fragmentationrules::FRAG_Y_SERIES, // ions series ID
false, // single-charged ions only
100.0, // minimal fragment mass to return
mr, // maximal fragment mass to return
&fragments,
&err); // collect peptide-specific errors
std::cout << "y-ion series fragments: " << std::endl;
printFragmentsTable(fragments);
int i;
for (i=0; i < fragments.getNumberOfFragments(); i++)
all_fragments.appendFragment(fragments.getFragmentByNumber(i));
aahelper.calcFragmentsEx(&peptide, // that is why we needed to create a peptide object first
ms_fragmentationrules::FRAG_Y_SERIES, // ions series ID
2, // double-charged ions only
100.0, // minimal fragment mass to return
mr, // maximal fragment mass to return
&fragments,
&err); // collect peptide-specific errors
std::cout << "y++-ion series fragments: " << std::endl;
printFragmentsTable(fragments);
for (i=0; i < fragments.getNumberOfFragments(); i++)
all_fragments.appendFragment(fragments.getFragmentByNumber(i));
ions = aahelper.calcFragments(&peptide, // that is why we created a peptide object first
ms_fragmentationrules::FRAG_INTERNAL_YB, // ions series ID
false, // single-charged ions only
100.0, // minimal fragment mass to return
700, // maximal fragment mass to return
&fragments,
&err); // collect peptide-specific errors
std::cout << "internal yb-ion series fragments: " << std::endl;
printFragmentsTable(fragments);
for (i=0; i < fragments.getNumberOfFragments(); i++)
all_fragments.appendFragment(fragments.getFragmentByNumber(i));
std::cout << "Run a search under Mascot to verify the output above" << std::endl;
std::cout << "Paste the following into a Mascot search query window:" << std::endl;
displayMascotTestSearch(vecFixed, vecVariable,
enzyme,
mr,
b_ions); // Or you can use all_fragments
return 0;
}
static void printFragmentsTable(ms_fragmentvector & fragments)
{
std::cout << "Number of fragments: " << fragments.getNumberOfFragments() << std::endl;
std::cout << "Col\tStart\tEnd\tLabel\t\t Mass\t NL\tName\tImmon\tIntern\tReg" << std::endl;
for (int i=0; i < fragments.getNumberOfFragments(); i++)
{
const ms_fragment * frag = fragments.getFragmentByNumber(i);
std::cout << frag->getColumn() << "\t";
std::cout << frag->getStart() << "\t";
std::cout << frag->getEnd() << "\t";
std::cout << std::setw(10) << frag->getLabel() << "\t";
std::cout << std::fixed << std::setprecision(2) << std::setw(7) << frag->getMass() << "\t";
std::cout << frag->getNeutralLoss()<< "\t";
std::cout << frag->getSeriesName() << "\t";
std::cout << frag->isImmonium() << "\t";
std::cout << frag->isInternal() << "\t";
std::cout << frag->isRegular() << std::endl;
}
std::cout << std::endl;
}
static void displayMascotTestSearch(const ms_modvector vecFixed,
const ms_modvector vecVariable,
const ms_enzyme * enzyme,
const double mr,
const ms_fragmentvector fragments)
{
/* fragments contains a list of b-ions from a peptide
* vecVariable contains a list of variable mods applied to the peptide
* vecFixed contains a list of fixed mods applied to the peptide
* Use this information to generate an test search that can be run on Mascot.
*/
int i;
for (i=0; i < vecFixed.getNumberOfModifications(); i++)
{
std::cout << "MODS="
<< std::endl;
}
for (i=0; i < vecVariable.getNumberOfModifications(); i++)
{
std::cout << "IT_MODS="
<< vecVariable.getModificationByNumber(i)->getTitle()
<< std::endl;
}
std::cout << "CHARGE=Mr" << std::endl;
std::cout << "CLE=" << enzyme->getTitle() << std::endl;
std::cout << "INSTRUMENT=MALDI-TOF-TOF" << std::endl;
std::cout << std::setprecision(3) << mr << " ions(";
for(i=0; i < fragments.getNumberOfFragments(); i++)
{
if (i > 0)
std::cout << ", ";
std::cout << fragments.getFragmentByNumber(i)->getMass();
}
std::cout << ")" << std::endl;
}
/*
will give the output, for instance:
# tools_aahelper ../config/enzymes ../config/mod_file
List of peptides
M
MEDYLDELR
EDYLDELR
HK
IPSFIVELLK
NNLK
NR
NLTR
NQLNK
IVNR
VSDLYFGK
KPEDK
K
AAELTNK
INDLSHK
LDALMK
VATVSSATK
VSDDIK
K
EIDNLDELDL
End of list
Peptide mass calculated using 'calcPeptideMZ' is 1320.494
Peptide has been created successfully: MEDYLDELR
b-ion series fragments:
Number of fragments: 8
Col Start End Label Mass NL Name Immon Intern Reg
1 1 -1 b(1) 190.05 0.00 b 0 0 1
2 2 -1 b(2) 319.10 0.00 b 0 0 1
3 3 -1 b(3) 434.12 0.00 b 0 0 1
4 4 -1 b(4) -97 579.18 97.98 b 0 0 1
5 5 -1 b(5) -97 692.26 97.98 b 0 0 1
6 6 -1 b(6) -97 807.29 97.98 b 0 0 1
7 7 -1 b(7) -97 936.33 97.98 b 0 0 1
8 8 -1 b(8) -97 1049.41 97.98 b 0 0 1
y-ion series fragments:
Number of fragments: 8
Col Start End Label Mass NL Name Immon Intern Reg
8 8 -1 y(8) -97 1034.48 97.98 y 0 0 1
7 7 -1 y(7) -97 905.44 97.98 y 0 0 1
6 6 -1 y(6) -97 790.41 97.98 y 0 0 1
5 5 -1 y(5) 645.36 0.00 y 0 0 1
4 4 -1 y(4) 532.27 0.00 y 0 0 1
3 3 -1 y(3) 417.25 0.00 y 0 0 1
2 2 -1 y(2) 288.20 0.00 y 0 0 1
1 1 -1 y(1) 175.12 0.00 y 0 0 1
internal yb-ion series fragments:
Number of fragments: 18
Col Start End Label Mass NL Name Immon Intern Reg
2 2 3 ED 245.08 0.00 yb 0 1 0
2 2 4 EDY -97 390.13 97.98 yb 0 1 0
2 2 5 EDYL -97 503.21 97.98 yb 0 1 0
2 2 6 EDYLD -97 618.24 97.98 yb 0 1 0
3 3 4 DY -97 261.09 97.98 yb 0 1 0
3 3 5 DYL -97 374.17 97.98 yb 0 1 0
3 3 6 DYLD -97 489.20 97.98 yb 0 1 0
3 3 7 DYLDE -97 618.24 97.98 yb 0 1 0
4 4 5 YL -97 259.14 97.98 yb 0 1 0
4 4 6 YLD -97 374.17 97.98 yb 0 1 0
4 4 7 YLDE -97 503.21 97.98 yb 0 1 0
4 4 8 YLDEL -97 616.30 97.98 yb 0 1 0
5 5 6 LD 229.12 0.00 yb 0 1 0
5 5 7 LDE 358.16 0.00 yb 0 1 0
5 5 8 LDEL 471.24 0.00 yb 0 1 0
6 6 7 DE 245.08 0.00 yb 0 1 0
6 6 8 DEL 358.16 0.00 yb 0 1 0
7 7 8 EL 243.13 0.00 yb 0 1 0
Run a search under Mascot to verify the output above
Paste the following into a Mascot search query window:
MODS=Oxidation (M)
IT_MODS=Acetyl (N-term)
IT_MODS=Phospho (STY)
CHARGE=Mr
CLE=Trypsin
INSTRUMENT=MALDI-TOF-TOF
1320.494 ions(190.053, 319.096, 434.123, 579.176, 692.260, 807.287, 936.329, 1049.413)
*/
This class facilitates creation of peptides and calculation of MS-fragment masses.
Definition: ms_aahelper.hpp:35
std::vector< double > calcFragments(const ms_peptide *peptide, const int seriesID, const bool doubleCharged, const double minMass, const double maxMass, const MASS_TYPE massType, ms_fragmentvector *fragments, ms_errs *err, const FRAG_NL fragNlType=FRAG_NL_PRIMARY) const
Returns a vector of ion masses for the given series and peptide.
Definition: ms_aahelper.cpp:4072
int getPepEnd() const
Returns the last position of the current peptide.
Definition: ms_aahelper.cpp:1105
bool calcFragmentsEx(const ms_peptide *peptide, const int seriesID, const int fragCharge, const double minMass, const double maxMass, const MASS_TYPE massType, ms_fragmentvector *fragments, ms_errs *err, const FRAG_NL fragNlType=FRAG_NL_PRIMARY) const
Returns a vector of ion masses for the given series and peptide.
Definition: ms_aahelper.cpp:4124
double calcPeptideMZ(const char *proteinStr, const int proteinLen, const int startPos, const int endPos, const std::vector< int > numThatMustBeModded, const int charge, const MASS_TYPE massType, ms_errs *err) const
Calculates and returns a peptide m/z (or Mr) value without specifying exact positions of modified res...
Definition: ms_aahelper.cpp:1163
void setEnzyme(const ms_enzyme *enzyme)
Sets an enzyme to use.
Definition: ms_aahelper.cpp:494
void setAvailableModifications(const ms_modvector *fixedModVector, const ms_modvector *varModVector)
Applies fixed modifications to residue masses and stores variable modifications for later use.
Definition: ms_aahelper.cpp:550
bool getNextPeptide()
Call to iterate to the next peptide.
Definition: ms_aahelper.cpp:885
void setMasses(const ms_masses *masses)
Copies all residue and chemical element masses from the masses file instance.
Definition: ms_aahelper.cpp:512
void startIteratePeptides(const char *sequenceProtein, const int sizeProtein, const int pfa)
Initialises internal members in preparation for generating peptides.
Definition: ms_aahelper.cpp:837
ms_peptide createPeptide(const char *proteinStr, const int proteinLen, const int startPos, const int endPos, const std::vector< int > varMods, const std::vector< int > varModsWhichNl, const int charge, const MASS_TYPE massType, ms_errs *err) const
Creates and returns a peptide object.
Definition: ms_aahelper.cpp:1558
int getPepStart() const
Returns the first position of the current peptide.
Definition: ms_aahelper.cpp:1094
Represent a single entry in the enzymes file.
Definition: ms_enzyme.hpp:67
std::string getTitle() const
Returns a name of the enzyme as appears in the file.
Definition: ms_enzyme.cpp:318
Reads and parses the enzymes file that contains multiple enzyme definitions.
Definition: ms_enzyme.hpp:194
std::string getLastErrorString() const
Return the error description of the last error that occurred.
Definition: ms_errors.cpp:1488
bool isValid() const
Call this function to determine if there have been any errors.
Definition: ms_errors.cpp:1472
All errors are collected in an instance of this class.
Definition: ms_errors.hpp:37
std::string getLastErrorString() const
Return the last error number or an empty string.
Definition: ms_errors.cpp:1126
bool isValid() const
Returns TRUE if no errors occurred, FALSE otherwise.
Definition: ms_errors.cpp:707
void clearAllErrors()
Remove all errors from the current list of errors.
Definition: ms_errors.cpp:1019
Class representing a single peptide fragment.
Definition: ms_fragment.hpp:38
int getColumn() const
Returns the 1-based position index in case of a regular series.
Definition: ms_fragment.cpp:809
std::string getLabel(const LABEL_FORMAT fmt=LABEL_FORMAT_TRADITIONAL) const
Returns a symbolic label like "a*(6)++".
Definition: ms_fragment.cpp:758
std::string getSeriesName() const
Returns the ion series name.
Definition: ms_fragment.cpp:804
bool isImmonium() const
Returns TRUE for immonium ions.
Definition: ms_fragment.cpp:733
int getStart() const
Returns the 1-based start position within the peptide sequence.
Definition: ms_fragment.cpp:818
double getNeutralLoss() const
Returns the neutral loss (m/z) values applied to the fragment.
Definition: ms_fragment.cpp:748
int getEnd() const
Returns the 1-based last position in case of an internal fragment.
Definition: ms_fragment.cpp:828
bool isInternal() const
Returns TRUE for internal fragments ya and yb.
Definition: ms_fragment.cpp:728
double getMass() const
Returns the ion mass (m/z) value.
Definition: ms_fragment.cpp:743
bool isRegular() const
Returns TRUE for a, b, c, d, y, v, w, x, z series and their derivatives.
Definition: ms_fragment.cpp:713
Class for holding a list of ms_fragment objects.
Definition: ms_fragmentvector.hpp:47
int getNumberOfFragments() const
Returns a number of fragments currently held in the vector.
Definition: ms_fragmentvector.cpp:88
const ms_fragment * getFragmentByNumber(const unsigned int numFrag) const
Return a fragment object by its number.
Definition: ms_fragmentvector.cpp:121
void appendFragment(const ms_fragment *item)
Adds a new fragment to the end of the vector.
Definition: ms_fragmentvector.cpp:109
void copyFrom(const ms_fragmentvector *right)
Copies all content from another instance.
Definition: ms_fragmentvector.cpp:60
Reads and parses the masses file with residue and atom masses.
Definition: ms_masses.hpp:48
Use this class in order to read in the amino acid modification file.
Definition: ms_modfile.hpp:407
The class represents a single modification-entry in mod_file.
Definition: ms_modfile.hpp:134
std::string getTitle() const
Returns the modification title.
Definition: ms_modfile.cpp:171
General usage class for creating lists of modifications to be passed as parameters.
Definition: ms_modfile.hpp:339
const ms_modification * getModificationByNumber(const int numMod) const
Returns a modification object by its number.
Definition: ms_modfile.cpp:955
int getNumberOfModifications() const
Returns the number of modifications currently held in the vector.
Definition: ms_modfile.cpp:918
void appendModification(const ms_modification *item)
Adds a new modification at the end of the vector.
Definition: ms_modfile.cpp:933
This class encapsulates a peptide from the mascot results file.
Definition: ms_peptide.hpp:57
std::string getPeptideStr(bool substituteAmbiguous=true, const PSM psmComponent=PSM_COMPLETE) const
Returns the sequence found for the peptide.
Definition: ms_peptide.cpp:710
@ MASS_TYPE_MONO
monoisotopic mass index
Definition: ms_modfile.hpp:66