For calculating peptide and fragment masses.
#!/usr/local/bin/perl
##############################################################################
# file: tools_aahelper.pl #
# Mascot Parser toolkit example code #
##############################################################################
# COPYRIGHT NOTICE #
# Copyright 1998-2010 Matrix Science Limited All Rights Reserved. #
# #
##############################################################################
# $Source: parser/examples/test_perl/tools_aahelper.pl $
# $Author: villek@matrixscience.com $
# $Date: 2018-07-30 16:23:53 +0100 $
# $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_3_0_0-2024-09-24-0-g93ebaeb4f4 $
##############################################################################
use strict;
##############################################################################
use msparser;
if (!defined($ARGV[0]) || !defined($ARGV[1])) {
# We need an enzyme to build a list of peptides and mod_file file if we
# want to apply any modifications.
print "Location of enzymes file and mod_file has to be specified as parameters\n";
exit 1;
}
my $enzymefile = open_enzymefile($ARGV[0]);
# Note: both $enzymefile *must* be kept in scope for as long as you use
# $Trypsin. See "Using the toolkit from Perl, Java and Python" in Mascot Parser
# manual.
my $Trypsin = $enzymefile->getEnzymeByName('Trypsin');
if (!$Trypsin) {
print "Cannot find 'Trypsin' in the enzyme file. Cannot continue.\n";
exit 1;
}
my $modfile = open_modfile($ARGV[1]);
my $aahelper = new msparser::ms_aahelper;
# Note: both $modfile and $Trypsin *must* be kept in scope for as
# long as you use $aahelper. See "Using the toolkit from Perl, Java and
# Python" in Mascot Parser manual.
$aahelper->setMasses($modfile->getMassFile);
$aahelper->setEnzyme($Trypsin);
# Now we can generate peptides for a given protein. This is
# RL29_METTP (50S ribosomal protein L29P OS=Methanosaeta thermophila (strain
# DSM 6194 / PT) GN=rpl29p PE=3 SV=1) from SwissProt 2010_09.
my $proteinStr = "MAIFRIDEIRNMSSEELEEELRKLEVELIRERGAVRAGGAPEKPGRIREIRRTIARMKTVQRERVRK";
# No missed cleavages are allowed (third parameter).
$aahelper->startIteratePeptides($proteinStr, length($proteinStr), 0);
print "List of peptides\n";
while ($aahelper->getNextPeptide) {
my $start = $aahelper->getPepStart();
my $len = $aahelper->getPepEnd() - $aahelper->getPepStart() + 1;
# getPepStart() returns one-based index.
my $peptideStr = substr($proteinStr, $start - 1, $len);
print $peptideStr, "\n";
}
print "End of list\n\n";
# Create a list of fixed modifications.
my $vecFixed = new msparser::ms_modvector();
$vecFixed->appendModification(
$modfile->getModificationByName('Phospho (Y)')
);
# Create a list of variable modifications.
my $vecVariable = new msparser::ms_modvector();
$vecVariable->appendModification(
$modfile->getModificationByName('Oxidation (M)')
);
$vecVariable->appendModification(
$modfile->getModificationByName('Acetyl (N-term)')
);
# Note: both $vecFixed and $vecVariable *must* be kept in scope for as
# long as you use $aahelper. See "Using the toolkit from Perl, Java and
# Python" in Mascot Parser manual.
$aahelper->setAvailableModifications($vecFixed, $vecVariable);
# ms_aahelper can also contain errors that might happen when applying
# modifications, for instance when we have a conflict between two
# modifications (same residue or same peptide end).
if (!$aahelper->isValid()) {
print "Error while setting available modifications: ";
print $aahelper->getLastErrorString(), "\n";
exit(1);
}
# We will need also a separate error object for collecting peptide-specific
# errors.
my $err = new msparser::ms_errs();
# Example of how to call calcPeptideMZ(). It will often be more convenient to
# create an ms_peptide instead, and then call getMrCalc() on that object.
my $numThatMustBeModded = new msparser::vectori();
$numThatMustBeModded->push(1); # 1 acetylNterm modification
$numThatMustBeModded->push(1); # 1 site is oxidised
my $mr = $aahelper->calcPeptideMZ(
$proteinStr,
length($proteinStr),
1,
10, # peptide ends (1-based)
$numThatMustBeModded,
0, # no charge - i.e. Mr
$msparser::MASS_TYPE_MONO,
$err
);
if (!$err->isValid()) {
print "Error while calculating peptide mass: ";
print $err->getLastErrorString(), "\n";
# Don't need to halt as they are not fatal errors.
$err->clearAllErrors();
} else {
printf "Peptide mass calculated using 'calcPeptideMZ' is %8.3f\n", $mr;
}
# Create a peptide - which we can then fragment.
#
# Specify which residues are modified by which modification as it has to
# correspond to a modification string:
#
# Nterm modification + 9 residues + Cterm modification
my $numModded = new msparser::vectori();
$numModded->push(2); # N-term - modified by "Acetyl (N-term)"
$numModded->push(1); # M - modified by "Oxidation (M)"
$numModded->push(0); # A
$numModded->push(0); # I
$numModded->push(0); # F
$numModded->push(0); # R
$numModded->push(0); # I
$numModded->push(0); # D
$numModded->push(0); # E
$numModded->push(0); # I
$numModded->push(0); # R
$numModded->push(0); # C-term
# We have to specify (or at least supply an empty vector) which neutral loss
# value to use, in case there are more than one available for a modification.
my $whichNl = new msparser::vectori();
$whichNl->push(0); # N-term
$whichNl->push(1); # M - has 2 neutral losses. Specify the first (-98)
$whichNl->push(0); # A
$whichNl->push(0); # I
$whichNl->push(0); # F
$whichNl->push(0); # R
$whichNl->push(0); # I
$whichNl->push(0); # D
$whichNl->push(0); # E
$whichNl->push(0); # I
$whichNl->push(0); # R
$whichNl->push(0); # C-term
my $peptide = $aahelper->createPeptide(
$proteinStr,
length($proteinStr),
1,
10, # end positions
$numModded, # modification string-like vector
$whichNl, # which neutral loss to use
0, # no charge
$msparser::MASS_TYPE_MONO,
$err
);
if (!$err->isValid()) {
print "Error while creating a peptide: ";
print $err->getLastErrorString(), "\n";
# Don't need to halt as they are not fatal errors.
$err->clearAllErrors();
} else {
print "\n\nPeptide has been created successfully: ";
print $peptide->getPeptideStr(), "\n";
}
# Keep a list of fragments from all series
my $all_fragments = new msparser::ms_fragmentvector;
my $b_ions = fragmentPeptide(
$aahelper,
$peptide,
$msparser::ms_fragmentationrules::FRAG_B_SERIES,
'b-ion series',
0, # single-charged ions only
$mr, # maximal fragment mass to return
);
# copyFrom() can only be used to populate the list for the first time.
$all_fragments->copyFrom($b_ions);
my $fragments = fragmentPeptide(
$aahelper,
$peptide,
$msparser::ms_fragmentationrules::FRAG_Y_SERIES,
'y-ion series',
0, # single-charged ions only
$mr, # maximal fragment mass to return
);
for my $i (0 .. $fragments->getNumberOfFragments - 1) {
$all_fragments->appendFragment($fragments->getFragmentByNumber($i));
}
$fragments = fragmentPeptide(
$aahelper,
$peptide,
$msparser::ms_fragmentationrules::FRAG_Y_SERIES,
'y++-ion series',
2, # double-charged ions only
$mr, # maximal fragment mass to return
);
for my $i (0 .. $fragments->getNumberOfFragments - 1) {
$all_fragments->appendFragment($fragments->getFragmentByNumber($i));
}
$fragments = fragmentPeptide(
$aahelper,
$peptide,
$msparser::ms_fragmentationrules::FRAG_INTERNAL_YB,
'internal yb-ion series',
0, # single-charged ions only
700, # maximal fragment mass to return
);
for my $i (0 .. $fragments->getNumberOfFragments - 1) {
$all_fragments->appendFragment($fragments->getFragmentByNumber($i));
}
print "Paste the following into a Mascot search query window to verify this output:\n";
displayMascotTestSearch(
$vecFixed, $vecVariable, $Trypsin, $peptide->getMrCalc,
$b_ions # or you can use $all_fragments
);
sub open_enzymefile {
my ($filename) = @_;
my $enzymefile = new msparser::ms_enzymefile($filename);
if (!$enzymefile->isValid) {
print "Error while opening enzyme file: ";
print $enzymefile->getLastErrorString() . "\n";
exit 1;
}
return $enzymefile;
}
sub open_modfile {
my ($filename) = @_;
# We can use the default masses in this example.
my $masses = new msparser::ms_masses;
my $modfile = new msparser::ms_modfile($filename, $masses);
if (!$modfile->isValid) {
print "Error while opening mod file: ";
print $modfile->getLastErrorString . "\n";
exit 1;
}
for ('Oxidation (M)', 'Acetyl (N-term)', 'Phospho (Y)') {
if (not $modfile->getModificationByName($_)) {
print "Cannot find '$_' in the mod file. Cannot continue.\n";
exit(1);
}
}
return $modfile;
}
sub fragmentPeptide {
my ($aa_helper, $peptide, $series, $series_label, $doubleCharged, $mass_max) = @_;
my $fragments = new msparser::ms_fragmentvector;
my $err = new msparser::ms_errs();
$aahelper->calcFragments(
$peptide,
$series,
$doubleCharged ? 1 : 0,
100.0,
$mass_max,
$msparser::MASS_TYPE_MONO,
$fragments,
$err
);
# Check $err here.
print $series_label, " fragments: \n";
printFragmentsTable($fragments);
return $fragments;
}
sub printFragmentsTable {
my ($fragments) = @_;
print "Number of fragments: ", $fragments->getNumberOfFragments(), "\n";
my $headerfmt = "%5s %5s %5s %-10s %7s %7s %4s %5s %6s %4s\n";
my $fmt = "%5s %5s %5s %-10s %7.2f %7.2f %4s %5s %6s %4s\n";
printf $headerfmt, qw(Col Start End Label Mass NL Name Immon Intern Reg);
for my $i (0 .. $fragments->getNumberOfFragments - 1) {
my $frag = $fragments->getFragmentByNumber($i);
printf $fmt,
$frag->getColumn(),
$frag->getStart(),
$frag->getEnd(),
$frag->getLabel(),
$frag->getMass(),
$frag->getNeutralLoss(),
$frag->getSeriesName(),
$frag->isImmonium(),
$frag->isInternal(),
$frag->isRegular(),
;
}
print "\n";
}
# $vecFixed contains a list of fixed mods applied to the peptide
# $vecVariable contains a list of variable mods applied to the peptide
# $enzyme is the enzyme used in fragmentation
# $mr is the peptide Mr(calc)
# $fragments contains a list of b-ions from a peptide
#
# Use this information to generate an test search that can be run on Mascot.
sub displayMascotTestSearch {
my ($vecFixed, $vecVariable, $enzyme, $mr, $fragments) = @_;
for my $i (0 .. $vecFixed->getNumberOfModifications-1) {
printf "MODS=%s\n", $vecFixed->getModificationByNumber($i)->getTitle();
}
for my $i (0 .. $vecVariable->getNumberOfModifications-1) {
printf "IT_MODS=%s\n", $vecVariable->getModificationByNumber($i)->getTitle();
}
print "CHARGE=Mr\n";
print "CLE=", $enzyme->getTitle(), "\n";
print "INSTRUMENT=MALDI-TOF-TOF\n";
printf "%.3f ions(", $mr;
print join(', ', map {
sprintf("%.3f", $fragments->getFragmentByNumber($_)->getMass)
} 0 .. $fragments->getNumberOfFragments-1
);
print ")\n";
}
=pod
Running the program as
perl -I../bin tools_aahelper.pl ../config/enzymes ../config/mod_file
will give the following output under Mascot 2.3:
List of peptides
M
MAIFR
AIFR
IDEIR
NMSSEELEEELR
K
LEVELIR
ER
GAVR
AGGAPEKPGR
IR
EIR
R
TIAR
MK
TVQR
ER
VR
K
End of list
Peptide mass calculated using 'calcPeptideMZ' is 1320.686
Peptide has been created successfully: MAIFRIDEIR
b-ion series fragments:
Number of fragments: 9
Col Start End Label Mass NL Name Immon Intern Reg
1 1 -1 b(1) 190.05 0.00 b 1
2 2 -1 b(2) 261.09 0.00 b 1
3 3 -1 b(3) 374.17 0.00 b 1
4 4 -1 b(4) 521.24 0.00 b 1
5 5 -1 b(5) 677.34 0.00 b 1
6 6 -1 b(6) 790.43 0.00 b 1
7 7 -1 b(7) 905.45 0.00 b 1
8 8 -1 b(8) 1034.50 0.00 b 1
9 9 -1 b(9) 1147.58 0.00 b 1
y-ion series fragments:
Number of fragments: 9
Col Start End Label Mass NL Name Immon Intern Reg
9 9 -1 y(9) 1132.65 0.00 y 1
8 8 -1 y(8) 1061.61 0.00 y 1
7 7 -1 y(7) 948.53 0.00 y 1
6 6 -1 y(6) 801.46 0.00 y 1
5 5 -1 y(5) 645.36 0.00 y 1
4 4 -1 y(4) 532.27 0.00 y 1
3 3 -1 y(3) 417.25 0.00 y 1
2 2 -1 y(2) 288.20 0.00 y 1
1 1 -1 y(1) 175.12 0.00 y 1
y++-ion series fragments:
Number of fragments: 8
Col Start End Label Mass NL Name Immon Intern Reg
9 9 -1 y(9)++ 566.83 0.00 y 1
8 8 -1 y(8)++ 531.31 0.00 y 1
7 7 -1 y(7)++ 474.77 0.00 y 1
6 6 -1 y(6)++ 401.23 0.00 y 1
5 5 -1 y(5)++ 323.18 0.00 y 1
4 4 -1 y(4)++ 266.64 0.00 y 1
3 3 -1 y(3)++ 209.13 0.00 y 1
2 2 -1 y(2)++ 144.61 0.00 y 1
internal yb-ion series fragments:
Number of fragments: 22
Col Start End Label Mass NL Name Immon Intern Reg
2 2 3 AI 185.13 0.00 yb 1
2 2 4 AIF 332.20 0.00 yb 1
2 2 5 AIFR 488.30 0.00 yb 1
2 2 6 AIFRI 601.38 0.00 yb 1
3 3 4 IF 261.16 0.00 yb 1
3 3 5 IFR 417.26 0.00 yb 1
3 3 6 IFRI 530.34 0.00 yb 1
3 3 7 IFRID 645.37 0.00 yb 1
4 4 5 FR 304.18 0.00 yb 1
4 4 6 FRI 417.26 0.00 yb 1
4 4 7 FRID 532.29 0.00 yb 1
4 4 8 FRIDE 661.33 0.00 yb 1
5 5 6 RI 270.19 0.00 yb 1
5 5 7 RID 385.22 0.00 yb 1
5 5 8 RIDE 514.26 0.00 yb 1
5 5 9 RIDEI 627.35 0.00 yb 1
6 6 7 ID 229.12 0.00 yb 1
6 6 8 IDE 358.16 0.00 yb 1
6 6 9 IDEI 471.24 0.00 yb 1
7 7 8 DE 245.08 0.00 yb 1
7 7 9 DEI 358.16 0.00 yb 1
8 8 9 EI 243.13 0.00 yb 1
Paste the following into a Mascot search query window to verify this output:
MODS=Phospho (Y)
IT_MODS=Oxidation (M)
IT_MODS=Acetyl (N-term)
CHARGE=Mr
CLE=Trypsin
INSTRUMENT=MALDI-TOF-TOF
1320.686 ions(190.053, 261.090, 374.174, 521.243, 677.344, 790.428, 905.455, 1034.498, 1147.582)
=cut