Matrix Science Mascot Parser toolkit
No Matches

For calculating peptide and fragment masses.

# file: #
# Mascot Parser toolkit example code #
# Copyright 1998-2010 Matrix Science Limited All Rights Reserved. #
# #
# $Source: parser/examples/test_perl/ $
# $Author: $
# $Date: 2018-07-30 16:23:53 +0100 $
# $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_3_1_0-2025-07-27-0-gea47708fac $
use strict;
use msparser;
if (!defined($ARGV[0]) || !defined($ARGV[1])) {
# We need an enzyme to build a list of peptides and mod_file file if we
# want to apply any modifications.
print "Location of enzymes file and mod_file has to be specified as parameters\n";
exit 1;
my $enzymefile = open_enzymefile($ARGV[0]);
# Note: both $enzymefile *must* be kept in scope for as long as you use
# $Trypsin. See "Using the toolkit from Perl, Java and Python" in Mascot Parser
# manual.
my $Trypsin = $enzymefile->getEnzymeByName('Trypsin');
if (!$Trypsin) {
print "Cannot find 'Trypsin' in the enzyme file. Cannot continue.\n";
exit 1;
my $modfile = open_modfile($ARGV[1]);
my $aahelper = new msparser::ms_aahelper;
# Note: both $modfile and $Trypsin *must* be kept in scope for as
# long as you use $aahelper. See "Using the toolkit from Perl, Java and
# Python" in Mascot Parser manual.
# Now we can generate peptides for a given protein. This is
# RL29_METTP (50S ribosomal protein L29P OS=Methanosaeta thermophila (strain
# DSM 6194 / PT) GN=rpl29p PE=3 SV=1) from SwissProt 2010_09.
# No missed cleavages are allowed (third parameter).
$aahelper->startIteratePeptides($proteinStr, length($proteinStr), 0);
print "List of peptides\n";
while ($aahelper->getNextPeptide) {
my $start = $aahelper->getPepStart();
my $len = $aahelper->getPepEnd() - $aahelper->getPepStart() + 1;
# getPepStart() returns one-based index.
my $peptideStr = substr($proteinStr, $start - 1, $len);
print $peptideStr, "\n";
print "End of list\n\n";
# Create a list of fixed modifications.
my $vecFixed = new msparser::ms_modvector();
$modfile->getModificationByName('Phospho (Y)')
# Create a list of variable modifications.
my $vecVariable = new msparser::ms_modvector();
$modfile->getModificationByName('Oxidation (M)')
$modfile->getModificationByName('Acetyl (N-term)')
# Note: both $vecFixed and $vecVariable *must* be kept in scope for as
# long as you use $aahelper. See "Using the toolkit from Perl, Java and
# Python" in Mascot Parser manual.
$aahelper->setAvailableModifications($vecFixed, $vecVariable);
# ms_aahelper can also contain errors that might happen when applying
# modifications, for instance when we have a conflict between two
# modifications (same residue or same peptide end).
if (!$aahelper->isValid()) {
print "Error while setting available modifications: ";
print $aahelper->getLastErrorString(), "\n";
# We will need also a separate error object for collecting peptide-specific
# errors.
my $err = new msparser::ms_errs();
# Example of how to call calcPeptideMZ(). It will often be more convenient to
# create an ms_peptide instead, and then call getMrCalc() on that object.
my $numThatMustBeModded = new msparser::vectori();
$numThatMustBeModded->push(1); # 1 acetylNterm modification
$numThatMustBeModded->push(1); # 1 site is oxidised
my $mr = $aahelper->calcPeptideMZ(
10, # peptide ends (1-based)
0, # no charge - i.e. Mr
if (!$err->isValid()) {
print "Error while calculating peptide mass: ";
print $err->getLastErrorString(), "\n";
# Don't need to halt as they are not fatal errors.
} else {
printf "Peptide mass calculated using 'calcPeptideMZ' is %8.3f\n", $mr;
# Create a peptide - which we can then fragment.
# Specify which residues are modified by which modification as it has to
# correspond to a modification string:
# Nterm modification + 9 residues + Cterm modification
my $numModded = new msparser::vectori();
$numModded->push(2); # N-term - modified by "Acetyl (N-term)"
$numModded->push(1); # M - modified by "Oxidation (M)"
$numModded->push(0); # A
$numModded->push(0); # I
$numModded->push(0); # F
$numModded->push(0); # R
$numModded->push(0); # I
$numModded->push(0); # D
$numModded->push(0); # E
$numModded->push(0); # I
$numModded->push(0); # R
$numModded->push(0); # C-term
# We have to specify (or at least supply an empty vector) which neutral loss
# value to use, in case there are more than one available for a modification.
my $whichNl = new msparser::vectori();
$whichNl->push(0); # N-term
$whichNl->push(1); # M - has 2 neutral losses. Specify the first (-98)
$whichNl->push(0); # A
$whichNl->push(0); # I
$whichNl->push(0); # F
$whichNl->push(0); # R
$whichNl->push(0); # I
$whichNl->push(0); # D
$whichNl->push(0); # E
$whichNl->push(0); # I
$whichNl->push(0); # R
$whichNl->push(0); # C-term
my $peptide = $aahelper->createPeptide(
10, # end positions
$numModded, # modification string-like vector
$whichNl, # which neutral loss to use
0, # no charge
if (!$err->isValid()) {
print "Error while creating a peptide: ";
print $err->getLastErrorString(), "\n";
# Don't need to halt as they are not fatal errors.
} else {
print "\n\nPeptide has been created successfully: ";
print $peptide->getPeptideStr(), "\n";
# Keep a list of fragments from all series
my $all_fragments = new msparser::ms_fragmentvector;
my $b_ions = fragmentPeptide(
'b-ion series',
0, # single-charged ions only
$mr, # maximal fragment mass to return
# copyFrom() can only be used to populate the list for the first time.
my $fragments = fragmentPeptide(
'y-ion series',
0, # single-charged ions only
$mr, # maximal fragment mass to return
for my $i (0 .. $fragments->getNumberOfFragments - 1) {
$fragments = fragmentPeptide(
'y++-ion series',
2, # double-charged ions only
$mr, # maximal fragment mass to return
for my $i (0 .. $fragments->getNumberOfFragments - 1) {
$fragments = fragmentPeptide(
'internal yb-ion series',
0, # single-charged ions only
700, # maximal fragment mass to return
for my $i (0 .. $fragments->getNumberOfFragments - 1) {
print "Paste the following into a Mascot search query window to verify this output:\n";
$vecFixed, $vecVariable, $Trypsin, $peptide->getMrCalc,
$b_ions # or you can use $all_fragments
sub open_enzymefile {
my ($filename) = @_;
my $enzymefile = new msparser::ms_enzymefile($filename);
if (!$enzymefile->isValid) {
print "Error while opening enzyme file: ";
print $enzymefile->getLastErrorString() . "\n";
exit 1;
return $enzymefile;
sub open_modfile {
my ($filename) = @_;
# We can use the default masses in this example.
my $masses = new msparser::ms_masses;
my $modfile = new msparser::ms_modfile($filename, $masses);
if (!$modfile->isValid) {
print "Error while opening mod file: ";
print $modfile->getLastErrorString . "\n";
exit 1;
for ('Oxidation (M)', 'Acetyl (N-term)', 'Phospho (Y)') {
if (not $modfile->getModificationByName($_)) {
print "Cannot find '$_' in the mod file. Cannot continue.\n";
return $modfile;
sub fragmentPeptide {
my ($aa_helper, $peptide, $series, $series_label, $doubleCharged, $mass_max) = @_;
my $fragments = new msparser::ms_fragmentvector;
my $err = new msparser::ms_errs();
$doubleCharged ? 1 : 0,
# Check $err here.
print $series_label, " fragments: \n";
return $fragments;
sub printFragmentsTable {
my ($fragments) = @_;
print "Number of fragments: ", $fragments->getNumberOfFragments(), "\n";
my $headerfmt = "%5s %5s %5s %-10s %7s %7s %4s %5s %6s %4s\n";
my $fmt = "%5s %5s %5s %-10s %7.2f %7.2f %4s %5s %6s %4s\n";
printf $headerfmt, qw(Col Start End Label Mass NL Name Immon Intern Reg);
for my $i (0 .. $fragments->getNumberOfFragments - 1) {
my $frag = $fragments->getFragmentByNumber($i);
printf $fmt,
print "\n";
# $vecFixed contains a list of fixed mods applied to the peptide
# $vecVariable contains a list of variable mods applied to the peptide
# $enzyme is the enzyme used in fragmentation
# $mr is the peptide Mr(calc)
# $fragments contains a list of b-ions from a peptide
# Use this information to generate an test search that can be run on Mascot.
sub displayMascotTestSearch {
my ($vecFixed, $vecVariable, $enzyme, $mr, $fragments) = @_;
for my $i (0 .. $vecFixed->getNumberOfModifications-1) {
printf "MODS=%s\n", $vecFixed->getModificationByNumber($i)->getTitle();
for my $i (0 .. $vecVariable->getNumberOfModifications-1) {
printf "IT_MODS=%s\n", $vecVariable->getModificationByNumber($i)->getTitle();
print "CHARGE=Mr\n";
print "CLE=", $enzyme->getTitle(), "\n";
printf "%.3f ions(", $mr;
print join(', ', map {
sprintf("%.3f", $fragments->getFragmentByNumber($_)->getMass)
} 0 .. $fragments->getNumberOfFragments-1
print ")\n";
Running the program as
perl -I../bin ../config/enzymes ../config/mod_file
will give the following output under Mascot 2.3:
List of peptides
End of list
Peptide mass calculated using 'calcPeptideMZ' is 1320.686
Peptide has been created successfully: MAIFRIDEIR
b-ion series fragments:
Number of fragments: 9
Col Start End Label Mass NL Name Immon Intern Reg
1 1 -1 b(1) 190.05 0.00 b 1
2 2 -1 b(2) 261.09 0.00 b 1
3 3 -1 b(3) 374.17 0.00 b 1
4 4 -1 b(4) 521.24 0.00 b 1
5 5 -1 b(5) 677.34 0.00 b 1
6 6 -1 b(6) 790.43 0.00 b 1
7 7 -1 b(7) 905.45 0.00 b 1
8 8 -1 b(8) 1034.50 0.00 b 1
9 9 -1 b(9) 1147.58 0.00 b 1
y-ion series fragments:
Number of fragments: 9
Col Start End Label Mass NL Name Immon Intern Reg
9 9 -1 y(9) 1132.65 0.00 y 1
8 8 -1 y(8) 1061.61 0.00 y 1
7 7 -1 y(7) 948.53 0.00 y 1
6 6 -1 y(6) 801.46 0.00 y 1
5 5 -1 y(5) 645.36 0.00 y 1
4 4 -1 y(4) 532.27 0.00 y 1
3 3 -1 y(3) 417.25 0.00 y 1
2 2 -1 y(2) 288.20 0.00 y 1
1 1 -1 y(1) 175.12 0.00 y 1
y++-ion series fragments:
Number of fragments: 8
Col Start End Label Mass NL Name Immon Intern Reg
9 9 -1 y(9)++ 566.83 0.00 y 1
8 8 -1 y(8)++ 531.31 0.00 y 1
7 7 -1 y(7)++ 474.77 0.00 y 1
6 6 -1 y(6)++ 401.23 0.00 y 1
5 5 -1 y(5)++ 323.18 0.00 y 1
4 4 -1 y(4)++ 266.64 0.00 y 1
3 3 -1 y(3)++ 209.13 0.00 y 1
2 2 -1 y(2)++ 144.61 0.00 y 1
internal yb-ion series fragments:
Number of fragments: 22
Col Start End Label Mass NL Name Immon Intern Reg
2 2 3 AI 185.13 0.00 yb 1
2 2 4 AIF 332.20 0.00 yb 1
2 2 5 AIFR 488.30 0.00 yb 1
2 2 6 AIFRI 601.38 0.00 yb 1
3 3 4 IF 261.16 0.00 yb 1
3 3 5 IFR 417.26 0.00 yb 1
3 3 6 IFRI 530.34 0.00 yb 1
3 3 7 IFRID 645.37 0.00 yb 1
4 4 5 FR 304.18 0.00 yb 1
4 4 6 FRI 417.26 0.00 yb 1
4 4 7 FRID 532.29 0.00 yb 1
4 4 8 FRIDE 661.33 0.00 yb 1
5 5 6 RI 270.19 0.00 yb 1
5 5 7 RID 385.22 0.00 yb 1
5 5 8 RIDE 514.26 0.00 yb 1
5 5 9 RIDEI 627.35 0.00 yb 1
6 6 7 ID 229.12 0.00 yb 1
6 6 8 IDE 358.16 0.00 yb 1
6 6 9 IDEI 471.24 0.00 yb 1
7 7 8 DE 245.08 0.00 yb 1
7 7 9 DEI 358.16 0.00 yb 1
8 8 9 EI 243.13 0.00 yb 1
Paste the following into a Mascot search query window to verify this output:
MODS=Phospho (Y)
IT_MODS=Oxidation (M)
IT_MODS=Acetyl (N-term)
1320.686 ions(190.053, 261.090, 374.174, 521.243, 677.344, 790.428, 905.455, 1034.498, 1147.582)