Matrix Science Mascot Parser toolkit
 
Loading...
Searching...
No Matches
tools_aahelper.java

For calculating peptide and fragment masses.

/*
##############################################################################
# File: tools_aahelper.java #
# Mascot Parser toolkit example code #
# Acknowledgment: Thanks to Tony Major, Celera Genomics for converting this #
# file from the C++ example #
##############################################################################
# COPYRIGHT NOTICE #
# Copyright 1998-2006 Matrix Science Limited All Rights Reserved. #
# #
##############################################################################
# $Source: parser/examples/test_java/tools_aahelper.java $
# $Author: villek@matrixscience.com $
# $Date: 2018-07-30 16:23:53 +0100 $
# $Revision: 1b450440f9c97e1e41d0fc6016a27d68951d4532 | MSPARSER_REL_3_0_0-2024-09-24-0-g93ebaeb4f4 $
##############################################################################
*/
import matrix_science.msparser.ms_enzymefile;
import matrix_science.msparser.ms_fragmentationrules;
import matrix_science.msparser.msparserConstants;
import matrix_science.msparser.*;
import java.text.*;
public class tools_aahelper {
static {
try {
System.loadLibrary("msparserj");
} catch (UnsatisfiedLinkError e) {
System.err.println("Native code library failed to load. "
+ "Is msparserj.dll on the path?\n" + e);
System.exit(0);
}
}
public static void main(String argv[])
{
// We need an enzyme to build a list of peptides
// and mod_file file if we want to apply any modifications
if(argv.length < 2) {
System.out.println("Location of enzymes file and mod_file has to be specified as parameters");
System.exit(0);
}
ms_enzymefile enzymefile = new ms_enzymefile(argv[0]);
if ( !enzymefile.isValid() )
{
System.out.println("There are errors. Cannot continue. The last error description:");
System.out.println(enzymefile.getLastErrorString());
System.exit(1);
}
ms_enzyme enzyme = enzymefile.getEnzymeByName("Trypsin");
if ( enzyme == null )
{
System.out.println("Cannot find Trypsin enzyme in the file. Cannot continue.");
System.exit(1);
}
// we need masses file, but can use default masses anyway
ms_masses masses = new ms_masses();
ms_modfile modfile = new ms_modfile(argv[1], masses);
if ( !modfile.isValid() )
{
System.out.println("There are errors. Cannot continue. The last error description:");
System.out.println(modfile.getLastErrorString());
System.exit(1);
}
ms_modification oxidation = modfile.getModificationByName("Oxidation (M)");
ms_modification acetylNterm = modfile.getModificationByName("Acetyl (N-term)");
ms_modification phospho = modfile.getModificationByName("Phospho (STY)");
if ( oxidation == null || acetylNterm == null || phospho == null )
{
System.out.println("Cannot find necessary modifications in the mod_file. Cannot continue.");
System.exit(1);
}
ms_aahelper aahelper = new ms_aahelper();
aahelper.setMasses(masses);
aahelper.setEnzyme(enzyme);
// Now we can generate peptides for a given protein
String proteinStr = "MAIFRIDEIRNMSSEELEEELRKLEVELIRERGAVRAGGAPEKPGRIREIRRTIARMKTVQRERVRK";
aahelper.startIteratePeptides(proteinStr, proteinStr.length(), 0);// no missed cleavages are allowed
System.out.println("List of peptides");
while(aahelper.getNextPeptide())
{
int start = aahelper.getPepStart()-1; // the method returns 1-based position
int end = aahelper.getPepEnd();
String peptideStr = proteinStr.substring(start, end);
//peptideStr.assign(proteinStr+start, len);
System.out.println(peptideStr);
}
System.out.println("End of list");
// create a list of fixed modifications
ms_modvector vecFixed = new ms_modvector();
vecFixed.appendModification(phospho);
// create a list of variable modifications
ms_modvector vecVariable = new ms_modvector();
vecVariable.appendModification(oxidation);
vecVariable.appendModification(acetylNterm);
aahelper.setAvailableModifications(vecFixed, vecVariable);
// ms_aahelper can also contain errors that might happen when applying modifications
// for instance, when we have a conflict between two modifications (same residue or same peptide end)
if ( !aahelper.isValid() )
{
System.out.println("There are errors. Cannot continue. The last error description:");
System.out.println(aahelper.getLastErrorString());
System.exit(1);
}
// we will need also a separate error-object for collecting peptide-specific errors
ms_errs err = new ms_errs();
// Example of how to call calcPeptideMZ
// It will often be more convenient to create an ms_peptide and then
// call getMrCalc() on that object.
vectori numThatMustBeModded = new vectori();
numThatMustBeModded.add(1); // 1 acetylNterm modification
numThatMustBeModded.add(1); // 1 site is phosphorylated
double mr = aahelper.calcPeptideMZ(proteinStr, proteinStr.length(),
1, 10, // peptide ends (1-based)
numThatMustBeModded,
0, // no charge
msparserConstants.MASS_TYPE_MONO,
err);
if ( !err.isValid() )
{
System.out.println("There have been errors while calculating peptide mass: ");
System.out.println(err.getLastErrorString());
// don't need to halt as they are not fatal errors
err.clearAllErrors(); // prepare to re-use it
}
else
{
DecimalFormat massFmt = new DecimalFormat("0.000");
System.out.println("Peptide mass calculated using 'calcPeptideMZ' is " + massFmt.format(mr));
}
// Create a peptide - which we can then fragment
//
// Specify which residues are modified by which modification.
// as it has to correspond to a modification string
// Nterm modification + 9 residues + Cterm modification
vectori numModded = new vectori();
numModded.add(2); // N-term - modified by "Acetyl (N-term)"
numModded.add(1); // M - modified by "Oxidation (M)"
numModded.add(0); // A
numModded.add(0); // I
numModded.add(0); // F
numModded.add(0); // R
numModded.add(0); // I
numModded.add(0); // D
numModded.add(0); // E
numModded.add(0); // I
numModded.add(0); // R
numModded.add(0); // C-term
// we have to specify (or at least supply an empty vector) which neutral loss value to use
// in case there are more than one available for a modification
vectori whichNl = new vectori();
whichNl.add(0); // N-term
whichNl.add(1); // M - has 2 neutral losses. Specify the first (-98)
whichNl.add(0); // A
whichNl.add(0); // I
whichNl.add(0); // F
whichNl.add(0); // R
whichNl.add(0); // I
whichNl.add(0); // D
whichNl.add(0); // E
whichNl.add(0); // I
whichNl.add(0); // R
whichNl.add(0); // C-term
ms_peptide peptide = aahelper.createPeptide(proteinStr, proteinStr.length(),
1,10, // end positions
numModded, // modification string-like vector
whichNl, // which neutral loss to use
0, // no charge
msparserConstants.MASS_TYPE_MONO,
err); // collect errors in it
if ( !err.isValid() )
{
System.out.println("There have been errors while creating a peptide: ");
System.out.println(err.getLastErrorString());
// don't need to halt as they are not fatal errors
err.clearAllErrors(); // prepare to re-use it
}
else
{
System.out.println();
System.out.println();
System.out.print("Peptide has been created successfully: ");
System.out.println(peptide.getPeptideStr());
}
vectord ions;
ms_fragmentvector fragments = new ms_fragmentvector();
ms_fragmentvector all_fragments = new ms_fragmentvector(); // Keep a list of fragments from all series
ions = aahelper.calcFragments(peptide, // that is why we needed to create a peptide object first
ms_fragmentationrules.FRAG_B_SERIES, // ions series ID
false, // single-charged ions only
100.0, // minimal fragment mass to return
mr, // maximal fragment mass to return
msparserConstants.MASS_TYPE_MONO,
fragments,
err); // collect peptide-specific errors
System.out.println("b-ion series fragments: ");
printFragmentsTable(fragments);
all_fragments.copyFrom(fragments);
ms_fragmentvector b_ions = new ms_fragmentvector();
b_ions.copyFrom(fragments);
ions = aahelper.calcFragments(peptide, // that is why we needed to create a peptide object first
ms_fragmentationrules.FRAG_Y_SERIES, // ions series ID
false, // single-charged ions only
100.0, // minimal fragment mass to return
mr, // maximal fragment mass to return
msparserConstants.MASS_TYPE_MONO,
fragments,
err); // collect peptide-specific errors
System.out.println("y-ion series fragments: ");
printFragmentsTable(fragments);
for (int i=0; i < fragments.getNumberOfFragments(); i++)
all_fragments.appendFragment(fragments.getFragmentByNumber(i));
aahelper.calcFragmentsEx(peptide, // that is why we needed to create a peptide object first
ms_fragmentationrules.FRAG_Y_SERIES, // ions series ID
2, // double-charged only
100.0, // minimal fragment mass to return
mr, // maximal fragment mass to return
msparserConstants.MASS_TYPE_MONO,
fragments,
err); // collect peptide-specific errors
System.out.println("y++-ion series fragments: ");
printFragmentsTable(fragments);
for (int i=0; i < fragments.getNumberOfFragments(); i++)
all_fragments.appendFragment(fragments.getFragmentByNumber(i));
ions = aahelper.calcFragments(peptide, // that is why we needed to create a peptide object first
ms_fragmentationrules.FRAG_INTERNAL_YB, // ions series ID
false, // single-charged ions only
100.0, // minimal fragment mass to return
700.0, // maximal fragment mass to return
msparserConstants.MASS_TYPE_MONO,
fragments,
err); // collect peptide-specific errors
System.out.println("internal yb-ion series fragments: ");
printFragmentsTable(fragments);
for (int i=0; i < fragments.getNumberOfFragments(); i++)
all_fragments.appendFragment(fragments.getFragmentByNumber(i));
System.out.println("Run a search under Mascot to verify the output above");
System.out.println("Paste the following into a Mascot search query window:");
displayMascotTestSearch(vecFixed, vecVariable,
enzyme,
mr,
b_ions); // Or you can use all_fragments
System.exit(0);
}
public static void printFragmentsTable(ms_fragmentvector fragments)
{
System.out.print("Number of fragments: ");
System.out.println(fragments.getNumberOfFragments());
System.out.println("Col\tStart\tEnd\tLabel\t\t Mass\t NL\tName\tImmon\tIntern\tReg");
int i;
for (i=0; i < fragments.getNumberOfFragments(); i++)
{
ms_fragment frag = fragments.getFragmentByNumber(i);
DecimalFormat fragMassFmt = new DecimalFormat("0.00");
System.out.print(frag.getColumn());
System.out.print("\t");
System.out.print(frag.getStart());
System.out.print("\t");
System.out.print(frag.getEnd());
System.out.print("\t");
System.out.print(padding(frag.getLabel(), 10, " "));
System.out.print("\t");
System.out.print(padout(fragMassFmt.format(frag.getMass()),7," "));
System.out.print("\t");
System.out.print(fragMassFmt.format(frag.getNeutralLoss()));
System.out.print("\t");
System.out.print(frag.getSeriesName());
System.out.print("\t");
if (frag.isImmonium())
System.out.print("1\t");
else
System.out.print("0\t");
if (frag.isInternal())
System.out.print("1\t");
else
System.out.print("0\t");
if (frag.isRegular())
System.out.print("1\n");
else
System.out.print("0\n");
}
System.out.println();
}
public static void displayMascotTestSearch(ms_modvector vecFixed,
ms_modvector vecVariable,
ms_enzyme enzyme,
double mr,
ms_fragmentvector fragments)
{
/* fragments contains a list of b-ions from a peptide
* vecVariable contains a list of variable mods applied to the peptide
* vecFixed contains a list of fixed mods applied to the peptide
* Use this information to generate an test search that can be run on Mascot.
*/
int i;
for (i=0; i < vecFixed.getNumberOfModifications(); i++)
{
System.out.print("MODS=");
System.out.println(vecFixed.getModificationByNumber(i).getTitle());
}
for (i=0; i < vecVariable.getNumberOfModifications(); i++)
{
System.out.print("IT_MODS=");
System.out.println(vecVariable.getModificationByNumber(i).getTitle());
}
System.out.println("CHARGE=Mr");
System.out.print("CLE=");
System.out.println(enzyme.getTitle());
System.out.println("INSTRUMENT=MALDI-TOF-TOF");
DecimalFormat massFmt = new DecimalFormat("#0.000");
System.out.print(massFmt.format(mr) + " ions(" );
for(i=0; i < fragments.getNumberOfFragments(); i++)
{
if (i > 0)
System.out.print(", ");
double mz = fragments.getFragmentByNumber(i).getMass();
System.out.print(massFmt.format(mz));
}
System.out.println(")");
}
private static String padout(String toPrint, int length,String padding) {
String returnValue="";
if(toPrint.length() < length) {
for(int queryLoop = 1; queryLoop <= length - toPrint.length(); queryLoop++) {
returnValue += padding;
}
returnValue += toPrint;
}
else returnValue = toPrint;
return returnValue;
}
private static String padding(String toPrint, int length,String padding) {
String returnValue=toPrint;
if(toPrint.length() < length) {
for(int queryLoop = 1; queryLoop <= length - toPrint.length(); queryLoop++) {
returnValue += padding;
}
}
return returnValue;
}
}
/*
will give the output:
C:\msparser\example_java>java -classpath .;../java/msparser.jar tools_aahelper c:\Inetpub\MASCOT\config\enzymes c:\Inetpub\MASCOT\config\mod_file
List of peptides
M
MEDYLDELR
EDYLDELR
HK
IPSFIVELLK
NNLK
NR
NLTR
NQLNK
IVNR
VSDLYFGK
KPEDK
K
AAELTNK
INDLSHK
LDALMK
VATVSSATK
VSDDIK
K
EIDNLDELDL
End of list
Peptide mass calculated using 'calcPeptideMZ' is 1320.494
Peptide has been created successfully: MEDYLDELR
b-ion series fragments:
Number of fragments: 8
Col Start End Label Mass NL Name Immon Intern Reg
1 1 -1 b(1) 190.05 0.00 b 0 0 1
2 2 -1 b(2) 319.10 0.00 b 0 0 1
3 3 -1 b(3) 434.12 0.00 b 0 0 1
4 4 -1 b(4) -97 579.18 97.98 b 0 0 1
5 5 -1 b(5) -97 692.26 97.98 b 0 0 1
6 6 -1 b(6) -97 807.29 97.98 b 0 0 1
7 7 -1 b(7) -97 936.33 97.98 b 0 0 1
8 8 -1 b(8) -97 1049.41 97.98 b 0 0 1
y-ion series fragments:
Number of fragments: 8
Col Start End Label Mass NL Name Immon Intern Reg
8 8 -1 y(8) -97 1034.48 97.98 y 0 0 1
7 7 -1 y(7) -97 905.44 97.98 y 0 0 1
6 6 -1 y(6) -97 790.41 97.98 y 0 0 1
5 5 -1 y(5) 645.36 0.00 y 0 0 1
4 4 -1 y(4) 532.27 0.00 y 0 0 1
3 3 -1 y(3) 417.25 0.00 y 0 0 1
2 2 -1 y(2) 288.20 0.00 y 0 0 1
1 1 -1 y(1) 175.12 0.00 y 0 0 1
internal yb-ion series fragments:
Number of fragments: 18
Col Start End Label Mass NL Name Immon Intern Reg
2 2 3 ED 245.08 0.00 yb 0 1 0
2 2 4 EDY -97 390.13 97.98 yb 0 1 0
2 2 5 EDYL -97 503.21 97.98 yb 0 1 0
2 2 6 EDYLD -97 618.24 97.98 yb 0 1 0
3 3 4 DY -97 261.09 97.98 yb 0 1 0
3 3 5 DYL -97 374.17 97.98 yb 0 1 0
3 3 6 DYLD -97 489.20 97.98 yb 0 1 0
3 3 7 DYLDE -97 618.24 97.98 yb 0 1 0
4 4 5 YL -97 259.14 97.98 yb 0 1 0
4 4 6 YLD -97 374.17 97.98 yb 0 1 0
4 4 7 YLDE -97 503.21 97.98 yb 0 1 0
4 4 8 YLDEL -97 616.30 97.98 yb 0 1 0
5 5 6 LD 229.12 0.00 yb 0 1 0
5 5 7 LDE 358.16 0.00 yb 0 1 0
5 5 8 LDEL 471.24 0.00 yb 0 1 0
6 6 7 DE 245.08 0.00 yb 0 1 0
6 6 8 DEL 358.16 0.00 yb 0 1 0
7 7 8 EL 243.13 0.00 yb 0 1 0
Run a search under Mascot to verify the output above
Paste the following into a Mascot search query window:
MODS=Oxidation (M)
IT_MODS=Acetyl (N-term)
IT_MODS=Phospho (STY)
CHARGE=Mr
CLE=Trypsin
INSTRUMENT=MALDI-TOF-TOF
1320.494 ions(190.053, 319.096, 434.123, 579.176, 692.260, 807.287, 936.329, 1049.413)
*/