Example program extracting peptide ratios from a Reporter or Multiplex file.
#!/usr/local/bin/perl
##############################################################################
# file: resfile_ms2quantitation.pl #
# Mascot Parser toolkit example code #
##############################################################################
# COPYRIGHT NOTICE #
# Copyright 1998-2013 Matrix Science Limited All Rights Reserved. #
# #
##############################################################################
# $Source: parser/examples/test_perl/resfile_ms2quantitation.pl $
# $Author: robertog@matrixscience.com $
# $Date: 2024-09-04 10:23:46 +0100 $
# $Revision: 526921a73137894bb1eae0b0fc8ccb4bb52ea662 | MSPARSER_REL_3_0_0-2024-09-24-0-g93ebaeb4f4 $
##############################################################################
use strict;
use warnings;
##############################################################################
use msparser;
if (@ARGV != 4) {
print STDERR "Usage: $0 <quantitation schema 1 filepath> <quantitation schema 2 filepath> <unimod schema filepath> <results file>\n";
print STDERR "The location of schema files should be e.g.\n";
print STDERR " ../html/xmlns/schema/quantitation_1/quantitation_1.xsd\n";
print STDERR " ../html/xmlns/schema/quantitation_2/quantitation_2.xsd\n";
print STDERR " ../html/xmlns/schema/unimod_2/unimod_2.xsd\n";
print STDERR "if running from the Mascot cgi directory.\n";
exit 1;
}
my $quant_schema1_filepath = $ARGV[0];
my $quant_schema2_filepath = $ARGV[1];
my $unimod_schema_filepath = $ARGV[2];
my $resfile = msparser::ms_mascotresfilebase::createResfile($ARGV[3], 1);
if (not $resfile->isValid) {
print STDERR $resfile->getLastErrorString(), "\n";
exit 1;
}
# And print out related warnings, if any (we know these are only warnings,
# because isValid() returned true earlier):
dump_warnings($resfile->getErrorHandler());
$resfile->clearAllErrors();
$resfile->setXMLschemaFilePath(
$msparser::ms_mascotresfilebase::XML_SCHEMA_QUANTITATION,
"http://www.matrixscience.com/xmlns/schema/quantitation_2 $quant_schema2_filepath http://www.matrixscience.com/xmlns/schema/quantitation_1 $quant_schema1_filepath"
);
$resfile->setXMLschemaFilePath(
$msparser::ms_mascotresfilebase::XML_SCHEMA_UNIMOD,
"http://www.unimod.org/xmlns/schema/unimod_2 $unimod_schema_filepath",
);
my $qmethod = msparser::ms_quant_method->new();
if (not $resfile->getQuantitationMethod($qmethod)) {
print STDERR $resfile->getLastErrorString(), "\n";
exit 1;
}
my $pepsum;
do {
my $datfile = msparser::ms_datfile->new();
my (undef, $flags, $minprob, $maxhits, $iisb, $minpeplen, $use_pepsum, $flags2)
= $resfile->get_ms_mascotresults_params($datfile->getMascotOptions());
if (not $use_pepsum) {
print STDERR "Results file cannot be opened as a peptide summary\n";
exit 1;
}
$pepsum = msparser::ms_peptidesummary->new(
$resfile, $flags, $minprob, $maxhits, '', $iisb, $minpeplen, '', $flags2
);
if (not $resfile->isValid) {
print STDERR $resfile->getLastErrorString, "\n";
exit 1;
}
dump_warnings($resfile->getErrorHandler());
$resfile->clearAllErrors();
};
# Quantitate the file using ms_ms2quantitation:
my $ms2quant = msparser::ms_ms2quantitation->new($pepsum, $qmethod);
if (not $ms2quant->isValid) {
print STDERR $ms2quant->getLastErrorString(), "\n";
exit 1;
}
if ($ms2quant->hasIntensityNormalisation()) {
$ms2quant->normaliseIntensities();
} elsif ($ms2quant->hasPeptideRatioNormalisation()) {
$ms2quant->normalisePeptideRatios();
}
# Update the quantitation method in case ms_ms2quantitation changed any of
# the parameters:
$qmethod = $ms2quant->getQuantitationMethod();
dump_warnings($ms2quant->getErrorHandler());
$ms2quant->clearAllErrors();
# Now we can inspect peptide and protein data in $ms2quant.
# But first, dump quantitation method parameters to see what we have:
do {
my @comps = ();
for my $i (0 .. $qmethod->getNumberOfComponents()-1) {
my $comp = $qmethod->getComponentByNumber($i);
$comps[$i] = $comp->getName();
}
printf "Components: [%s]\n", join(', ', @comps);
};
my @rationames = ();
for my $i (0 .. $qmethod->getNumberOfReportRatios()-1) {
push @rationames, $qmethod->getReportRatioByNumber($i)->getName();
}
printf "Report ratios: [%s]\n", join(', ', @rationames);
printf "Protein ratio type = %s\n", $qmethod->getProteinRatioType();
printf "Min. number of peptides = %d\n", $qmethod->getMinNumPeptides();
if ($qmethod->haveQuality()) {
my $q = $qmethod->getQuality();
printf "Quality: min. precursor charge = %s\n", $q->getMinPrecursorCharge();
printf "Quality: pep. threshold type = %s\n", $q->getPepThresholdType();
printf "Quality: pep. threshold value = %s\n", $q->getPepThresholdValue();
} else {
printf "Quality: no restrictions\n";
}
if ($qmethod->haveNormalisation()) {
my $q = $qmethod->getNormalisation();
printf "Normalisation = %s\n", $q->getMethod();
if ($q->haveProteins()) {
my $p = $q->getProteins();
printf " - Accessions = %s\n", join(', ',
map { $p->getProtein($_)->getAccession() } 0 .. $p->getNumberOfProteins()-1
);
}
if ($q->havePeptides()) {
my $p = $q->getPeptides();
printf " - Sequences = %s\n", join(', ',
map { $p->getPeptide($_)->getSequence() } 0 .. $p->getNumberOfPeptides()-1
);
}
my @component_bases = ();
for (1 .. $qmethod->getNumberOfComponents()) {
my $base = $ms2quant->getIntensityNormalisationBase($qmethod->getComponentByNumber($_-1)->getName());
push @component_bases, $base;
}
printf "Intensity normalisation constants: [%s]\n", join(', ', @component_bases);
printf "Ratio normalisation constants: [%s]\n", join(', ',
map { $ms2quant->getPeptideRatioNormalisationBase($_) } @rationames
);
} else {
printf "Normalisation: none\n";
}
if ($qmethod->haveOutliers()) {
my $q = $qmethod->getOutliers();
printf "Outliers = %s\n", $q->getMethod();
} else {
printf "Outliers: none\n";
}
print "\n";
# Collect proteins we're interested in:
my @proteins = ();
for my $i (1 .. $pepsum->getNumberOfHits()) {
my $hit = $pepsum->getHit($i);
push @proteins, $hit;
my $j = 0;
while (my $protein = $pepsum->getNextFamilyProtein($i, ++$j)) {
push @proteins, $protein;
}
}
for my $protein (@proteins) {
printf "%d::%s:\n", $protein->getDB(), $protein->getAccession();
for (@rationames) {
my $ratio = $ms2quant->getProteinRatio($protein->getAccession(), $protein->getDB(), $_);
if ($ratio->isMissing()) {
printf "%10s: ---\n", $_;
} else {
printf "%10s = %10g, stderr = %10g, N = %d, normal-p = %6g, hypo-p = %6g\n",
$_, $ratio->getValue(), $ratio->getStandardError(),
$ratio->getSampleSize(), $ratio->getNormalityPvalue(),
$ratio->getHypothesisPvalue();
}
}
print "\n";
print join(' ', map { sprintf("%10s", $_) } 'Peptide', @rationames), "\n";
for my $i (1 .. $protein->getNumPeptides()) {
next if $protein->getPeptideDuplicate($i) == $msparser::ms_protein::DUPE_DuplicateSameQuery;
my $q = $protein->getPeptideQuery($i);
my $p = $protein->getPeptideP($i);
my $peptide = $pepsum->getPeptide($q, $p);
next if not $peptide;
my @values = ();
for (@rationames) {
# Note that the q,p arguments to ms_peptide_quant_key must be
# integers. Otherwise the wrong constructor is chosen. Here they
# are integers, as they are return values from Parser functions
# that return ints.
my $ratio = $ms2quant->getPeptideRatio(msparser::ms_peptide_quant_key->new($q, $p), $_);
if ($ratio->isMissing()) {
push @values, sprintf("%10s", '---');
next;
}
if ($ratio->isInfinite()) {
push @values, sprintf("%10s", '###');
} else {
push @values, sprintf("%10g", $ratio->getValue());
}
}
printf "%10s %s\n", sprintf("q%d_p%d", $q, $p), join(' ', @values);
}
print "\n";
}
sub dump_warnings {
my ($err) = @_;
for my $i (1 .. $err->getNumberOfErrors()) {
print 'Warning: ', $err->getErrorString($i), "\n";
}
}