Matrix Science Mascot Parser toolkit
 
Loading...
Searching...
No Matches
repeat_search.pl

Repeating a search from a Mascot results file.

#!/usr/local/bin/perl
##############################################################################
# file: repeat_search.pl #
# 'msparser' toolkit #
# Test harness / example code #
##############################################################################
# COPYRIGHT NOTICE #
# Copyright 1998-2010 Matrix Science Limited All Rights Reserved. #
# #
##############################################################################
# $Source: parser/examples/test_perl/repeat_search.pl $ #
# $Author: robertog@matrixscience.com $ #
# $Date: 2024-09-10 15:47:19 +0100 $ #
# $Revision: 5cb3eebb919bbd2919409b6a166299325b2a1a58 | MSPARSER_REL_3_0_0-2024-09-24-0-g93ebaeb4f4 $ #
# $NoKeywords:: $ #
##############################################################################
use strict;
##############################################################################
# This script must be run in the Mascot Server cgi directory.
use lib '../bin';
use msparser;
if (!defined($ARGV[0])) {
usage();
exit(1);
}
# If this script is being run as a cgi script, then a
# boundary string may be set in the environment.
# This will be different from the simple boundary string defined
# below, so nph-mascot.exe will fail. The following two
# lines clear the relevant environment variables.
delete $ENV{'CONTENT_TYPE'};
delete $ENV{'CONTENT_LENGTH'};
repeatSearch($ARGV[0]);
sub repeatSearch {
my ($filename) = @_;
my $resfile = msparser::ms_mascotresfilebase::createResfile($filename);
if (!$resfile->isValid) {
print "Cannot open results file ", $filename, ": ";
print $resfile->getLastErrorString(), "\n";
return;
}
my @s = ();
push @s, "----12345";
push @s, 'Content-Disposition: form-data; name="QUE"';
push @s, '';
# Parameters section
my $keys = msparser::VectorString->new;
my $vals = msparser::VectorString->new;
$resfile->getSearchParametersKeyValues($keys, $vals);
for my $i (0 .. $keys->size-1) {
my $key = $keys->get($i);
my $val = $vals->get($i);
# To search against a different database, add 'and key ne "DB"'
if ($val
and ($key ne "INTERMEDIATE")
and ($key ne "RULES")
and ($key ne "INTERNALS")
and ($key ne "SEARCH")) {
push @s, $key . "=" . $val;
}
}
# To search against a different DB add e.g.
# push @s, "DB=MY_DB";
# Most flexible to repeat each search as a 'sequence' search.
push @s, "SEARCH=SQ";
# For ms-ms data, tell nph-mascot where to find the ions data
push @s, "INTERMEDIATE=" . $filename;
# Now the repeat search data
for my $q (1 .. $resfile->getNumQueries) {
push @s, $resfile->getRepeatSearchString($q);
}
# Terminating line for MIME format file
push @s, "----12345--";
# Start nph-mascot.exe, and redirect the output to tmp.txt
# (Could use open2 here, but this can be unreliable with nph-mascot.exe)
# For Unix systems, change nph-mascot.exe to ./nph-mascot.exe
open (my $sock, "| nph-mascot.exe 4 -commandline > tmp.txt");
print $sock $_, "\n" for @s;
close $sock;
open(my $fh, '<', 'tmp.txt');
while (<$fh>) {
if ( /SUCCESS/ ) {
# Next line contains the results file name
chomp(my $buffer = <$fh>);
compareResults($resfile, $buffer);
next;
}
if ( /ERROR/ ) {
print "Search failed: ", $_;
# Print details of error messages
print while <$fh>;
next;
}
}
}
sub compareResults {
my ($originalSearch, $repeatedSearchFileName) = @_;
my $repeatedSearch = msparser::ms_mascotresfilebase::createResfile($repeatedSearchFileName);
my $anyReport = 0;
if (!$repeatedSearch->isValid()) {
print "Invalid repeat search: ",$repeatedSearch->getLastErrorString(), "\n";
return;
}
if ($originalSearch->anyPMF()) {
# Use protein summary
my $originalResults = new msparser::ms_proteinsummary($originalSearch);
my $repeatedResults = new msparser::ms_proteinsummary($repeatedSearch);
my $originalProt = $originalResults->getHit(1);
my $repeatedProt = $repeatedResults->getHit(1);
if ($originalProt and $repeatedProt) {
my $diff = $repeatedProt->getScore() - $originalProt->getScore();
if ($diff > 10.0) {
print "Protein score is ", $diff, " higher for search ";
print $originalSearch->getFileName();
print " than ";
print $repeatedSearchFileName;
print "\n";
$anyReport = 1;
}
}
} else {
# Use peptide summary
my $originalResults = new msparser::ms_peptidesummary($originalSearch);
my $repeatedResults = new msparser::ms_peptidesummary($repeatedSearch);
# Compare peptide scores
for my $q (1 .. $originalSearch->getNumQueries) {
my $pepOriginal = $originalResults->getPeptide($q, 1);
my $pepRepeated = $repeatedResults->getPeptide($q, 1);
my $diff = $pepRepeated->getIonsScore() - $pepOriginal->getIonsScore();
if ($diff > 10.0) {
print "Query ", $q, " has score ", $diff;
print " higher for search ", $originalSearch->getFileName();
print " than ", $repeatedSearchFileName, "\n";
$anyReport = 1;
}
}
}
if (!$anyReport) {
print "Similar results for ", $originalSearch->getFileName();
print " and ", $repeatedSearchFileName, "\n";
}
}
sub usage {
print <<EOF;
Usage: repeat_search.pl <results file>
Given an mascot results file name, repeat the search against the same data.
The program must be run from the mascot cgi directory.
EOF
}