Matrix Science Mascot Parser toolkit
 
Loading...
Searching...
No Matches
repeat_search.cpp

Repeating a search from a Mascot results file.

/*
##############################################################################
# file: repeat_search.cpp #
# 'msparser' toolkit #
# Test harness / example code #
##############################################################################
# COPYRIGHT NOTICE #
# Copyright 1998-2005 Matrix Science Limited All Rights Reserved. #
# #
##############################################################################
# $Source: parser/examples/test_cxx/repeat_search.cpp $ #
# $Author: robertog@matrixscience.com $ #
# $Date: 2024-09-04 10:23:46 +0100 $ #
# $Revision: 526921a73137894bb1eae0b0fc8ccb4bb52ea662 | MSPARSER_REL_3_0_0-2024-09-24-0-g93ebaeb4f4 $ #
# $NoKeywords:: $ #
##############################################################################
*/
#include "msparser.hpp"
#include <stdio.h> // For 'popen'
#include <iostream>
#include <sstream>
#include <string>
#ifdef _WIN32
#define popen _popen
#define pclose _pclose
#endif
using namespace matrix_science;
// forward declarations
static bool repeatSearch(const char * filename);
static int usage(const char * progName);
static void compareResults(ms_mascotresfilebase & originalSearch,
const char * repeatedSearchFileName);
int main(int argc, char * argv[])
{
if (argc == 2)
return repeatSearch(argv[1]);
else
return usage(argv[0]);
}
static bool repeatSearch(const char * filename)
{
bool success = false;
ms_mascotresfilebase file = ms_mascotresfilebase::createResfile(filename);
if (file.isValid())
{
std::ostringstream s; // Build up a MIME format string with all parameters
s << "----12345\n"
<< "Content-Disposition: form-data; name=\"QUE\""
<< std::endl << std::endl;
// Parameters section
std::vector<std::string> keys, values;
file. getSearchParametersKeyValues(keys, values);
for (std::vector<std::string>::size_type i = 0; i < keys.size(); ++i) {
std::string key = keys[i];
std::string val = values[i];
// To search against a different database, add && key != "DB"
if (!val.empty() && key != "INTERMEDIATE" && key != "RULES" && key != "SEARCH")
s << key << "=" << val << std::endl;
}
// To search against a different DB add: s << "DB=MY_DB" << std::endl;
// Most flexible to repeat each search as a 'sequence' search.
s << "SEARCH=SQ" << std::endl;
// For ms-ms data, tell nph-mascot where to find the ions data
s << "INTERMEDIATE=" << filename << std::endl;
// Now the repeat search data
for (int q=1; q <= file.getNumQueries(); q++)
s << file.getRepeatSearchString(q) << std::endl;
s << "----12345--\n"; // terminating line for MIME format file
// Start nph-mascot.exe, and redirect the output to tmp.txt
// Note that for Unix, you may need to use ./nph-mascot.exe
FILE * f = popen("./nph-mascot.exe 4 -commandline > tmp.txt", "w"); // 4 = repeat search
if (f)
{
fwrite(s.str().c_str(), s.str().length(), 1, f);
pclose(f);
if ((f = fopen("tmp.txt", "r")) != 0)
{
char buf[1000];
while (fgets(buf, sizeof(buf), f))
{
if (strstr(buf, "SUCCESS") && fgets(buf, sizeof(buf), f))
{
if (buf[strlen(buf)-1] == '\n')
buf[strlen(buf)-1] = '\0';
compareResults(file, buf);
success = true;
}
else if (strstr(buf, "ERROR"))
{
std::cout << "Search failed: " << buf;
while (fgets(buf, sizeof(buf), f))
{
std::cout << buf;
}
}
}
fclose(f);
}
else
{
std::cout << "Can't open tmp file. \n";
}
}
else
{
std::cout << "Cannot open pipe to nph-mascot.exe to repeat search for file "
<< filename << std::endl;
}
}
else
{
std::cout << "Cannot open results file " << filename
<< " " << file.getLastErrorString() << std::endl;
}
return success;
}
static void compareResults(ms_mascotresfilebase & originalSearch,
const char * repeatedSearchFileName)
{
ms_mascotresfilebase repeatedSearch = ms_mascotresfilebase::createResfile(repeatedSearchFileName);
bool anyReport = false;
if (repeatedSearch.isValid())
{
if (originalSearch.anyPMF())
{
// Use protein summary
ms_proteinsummary originalResults(originalSearch);
ms_proteinsummary repeatedResults(repeatedSearch);
ms_protein * originalProt = originalResults.getHit(1);
ms_protein * repeatedProt = repeatedResults.getHit(1);
if (originalProt && repeatedProt)
{
double diff = repeatedProt->getScore()-originalProt->getScore();
if (diff > 10.0)
{
std::cout << "Protein score is "
<< diff
<< " higher for search "
<< originalSearch.getFileName()
<< " than "
<< repeatedSearchFileName
<< std::endl;
anyReport = true;
}
}
}
else
{
// Use peptide summary
ms_peptidesummary originalResults(originalSearch);
ms_peptidesummary repeatedResults(repeatedSearch);
// Compare peptide scores
for (int q=1; q <= originalSearch.getNumQueries(); q++)
{
ms_peptide * pepOriginal, *pepRepeated;
if (originalResults.getPeptide(q, 1, pepOriginal)
&& repeatedResults.getPeptide(q, 1, pepRepeated))
{
double diff = pepRepeated->getIonsScore()
- pepOriginal->getIonsScore();
if (diff > 10.0)
{
std::cout << "Query " << q
<< "has score " << diff
<< " higher for search "
<< originalSearch.getFileName()
<< " than "
<< repeatedSearchFileName
<< std::endl;
anyReport = true;
}
}
}
}
if (!anyReport)
std::cout << "Similar results for "
<< originalSearch.getFileName()
<< " and "
<< repeatedSearchFileName
<< std::endl;
}
else
std::cout << "Invalid repeat search "
<< repeatedSearch.getLastErrorString();
}
static int usage(const char * progName)
{
std::cout << progName << " results_file " << std::endl
<< "Given an mascot results file name, repeat the search "
<< "against the same data" << std::endl
<< " results_file is a full path to a results file" << std::endl
<< "The program must be run from the mascot cgi directory"
<< std::endl;
return 1;
}
bool isValid() const
Call this function to determine if there have been any errors.
Definition: ms_errors.cpp:1472
Abstract base class of ms_mascotresfile_dat and ms_mascotresfile_msr.
Definition: ms_mascotresfilebase.hpp:72
std::string getLastErrorString() const
Return the last error number - or an empty string.
Definition: ms_mascotresfilebase.cpp:824
virtual bool anyPMF() const =0
Returns true if any of the queries in the search just contain a single peptide mass.
virtual std::string getRepeatSearchString(const int query, const bool fullQuery=false) const =0
To perform a repeat search need to build up appropriate string.
virtual int getNumQueries(const int resfileID=0) const =0
Returns the number of queries (peptide masses or ms-ms spectra).
virtual std::string getFileName(const int id=1) const =0
Returns the name of the results file passed into the constructor.
This class encapsulates a peptide from the mascot results file.
Definition: ms_peptide.hpp:57
double getIonsScore() const
Returns the ions score.
Definition: ms_peptide.cpp:1050
Use this class to get peptide summary results.
Definition: ms_peptidesummary.hpp:51
This class encapsulates a protein in the mascot results file.
Definition: ms_protein.hpp:57
double getScore() const
Return the protein score for this protein.
Definition: ms_protein.cpp:574
Definition: ms_proteinsummary.hpp:45