Matrix Science Mascot Parser toolkit
 
Loading...
Searching...
No Matches
ms_spectral_lib_file.hpp
1/*
2##############################################################################
3# File: ms_nist_msp.hpp #
4# Mascot Parser toolkit #
5# Encapsulates spectral library files (e.g. msp, sptext) #
6# #
7##############################################################################
8# COPYRIGHT NOTICE #
9# Copyright 2015 Matrix Science Limited All Rights Reserved. #
10# #
11##############################################################################
12# $Source: parser/inc/ms_spectral_lib_file.hpp $
13# $Author: dcreasy@matrixscience.com $
14# $Date: 2019-10-23 11:32:40 +0100 $
15# $Revision: e1e932b0e014aa84a197578948d47baeca33fd07 | MSPARSER_REL_3_0_0-2024-09-24-0-g93ebaeb4f4 $
16##############################################################################
17*/
18
19#ifndef MS_SPECTRAL_LIB_FILE
20#define MS_SPECTRAL_LIB_FILE
21
22
23// Includes from the standard template library
24
25#include <map>
26#include <set>
27#include <string>
28
29
30namespace matrix_science {
31
38
65 class MS_MASCOTRESFILE_API ms_spectral_lib_file : public ms_errors
66 {
67 public:
69 ms_spectral_lib_file(const char * fileName, const char * regexForAccession, const char * cdbFileName = 0);
70
71#ifndef SWIG
73 ms_spectral_lib_file(const char * fileName, const char * regexForAccession, const char * cdbFileName, const std::map<std::string, std::string> & modificationAliases);
74#endif
75
78
80 int getNumEntries() const;
81
83 int getNumResidues() const;
84
86 std::vector<int> findEntries(const char * sequence, const char * checksum = 0, const char * accession = 0, const char * mods=0) const;
87
89 ms_spectral_lib_entry getEntryFromNumber(const int number) const;
90
92 std::vector<std::string> getEntryFromNumberAsText(const int number) const;
93
94
96 std::string getAccessionFromNumber(const int number) const;
97
99 std::string getChecksumFromNumber(const int number) const;
100
102 std::string getSequenceFromNumber(const int number) const;
103
105 std::string getModsFromNumber(const int number) const;
106
108 double getPrecursorMZFromNumber(const int number) const;
109
111 int getPrecursorChargeFromNumber(const int number) const;
112
114 std::vector<std::string> getAllMods() const;
115
117 ms_spectral_lib::FILE_FORMAT getFormat() const;
118
120 bool saveAs(const char * fileName,
121 const bool replaceProteinName = true,
122 ms_spectral_lib::FILE_FORMAT fileFormat = ms_spectral_lib::FORMAT_NIST_MSP,
123 const int startNumber = 1,
124 const int endNumber = -1,
125 const ms_spectral_lib_entry::WHAT_TO_ANNOTATE whatToAnnotate = ms_spectral_lib_entry::ANNOTATE_REPLACE_QUESTION_MARKS,
126 const double annotateTol = 0.6,
127 const char * annotateTolu = "Da",
128 const ms_umod_configfile * unimod = 0) const;
129
131 std::string getStatsInformation() const;
132
134 std::string getFileName() const;
135
137 long getQmatch(double minMz, double maxMz) const;
138
140 bool verifyThatModsAreInUnimod(const ms_umod_configfile & unimod);
141
142 private:
143 //Do not copy this object
145 ms_spectral_lib_file& operator=(const ms_spectral_lib_file & right);
146
147 struct sequentialIndex_t {
148 std::string accession;
149 std::string checksum;
150 std::string sequence;
151 std::string mods;
152 double precursorMZ;
153 int precursorCharge;
154 OFFSET64_T fileOffset;
155 };
156 void initialise(const char * fileName, const char * regexForAccession, const char * cdbFileName, const std::map<std::string, std::string> & aliases);
157 bool readFile(int recordNumber) const;
158 void getline(std::string & str) const;
159 ms_errs::err getEntryAsText(const int number, std::vector<std::string> & lines, std::string & errorMsg) const;
160
161 void saveIndexes(const ms_spectral_lib_entry & msp,
162 const OFFSET64_T fileOffset);
163 OFFSET64_T getFileOffsetFromId(int number) const;
164 const char * getSpectrumDelimiter() const;
165
166 int applyModAliases(const std::map<std::string, std::string> & aliases);
167 void replaceModAliases(std::string & comment, const ms_spectral_lib_entry & spect) const;
168
169 // the following are always valid - with or without a cdb file
170 std::string fileName_;
171 mutable int numEntries_; // May be less than the total in the file if it's being read on demand
172 mutable int numResidues_; // May be less than the total in the file if it's being read on demand
173 ms_parserule parseRule_;
174 mutable bool wholeFileRead_;
175 FILE * ifs_;
176 mutable ms_spectral_lib::FILE_FORMAT fileFormat_;
177
178 // the following are valid when creating or using the cdb file
179 ms_tinycdb * cdb_;
180
181 // the following are only valid when no cdb specified *or* when *first creating* the cdb file
182 mutable OFFSET64_T offsetToNextRecord_;
183 typedef std::map<std::string, std::vector<int> > strLookup_t; // e.g. sequence => vector of spectra
184 std::vector<strLookup_t> lookup_;
185 std::vector<sequentialIndex_t> sequentialIndex_;
186 typedef std::map<std::string, std::string> modMap_t; // nameInMsp => nameInUnimod (or empty if no alias)
187 mutable modMap_t modsMap_;
188
189 typedef std::multiset<double> precursors_t;
190 mutable precursors_t precursors_;
191
192 }; // class ms_spectral_lib_file
193 // end of tools_group
195} // namespace matrix_science
196
197#endif // MS_SPECTRAL_LIB_FILE
198
199/*------------------------------- End of File -------------------------------*/
This class is used as a base class for several Mascot Parser classes.
Definition: ms_errors.hpp:696
msg_num_t err
Definitions for error numbers.
Definition: ms_errors.hpp:41
Represents a single regular expression parsing rule.
Definition: ms_parserule.hpp:38
This class is used to encapsulate a single entry, i.e. single spectrum from a NIST ....
Definition: ms_spectral_lib_entry.hpp:68
WHAT_TO_ANNOTATE
Used by annotatePeaks to specify if existing annotation should be overwritten.
Definition: ms_spectral_lib_entry.hpp:76
This class is used to encapsulate a complete NIST .msp, SpectraST .sptxt or X!Hunter ASL MGF file.
Definition: ms_spectral_lib_file.hpp:66
Wrapper for the public domain tinycdb package http://www.corpit.ru/mjt/tinycdb.html by Michael Tokare...
Definition: ms_tinycdb.hpp:124
This class represents the file unimod.xml.
Definition: ms_umod_configfile.hpp:54
FILE_FORMAT
Definition: ms_spectral_lib_peak.hpp:48