Matrix Science Mascot Parser toolkit
 
Loading...
Searching...
No Matches
ms_taxonomyrules.hpp
1/*
2##############################################################################
3# file: ms_taxonomyrules.hpp #
4# 'msparser' toolkit #
5# Encapsulates "mascot.dat"-file that describes most important parameters #
6##############################################################################
7# COPYRIGHT NOTICE #
8# Copyright 1998-2003 Matrix Science Limited All Rights Reserved. #
9# #
10##############################################################################
11# $Archive:: /Mowse/ms_mascotresfile/include/ms_taxonomyrules.hpp $ #
12# $Author: francoisr@matrixscience.com $ #
13# $Date: 2022-01-12 12:34:37 +0000 $ #
14# $Revision: b158d77079a10225ce667f6e66a6df8412823ac0 | MSPARSER_REL_3_0_0-2024-09-24-0-g93ebaeb4f4 $ #
15# $NoKeywords:: $ #
16##############################################################################
17*/
18
19#ifndef MS_TAXONOMYRULES_HPP
20#define MS_TAXONOMYRULES_HPP
21
22
23#include <string>
24#include <vector>
25
26
27namespace matrix_science {
39 {
46 TAX_SPECIES_FORMAT_COUNT /* Always leave this one last */
47 };
48
50 class MS_MASCOTRESFILE_API ms_taxspeciesfiles
51 {
52 friend class ms_datfile;
53 friend class ms_taxonomyrules;
54
55 public:
58
61
64
66 void defaultValues();
67
69 void copyFrom(const ms_taxspeciesfiles* right);
70
71#ifndef SWIG
73 ms_taxspeciesfiles& operator=(const ms_taxspeciesfiles& right);
74#endif
76 TAX_SPECIES_FORMAT getFormat() const;
77
79 void setFormat(const TAX_SPECIES_FORMAT value);
80
82 std::string getFileName() const;
83
85 void setFileName(const char* name);
86
87 private:
88 TAX_SPECIES_FORMAT format_;
89 std::string filename_;
90
91 std::string getStringValue() const;
92 }; // class ms_taxspeciesfiles
93
100 {
103 };
104
106
108 class MS_MASCOTRESFILE_API ms_taxnodesfiles
109 {
110 friend class ms_datfile;
111 friend class ms_taxonomyrules;
112 public:
113
116
119
122
124 void defaultValues();
125
127 void copyFrom(const ms_taxnodesfiles* right);
128
129#ifndef SWIG
131 ms_taxnodesfiles& operator=(const ms_taxnodesfiles& right);
132#endif
134 TAX_NODE_FORMAT getFormat() const;
135
137 void setFormat(const TAX_NODE_FORMAT value);
138
140 std::string getFileName() const;
141
143 void setFileName(const char* name);
144
145 private:
146 TAX_NODE_FORMAT format_;
147 std::string filename_;
148 std::string getStringValue() const;
149 }; // class ms_taxnodesfiles
150
152 class MS_MASCOTRESFILE_API ms_parserule_plus
153 {
154 friend class ms_datfile;
155 friend class ms_taxonomyrules;
156
157 public:
159
163 {
164 TAX_CHOP_PREFIX = 0x0001,
165 TAX_CHOP_SUFFIX = 0x0002,
166 TAX_CHOP_WORDS = 0x0004
167 };
168
170 typedef unsigned int TAX_CHOP_SRC;
171
174
177
180
182 void defaultValues();
183
185 void copyFrom(const ms_parserule_plus* right);
186
187#ifndef SWIG
189 ms_parserule_plus& operator=(const ms_parserule_plus& right);
190#endif
192 TAX_SPECIES_FORMAT getFileTypeToSearch() const;
193
195 void setFileTypeToSearch(const TAX_SPECIES_FORMAT value);
196
198 const ms_parserule* getRule() const;
199
201 void setRule(const ms_parserule* src);
202
204 TAX_CHOP_SRC getChopSource() const;
205
207 void setChopSource(const TAX_CHOP_SRC value);
208
210 std::string getNameOfDB() const;
211
213 void setNameOfDB(const char* name);
214
215 private:
216 TAX_SPECIES_FORMAT fileTypeToSearch_;
217 ms_parserule rule_;
218 TAX_CHOP_SRC chopSrc_;
219 std::string nameOfDb_;
220 std::string getStringValue() const;
221 }; // ms_parserule_plus
222
224
246 class MS_MASCOTRESFILE_API ms_taxonomyrules: public ms_customproperty
247 {
248 friend class ms_datfile;
249
250 public:
253
256
259
261 void defaultValues();
262
264 void copyFrom(const ms_taxonomyrules* right);
265
266#ifndef SWIG
268 ms_taxonomyrules& operator=(const ms_taxonomyrules& right);
269#endif
271 bool isSectionAvailable() const;
272
274 void setSectionAvailable(const bool value);
275
277 bool isEnabled() const;
278
280 void setEnabled(const bool flag);
281
283 std::string getIdentifier() const;
284
286 void setIdentifier(const char* str);
287
289 int getErrorLevel() const;
290
292 void setErrorLevel(const int value);
293
295 bool isFromRefFile() const;
296
298 void setFromRefFile(const bool flag);
299
301 bool isConcatRefFileLines() const;
302
304 void setConcatRefFileLines(const bool flag);
305
307 char getDescriptionLineSep() const;
308
310 void setDescriptionLineSep(const char value);
311
313 int getNumberOfNoBreakDescLineIf() const;
314
316 std::string getNoBreakDescLineIf(const int index) const;
317
319 void clearNoBreakDescLineIf();
320
322 void appendNoBreakDescLineIf(const char* str);
323
325 int getNumberOfSpeciesFiles() const;
326
328 const ms_taxspeciesfiles * getSpeciesFile(const int index) const;
329
331 void clearSpeciesFiles();
332
334 void appendSpeciesFile(const ms_taxspeciesfiles * item);
335
336 // no longer in use
337 int getNumberOfStrFiles() const;
338 // no longer in use
339 const ms_taxspeciesfiles * getStrFile(const int index) const;
340 // no longer in use
341 void clearStrFiles();
342 // no longer in use
343 void appendStrFile(const ms_taxspeciesfiles * item);
344
345 // no longer in use
346 const ms_parserule_plus* getStrRule() const;
347 // no longer in use
348 void setStrRule(const ms_parserule_plus* src);
349
351 int getNumberOfNodesFiles() const;
352
354 const ms_taxnodesfiles * getNodesFile(const int index) const;
355
357 void clearNodesFiles();
358
360 void appendNodesFile(const ms_taxnodesfiles * item);
361
363 int getNumberOfGencodeFiles() const;
364
366 const ms_taxnodesfiles * getGencodeFile(const int index) const;
367
369 void clearGencodeFiles();
370
372 void appendGencodeFile(const ms_taxnodesfiles * item);
373
375 const ms_parserule_plus* getDefaultRule() const;
376
378 void setDefaultRule(const ms_parserule_plus* src);
379
381 int getNumberOfPrefixRemoves() const;
382
384 std::string getPrefixRemove(const int index) const;
385
387 void clearPrefixRemoves();
388
390 void appendPrefixRemove(const char * item);
391
393 int getNumberOfSuffixRemoves() const;
394
396 std::string getSuffixRemove(const int index) const;
397
399 void clearSuffixRemoves();
400
402 void appendSuffixRemove(const char * item);
403
405 const ms_parserule* getSrcDatabaseRule() const;
406
408 void setSrcDatabaseRule(const ms_parserule* src);
409
411 int getNumberOfPerDbSrcRules() const;
412
414 const ms_parserule_plus * getPerDbSrcRule(const int index) const;
415
417 void clearPerDbSrcRules();
418
420 void appendPerDbSrcRule(const ms_parserule_plus * item);
421
423 const ms_parserule* getDoThisRuleFirst() const;
424
426 void setDoThisRuleFirst(const ms_parserule* src);
427
429 const ms_parserule* getAccFromSpeciesLine() const;
430
432 void setAccFromSpeciesLine(const ms_parserule* src);
433
435 std::string getQuickRefSearch() const;
436
438 void setQuickRefSearch(const char* str);
439
441 int getDBLevelTaxId() const;
442
444 void setDBLevelTaxId(const int value);
445
447 bool isDBLevelTaxId() const;
448
450 void clearDBLevelTaxId();
451
453 bool isMitochondrialTranslation() const;
454
456 void setMitochondrialTranslation(const bool flag);
457
459 std::string getSpeciesFormatRegex(const TAX_SPECIES_FORMAT format) const;
460
462 void setSpeciesFormatRegex(const TAX_SPECIES_FORMAT format, const std::string regex);
463
464#ifdef SUPPRESS_MS_CUSTOMPROPERTY_INHERITANCE
465#include "suppress_ms_customproperty.hpp"
466#endif
467
468 private:
469 bool sectionAvailable_;
470 bool enabled_;
471 std::string identifier_;
472 int errorLevel_;
473 bool fromRefFile_;
474 bool concatRefFileLines_;
475 char descriptionLineSep_;
476
477 std::vector< std::string > noBreakDescLineIf_;
478 std::vector< ms_taxspeciesfiles* > speciesFiles_;
479 std::vector< ms_taxspeciesfiles* > strStrFiles_;
480 std::vector< ms_taxnodesfiles* > nodesFiles_;
481 std::vector< ms_taxnodesfiles* > gencodeFiles_;
482 std::vector< ms_parserule_plus* > perDbSrcRules_;
483 std::vector< std::string > prefixRemoves_;
484 std::vector< std::string > suffixRemoves_;
485 std::vector< std::string > speciesFormatRegex_;
486
487 ms_parserule srcDatabaseRule_;
488 ms_parserule doThisRuleFirst_;
489 ms_parserule accFromSpeciesLine_;
490 ms_parserule_plus defaultRule_;
491 ms_parserule_plus strStrRule_;
492
493 std::string quickRefSearch_;
494 int dbLevelTaxId_;
495 bool mitochondrialTranslation_;
496 }; // class ms_taxonomyrules
497
498 class ms_tinycdb;
499 class ms_taxonomychoice;
500
502
540 class MS_MASCOTRESFILE_API ms_taxonomytree : public ms_errors
541 {
542 public:
544 ms_taxonomytree(const ms_taxonomyrules * taxonomyRules,
545 const char * taxonomyDirectory = "../taxonomy",
546 const bool useIndex = true,
547 const bool createList = false,
548 const char * preparingDirectory = "../taxonomy/preparing",
549 const char * oldDirectory = "../taxonomy/old");
550
552 ms_taxonomytree(const ms_taxonomytree & src);
553
556
558 void copyFrom(const ms_taxonomytree * right);
559
560#ifndef SWIG
562 ms_taxonomytree& operator=(const ms_taxonomytree & right);
563
564 struct TAX_TREE_NODE {
565 int parentId;
566 int tableId;
567 };
568 typedef std::vector<TAX_TREE_NODE> TAX_TREE_NODES;
569
571 TAX_TREE_NODES * getTaxIDArray();
572#endif
573
575 bool usingIndex() const;
576
578 std::string getTaxonomyAndIndexFiles() const;
579
581 bool isSpeciesDescendantOf(const int parentID, const int id) const;
582
583#ifndef SWIG
585 bool getParent(const int id, int & ttParent, int & ttGenTable) const;
586#else // SWIG Multiple return values
587 bool getParent(const int id, int & OUTPUT, int & OUTPUT) const;
588#endif
589
591 bool isIncludedIn(const int id, const ms_taxonomychoice * choice) const;
592
593 private:
594 bool usingCDB_;
595 std::vector<ms_taxnodesfiles> files_;
596 TAX_TREE_NODES nodes_;
597 std::vector<std::string> fileNames_;
598 std::vector<ms_tinycdb *> cdbFiles_;
599
600 bool readFile(const std::string & filename,
601 const bool isMitochondrialTranslation);
602
603 };
604
605 // end of config_group
607} // namespace matrix_science
608
609#endif // MS_TAXONOMYRULES_HPP
610
611/*------------------------------- End of File -------------------------------*/
The class is used as a base for property-containing classes, such as ms_mascotoptions.
Definition: ms_customproperty.hpp:91
Encapsulates the mascot.dat file that contains the most important parameters.
Definition: ms_datfile.hpp:47
This class is used as a base class for several Mascot Parser classes.
Definition: ms_errors.hpp:696
Represents regular expression parse rule plus some additional parameters.
Definition: ms_taxonomyrules.hpp:153
unsigned int TAX_CHOP_SRC
Data type used for the parameter specifying how to chop a source line. This will be zero or more of t...
Definition: ms_taxonomyrules.hpp:170
TAX_CHOP_TYPES
Constants used for combining TAX_CHOP_SRC values.
Definition: ms_taxonomyrules.hpp:163
Represents a single regular expression parsing rule.
Definition: ms_parserule.hpp:38
Filenames and formats for taxonomy nodes or genetic codes files.
Definition: ms_taxonomyrules.hpp:109
The class represents a single taxonomy choice entry in the taxonomy file.
Definition: ms_taxonomyfile.hpp:49
This class represents a single Taxonomy_XXX section in mascot.dat.
Definition: ms_taxonomyrules.hpp:247
The complete taxonomy tree as built from one or more files such as nodes.dmp.
Definition: ms_taxonomyrules.hpp:541
An instance of this class describes one entry of taxonomy species files.
Definition: ms_taxonomyrules.hpp:51
Wrapper for the public domain tinycdb package http://www.corpit.ru/mjt/tinycdb.html by Michael Tokare...
Definition: ms_tinycdb.hpp:124
TAX_SPECIES_FORMAT
Definition: ms_taxonomyrules.hpp:39
TAX_NODE_FORMAT
Definition: ms_taxonomyrules.hpp:100
@ TAX_SPECIES_NCBI
NCBI names.dmp taxonomy species files.
Definition: ms_taxonomyrules.hpp:40
@ TAX_SPECIES_GI2TAXID
GI2TAXID.
Definition: ms_taxonomyrules.hpp:43
@ TAX_SPECIES_EXPLICIT
No lookup required, because the ID is given in the description line. For example: >IPI:IPI00000001....
Definition: ms_taxonomyrules.hpp:45
@ TAX_SPECIES_PDB
PDB.
Definition: ms_taxonomyrules.hpp:42
@ TAX_SPECIES_SWISSPROT
SwissProt.
Definition: ms_taxonomyrules.hpp:41
@ TAX_SPECIES_ACC2TAXID
Simple 'accession taxID' (any whitespace).
Definition: ms_taxonomyrules.hpp:44
@ TAX_NODE_GENCODE
NCBI gencode.dmp format.
Definition: ms_taxonomyrules.hpp:102
@ TAX_NODE_NCBI
NCBI nodes.dmp format.
Definition: ms_taxonomyrules.hpp:101