Matrix Science Mascot Parser toolkit
 
Loading...
Searching...
No Matches
ms_unigene.hpp
1/*
2##############################################################################
3# file: ms_mascotrespeptide.hpp #
4# 'msparser' toolkit #
5# Encapsulates a Unigene file (from NCBI) #
6# #
7##############################################################################
8# COPYRIGHT NOTICE #
9# Copyright 1998-2003 Matrix Science Limited All Rights Reserved. #
10# #
11##############################################################################
12# $Archive:: /Mowse/ms_mascotresfile/include/ms_unigene.hpp $ #
13# $Author: robertog@matrixscience.com $ #
14# $Date: 2023-08-29 15:05:00 +0100 $ #
15# $Revision: 59a8ab98151c86e0bdeddc2efea60d287d6087ba | MSPARSER_REL_3_0_0-2024-09-24-0-g93ebaeb4f4 $ #
16# $NoKeywords:: $ #
17##############################################################################
18*/
19
20#ifndef MS_MASCOTRESUNIGENE_HPP
21#define MS_MASCOTRESUNIGENE_HPP
22
23
24// Includes from the standard template library
25#include <stdio.h>
26#include <string>
27#include <vector>
28
29
30
31namespace matrix_science {
32 class ms_tinycdb;
33
40
45 class MS_MASCOTRESFILE_API ms_unigene_entry
46 {
47 public:
49 ms_unigene_entry(FILE * f, ms_unigene & unigene);
50
52 ms_unigene_entry(const std::string & id,
53 const std::string & title,
54 const std::string & gene,
55 const std::string & cytoBand,
56 const std::string & locusLink,
57 const std::string & express,
58 const long chromosome,
59 const int numAccessions,
60 const OFFSET64_T fileOffset);
61
64
66
71 int getNumAccessions() const { return numAccessions_; }
72
74
77 std::string getID() const { return id_; }
78
80
83 std::string getTitle() const { return title_; }
84
86
89 std::string getGene() const { return gene_; }
90
92
95 std::string getCytoBand() const { return cytoBand_; }
96
98
102 std::string getLocusLink() const { return locuslink_; }
103
105
111 std::string getExpress() const { return express_; }
112
114
117 long getChromosome() const { return chromosome_; }
118
120
124 OFFSET64_T getFileOffset() const { return fileOffset_; }
125
126 protected:
127 // Not safe to copy or assign this object.
128#ifndef SWIG
130 ms_unigene_entry & operator=(const ms_unigene_entry & rhs);
131#endif
132 private:
134 bool getString(const char * buf, const char * id,
135 const int idLen, std::string &res);
136
138 bool getLong(const char * buf, const char * id,
139 const int idLen, long &res);
140
142 bool getAccessions(const char * buf, const char * id, const int idLen,
143 std::string & giNumber, std::string & accession);
144 std::string id_;
145 std::string title_;
146 std::string gene_;
147 std::string cytoBand_;
148 std::string locuslink_;
149 std::string express_;
150 long chromosome_;
151 int numAccessions_;
152 OFFSET64_T fileOffset_;
153 };
154
156
161 class MS_MASCOTRESFILE_API ms_unigene
162 {
163 friend class ms_unigene_entry;
164
165 public:
167 ms_unigene(const ms_mascotresfilebase& resfile, const char * filename);
168
169 virtual ~ms_unigene() = default;
170
172 virtual const ms_unigene_entry * findEntry(const char * id) = 0;
173
175 virtual std::string getUnigeneForAccession(const std::string accession,
176 const int index) = 0;
177
178 protected:
179 // Not safe to copy or assign this object.
180#ifndef SWIG
181 ms_unigene(const ms_unigene & rhs);
182 ms_unigene & operator=(const ms_unigene & rhs);
183#endif
185 void addAccessionUnigenePair(const ms_unigene_entry * unigene,
186 const std::string & accession);
187
188 protected:
189 const ms_mascotresfilebase& resfile_;
190 std::vector<ms_unigene_entry *> entries_;
191 std::string filename_;
192 int numAccessions_;
193 typedef std::multimap<std::string, const ms_unigene_entry *> unigenesForAcc;
194 unigenesForAcc accessionToUnigene_;
195
196 };
197
198
199 class MS_MASCOTRESFILE_API ms_unigene_dat : public ms_unigene
200 {
201 friend class ms_unigene_entry;
202
203 public:
205 ms_unigene_dat(const ms_mascotresfile_dat &resfile, const char * filename);
206
207 virtual ~ms_unigene_dat();
208
210 virtual const ms_unigene_entry * findEntry(const char * id);
211
213 virtual std::string getUnigeneForAccession(const std::string accession,
214 const int index);
215
216 protected:
217 // Not safe to copy or assign this object.
218#ifndef SWIG
219 ms_unigene_dat(const ms_unigene_dat & rhs);
220 ms_unigene_dat & operator=(const ms_unigene_dat & rhs);
221#endif
222 private:
223
224 ms_tinycdb * pcdb_;
225
226 }; // end of resfile_group
228} // matrix_science namespace
229
230
231#endif // MS_MASCOTRESUNIGENE_HPP
232
233/*------------------------------- End of File -------------------------------*/
Class for parsing and reading files in dat28 format.
Definition: ms_mascotresfile_dat.hpp:121
Abstract base class of ms_mascotresfile_dat and ms_mascotresfile_msr.
Definition: ms_mascotresfilebase.hpp:72
Wrapper for the public domain tinycdb package http://www.corpit.ru/mjt/tinycdb.html by Michael Tokare...
Definition: ms_tinycdb.hpp:124
This class encapsulates a single entry from a unigene file.
Definition: ms_unigene.hpp:46
int getNumAccessions() const
Return the number of accessions (gi numbers or EMBL accessions) that comprise this entry.
Definition: ms_unigene.hpp:71
std::string getLocusLink() const
Return the LocusLink - e.g. 125.
Definition: ms_unigene.hpp:102
std::string getCytoBand() const
Return the CYTOBAND - e.g. 4q21-q23.
Definition: ms_unigene.hpp:95
long getChromosome() const
Return the chromosome that contains the unignene entry.
Definition: ms_unigene.hpp:117
std::string getID() const
Return the 'ID' of this entry - e.g. Hs.4.
Definition: ms_unigene.hpp:77
std::string getTitle() const
Return the 'title' of this entry - e.g. "alcohol dehydrogenase...".
Definition: ms_unigene.hpp:83
std::string getExpress() const
Return the EXPRESS entry. Can be very long - 5000 bytes.
Definition: ms_unigene.hpp:111
std::string getGene() const
Return the gene name for this entry - e.g. "ADH1B".
Definition: ms_unigene.hpp:89
OFFSET64_T getFileOffset() const
Return the offset into the unigene data file for this entry.
Definition: ms_unigene.hpp:124
This class encapsulates a complete unigene file.
Definition: ms_unigene.hpp:162
virtual const ms_unigene_entry * findEntry(const char *id)=0
Given an accession, return a pointer to the relevant unigene entry.
virtual std::string getUnigeneForAccession(const std::string accession, const int index)=0
Return the Unigene 'accession' (ID) for a given EST accession.