Matrix Science Mascot Parser toolkit
 
Loading...
Searching...
No Matches
ms_imputation.hpp
1/*
2##############################################################################
3# file: ms_imputation.hpp
4# 'msparser' toolkit
5#
6##############################################################################
7# COPYRIGHT NOTICE
8# Copyright 1998-2024 Matrix Science Limited All Rights Reserved.
9#
10##############################################################################
11* @(#)$Source: parser/inc/ms_imputation.hpp $
12* @(#)$Revision: bb42496f256d654703dbcb86842d07846b167b8b | MSPARSER_REL_3_0_0-2024-09-24-0-g93ebaeb4f4 $
13* @(#)$Date: 2024-09-20 16:45:19 +0100 $
14##############################################################################
15 */
16
17#if !defined(ms_imputation_INCLUDED_)
18#define ms_imputation_INCLUDED_
19#include <vector>
20
21namespace matrix_science
22{
23 // Forward declarations
24 class ms_imputation_method;
25 class ms_imputation_fixed_value;
26 class ms_imputation_average_value;
27 class ms_imputation_knn;
28 class ms_imputation_missforest;
29
33 class MS_MASCOTRESFILE_API ms_imputation_missing_val {
34
35 public:
36
39 : isMissing_(true)
40 , val_(0.0)
41 { }
42
45 bool isMissing,
46 double val)
47 : isMissing_(isMissing)
48 , val_(val) {}
49
51 bool isMissing() const { return isMissing_; }
52
54 void setMissing() { isMissing_ = true; val_ = 0.0; }
55
57 double getVal() const { return val_; }
58
60 void setVal(double val) { val_ = val; isMissing_ = false; }
61
62 private:
63
64 bool isMissing_ = true;
65 double val_ = 0.0;
66 };
67
78 class MS_MASCOTRESFILE_API ms_imputation_peptide_data {
79
80 public:
81
84 double retentionTime,
85 int subproject)
86 : retentionTime_(retentionTime)
87 , subproject_(subproject) {}
88
89
91
93 const double retentionTime() const { return retentionTime_; }
94
96 const int subproject() const { return subproject_; }
97
98 private:
99
100 // Internal variables
101 double retentionTime_;
102 int subproject_; // This is 1 based
103 };
104
110 class MS_MASCOTRESFILE_API ms_imputation : public ms_errors
111 {
112 public:
113
115
123 IMPUTE_RETENTION_TIME
124 };
125
127 ms_imputation(const ms_ms1quantitation* ms1quantitation, ms_imputation_method& m, IMPUTATION_VARIABLE imputationVariableChoice);
128
130 ms_imputation(const ms_ms2quantitation* ms2quantitation, ms_imputation_method& m, IMPUTATION_VARIABLE imputationVariableChoice);
131
133 ms_imputation(std::vector<std::vector<ms_imputation_peptide_data>> imputationPeptides, ms_imputation_method& m);
134
136
138 const IMPUTATION_VARIABLE getImputationVariableChoice() const { return IMPUTATION_VARIABLE_; }
139
141 void setImputationVariableChoice(const IMPUTATION_VARIABLE imputationVariableChoiceIn) { IMPUTATION_VARIABLE_ = imputationVariableChoiceIn; }
142
145 bool getImputeMs1PeptideExcludedByFractionThr() { return (IMPUTATION_MS1_PEPTIDE_CHOICE_ & IMPUTATION_MS1_PEPTIDE_CHOICE::IMPUTE_IF_EXCLUDED_BY_FRACTION_THR) != 0; }
146
149 void setImputeMs1PeptideExcludedByFractionThr(bool imputeMs1PeptideExcludedByFractionThr);
150
153 bool getImputeMs1PeptideExcludedByCorrelationThr() { return (IMPUTATION_MS1_PEPTIDE_CHOICE_ & IMPUTATION_MS1_PEPTIDE_CHOICE::IMPUTE_IF_EXCLUDED_BY_CORRELATION_THR) != 0; }
154
157 void setImputeMs1PeptideExcludedByCorrelationThr(bool imputeMs1PeptideExcludedByCorrelationThr);
158
161 bool getImputeMs1PeptideExcludedByStdErrThr() { return (IMPUTATION_MS1_PEPTIDE_CHOICE_ & IMPUTATION_MS1_PEPTIDE_CHOICE::IMPUTE_IF_EXCLUDED_BY_STDERR_THR) != 0; }
162
165 void setImputeMs1PeptideExcludedByStdErrThr(bool imputeMs1PeptideExcludedByStdErrThr);
166
169 bool getImputeMs1PeptideNegativeRatio() { return (IMPUTATION_MS1_PEPTIDE_CHOICE_ & IMPUTATION_MS1_PEPTIDE_CHOICE::IMPUTE_IF_NEGATIVE) != 0; }
170
173 void setImputeMs1PeptideNegativeRatio(bool imputeMs1PeptideNegativeRatio);
174
177 bool getImputeExcludedMs2PeptideRatios() { return imputation_ms2_peptide_choice_; }
178
181 void setImputeExcludedMs2PeptideRatios(bool imputationMs2PeptideChoiceIn) { imputation_ms2_peptide_choice_ = imputationMs2PeptideChoiceIn; }
182
186 std::vector<std::vector<double>> impute();
187
188 private:
189
197
204 void appendImputationPeptides();
205
212 void appendObservation(ms_imputation_peptide_data);
213
215
221 enum IMPUTATION_MS1_PEPTIDE_CHOICE {
222 IMPUTE_ALL_EXCLUDED_RATIOS = 0,
223 IMPUTE_IF_EXCLUDED_BY_FRACTION_THR = 1,
224 IMPUTE_IF_EXCLUDED_BY_CORRELATION_THR = 2,
225 IMPUTE_IF_EXCLUDED_BY_STDERR_THR = 4,
226 IMPUTE_IF_NEGATIVE = 8,
227 IMPUTE_NO_EXCLUDED_RATIOS = 16
228 };
229
230 // Default excluded behaviour
231 int IMPUTATION_MS1_PEPTIDE_CHOICE_ = 8; // i.e. IMPUTATION_PEPTIDE_CHOICE::IMPUTE_IF_NEGATIVE;
232 bool imputation_ms2_peptide_choice_ = true; // Treat excluded ms2 peptide ratios as missing
233 // Initialise member pointers to null
234 const ms_peptidesummary* pepSum_ = nullptr;
235 const ms_quant_method* quantMethod_ = nullptr;
236 const ms_ms1quantitation* ms1_quant_ = nullptr;
237 const ms_ms2quantitation* ms2_quant_ = nullptr;
238 ms_imputation_fixed_value* fixed_value_imputation_ = nullptr;
239 ms_imputation_average_value* average_value_imputation_ = nullptr;
240 ms_imputation_knn* knn_imputation_ = nullptr;
241 ms_imputation_missforest* missforest_imputation_ = nullptr;
242 // Member variables
243 ms_imputation_method& imputation_method_;
244 std::vector<std::vector<ms_imputation_missing_val>> dataWithMissing_;
245 std::vector<std::vector<ms_imputation_peptide_data>> imputationPeptides_;
246 std::vector<ms_imputation_peptide_data> aObservationSet_;
247 // Default ratio choice
248 IMPUTATION_VARIABLE IMPUTATION_VARIABLE_ = IMPUTATION_VARIABLE::IMPUTE_PROTEIN_RATIO;
249 // Methods
250 void populateArraysForImputation();
251 void extractProteinRatios();
252 void extractPeptideRatios();
253 std::vector<ms_ms1quant_match> getMatchesList(int hitNim, int memNum);
254 void pullProteinsFrom(std::vector<matrix_science::ms_protein>& proteinsOut);
255 void extractRetentionTimes();
256 void extractMs1PeptideStatus(int ratioStatus, std::vector<bool>& boolStatus);
257 };
258
262 class MS_MASCOTRESFILE_API ms_imputation_method : public ms_errors
263 {
264 public:
265
267 std::vector<std::vector<ms_imputation_missing_val>>& getDataWithMissing() { return dataWithMissing_; }
268
269 // Overloaded method to execute the imputation calculation
270 virtual std::vector<std::vector<double>> impute() = 0;
271
272 friend class ms_imputation;
273
274 virtual ~ms_imputation_method();
275
276 protected:
277
280 // Internal variables
281 std::vector<std::vector<ms_imputation_missing_val>> dataWithMissing_;
282 std::vector<std::vector<double>> completeObservations_;
283 std::vector<double> averageValues_;
284 // Methods
285 void setDataWithMissing(const std::vector<std::vector<ms_imputation_missing_val>>& dataWithMissingIn);
286 std::vector<double> changems_imputation_missing_valVecToDoubleVec(std::vector<ms_imputation_missing_val> missingObservation);
287 std::vector<std::vector<double>> changems_imputation_missing_valArrayToDoubleArray(std::vector<std::vector<ms_imputation_missing_val>> dataProcess);
288 std::vector<int> getMissingIndexes(std::vector<ms_imputation_missing_val>);
289 std::vector<double> getKnownValues(std::vector<ms_imputation_missing_val>);
290 std::vector<std::vector<int>> removeDuplicateIndexes(std::vector<std::vector<int>> duplicateIndexes);
291 void averageFindAverage();
292 };
293
329 class MS_MASCOTRESFILE_API ms_imputation_fixed_value : public ms_imputation_method
330 {
331 using ms_imputation_method::ms_imputation_method;
332
333 public:
334
336 double getFixedValue() { return fixedValue_; }
337
339 void setFixedValue(double fixedValueIn) { fixedValue_ = fixedValueIn; }
340
342 std::vector<std::vector<double>> impute() override;
343
344 friend class ms_imputation;
345
347
348 private:
349
350 // Internal fixed value
351 double fixedValue_ = 0;
352 };
353
387 class MS_MASCOTRESFILE_API ms_imputation_average_value : public ms_imputation_method
388 {
389 using ms_imputation_method::ms_imputation_method;
390
391 public:
392
394 std::vector<double> getAverageValues() { return averageValues_; }
395
397 void setAverageValues(std::vector<double> averageValuesIn) { averageValues_ = averageValuesIn; }
398
400 std::vector<std::vector<double>> impute() override;
401
402 friend class ms_imputation;
403
405 };
406
447 class MS_MASCOTRESFILE_API ms_imputation_knn : public ms_imputation_method
448 {
449 using ms_imputation_method::ms_imputation_method;
450
451 public:
452
454 int getKnnNumNeighbours() { return knnNumNeighbours_; }
455
457 void setKnnNumNeighbours(int knnNumNeighboursIn) { knnNumNeighbours_ = knnNumNeighboursIn; }
458
460 bool getKnnUseWeightedAverage() { return knnUseWeightedAverage_; }
461
463 void setKnnUseWeightedAverage(bool knnUseWeightedAverageIn) { knnUseWeightedAverage_ = knnUseWeightedAverageIn; }
464
466 std::vector<std::vector<double>> impute() override;
467
468 friend class ms_imputation;
469
471
472 private:
473
474 // Internal knn parameters
475 int knnNumNeighbours_ = 5;
476 bool knnUseWeightedAverage_ = true;
477 std::vector<std::vector<int>> missingCombinations_;
478
479 // Internal methods
480 void knnImpute();
481 void knnProcess();
482 void getCompleteObsAndMissingIndexes();
483 };
484
525 class MS_MASCOTRESFILE_API ms_imputation_missforest : public ms_imputation_method
526 {
527 using ms_imputation_method::ms_imputation_method;
528
529 public:
530
532 std::vector<std::vector<double>> impute() override;
533
534 friend class ms_imputation;
535
537
538 private:
539
540 // Internal methods
541 void IdentifyMissing(std::vector<int>& missingCounts, std::vector<std::vector<int>>& missingRows);
542 double GetDifferences(std::vector<std::vector<double>>& currData, std::vector<std::vector<double>>& prevData);
543 double GetDoubleDifferences(std::vector<std::vector<double>>& currData, std::vector<std::vector<double>>& prevData);
544 bool CompareDiffs(std::vector<double> iterationDifferences);
545 };
546 // end of tools_group
548} // namespace matrix_science
549
550#endif // !defined(ms_imputation_INCLUDED_)
551
552/*------------------------------- End of File -------------------------------*/
This class is used as a base class for several Mascot Parser classes.
Definition: ms_errors.hpp:696
Definition: ms_imputation.hpp:388
void setAverageValues(std::vector< double > averageValuesIn)
Set the vector of average values, one average per variable, that will be written in place of all miss...
Definition: ms_imputation.hpp:397
std::vector< double > getAverageValues()
Get the vector of average values, one average per variable, that will be written in place of all miss...
Definition: ms_imputation.hpp:394
Definition: ms_imputation.hpp:330
void setFixedValue(double fixedValueIn)
Set the single fixed value that will be written in place of all missing values.
Definition: ms_imputation.hpp:339
double getFixedValue()
Get the single fixed value that will be written in place of all missing values.
Definition: ms_imputation.hpp:336
Definition: ms_imputation.hpp:448
void setKnnNumNeighbours(int knnNumNeighboursIn)
Set the number of nearest neighbours used when calculating the imputed value. Default value = 5.
Definition: ms_imputation.hpp:457
int getKnnNumNeighbours()
Get the number of nearest neighbours used when calculating an imputed value. Default value = 5.
Definition: ms_imputation.hpp:454
void setKnnUseWeightedAverage(bool knnUseWeightedAverageIn)
Set the bool to signify if a weighted average of nearest neighbours is used when calculating an imput...
Definition: ms_imputation.hpp:463
bool getKnnUseWeightedAverage()
Get the bool to signify if a weighted average of nearest neighbours is used when calculating an imput...
Definition: ms_imputation.hpp:460
Definition: ms_imputation.hpp:263
std::vector< std::vector< ms_imputation_missing_val > > & getDataWithMissing()
Get the data array that the imputation method will impute.
Definition: ms_imputation.hpp:267
ms_imputation_method()=default
Default constructor.
Definition: ms_imputation.hpp:526
Definition: ms_imputation.hpp:33
bool isMissing() const
Get missing status for this value.
Definition: ms_imputation.hpp:51
void setVal(double val)
Set the numerical value and status as not missing.
Definition: ms_imputation.hpp:60
ms_imputation_missing_val()
Default construtor; initialises the value as missing.
Definition: ms_imputation.hpp:38
ms_imputation_missing_val(bool isMissing, double val)
Constructor to initialise whether or not a value is missing and, if not missing, the numerical value.
Definition: ms_imputation.hpp:44
void setMissing()
Set the status and value as missing.
Definition: ms_imputation.hpp:54
double getVal() const
Get the numerical value (if this is not missing).
Definition: ms_imputation.hpp:57
Definition: ms_imputation.hpp:78
const double retentionTime() const
Get retention time for this peptide.
Definition: ms_imputation.hpp:93
ms_imputation_peptide_data(double retentionTime, int subproject)
Constructor to initialise the parameters required for imputation.
Definition: ms_imputation.hpp:83
const int subproject() const
Get the 1-based subproject index.
Definition: ms_imputation.hpp:96
Definition: ms_imputation.hpp:111
void setImputationVariableChoice(const IMPUTATION_VARIABLE imputationVariableChoiceIn)
Set the imputation object to impute protein/peptide ratios from ms1 or ms2 quantitation,...
Definition: ms_imputation.hpp:141
bool getImputeMs1PeptideExcludedByCorrelationThr()
Get how the imputation object is currently treating ms1 peptide ratios excluded because of the fracti...
Definition: ms_imputation.hpp:153
bool getImputeMs1PeptideExcludedByStdErrThr()
Get how the imputation object is currently treating ms1 peptide ratios excluded because of the std er...
Definition: ms_imputation.hpp:161
IMPUTATION_VARIABLE
What type of data is being imputed.
Definition: ms_imputation.hpp:120
@ IMPUTE_PROTEIN_RATIO
Missing protein ratios are imputed.
Definition: ms_imputation.hpp:121
@ IMPUTE_PEPTIDE_RATIO
Missing peptide ratios are imputed. How to treat excluded peptide ratios is set via setImputeMs1Pepti...
Definition: ms_imputation.hpp:122
bool getImputeExcludedMs2PeptideRatios()
Get how the imputation object is currently treating ms2 excluded ratios. I.e. are excluded ratios tre...
Definition: ms_imputation.hpp:177
bool getImputeMs1PeptideExcludedByFractionThr()
Get how the imputation object is currently treating ms1 peptide ratios excluded because of the fracti...
Definition: ms_imputation.hpp:145
void setImputeExcludedMs2PeptideRatios(bool imputationMs2PeptideChoiceIn)
Set the imputation object to treat excluded ms2 ratios as missing Not used when imputing ms1 quantita...
Definition: ms_imputation.hpp:181
bool getImputeMs1PeptideNegativeRatio()
Get how the imputation object is currently treating negative ms1 peptide ratios. Not used when imputi...
Definition: ms_imputation.hpp:169
const IMPUTATION_VARIABLE getImputationVariableChoice() const
Get what the imputation object is currently imputing. I.e. protein/peptide ratios from ms1 or ms2 qua...
Definition: ms_imputation.hpp:138
The result of quantitation calculations applied to peptide summary.
Definition: ms_ms1quantitation.hpp:92
Peptide and protein quantitation in a Reporter or Multiplex Mascot results file.
Definition: ms_ms2quantitation.hpp:127