Matrix Science Mascot Parser toolkit
 
Loading...
Searching...
No Matches
ms_imputation.hpp
1/*
2##############################################################################
3# file: ms_imputation.hpp
4# 'msparser' toolkit
5#
6##############################################################################
7# COPYRIGHT NOTICE
8# Copyright 1998-2024 Matrix Science Limited All Rights Reserved.
9#
10##############################################################################
11* @(#)$Source: parser/inc/ms_imputation.hpp $
12* @(#)$Revision: 3dc976f32db83ed8da39bffa0537780f93efbfd0 | MSPARSER_REL_3_1_0-2025-07-27-0-gea47708fac $
13* @(#)$Date: 2024-11-13 17:07:42 +0000 $
14##############################################################################
15 */
16
17#if !defined(ms_imputation_INCLUDED_)
18#define ms_imputation_INCLUDED_
19#include <vector>
20
21namespace matrix_science
22{
23 // Forward declarations
24 class ms_imputation_method;
25 class ms_imputation_fixed_value;
26 class ms_imputation_average_value;
27 class ms_imputation_knn;
28 class ms_imputation_missforest;
29
33 class MS_MASCOTRESFILE_API ms_imputation_missing_val {
34
35 public:
36
39 : isMissing_(true)
40 , val_(0.0)
41 { }
42
45 bool isMissing,
46 double val)
47 : isMissing_(isMissing)
48 , val_(val) {}
49
51 bool isMissing() const { return isMissing_; }
52
54 void setMissing() { isMissing_ = true; val_ = 0.0; }
55
57 double getVal() const { return val_; }
58
60 void setVal(double val) { val_ = val; isMissing_ = false; }
61
62 private:
63
64 bool isMissing_ = true;
65 double val_ = 0.0;
66 };
67
78 class MS_MASCOTRESFILE_API ms_imputation_peptide_data {
79
80 public:
81
87 double retentionTime,
88 int subproject)
89 : retentionTime_(retentionTime)
90 , subproject_(subproject) {}
91
92
94
96 const double retentionTime() const { return retentionTime_; }
97
100 const int subproject() const { return subproject_; }
101
102 private:
103
104 // Internal variables
105 double retentionTime_;
106 int subproject_; // This is 1 based
107 };
108
114 class MS_MASCOTRESFILE_API ms_imputation : public ms_errors
115 {
116 public:
117
119
127 IMPUTE_RETENTION_TIME
128 };
129
131 ms_imputation(const ms_ms1quantitation* ms1quantitation, ms_imputation_method& m, IMPUTATION_VARIABLE imputationVariableChoice);
132
134 ms_imputation(const ms_ms2quantitation* ms2quantitation, ms_imputation_method& m, IMPUTATION_VARIABLE imputationVariableChoice);
135
137 ms_imputation(std::vector<std::vector<ms_imputation_peptide_data>> imputationPeptides, ms_imputation_method& m, size_t numVariables);
138
140
142 const IMPUTATION_VARIABLE getImputationVariableChoice() const { return IMPUTATION_VARIABLE_; }
143
145 void setImputationVariableChoice(const IMPUTATION_VARIABLE imputationVariableChoiceIn) { IMPUTATION_VARIABLE_ = imputationVariableChoiceIn; }
146
149 bool getImputeMs1PeptideExcludedByFractionThr() { return (IMPUTATION_MS1_PEPTIDE_CHOICE_ & IMPUTATION_MS1_PEPTIDE_CHOICE::IMPUTE_IF_EXCLUDED_BY_FRACTION_THR) != 0; }
150
153 void setImputeMs1PeptideExcludedByFractionThr(bool imputeMs1PeptideExcludedByFractionThr);
154
157 bool getImputeMs1PeptideExcludedByCorrelationThr() { return (IMPUTATION_MS1_PEPTIDE_CHOICE_ & IMPUTATION_MS1_PEPTIDE_CHOICE::IMPUTE_IF_EXCLUDED_BY_CORRELATION_THR) != 0; }
158
161 void setImputeMs1PeptideExcludedByCorrelationThr(bool imputeMs1PeptideExcludedByCorrelationThr);
162
165 bool getImputeMs1PeptideExcludedByStdErrThr() { return (IMPUTATION_MS1_PEPTIDE_CHOICE_ & IMPUTATION_MS1_PEPTIDE_CHOICE::IMPUTE_IF_EXCLUDED_BY_STDERR_THR) != 0; }
166
169 void setImputeMs1PeptideExcludedByStdErrThr(bool imputeMs1PeptideExcludedByStdErrThr);
170
173 bool getImputeMs1PeptideNegativeRatio() { return (IMPUTATION_MS1_PEPTIDE_CHOICE_ & IMPUTATION_MS1_PEPTIDE_CHOICE::IMPUTE_IF_NEGATIVE) != 0; }
174
177 void setImputeMs1PeptideNegativeRatio(bool imputeMs1PeptideNegativeRatio);
178
181 bool getImputeExcludedMs2PeptideRatios() { return imputation_ms2_peptide_choice_; }
182
185 void setImputeExcludedMs2PeptideRatios(bool imputationMs2PeptideChoiceIn) { imputation_ms2_peptide_choice_ = imputationMs2PeptideChoiceIn; }
186
190 std::vector<std::vector<double>> impute();
191
192 private:
193
201
208 void appendImputationPeptides();
209
216 void appendObservation(ms_imputation_peptide_data);
217
219
225 enum IMPUTATION_MS1_PEPTIDE_CHOICE {
226 IMPUTE_ALL_EXCLUDED_RATIOS = 0,
227 IMPUTE_IF_EXCLUDED_BY_FRACTION_THR = 1,
228 IMPUTE_IF_EXCLUDED_BY_CORRELATION_THR = 2,
229 IMPUTE_IF_EXCLUDED_BY_STDERR_THR = 4,
230 IMPUTE_IF_NEGATIVE = 8,
231 IMPUTE_NO_EXCLUDED_RATIOS = 16
232 };
233
234 // Default excluded behaviour
235 int IMPUTATION_MS1_PEPTIDE_CHOICE_ = 8; // i.e. IMPUTATION_PEPTIDE_CHOICE::IMPUTE_IF_NEGATIVE;
236 bool imputation_ms2_peptide_choice_ = true; // Treat excluded ms2 peptide ratios as missing
237 // Initialise member pointers to null
238 const ms_peptidesummary* pepSum_ = nullptr;
239 const ms_quant_method* quantMethod_ = nullptr;
240 const ms_ms1quantitation* ms1_quant_ = nullptr;
241 const ms_ms2quantitation* ms2_quant_ = nullptr;
242 ms_imputation_fixed_value* fixed_value_imputation_ = nullptr;
243 ms_imputation_average_value* average_value_imputation_ = nullptr;
244 ms_imputation_knn* knn_imputation_ = nullptr;
245 ms_imputation_missforest* missforest_imputation_ = nullptr;
246 // Member variables
247 ms_imputation_method& imputation_method_;
248 std::vector<std::vector<ms_imputation_missing_val>> dataWithMissing_;
249 std::vector<std::vector<ms_imputation_peptide_data>> imputationPeptides_;
250 std::vector<ms_imputation_peptide_data> aObservationSet_;
251 size_t numVariables_;
252 // Default ratio choice
253 IMPUTATION_VARIABLE IMPUTATION_VARIABLE_ = IMPUTATION_VARIABLE::IMPUTE_PROTEIN_RATIO;
254 // Methods
255 void populateArraysForImputation();
256 void extractProteinRatios();
257 void extractPeptideRatios();
258 std::vector<ms_ms1quant_match> getMatchesList(int hitNim, int memNum);
259 void pullProteinsFrom(std::vector<matrix_science::ms_protein>& proteinsOut);
260 void extractRetentionTimes();
261 void extractMs1PeptideStatus(int ratioStatus, std::vector<bool>& boolStatus);
262 };
263
267 class MS_MASCOTRESFILE_API ms_imputation_method : public ms_errors
268 {
269 public:
270
272 std::vector<std::vector<ms_imputation_missing_val>>& getDataWithMissing() { return dataWithMissing_; }
273
274 // Overloaded method to execute the imputation calculation
275 virtual std::vector<std::vector<double>> impute() = 0;
276
277 friend class ms_imputation;
278
279 virtual ~ms_imputation_method();
280
281 protected:
282
285 // Internal variables
286 std::vector<std::vector<ms_imputation_missing_val>> dataWithMissing_;
287 std::vector<std::vector<double>> completeObservations_;
288 std::vector<double> averageValues_;
289 // Methods
290 void setDataWithMissing(const std::vector<std::vector<ms_imputation_missing_val>>& dataWithMissingIn);
291 std::vector<double> changems_imputation_missing_valVecToDoubleVec(std::vector<ms_imputation_missing_val> missingObservation);
292 std::vector<std::vector<double>> changems_imputation_missing_valArrayToDoubleArray(std::vector<std::vector<ms_imputation_missing_val>> dataProcess);
293 std::vector<int> getMissingIndexes(std::vector<ms_imputation_missing_val>);
294 std::vector<double> getKnownValues(std::vector<ms_imputation_missing_val>);
295 std::vector<std::vector<int>> removeDuplicateIndexes(std::vector<std::vector<int>> duplicateIndexes);
296 void averageFindAverage();
297 };
298
334 class MS_MASCOTRESFILE_API ms_imputation_fixed_value : public ms_imputation_method
335 {
336 using ms_imputation_method::ms_imputation_method;
337
338 public:
339
341 double getFixedValue() { return fixedValue_; }
342
344 void setFixedValue(double fixedValueIn) { fixedValue_ = fixedValueIn; }
345
347 std::vector<std::vector<double>> impute() override;
348
349 friend class ms_imputation;
350
352
353 private:
354
355 // Internal fixed value
356 double fixedValue_ = 0;
357 };
358
392 class MS_MASCOTRESFILE_API ms_imputation_average_value : public ms_imputation_method
393 {
394 using ms_imputation_method::ms_imputation_method;
395
396 public:
397
399 std::vector<double> getAverageValues() { return averageValues_; }
400
402 void setAverageValues(std::vector<double> averageValuesIn) { averageValues_ = averageValuesIn; }
403
405 std::vector<std::vector<double>> impute() override;
406
407 friend class ms_imputation;
408
410 };
411
452 class MS_MASCOTRESFILE_API ms_imputation_knn : public ms_imputation_method
453 {
454 using ms_imputation_method::ms_imputation_method;
455
456 public:
457
459 int getKnnNumNeighbours() { return knnNumNeighbours_; }
460
462 void setKnnNumNeighbours(int knnNumNeighboursIn) { knnNumNeighbours_ = knnNumNeighboursIn; }
463
465 bool getKnnUseWeightedAverage() { return knnUseWeightedAverage_; }
466
468 void setKnnUseWeightedAverage(bool knnUseWeightedAverageIn) { knnUseWeightedAverage_ = knnUseWeightedAverageIn; }
469
471 std::vector<std::vector<double>> impute() override;
472
473 friend class ms_imputation;
474
476
477 private:
478
479 // Internal knn parameters
480 int knnNumNeighbours_ = 5;
481 bool knnUseWeightedAverage_ = true;
482 std::vector<std::vector<int>> missingCombinations_;
483
484 // Internal methods
485 void knnImpute();
486 void knnProcess();
487 void getCompleteObsAndMissingIndexes();
488 };
489
530 class MS_MASCOTRESFILE_API ms_imputation_missforest : public ms_imputation_method
531 {
532 using ms_imputation_method::ms_imputation_method;
533
534 public:
535
537 std::vector<std::vector<double>> impute() override;
538
539 friend class ms_imputation;
540
542
543 private:
544
545 // Internal methods
546 void IdentifyMissing(std::vector<int>& missingCounts, std::vector<std::vector<int>>& missingRows);
547 double GetDifferences(std::vector<std::vector<double>>& currData, std::vector<std::vector<double>>& prevData);
548 double GetDoubleDifferences(std::vector<std::vector<double>>& currData, std::vector<std::vector<double>>& prevData);
549 bool CompareDiffs(std::vector<double> iterationDifferences);
550 };
551 // end of tools_group
553} // namespace matrix_science
554
555#endif // !defined(ms_imputation_INCLUDED_)
556
557/*------------------------------- End of File -------------------------------*/
This class is used as a base class for several Mascot Parser classes.
Definition: ms_errors.hpp:696
Definition: ms_imputation.hpp:393
void setAverageValues(std::vector< double > averageValuesIn)
Set the vector of average values, one average per variable, that will be written in place of all miss...
Definition: ms_imputation.hpp:402
std::vector< double > getAverageValues()
Get the vector of average values, one average per variable, that will be written in place of all miss...
Definition: ms_imputation.hpp:399
Definition: ms_imputation.hpp:335
void setFixedValue(double fixedValueIn)
Set the single fixed value that will be written in place of all missing values.
Definition: ms_imputation.hpp:344
double getFixedValue()
Get the single fixed value that will be written in place of all missing values.
Definition: ms_imputation.hpp:341
Definition: ms_imputation.hpp:453
void setKnnNumNeighbours(int knnNumNeighboursIn)
Set the number of nearest neighbours used when calculating the imputed value. Default value = 5.
Definition: ms_imputation.hpp:462
int getKnnNumNeighbours()
Get the number of nearest neighbours used when calculating an imputed value. Default value = 5.
Definition: ms_imputation.hpp:459
void setKnnUseWeightedAverage(bool knnUseWeightedAverageIn)
Set the bool to signify if a weighted average of nearest neighbours is used when calculating an imput...
Definition: ms_imputation.hpp:468
bool getKnnUseWeightedAverage()
Get the bool to signify if a weighted average of nearest neighbours is used when calculating an imput...
Definition: ms_imputation.hpp:465
Definition: ms_imputation.hpp:268
std::vector< std::vector< ms_imputation_missing_val > > & getDataWithMissing()
Get the data array that the imputation method will impute.
Definition: ms_imputation.hpp:272
ms_imputation_method()=default
Default constructor.
Definition: ms_imputation.hpp:531
Definition: ms_imputation.hpp:33
bool isMissing() const
Get missing status for this value.
Definition: ms_imputation.hpp:51
void setVal(double val)
Set the numerical value and status as not missing.
Definition: ms_imputation.hpp:60
ms_imputation_missing_val()
Default construtor; initialises the value as missing.
Definition: ms_imputation.hpp:38
ms_imputation_missing_val(bool isMissing, double val)
Constructor to initialise whether or not a value is missing and, if not missing, the numerical value.
Definition: ms_imputation.hpp:44
void setMissing()
Set the status and value as missing.
Definition: ms_imputation.hpp:54
double getVal() const
Get the numerical value (if this is not missing).
Definition: ms_imputation.hpp:57
Definition: ms_imputation.hpp:78
const double retentionTime() const
Get retention time for this peptide.
Definition: ms_imputation.hpp:96
ms_imputation_peptide_data(double retentionTime, int subproject)
Constructor to initialise the parameters required for imputation subproject property value to be set ...
Definition: ms_imputation.hpp:86
const int subproject() const
Get the 1-based subproject index See constructor description for guidelines.
Definition: ms_imputation.hpp:100
Definition: ms_imputation.hpp:115
void setImputationVariableChoice(const IMPUTATION_VARIABLE imputationVariableChoiceIn)
Set the imputation object to impute protein/peptide ratios from ms1 or ms2 quantitation,...
Definition: ms_imputation.hpp:145
bool getImputeMs1PeptideExcludedByCorrelationThr()
Get how the imputation object is currently treating ms1 peptide ratios excluded because of the fracti...
Definition: ms_imputation.hpp:157
bool getImputeMs1PeptideExcludedByStdErrThr()
Get how the imputation object is currently treating ms1 peptide ratios excluded because of the std er...
Definition: ms_imputation.hpp:165
IMPUTATION_VARIABLE
What type of data is being imputed.
Definition: ms_imputation.hpp:124
@ IMPUTE_PROTEIN_RATIO
Missing protein ratios are imputed.
Definition: ms_imputation.hpp:125
@ IMPUTE_PEPTIDE_RATIO
Missing peptide ratios are imputed. How to treat excluded peptide ratios is set via setImputeMs1Pepti...
Definition: ms_imputation.hpp:126
bool getImputeExcludedMs2PeptideRatios()
Get how the imputation object is currently treating ms2 excluded ratios. I.e. are excluded ratios tre...
Definition: ms_imputation.hpp:181
bool getImputeMs1PeptideExcludedByFractionThr()
Get how the imputation object is currently treating ms1 peptide ratios excluded because of the fracti...
Definition: ms_imputation.hpp:149
void setImputeExcludedMs2PeptideRatios(bool imputationMs2PeptideChoiceIn)
Set the imputation object to treat excluded ms2 ratios as missing Not used when imputing ms1 quantita...
Definition: ms_imputation.hpp:185
bool getImputeMs1PeptideNegativeRatio()
Get how the imputation object is currently treating negative ms1 peptide ratios. Not used when imputi...
Definition: ms_imputation.hpp:173
const IMPUTATION_VARIABLE getImputationVariableChoice() const
Get what the imputation object is currently imputing. I.e. protein/peptide ratios from ms1 or ms2 qua...
Definition: ms_imputation.hpp:142
The result of quantitation calculations applied to peptide summary.
Definition: ms_ms1quantitation.hpp:92
Peptide and protein quantitation in a Reporter or Multiplex Mascot results file.
Definition: ms_ms2quantitation.hpp:127