17#if !defined(ms_imputation_INCLUDED_)
18#define ms_imputation_INCLUDED_
21namespace matrix_science
24 class ms_imputation_method;
25 class ms_imputation_fixed_value;
26 class ms_imputation_average_value;
27 class ms_imputation_knn;
28 class ms_imputation_missforest;
47 : isMissing_(isMissing)
57 double getVal()
const {
return val_; }
60 void setVal(
double val) { val_ = val; isMissing_ =
false; }
64 bool isMissing_ =
true;
89 : retentionTime_(retentionTime)
90 , subproject_(subproject) {}
105 double retentionTime_;
127 IMPUTE_RETENTION_TIME
153 void setImputeMs1PeptideExcludedByFractionThr(
bool imputeMs1PeptideExcludedByFractionThr);
161 void setImputeMs1PeptideExcludedByCorrelationThr(
bool imputeMs1PeptideExcludedByCorrelationThr);
169 void setImputeMs1PeptideExcludedByStdErrThr(
bool imputeMs1PeptideExcludedByStdErrThr);
177 void setImputeMs1PeptideNegativeRatio(
bool imputeMs1PeptideNegativeRatio);
190 std::vector<std::vector<double>> impute();
208 void appendImputationPeptides();
225 enum IMPUTATION_MS1_PEPTIDE_CHOICE {
226 IMPUTE_ALL_EXCLUDED_RATIOS = 0,
227 IMPUTE_IF_EXCLUDED_BY_FRACTION_THR = 1,
228 IMPUTE_IF_EXCLUDED_BY_CORRELATION_THR = 2,
229 IMPUTE_IF_EXCLUDED_BY_STDERR_THR = 4,
230 IMPUTE_IF_NEGATIVE = 8,
231 IMPUTE_NO_EXCLUDED_RATIOS = 16
235 int IMPUTATION_MS1_PEPTIDE_CHOICE_ = 8;
236 bool imputation_ms2_peptide_choice_ =
true;
238 const ms_peptidesummary* pepSum_ =
nullptr;
239 const ms_quant_method* quantMethod_ =
nullptr;
240 const ms_ms1quantitation* ms1_quant_ =
nullptr;
241 const ms_ms2quantitation* ms2_quant_ =
nullptr;
242 ms_imputation_fixed_value* fixed_value_imputation_ =
nullptr;
243 ms_imputation_average_value* average_value_imputation_ =
nullptr;
244 ms_imputation_knn* knn_imputation_ =
nullptr;
245 ms_imputation_missforest* missforest_imputation_ =
nullptr;
247 ms_imputation_method& imputation_method_;
248 std::vector<std::vector<ms_imputation_missing_val>> dataWithMissing_;
249 std::vector<std::vector<ms_imputation_peptide_data>> imputationPeptides_;
250 std::vector<ms_imputation_peptide_data> aObservationSet_;
251 size_t numVariables_;
253 IMPUTATION_VARIABLE IMPUTATION_VARIABLE_ = IMPUTATION_VARIABLE::IMPUTE_PROTEIN_RATIO;
255 void populateArraysForImputation();
256 void extractProteinRatios();
257 void extractPeptideRatios();
258 std::vector<ms_ms1quant_match> getMatchesList(
int hitNim,
int memNum);
259 void pullProteinsFrom(std::vector<matrix_science::ms_protein>& proteinsOut);
260 void extractRetentionTimes();
261 void extractMs1PeptideStatus(
int ratioStatus, std::vector<bool>& boolStatus);
272 std::vector<std::vector<ms_imputation_missing_val>>&
getDataWithMissing() {
return dataWithMissing_; }
275 virtual std::vector<std::vector<double>> impute() = 0;
286 std::vector<std::vector<ms_imputation_missing_val>> dataWithMissing_;
287 std::vector<std::vector<double>> completeObservations_;
288 std::vector<double> averageValues_;
290 void setDataWithMissing(
const std::vector<std::vector<ms_imputation_missing_val>>& dataWithMissingIn);
291 std::vector<double> changems_imputation_missing_valVecToDoubleVec(std::vector<ms_imputation_missing_val> missingObservation);
292 std::vector<std::vector<double>> changems_imputation_missing_valArrayToDoubleArray(std::vector<std::vector<ms_imputation_missing_val>> dataProcess);
293 std::vector<int> getMissingIndexes(std::vector<ms_imputation_missing_val>);
294 std::vector<double> getKnownValues(std::vector<ms_imputation_missing_val>);
295 std::vector<std::vector<int>> removeDuplicateIndexes(std::vector<std::vector<int>> duplicateIndexes);
296 void averageFindAverage();
336 using ms_imputation_method::ms_imputation_method;
347 std::vector<std::vector<double>> impute()
override;
356 double fixedValue_ = 0;
394 using ms_imputation_method::ms_imputation_method;
402 void setAverageValues(std::vector<double> averageValuesIn) { averageValues_ = averageValuesIn; }
405 std::vector<std::vector<double>> impute()
override;
454 using ms_imputation_method::ms_imputation_method;
471 std::vector<std::vector<double>> impute()
override;
480 int knnNumNeighbours_ = 5;
481 bool knnUseWeightedAverage_ =
true;
482 std::vector<std::vector<int>> missingCombinations_;
487 void getCompleteObsAndMissingIndexes();
532 using ms_imputation_method::ms_imputation_method;
537 std::vector<std::vector<double>> impute()
override;
546 void IdentifyMissing(std::vector<int>& missingCounts, std::vector<std::vector<int>>& missingRows);
547 double GetDifferences(std::vector<std::vector<double>>& currData, std::vector<std::vector<double>>& prevData);
548 double GetDoubleDifferences(std::vector<std::vector<double>>& currData, std::vector<std::vector<double>>& prevData);
549 bool CompareDiffs(std::vector<double> iterationDifferences);
This class is used as a base class for several Mascot Parser classes.
Definition: ms_errors.hpp:696
Definition: ms_imputation.hpp:393
void setAverageValues(std::vector< double > averageValuesIn)
Set the vector of average values, one average per variable, that will be written in place of all miss...
Definition: ms_imputation.hpp:402
std::vector< double > getAverageValues()
Get the vector of average values, one average per variable, that will be written in place of all miss...
Definition: ms_imputation.hpp:399
Definition: ms_imputation.hpp:335
void setFixedValue(double fixedValueIn)
Set the single fixed value that will be written in place of all missing values.
Definition: ms_imputation.hpp:344
double getFixedValue()
Get the single fixed value that will be written in place of all missing values.
Definition: ms_imputation.hpp:341
Definition: ms_imputation.hpp:453
void setKnnNumNeighbours(int knnNumNeighboursIn)
Set the number of nearest neighbours used when calculating the imputed value. Default value = 5.
Definition: ms_imputation.hpp:462
int getKnnNumNeighbours()
Get the number of nearest neighbours used when calculating an imputed value. Default value = 5.
Definition: ms_imputation.hpp:459
void setKnnUseWeightedAverage(bool knnUseWeightedAverageIn)
Set the bool to signify if a weighted average of nearest neighbours is used when calculating an imput...
Definition: ms_imputation.hpp:468
bool getKnnUseWeightedAverage()
Get the bool to signify if a weighted average of nearest neighbours is used when calculating an imput...
Definition: ms_imputation.hpp:465
Definition: ms_imputation.hpp:268
std::vector< std::vector< ms_imputation_missing_val > > & getDataWithMissing()
Get the data array that the imputation method will impute.
Definition: ms_imputation.hpp:272
ms_imputation_method()=default
Default constructor.
Definition: ms_imputation.hpp:531
Definition: ms_imputation.hpp:33
bool isMissing() const
Get missing status for this value.
Definition: ms_imputation.hpp:51
void setVal(double val)
Set the numerical value and status as not missing.
Definition: ms_imputation.hpp:60
ms_imputation_missing_val()
Default construtor; initialises the value as missing.
Definition: ms_imputation.hpp:38
ms_imputation_missing_val(bool isMissing, double val)
Constructor to initialise whether or not a value is missing and, if not missing, the numerical value.
Definition: ms_imputation.hpp:44
void setMissing()
Set the status and value as missing.
Definition: ms_imputation.hpp:54
double getVal() const
Get the numerical value (if this is not missing).
Definition: ms_imputation.hpp:57
Definition: ms_imputation.hpp:78
const double retentionTime() const
Get retention time for this peptide.
Definition: ms_imputation.hpp:96
ms_imputation_peptide_data(double retentionTime, int subproject)
Constructor to initialise the parameters required for imputation subproject property value to be set ...
Definition: ms_imputation.hpp:86
const int subproject() const
Get the 1-based subproject index See constructor description for guidelines.
Definition: ms_imputation.hpp:100
Definition: ms_imputation.hpp:115
void setImputationVariableChoice(const IMPUTATION_VARIABLE imputationVariableChoiceIn)
Set the imputation object to impute protein/peptide ratios from ms1 or ms2 quantitation,...
Definition: ms_imputation.hpp:145
bool getImputeMs1PeptideExcludedByCorrelationThr()
Get how the imputation object is currently treating ms1 peptide ratios excluded because of the fracti...
Definition: ms_imputation.hpp:157
bool getImputeMs1PeptideExcludedByStdErrThr()
Get how the imputation object is currently treating ms1 peptide ratios excluded because of the std er...
Definition: ms_imputation.hpp:165
IMPUTATION_VARIABLE
What type of data is being imputed.
Definition: ms_imputation.hpp:124
@ IMPUTE_PROTEIN_RATIO
Missing protein ratios are imputed.
Definition: ms_imputation.hpp:125
@ IMPUTE_PEPTIDE_RATIO
Missing peptide ratios are imputed. How to treat excluded peptide ratios is set via setImputeMs1Pepti...
Definition: ms_imputation.hpp:126
bool getImputeExcludedMs2PeptideRatios()
Get how the imputation object is currently treating ms2 excluded ratios. I.e. are excluded ratios tre...
Definition: ms_imputation.hpp:181
bool getImputeMs1PeptideExcludedByFractionThr()
Get how the imputation object is currently treating ms1 peptide ratios excluded because of the fracti...
Definition: ms_imputation.hpp:149
void setImputeExcludedMs2PeptideRatios(bool imputationMs2PeptideChoiceIn)
Set the imputation object to treat excluded ms2 ratios as missing Not used when imputing ms1 quantita...
Definition: ms_imputation.hpp:185
bool getImputeMs1PeptideNegativeRatio()
Get how the imputation object is currently treating negative ms1 peptide ratios. Not used when imputi...
Definition: ms_imputation.hpp:173
const IMPUTATION_VARIABLE getImputationVariableChoice() const
Get what the imputation object is currently imputing. I.e. protein/peptide ratios from ms1 or ms2 qua...
Definition: ms_imputation.hpp:142
The result of quantitation calculations applied to peptide summary.
Definition: ms_ms1quantitation.hpp:92
Peptide and protein quantitation in a Reporter or Multiplex Mascot results file.
Definition: ms_ms2quantitation.hpp:127