19 #include <unordered_map>
25 #include "dirtyAllocator.h"
27 #include "operators.h"
28 #include "marginalTrek++.h"
36 unsigned int parse_formula(
const char* formula,
37 std::vector<double>& isotope_masses,
38 std::vector<double>& isotope_probabilities,
41 unsigned int* confSize,
42 bool use_nominal_masses =
false);
49 class ISOSPEC_EXPORT_SYMBOL
Iso {
58 void setupMarginals(
const double* _isotopeMasses,
59 const double* _isotopeProbabilities);
70 bool doMarginalsNeedSorting()
const;
85 const int* _isotopeNumbers,
86 const int* _atomCounts,
87 const double* _isotopeMasses,
88 const double* _isotopeProbabilities
92 const int* _isotopeNumbers,
93 const int* _atomCounts,
94 const double*
const * _isotopeMasses,
95 const double*
const * _isotopeProbabilities
99 Iso(
const char* formula,
bool use_nominal_masses =
false);
102 inline Iso(
const std::string& formula,
bool use_nominal_masses =
false) :
Iso(formula.c_str(), use_nominal_masses) {}
113 static Iso FromFASTA(
const char* fasta,
bool use_nominal_masses =
false,
bool add_water =
true);
116 static inline Iso FromFASTA(
const std::string& fasta,
bool use_nominal_masses =
false,
bool add_water =
true) {
return FromFASTA(fasta.c_str(), use_nominal_masses, add_water); }
122 Iso& operator=(
const Iso& other) =
delete;
129 Iso(
const Iso& other,
bool fullcopy);
135 double getLightestPeakMass()
const;
138 double getHeaviestPeakMass()
const;
145 double getMonoisotopicPeakMass()
const;
148 double getModeLProb()
const;
151 double getUnlikeliestPeakLProb()
const;
154 double getModeMass()
const;
157 double getTheoreticalAverageMass()
const;
160 double variance()
const;
163 double stddev()
const {
return sqrt(variance()); }
172 void addElement(
int atomCount,
int noIsotopes,
const double* isotopeMasses,
const double* isotopeProbabilities);
175 void saveMarginalLogSizeEstimates(
double* priorities,
double target_total_prob)
const;
186 const double mode_lprob;
204 virtual double lprob()
const {
return partialLProbs[0]; }
210 virtual double mass()
const {
return partialMasses[0]; }
216 virtual double prob()
const {
return partialProbs[0]; }
240 std::priority_queue<void*, pod_vector<void*>,
ConfOrder> pq;
255 bool advanceToNextConfiguration()
override final;
264 int* c = getConf(topConf);
269 for(
int ii = 0; ii < dimNumber; ii++)
271 memcpy(space, marginalResults[ii]->confs()[c[ii]], isotopeNumbers[ii]*
sizeof(
int));
272 space += isotopeNumbers[ii];
299 double* maxConfsLPSum;
300 const double Lcutoff;
305 const double* lProbs_ptr;
306 const double* lProbs_ptr_start;
307 double* partialLProbs_second;
308 double partialLProbs_second_val, lcfmsv;
317 counter[0] = lProbs_ptr - lProbs_ptr_start;
318 if(marginalOrder !=
nullptr)
320 for(
int ii = 0; ii < dimNumber; ii++)
322 int jj = marginalOrder[ii];
323 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[jj]), isotopeNumbers[ii]*
sizeof(
int));
324 space += isotopeNumbers[ii];
329 for(
int ii = 0; ii < dimNumber; ii++)
331 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[ii]), isotopeNumbers[ii]*
sizeof(
int));
332 space += isotopeNumbers[ii];
346 IsoThresholdGenerator(
Iso&& iso,
double _threshold,
bool _absolute =
true,
int _tabSize = 1000,
int _hashSize = 1000,
bool reorder_marginals =
true);
356 if(ISOSPEC_LIKELY(*lProbs_ptr >= lcfmsv))
364 lProbs_ptr = lProbs_ptr_start;
366 int * cntr_ptr = counter;
368 while(idx < dimNumber-1)
376 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->
get_lProb(counter[idx]);
377 if(partialLProbs[idx] + maxConfsLPSum[idx-1] >= Lcutoff)
379 partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->
get_mass(counter[idx]);
380 partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->
get_prob(counter[idx]);
391 ISOSPEC_FORCE_INLINE
double lprob() const override final {
return partialLProbs_second_val + (*(lProbs_ptr)); }
392 ISOSPEC_FORCE_INLINE
double mass() const override final {
return partialMasses[1] + marginalResults[0]->
get_mass(lProbs_ptr - lProbs_ptr_start); }
393 ISOSPEC_FORCE_INLINE
double prob() const override final {
return partialProbs[1] * marginalResults[0]->
get_prob(lProbs_ptr - lProbs_ptr_start); }
396 void terminate_search();
408 size_t count_confs();
412 ISOSPEC_FORCE_INLINE
void recalc(
int idx)
414 for(; idx > 0; idx--)
416 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->
get_lProb(counter[idx]);
417 partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->
get_mass(counter[idx]);
418 partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->
get_prob(counter[idx]);
420 partialLProbs_second_val = *partialLProbs_second;
421 partialLProbs[0] = *partialLProbs_second + marginalResults[0]->
get_lProb(counter[0]);
422 lcfmsv = Lcutoff - partialLProbs_second_val;
425 ISOSPEC_FORCE_INLINE
void short_recalc(
int idx)
427 for(; idx > 0; idx--)
428 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->get_lProb(counter[idx]);
429 partialLProbs_second_val = *partialLProbs_second;
430 partialLProbs[0] = *partialLProbs_second + marginalResults[0]->
get_lProb(counter[0]);
431 lcfmsv = Lcutoff - partialLProbs_second_val;
443 double* maxConfsLPSum;
444 double currentLThreshold, lastLThreshold;
449 const double* lProbs_ptr;
450 const double* lProbs_ptr_start;
451 const double** resetPositions;
452 double* partialLProbs_second;
453 double partialLProbs_second_val, lcfmsv, last_lcfmsv;
454 bool marginalsNeedSorting;
463 counter[0] = lProbs_ptr - lProbs_ptr_start;
464 if(marginalOrder !=
nullptr)
466 for(
int ii = 0; ii < dimNumber; ii++)
468 int jj = marginalOrder[ii];
469 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[jj]), isotopeNumbers[ii]*
sizeof(
int));
470 space += isotopeNumbers[ii];
475 for(
int ii = 0; ii < dimNumber; ii++)
477 memcpy(space, marginalResultsUnsorted[ii]->get_conf(counter[ii]), isotopeNumbers[ii]*
sizeof(
int));
478 space += isotopeNumbers[ii];
483 inline double get_currentLThreshold()
const {
return currentLThreshold; }
485 IsoLayeredGenerator(Iso&& iso,
int _tabSize = 1000,
int _hashSize = 1000,
bool reorder_marginals =
true,
double t_prob_hint = 0.99);
487 ~IsoLayeredGenerator();
493 if(advanceToNextConfigurationWithinLayer())
495 }
while(IsoLayeredGenerator::nextLayer(-2.0));
499 ISOSPEC_FORCE_INLINE
bool advanceToNextConfigurationWithinLayer()
504 if(ISOSPEC_LIKELY(*lProbs_ptr >= lcfmsv))
511 ISOSPEC_FORCE_INLINE
double lprob() const override final {
return partialLProbs_second_val + (*(lProbs_ptr)); };
512 ISOSPEC_FORCE_INLINE
double mass() const override final {
return partialMasses[1] + marginalResults[0]->
get_mass(lProbs_ptr - lProbs_ptr_start); };
513 ISOSPEC_FORCE_INLINE
double prob() const override final {
return partialProbs[1] * marginalResults[0]->
get_prob(lProbs_ptr - lProbs_ptr_start); };
516 void terminate_search();
520 ISOSPEC_FORCE_INLINE
void recalc(
int idx)
522 for(; idx > 0; idx--)
524 partialLProbs[idx] = partialLProbs[idx+1] + marginalResults[idx]->
get_lProb(counter[idx]);
525 partialMasses[idx] = partialMasses[idx+1] + marginalResults[idx]->
get_mass(counter[idx]);
526 partialProbs[idx] = partialProbs[idx+1] * marginalResults[idx]->
get_prob(counter[idx]);
528 partialLProbs_second_val = *partialLProbs_second;
529 partialLProbs[0] = partialLProbs_second_val + marginalResults[0]->
get_lProb(counter[0]);
530 lcfmsv = currentLThreshold - partialLProbs_second_val;
531 last_lcfmsv = lastLThreshold - partialLProbs_second_val;
534 bool nextLayer(
double offset);
545 size_t to_sample_left;
546 const double precision;
547 const double beta_bias;
550 size_t current_count;
555 ISOSPEC_FORCE_INLINE
size_t count()
const {
return current_count; }
557 ISOSPEC_FORCE_INLINE
double mass() const override final {
return ILG.
mass(); }
559 ISOSPEC_FORCE_INLINE
double prob() const override final {
return static_cast<double>(count()); }
561 ISOSPEC_FORCE_INLINE
double lprob() const override final {
return log(
prob()); }
572 double curr_conf_prob_left, current_prob;
574 if(to_sample_left <= 0)
577 if(confs_prob < chasing_prob)
584 current_prob = ILG.
prob();
585 confs_prob += current_prob;
586 while(confs_prob <= chasing_prob)
590 current_prob = ILG.
prob();
591 confs_prob += current_prob;
593 if(to_sample_left <= 0)
595 curr_conf_prob_left = confs_prob - chasing_prob;
603 current_prob = ILG.
prob();
604 confs_prob += current_prob;
605 curr_conf_prob_left = current_prob;
608 double prob_left_to_1 = precision - chasing_prob;
609 double expected_confs = curr_conf_prob_left * to_sample_left / prob_left_to_1;
611 if(expected_confs <= beta_bias)
614 chasing_prob += rdvariate_beta_1_b(to_sample_left) * prob_left_to_1;
615 while(chasing_prob <= confs_prob)
619 if(to_sample_left == 0)
621 prob_left_to_1 = precision - chasing_prob;
622 chasing_prob += rdvariate_beta_1_b(to_sample_left) * prob_left_to_1;
624 if(current_count > 0)
630 size_t rbin = rdvariate_binom(to_sample_left, curr_conf_prob_left/prob_left_to_1);
631 current_count += rbin;
632 to_sample_left -= rbin;
633 chasing_prob = confs_prob;
634 if(current_count > 0)
The generator of isotopologues.
virtual void get_conf_signature(int *space) const =0
Write the signature of configuration into target memory location. It must be large enough to accomoda...
virtual bool advanceToNextConfiguration()=0
Advance to the next, not yet visited, most probable isotopologue.
virtual double mass() const
Get the mass of the current isotopologue.
virtual double lprob() const
Get the log-probability of the current isotopologue.
virtual double prob() const
Get the probability of the current isotopologue.
The Iso class for the calculation of the isotopic distribution.
double stddev() const
Get the standard deviation of the theoretical distribution.
static Iso FromFASTA(const std::string &fasta, bool use_nominal_masses=false, bool add_water=true)
Constructor (named) from aminoacid FASTA sequence as C++ std::string. See above for details.
int getDimNumber() const
Get the number of elements in the chemical formula of the molecule.
int getAllDim() const
Get the total number of isotopes of elements present in a chemical formula.
Iso(const std::string &formula, bool use_nominal_masses=false)
Constructor from C++ std::string chemical formula.
ISOSPEC_FORCE_INLINE double lprob() const override final
Get the log-probability of the current isotopologue.
ISOSPEC_FORCE_INLINE double mass() const override final
Get the mass of the current isotopologue.
ISOSPEC_FORCE_INLINE void recalc(int idx)
Recalculate the current partial log-probabilities, masses, and probabilities.
ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
void get_conf_signature(int *space) const override final
Write the signature of configuration into target memory location. It must be large enough to accomoda...
ISOSPEC_FORCE_INLINE double prob() const override final
Get the probability of the current isotopologue.
The generator of isotopologues sorted by their probability of occurrence.
void get_conf_signature(int *space) const override final
Save the counts of isotopes in the space.
ISOSPEC_FORCE_INLINE double mass() const override final
Get the mass of the current isotopologue.
ISOSPEC_FORCE_INLINE void get_conf_signature(int *space) const override final
Write the signature of configuration into target memory location. It must be large enough to accomoda...
ISOSPEC_FORCE_INLINE double lprob() const override final
Get the log-probability of the current isotopologue.
ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
ISOSPEC_FORCE_INLINE double prob() const override final
Get the probability of the current isotopologue.
The generator of isotopologues above a given threshold value.
ISOSPEC_FORCE_INLINE double lprob() const override final
Get the log-probability of the current isotopologue.
void get_conf_signature(int *space) const override final
Write the signature of configuration into target memory location. It must be large enough to accomoda...
ISOSPEC_FORCE_INLINE bool advanceToNextConfiguration() override final
Advance to the next, not yet visited, most probable isotopologue.
ISOSPEC_FORCE_INLINE double prob() const override final
Get the probability of the current isotopologue.
ISOSPEC_FORCE_INLINE double mass() const override final
Get the mass of the current isotopologue.
double get_mass(int idx) const
get the mass of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_mass.
double get_prob(int idx) const
get the probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_eProb.
double get_lProb(int idx) const
get the log-probability of the idx-th subisotopologue, see details in PrecalculatedMarginal::get_lPro...
The marginal distribution class (a subisotopologue).
The marginal distribution class (a subisotopologue).
Precalculated Marginal class.
const double & get_lProb(int idx) const
Get the log-probability of the idx-th subisotopologue.
const double & get_prob(int idx) const
Get the probability of the idx-th subisotopologue.
const double & get_mass(int idx) const
Get the mass of the idx-th subisotopologue.