GLnexus
Scalable datastore for population genome sequencing, with on-demand joint genotyping
 All Classes Functions Variables
genotyper.h
1 #ifndef GLNEXUS_GENOTYPER_H
2 #define GLNEXUS_GENOTYPER_H
3 
4 #include "data.h"
5 #include "types.h"
6 #include <fstream>
7 #include <memory>
8 #include "service_config.h"
9 
10 namespace GLnexus {
11 
12 Status genotype_site(const genotyper_config& cfg, MetadataCache& cache, BCFData& data,
13  const unified_site& site,
14  const std::string& sampleset, const std::vector<std::string>& samples,
15  const bcf_hdr_t* hdr, std::shared_ptr<bcf1_t>& ans, consolidated_loss& losses_for_site);
16 
17 // LossTracker handles the low-level housekeeping of loss accounting for a
18 // single unified_site and a single sample. Computation in this class assumes a
19 // diploid genome.
20 // The get() function returns the computed loss as a loss_stats for higher
21 // level record keeping.
22 class LossTracker {
23 
24 public:
25  // Simple wrapper struct to store information about an original call
26  // for a unified site, used bfor computation of loss
27  struct orig_call {
28  orig_call(range pos_, bool is_gvcf_) : pos(pos_), is_gvcf(is_gvcf_) {}
29 
30  range pos;
31  bool is_gvcf;
32 
33  bool operator==(const orig_call& rhs) const noexcept { return pos == rhs.pos && is_gvcf == rhs.is_gvcf; }
34  bool operator<(const orig_call& rhs) const noexcept { return pos < rhs.pos; }
35  bool operator<=(const orig_call& rhs) const noexcept { return pos <= rhs.pos; }
36  };
37 
38  // Constructor
39  LossTracker(const range rng_) noexcept : rng(rng_) {}
40 
41  Status add_call_for_site(const range call, int n_calls, bool is_gvcf) noexcept;
42  Status finalize_loss_for_site(int n_no_calls) noexcept;
43  Status get(loss_stats& ans) const noexcept;
44 
45  // Did we lose calls on this site?
46  bool is_loss() const noexcept {
47  return n_calls_lost > 0;
48  }
49 
50 private:
51  // Range of joint-called unified_site being considered
52  range rng;
53 
54  // Original calls (identified by effective range within site) and
55  // count of calls. Calls which are different in the bcf record but
56  // share the same effective range within the site will be collapsed
57  // into the same key.
58  std::map<orig_call, int> orig_calls_for_site;
59 
60  int n_calls_total=0, n_bp_total=0;
61  int n_gvcf_calls_total=0, n_gvcf_bp_total=0;
62  int n_calls_lost=0, n_bp_lost=0;
63  int n_gvcf_calls_lost=0, n_gvcf_bp_lost=0;
64  int n_no_calls_total = 0;
65 
66  bool is_finalized = false;
67 };
68 
69 using LossTrackers = std::vector<LossTracker>;
70 
71 } // namespace GLnexus
72 #endif
Definition: genotyper.h:27
Genomic range (chromosome id, begin coordinate, end coordinate)
Definition: types.h:99
Function status (return) codes.
Definition: types.h:30
Definition: types.h:236
Definition: genotyper.h:22