1 #ifndef GLNEXUS_TYPES_H
2 #define GLNEXUS_TYPES_H
16 #include "yaml-cpp/yaml.h"
18 #define UNPAIR(p,nm1,nm2) auto nm1 = (p).first; auto nm2 = (p).second;
19 template<
typename T>
inline void ignore_retval(T) {}
23 enum class StatusCode { OK, FAILURE, INVALID, NOT_FOUND, EXISTS, IO_ERROR, NOT_IMPLEMENTED };
33 std::unique_ptr<std::string> noun_;
40 Status(StatusCode code = StatusCode::OK,
const char* msg =
nullptr) noexcept
41 : code_(code), msg_(msg) {}
48 Status(StatusCode code,
const char* msg,
const std::string& noun)
49 : code_(code), msg_(msg) {
50 noun_ = std::make_unique<std::string>(noun);
53 bool ok() const noexcept {
return code_ == StatusCode::OK; }
54 bool bad() const noexcept {
return !ok(); }
55 operator StatusCode() const noexcept {
return code_; }
56 operator int() const noexcept {
return static_cast<int>(code_); }
58 static Status OK() noexcept {
return Status(StatusCode::OK); }
60 #define STATUS_SUGAR(nm,code) \
61 static Status nm(const char* msg = nullptr) noexcept { return Status(code, msg); } \
62 static Status nm(const char *msg, const std::string& noun) { return Status(code, msg, noun); }
64 STATUS_SUGAR(Failure,StatusCode::FAILURE)
65 STATUS_SUGAR(Invalid,StatusCode::INVALID)
66 STATUS_SUGAR(NotFound,StatusCode::NOT_FOUND)
67 STATUS_SUGAR(Exists,StatusCode::EXISTS)
68 STATUS_SUGAR(IOError,StatusCode::IO_ERROR)
69 STATUS_SUGAR(NotImplemented,StatusCode::NOT_IMPLEMENTED)
71 std::
string str()
const {
72 std::ostringstream ans;
74 case StatusCode::OK: ans <<
"OK";
break;
75 case StatusCode::INVALID: ans <<
"Invalid";
break;
76 case StatusCode::NOT_FOUND: ans <<
"NotFound";
break;
77 case StatusCode::EXISTS: ans <<
"Exists";
break;
78 case StatusCode::IO_ERROR: ans <<
"IOError";
break;
79 case StatusCode::NOT_IMPLEMENTED: ans <<
"NotImplemented";
break;
80 default: ans <<
"Failure";
85 ans <<
" (" << *noun_ <<
")";
93 #define S(st) s = st; if (s.bad()) return s;
96 bool is_dna(
const std::string&);
100 int rid=-1, beg=-1, end=-1;
102 range(
int rid_,
int beg_,
int end_) noexcept : rid(rid_), beg(beg_), end(end_) {
104 throw std::invalid_argument(
"invalid range (beginning > end)");
113 assert(bcf !=
nullptr);
116 end = bcf->pos+bcf->rlen;
119 range(
const std::shared_ptr<const bcf1_t>& bcf) noexcept :
range(bcf.get()) {}
121 size_t size() const noexcept {
return end-beg; }
123 bool operator==(
const range& r)
const noexcept {
return rid == r.rid && beg == r.beg && end == r.end; }
124 bool operator!=(
const range& r)
const noexcept {
return !(*
this == r); }
125 bool operator<(
const range& r)
const noexcept {
126 return rid < r.rid || (rid == r.rid && (beg < r.beg || (beg == r.beg && end < r.end)));
128 bool operator<=(
const range& r)
const noexcept {
return *
this < r || *
this == r; }
130 bool overlaps(
const range& r)
const noexcept {
return rid == r.rid && end > r.beg && beg < r.end; }
131 bool within(
const range& r)
const noexcept {
return rid == r.rid && beg >= r.beg && end <= r.end; }
132 bool contains(
const range& r)
const noexcept {
return r.within(*
this); }
134 std::unique_ptr<range> intersect(
const range& r)
const {
135 if (!overlaps(r))
return nullptr;
136 return std::make_unique<range>(rid, std::max(beg,r.beg), std::min(end,r.end));
139 std::string str(
const std::vector<std::pair<std::string,size_t> >& contigs)
const {
140 std::ostringstream os;
141 if (rid >= 0 && rid < (
int)contigs.size()) {
142 os << std::get<0>(contigs[rid]);
144 os << '<' << rid << '>
';
146 os << ':
' << (beg+1) << '-
' << end;
149 std::string str() const {
158 allele(const range& pos_, const std::string& dna_) : pos(pos_), dna(dna_) {
159 // Note; dna.size() may not equal pos.size(), for indel alleles
160 if (!is_dna(dna)) throw std::invalid_argument("allele(): invalid DNA " + dna);
164 bool operator==(const allele& rhs) const noexcept { return pos == rhs.pos && dna == rhs.dna; }
167 bool operator<(const allele& rhs) const noexcept { return pos < rhs.pos || (pos == rhs.pos && dna < rhs.dna); }
168 bool operator<=(const allele& rhs) const noexcept { return *this < rhs || *this == rhs; }
170 std::string str() const {
171 std::ostringstream os;
172 os << pos.str() << "(" << dna << ")";
177 struct discovered_allele_info {
179 float observation_count;
181 bool operator==(const discovered_allele_info& rhs) const noexcept { return is_ref == rhs.is_ref && observation_count == rhs.observation_count; }
183 std::string str() const {
184 std::ostringstream os;
185 os << "[ is_ref: " << std::boolalpha << is_ref << " observation count: " << std::setprecision(1) << observation_count << "]";
189 using discovered_alleles = std::map<allele,discovered_allele_info>;
190 Status merge_discovered_alleles(const discovered_alleles& src, discovered_alleles& dest);
191 Status yaml_of_discovered_alleles(const discovered_alleles&,
192 const std::vector<std::pair<std::string,size_t> >&,
194 Status discovered_alleles_of_yaml(const YAML::Node&,
195 const std::vector<std::pair<std::string,size_t> >&,
196 discovered_alleles&);
198 struct unified_site {
206 std::vector<std::string> alleles;
211 std::map<allele,int> unification;
213 std::vector<float> observation_count;
214 //std::vector<float> genotype_prior;
216 bool operator==(const unified_site& rhs) const noexcept {
217 return pos == rhs.pos && alleles == rhs.alleles && unification == rhs.unification
218 && observation_count == rhs.observation_count;
220 bool operator<(const unified_site& rhs) const noexcept{
221 if (pos != rhs.pos) return pos < rhs.pos;
222 if (alleles != rhs.alleles) return alleles < rhs.alleles;
223 if (unification != rhs.unification) return unification < rhs.unification;
224 return observation_count < rhs.observation_count;
227 unified_site(const range& pos_) noexcept : pos(pos_) {}
229 Status yaml(const std::vector<std::pair<std::string,size_t> >& contigs,
230 YAML::Emitter& out) const;
231 static Status of_yaml(const YAML::Node& yaml,
232 const std::vector<std::pair<std::string,size_t> >& contigs,
237 int n_calls_total=0, n_bp_total=0;
238 int n_gvcf_calls_total=0, n_gvcf_bp_total=0;
239 int n_no_calls_total = 0;
241 // captures loss of both vcf entries and gvcf entries
242 int n_calls_lost=0, n_bp_lost=0;
244 // loss specific to gvcf entries
245 int n_gvcf_calls_lost=0, n_gvcf_bp_lost=0;
247 // Merges another loss_stats and increment the count
248 // variables accordingly
249 void operator+=(const loss_stats& loss) {
250 n_calls_total += loss.n_calls_total;
251 n_bp_total += loss.n_bp_total;
253 n_gvcf_calls_total += loss.n_gvcf_calls_total;
254 n_gvcf_bp_total += loss.n_gvcf_bp_total;
256 n_calls_lost += loss.n_calls_lost;
257 n_bp_lost += loss.n_bp_lost;
259 n_gvcf_calls_lost += loss.n_gvcf_calls_lost;
260 n_gvcf_bp_lost += loss.n_gvcf_bp_lost;
262 n_no_calls_total += loss.n_no_calls_total;
265 std::string str() const noexcept {
266 std::ostringstream ans;
268 ans << n_no_calls_total << " no call(s).\n";
270 // stop here if no no calls
271 if (!n_no_calls_total)
274 ans << "This is made up of a loss of " << n_calls_lost << " original call(s) which cover " << n_bp_lost << " bp.\n";
275 ans << "The loss is " << std::setprecision(3) << prop_calls_lost() << "% of " << n_calls_total << " calls; or " << prop_bp_lost() << "% of " << n_bp_total << " bp processed from the original dataset(s).\n";
277 ans << "Looking at gvcf entries, there is a loss of " << n_gvcf_calls_lost << " call(s) which cover " << n_gvcf_bp_lost << " bp.\n";
278 ans << "The loss is " << prop_gvcf_calls_lost() << "% of " << n_gvcf_calls_total << " calls; or " << prop_gvcf_bp_lost() << "% of " << n_gvcf_bp_total << " bp of gvcf entries processed.\n";
284 // Returns proportion of calls lost as a percentage
285 float prop_calls_lost() const noexcept {
286 if (!n_calls_total) return 0;
287 return n_calls_lost / (float) n_calls_total * 100;
290 // Returns proportion of bp coverage lost as a percentage
291 float prop_bp_lost() const noexcept {
292 if (!n_bp_total) return 0;
293 return n_bp_lost / (float) n_bp_total * 100;
296 // Returns proportion of gvcf bp coverage lost as a percentage
297 float prop_gvcf_bp_lost() const noexcept {
298 if (!n_gvcf_bp_total) return 0;
299 return n_gvcf_bp_lost / (float) n_gvcf_bp_total * 100;
302 // Returns proportion of gvcf calls lost as a percentage
303 float prop_gvcf_calls_lost() const noexcept {
304 if (!n_gvcf_calls_total) return 0;
305 return n_gvcf_calls_lost / (float) n_gvcf_calls_total * 100;
309 // per-sample loss_stats
310 using consolidated_loss = std::map<std::string, loss_stats>;
311 Status merge_loss_stats(const consolidated_loss& src, consolidated_loss& dest);
313 // Statistics collected during range queries
314 struct StatsRangeQuery {
315 int64_t nBCFRecordsRead; // how many BCF records were read from the DB
316 int64_t nBCFRecordsInRange; // how many were in the requested range
321 nBCFRecordsInRange = 0;
325 StatsRangeQuery(const StatsRangeQuery &srq) {
326 nBCFRecordsRead = srq.nBCFRecordsRead;
327 nBCFRecordsInRange = srq.nBCFRecordsInRange;
331 StatsRangeQuery& operator+=(const StatsRangeQuery& srq) {
332 nBCFRecordsRead += srq.nBCFRecordsRead;
333 nBCFRecordsInRange += srq.nBCFRecordsInRange;
337 // return a human readable string
339 std::ostringstream os;
340 os << "Num BCF records read " << std::to_string(nBCFRecordsRead)
341 << " query hits " << std::to_string(nBCFRecordsInRange);
346 } //namespace GLnexus
Genomic range (chromosome id, begin coordinate, end coordinate)
Definition: types.h:99
Status(StatusCode code=StatusCode::OK, const char *msg=nullptr) noexcept
Default Status constructor.
Definition: types.h:40
Function status (return) codes.
Definition: types.h:30
range(const bcf1_t *bcf) noexcept
Definition: types.h:112
Status(StatusCode code, const char *msg, const std::string &noun)
Extended Status constructor.
Definition: types.h:48