GLnexus
Scalable datastore for population genome sequencing, with on-demand joint genotyping
 All Classes Functions Variables
data.h
1 #ifndef GLNEXUS_DATA_H
2 #define GLNEXUS_DATA_H
3 
4 #include <string>
5 #include <vector>
6 #include <map>
7 #include <memory>
8 #include <set>
9 #include <vcf.h>
10 #include "types.h"
11 
12 namespace GLnexus {
13 
23 class Metadata {
24 
25 public:
26  virtual ~Metadata() = default;
27 
31  virtual Status contigs(std::vector<std::pair<std::string,size_t> >& ans) const = 0;
32 
38  virtual Status sampleset_samples(const std::string& sampleset,
39  std::shared_ptr<const std::set<std::string> >& ans) const = 0;
40 
44  virtual Status sample_dataset(const std::string& sample, std::string& ans) const = 0;
45 
52  virtual Status all_samples_sampleset(std::string& ans) = 0;
53 
55  virtual Status sample_count(size_t& ans) const = 0;
56 };
57 
58 
61 class MetadataCache : public Metadata {
62  struct body;
63  std::unique_ptr<body> body_;
64 
65  MetadataCache();
66  MetadataCache(const MetadataCache&) = delete;
67 
68 public:
69  static Status Start(Metadata& inner, std::unique_ptr<MetadataCache>& ptr);
70  virtual ~MetadataCache();
71 
72  Status contigs(std::vector<std::pair<std::string,size_t> >& ans) const override;
73  Status sampleset_samples(const std::string& sampleset,
74  std::shared_ptr<const std::set<std::string> >& ans) const override;
75  Status sample_dataset(const std::string& sample, std::string& ans) const override;
76  Status all_samples_sampleset(std::string& ans) override;
77  Status sample_count(size_t& ans) const override;
78 
79  const std::vector<std::pair<std::string,size_t> >& contigs() const;
80  Status sampleset_datasets(const std::string& sampleset,
81  std::shared_ptr<const std::set<std::string> >& samples,
82  std::shared_ptr<const std::set<std::string>>& datasets) const;
83 };
84 
87 public:
88  virtual ~RangeBCFIterator() = default;
89 
92  virtual Status next(std::string& dataset, std::shared_ptr<const bcf_hdr_t>& hdr,
93  std::vector<std::shared_ptr<bcf1_t>>& records) = 0;
94 };
95 
98 class BCFData {
99 public:
100  virtual ~BCFData() = default;
101 
103  virtual Status dataset_header(const std::string& dataset,
104  std::shared_ptr<const bcf_hdr_t>& hdr) const = 0;
105 
114  virtual Status dataset_range(const std::string& dataset, const bcf_hdr_t* hdr, const range& pos,
115  std::vector<std::shared_ptr<bcf1_t> >& records) = 0;
116 
119  virtual Status dataset_range_and_header(const std::string& dataset, const range& pos,
120  std::shared_ptr<const bcf_hdr_t>& hdr,
121  std::vector<std::shared_ptr<bcf1_t> >& records);
122 
125  //
132  virtual Status sampleset_range(const MetadataCache& metadata, const std::string& sampleset,
133  const range& pos,
134  std::shared_ptr<const std::set<std::string>>& samples,
135  std::shared_ptr<const std::set<std::string>>& datasets,
136  std::vector<std::unique_ptr<RangeBCFIterator>>& iterators);
137 };
138 
139 }
140 
141 #endif
virtual Status sampleset_range(const MetadataCache &metadata, const std::string &sampleset, const range &pos, std::shared_ptr< const std::set< std::string >> &samples, std::shared_ptr< const std::set< std::string >> &datasets, std::vector< std::unique_ptr< RangeBCFIterator >> &iterators)
Definition: data.cc:189
Genomic range (chromosome id, begin coordinate, end coordinate)
Definition: types.h:99
Status sample_count(size_t &ans) const override
Return the count of all samples in the database.
Definition: data.cc:82
Definition: data.h:23
Status sampleset_samples(const std::string &sampleset, std::shared_ptr< const std::set< std::string > > &ans) const override
Definition: data.cc:48
Definition: data.h:98
Status sample_dataset(const std::string &sample, std::string &ans) const override
Definition: data.cc:63
Status all_samples_sampleset(std::string &ans) override
Definition: data.cc:77
virtual Status dataset_range_and_header(const std::string &dataset, const range &pos, std::shared_ptr< const bcf_hdr_t > &hdr, std::vector< std::shared_ptr< bcf1_t > > &records)
Definition: data.cc:115
Function status (return) codes.
Definition: types.h:30
virtual Status sample_count(size_t &ans) const =0
Return the count of all samples in the database.
Definition: data.h:61
virtual Status dataset_range(const std::string &dataset, const bcf_hdr_t *hdr, const range &pos, std::vector< std::shared_ptr< bcf1_t > > &records)=0
virtual Status sample_dataset(const std::string &sample, std::string &ans) const =0
virtual Status contigs(std::vector< std::pair< std::string, size_t > > &ans) const =0
virtual Status sampleset_samples(const std::string &sampleset, std::shared_ptr< const std::set< std::string > > &ans) const =0
Status contigs(std::vector< std::pair< std::string, size_t > > &ans) const override
Definition: data.cc:22
virtual Status next(std::string &dataset, std::shared_ptr< const bcf_hdr_t > &hdr, std::vector< std::shared_ptr< bcf1_t >> &records)=0
virtual Status all_samples_sampleset(std::string &ans)=0
virtual Status dataset_header(const std::string &dataset, std::shared_ptr< const bcf_hdr_t > &hdr) const =0
Retrieve the BCF header for a data set.
Iterate over BCF records within some range.
Definition: data.h:86