mapgd  0.4
A program for the Maximum-likelihood analysis of population genomic data.
 All Data Structures Functions Variables Friends Groups Pages
vcf_file.h
1 /* An vcf */
2 
3 #ifndef _VCF_FILE_H_
4 #define _VCF_FILE_H_
5 
6 #include <iostream>
7 #include <cfloat>
8 #include <iomanip>
9 #include <vector>
10 
11 #ifndef NOHTS
12 #include <htslib/hts.h>
13 #include <htslib/vcf.h>
14 
15 #ifndef NOLZ4
16 #include "state.h"
17 #endif
18 
19 #include "external-file.h"
20 #include "external-data.h"
21 
22 #include "typedef.h"
23 #include "datatypes.h"
24 #include "raw.h"
25 #include "stream_tools.h"
26 
28 
31 class Info {
32 public:
33  enum Key {
34  AA,
35  BQ,
36  CIGAR,
37  DB,
38  DP,
39  END,
40  H2,
41  H3,
42  MQ,
43  MQ0,
44  NS,
45  SB,
46  SOMATIC,
47  VALIDATED,
48  G
49  };
50 
51 };
52 
53 class Vcf_data : public External_data {
54 private:
55 
56 // bcf_init
57 // bcf_destroy
58 // bcf_read(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v)
59 // bcf_write(htsFile *fp, const bcf_hdr_t *h, bcf1_t *v)
60 
62 
63 public:
64  Vcf_data ();
65 
66  //These should all be private, make vcf_file freind?
67  bcf1_t *record_;
68  bcf_hdr_t *header_;
69  size_t sample_size_;
70 
71  void set_header(const File_index &, const std::vector <std::string> &);
72 
73  void put (const Data *, ...);
74  void get (Data *, ...) const;
75 
76  void put (const File_index &, const Allele &, const Population &);
77  id1_t get (const File_index &, Population &) const;
78 
79 #ifndef NOLZ4
80  id1_t get (State &) const;
81 #endif
82  std::vector<std::string> get_sample_names (void) const;
83  File_index get_index (void) const;
84 
85  //The mandatory fields.
86  std::string id;
87  Base ref;
88  std::vector <std::string> alt;
89  float_t qual;
90  bool filter;
91  std::vector <std::string> info;
92 
95 
97  std::vector <count_t> AC;
98 
100  // use this when estimated from primary data, not called genotypes
101  std::vector <float_t> AF;
102 
104  count_t AN;
105 
107  float_t BQ;
108 
110  //cigar CIGAR;
111 
113  bool DB;
114 
116  count_t DP;
117 
119  id1_t END;
120 
122  bool H2;
123 
125  float_t MQ;
126 
128  count_t MQ0;
129 
131  count_t NS;
132 
134  float_t SB;
135 
137  bool SOMATIC;
138 
140  bool VALIDATED;
141 };
142 
143 
144 class Vcf_file : public External_file <Vcf_data> {
145 private:
146  using Base_file::in_;
147  using Base_file::out_;
148  using Base_file::open_;
150  htsFile *file_;
151 public:
152  void open(const std::ios_base::openmode &);
153  void open(const char *, const std::ios_base::openmode &);
154  void close(void);
155  void read (Vcf_data &);
156  void write (const Vcf_data &);
157  void write_header (const Vcf_data &);
158  Vcf_data read_header();
159 };
160 
161 /*
162 typedef struct {
163  int32_t n[3]; // n:the size of the dictionary block in use, (allocated size, m, is below to preserve ABI)
164  bcf_idpair_t *id[3];
165  void *dict[3]; // ID dictionary, contig dict and sample dict
166  char **samples; // Presumably sample names.
167  bcf_hrec_t **hrec; // Jesus fucking crist.
168  int nhrec, dirty; //
169  int ntransl, *transl[2]; // for bcf_translate()
170  int nsamples_ori; // for bcf_hdr_set_samples()
171  uint8_t *keep_samples;
172  kstring_t mem;
173  int32_t m[3]; // m: allocated size of the dictionary block in use (see n above)
174 } bcf_hdr_t;
175 */
176 
177 #endif
178 #endif
A class converts human readable bases to bit flags.
Definition: base.h:16
Definition: vcf_file.h:144
bool VALIDATED
Validated by follow-up experiment.
Definition: vcf_file.h:140
Population genotypes.
Definition: population.h:13
Vcf_data()
number of individuals sapmled
Definition: vcf_file.cc:49
float_t BQ
RMS base quality at this position.
Definition: vcf_file.h:107
bool SOMATIC
Indicates that the record is a somatic mutation, for cancer genomics.
Definition: vcf_file.h:137
void open_no_extention(const char *, const std::ios::openmode &)
The function that opens a indexed_file (if file).
Definition: map_file.cc:59
An interface that transforms pairs of name and position keys into record numbers. ...
Definition: file_index.h:23
count_t AN
Total number of alleles in called genotypes.
Definition: vcf_file.h:104
An interface for reading and writing data specified outside of mapgd.
Definition: external-file.h:36
count_t NS
Number of samples with data.
Definition: vcf_file.h:131
float_t MQ
RMS mapping quality, e.g. MQ=52.
Definition: vcf_file.h:125
std::istream * in_
All data is read from in.
Definition: map_file.h:85
count_t MQ0
Number of MAPQ == 0 reads covering this record.
Definition: vcf_file.h:128
Definition: state.h:31
An interface used to read/write data from outside of mapgd.
Definition: external-data.h:17
std::vector< float_t > AF
Allele frequency for each ALT allele in the same order as listed:
Definition: vcf_file.h:101
std::ostream * out_
All data is written is written to out.
Definition: map_file.h:86
id1_t END
End position of the variant described in this record (esp. for CNVs)
Definition: vcf_file.h:119
count_t DP
Combined depth across samples, e.g. DP=154.
Definition: vcf_file.h:116
std::vector< count_t > AC
Allele count in genotypes, for each ALT allele, in the same order as listed.
Definition: vcf_file.h:97
bool DB
Cigar string describing how to align an alternate allele to the reference allele. ...
Definition: vcf_file.h:113
bool H2
Membership in hapmap2.
Definition: vcf_file.h:122
A key is (i.e. what the user called it).
Definition: key.h:17
Definition: vcf_file.h:53
float_t SB
Strand bias at this position.
Definition: vcf_file.h:134
bool open_
indicates whether the iostream opened succesfully
Definition: map_file.h:60
A class which can be written as flat text file or into an SQL database.
Definition: data.h:34
void put(const Data *,...)
The write function must be defined in the child class.
Definition: vcf_file.cc:150
Summary statistics from the allele command.
Definition: allele.h:19
Base AA
Ancestral allele.
Definition: vcf_file.h:94
Because of the god awful mess that are vcf header lines.
Definition: vcf_file.h:31