StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
trellis.h
Go to the documentation of this file.
1 //
2 // trellis.h
3 // StochHMM
4 //
5 //Permission is hereby granted, free of charge, to any person obtaining a copy of
6 //this software and associated documentation files (the "Software"), to deal in
7 //the Software without restriction, including without limitation the rights to
8 //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9 //the Software, and to permit persons to whom the Software is furnished to do so,
10 //subject to the following conditions:
11 //
12 //The above copyright notice and this permission notice shall be included in all
13 //copies or substantial portions of the Software.
14 //
15 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 
22 #ifndef __StochHMM__new_trellis__
23 #define __StochHMM__new_trellis__
24 
25 #include <iostream>
26 #include <vector>
27 #include <stdint.h>
28 #include <vector>
29 #include <string>
30 #include <fstream>
31 #include <bitset>
32 #include <map>
33 #include "stochTypes.h"
34 #include "sequences.h"
35 #include "hmm.h"
36 #include "traceback_path.h"
37 #include "stochMath.h"
38 #include <stdint.h>
39 #include <iomanip>
40 #include "stochTable.h"
41 #include "sparseArray.h"
42 
43 namespace StochHMM{
44 
45 
46 
47 
48 
49  typedef std::vector<std::vector<int16_t> > int_2D;
50  typedef std::vector<std::vector<float> > float_2D;
51  typedef std::vector<std::vector<double> > double_2D;
52  typedef std::vector<std::vector<long double> > long_double_2D;
53 
54  typedef std::vector<std::vector<std::vector<uint16_t> > > int_3D;
55  typedef std::vector<std::vector<std::vector<float> > > float_3D;
56  typedef std::vector<std::vector<std::vector<double> > > double_3D;
57  typedef std::vector<std::vector<std::vector<long double> > > long_double_3D;
58 // typedef std::vector<std::vector<std::vector<std::pair<int16_t,int16_t> >
59 
60  class nthScore{
61  public:
62  int16_t st_tb;
63  int16_t score_tb;
64  double score;
65  nthScore():st_tb(0),score_tb(0),score(-INFINITY){};
66  nthScore(int16_t st, int16_t tb, double sc):st_tb(st),score_tb(tb),score(sc){};
67 
68  };
69 
70 // struct nthTrace{
71 // int16_t st_tb;
72 // int16_t score_tb;
73 // nthTrace():st_tb(0),score_tb(0){};
74 // nthTrace(int16_t st, int16_t tb):st_tb(st),score_tb(tb){};
75 // };
76 
77  class nthTrace{
78  public:
79  std::map<int32_t,int32_t> tb;
80 
81  inline void assign(int16_t st, int16_t n_score, int16_t tb_ptr, int16_t tb_sc){
82  tb[(((int32_t) st) << 16 | n_score)] = (((int32_t)tb_ptr) << 16) | tb_sc;
83  }
84 
85  inline void get(int16_t& st, int16_t& n_score){
86  int32_t key_val = (((int32_t) st) << 16 | n_score);
87  if (tb.count(key_val)){
88  st = -1;
89  }
90  int32_t val = tb[key_val];
91  st = (val >> 16) & 0xFFFF;
92  n_score = val & 0xFFFF;
93  }
94  };
95 
96 
97  /*! \class Trellis
98  * \brief Implements the HMM scoring trellis and algorithms
99  *
100  *
101  *
102  */
103  class trellis{
104  public:
105  trellis();
106  trellis(model* h , sequences* sqs);
107  ~trellis();
108  void reset();
109  inline model* getModel(){return hmm;}
110  inline sequences* getSeq(){return seqs;}
111 
112  /*----------- Decoding Algorithms ------------*/
113 
114  //TODO: Fix these functions so that they evaluate the model and choose a
115  // a default algorithm based on the whether the model defines explicit duration
116  // states
117 
118  void viterbi();
119  void viterbi(model* h, sequences* sqs);
120 
121  void forward();
122  void forward(model* h, sequences* sqs);
123 
124 // void forward_viterbi();
125 // void forward_viterbi(model* h, sequences* sqs);
126 
127  void backward();
128  void backward(model* h, sequences* sqs);
129 
130  void posterior();
131  void posterior(model* h, sequences* sqs);
132 
133  void stochastic_viterbi();
134  void stochastic_viterbi(model* h, sequences* sqs);
135 
136  void stochastic_forward();
137  void stochastic_forward(model* h, sequences* sqs);
138 
139  void nth_viterbi();
140  void nth_viterbi(model* h, sequences* sqs);
141 
142  void baum_welch();
143 
144 
145  /*----------- Naive Model Decoding Algorithms -------------*/
146  /* These algorithms are the simply coded algorithms with little to no
147  optimizations.
148  */
149 
150  void naive_forward();
151  void naive_forward(model* h, sequences* sqs);
152 
153  void naive_backward();
154  void naive_backward(model* h, sequences* sqs);
155 
156  void naive_viterbi();
157  void naive_viterbi(model* h, sequences* sqs);
158 
159  void naive_baum_welch();
160  void naive_baum_welch(model* h, sequences* sqs);
161 
164 
167 
168  void naive_nth_viterbi(size_t n);
169  void naive_nth_viterbi(model* h, sequences* sqs, size_t n);
170 
171  /*----------- Simple Model Decoding Algorithms ------------*/
172  /* These algorithms are for use with models that do not define explicit
173  duration states or external functions. Therefore, probabilities can
174  be easily computed or referenced.
175 
176  */
177 
178  void simple_viterbi();
179  void simple_viterbi(model* h, sequences* sqs);
180 
181 // void simple_forward_viterbi();
182 // void simple_forward_viterbi(model* h, sequences* sqs);
183 
184  void simple_forward();
185  void simple_forward(model* h, sequences* sqs);
186 
187  void simple_backward();
188  void simple_backward(model* h, sequences* sqs);
189 
190  void simple_posterior();
191  void simple_posterior(model* h, sequences* sqs);
192 
195 // void simple_alt_stochastic_viterbi(model* h, sequences* sqs);
197 
200 
201  void simple_baum_welch();
202  void simple_baum_welch(model* h, sequences* sqs);
203 
204  void simple_nth_viterbi(size_t n);
205  void simple_nth_viterbi(model* h, sequences* sqs, size_t n);
206 
207 
208  /*----------- Fast Complex Model Decoding Algorithms ----------*/
209  /* These algorithms are for use with models that define external functions
210  or explicit duration states.
211 
212  These algorithms store the associated emissions and transitions
213  probabilities that were calculated during viterbi algorith. For later
214  use in the forward/backward algorithms.
215 
216  The values are stored in dense arrays, so the memory is accessed faster
217  at the cost of amount of memory required.
218  */
219 
220  void fast_complex_viterbi();
221  void fast_complex_viterbi(model* h, sequences* sqs);
222 //
223 // void fast_complex_forward_viterbi();
224 // void fast_complex_forward_viterbi(model* h, sequences* sqs);
225 
226  void fast_complex_backward();
227  void fast_complex_backward(model* h, sequences* sqs);
228 
231 
234 
236  void fast_complex_baum_welch(model* h, sequences* sqs);
237 
238 
239  /*----------- Sparse Complex Model Decoding Algorithms --------*/
240  /* These algorithms store the associated emissions and transitions
241  probabilities that were calculated during viterbi algorithm. For later
242  use in the forward/backward algorithms.
243 
244  The values are stored in sparse tables (hash map), so access to values
245  is slower but with lower amount of memory required.
246  */
247 
248  void sparse_complex_viterbi();
249  void sparse_complex_viterbi(model* h, sequences* sqs);
250 
251 // void sparse_complex_foward_viterbi();
252 // void sparse_complex_foward_viterbi(model* h, sequences* sqs);
253 
255  void sparse_complex_backward(model* h, sequences* sqs);
256 
259 
262 
263 
264  /*--------- Standard Traceback Algorithms --------*/
265 
266  void traceback(traceback_path& path);
267  void traceback(traceback_path& path ,size_t position, size_t state);
268  void traceback_nth(traceback_path& path, size_t n);
270 
272  void traceback_stoch_posterior(multiTraceback& paths , size_t reps);
273 
274 
275 
277 
279  void stochastic_traceback(multiTraceback&,size_t);
280 
281 
282  inline bool store(){return store_values;}
283  inline void store(bool val){store_values=val; return;}
284 
285  void print();
286  std::string stringify();
287  void export_trellis(std::ifstream&);
288  void export_trellis(std::string& file);
289  inline model* get_model(){return hmm;}
290 
291  //Model Baum-Welch Updating
292  void update_transitions();
293  void update_emissions();
294 
295 
296  /*----------- Accessors ---------------*/
301 
302 
306 
309 
310 
311  private:
312  double getEndingTransition(size_t);
313  double getTransition(state* st, size_t trans_to_state, size_t sequencePosition);
314  size_t get_explicit_duration_length(transition* trans, size_t sequencePosition,size_t state_iter, size_t to_state);
315  double transitionFuncTraceback(state* st, size_t position, transitionFuncParam* func);
316 
317 
318  model* hmm; //HMM model
319  sequences* seqs; //Digitized Sequences
320  size_t nth_size; //Size of N to calculate;
321 
322 
323  size_t state_size; //Number of States
324  size_t seq_size; //Length of Sequence
325 
327 
330 
331  //Traceback Tables
332  int_2D* traceback_table; //Simple traceback table
333 // int_3D* nth_traceback_table;//Nth-Viterbi traceback table
335 // alt_stochTable* alt_stochastic_table;
337 
338  //Score Tables
339  float_2D* viterbi_score; //Storing Viterbi scores
340  float_2D* forward_score; //Storing Forward scores
341  float_2D* backward_score; //Storing Backward scores
342  double_2D* posterior_score; //Storing Posterior scores
343 
349  std::vector<std::vector<std::vector<nthScore >* > >* naive_nth_scores;
350 
351 
352  std::vector<nthTrace>* nth_traceback_table;
353 
354  //Ending Cells
359  std::vector<nthScore>* ending_nth_viterbi;
360 
361 // std::vector<tb_score>* ending_stoch_tb;
362 // std::vector<tb_score>* ending_nth_viterbi;
363 
364  //Cells used for scoring each cell
365  std::vector<double>* scoring_current;
366  std::vector<double>* scoring_previous;
367  std::vector<double>* alt_scoring_current;
368  std::vector<double>* alt_scoring_previous;
369 
370  std::vector<size_t>* explicit_duration_current;
371  std::vector<size_t>* explicit_duration_previous;
372  std::vector<size_t>* swap_ptr_duration;
373 
374  std::vector<double>* swap_ptr;
375 
376  std::vector<std::vector<double>* > complex_emissions;
377  std::vector<std::vector<std::map<uint16_t,double>* >* >* complex_transitions;
378 
379  std::vector<std::vector<nthScore> >* nth_scoring_current;
380  std::vector<std::vector<nthScore> >* nth_scoring_previous;
381  std::vector<std::vector<nthScore> >* nth_swap_ptr;
382  };
383 
384  void sort_scores(std::vector<nthScore>& nth_scores);
385  bool _vec_sort(const nthScore& i, const nthScore& j);
386 
387 }
388 
389 #endif /* defined(__StochHMM__new_trellis__) */