StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
nth_best.cpp
Go to the documentation of this file.
1 //
2 // nth_best.cpp
3 // StochHMM
4 //
5 // Created by Paul Lott on 2/6/13.
6 // Copyright (c) 2013 Korf Lab, Genome Center, UC Davis, Davis, CA. All rights reserved.
7 //
8 
9 #include "trellis.h"
10 
11 namespace StochHMM{
12 
13 
14 
15 
16  //!Sort the viterbi scores in the nth trellis cells
17  void sort_scores(std::vector<nthScore>& nth_scores){
18  sort(nth_scores.begin(), nth_scores.end(), _vec_sort );
19  return;
20  }
21 
22  //Sort vector of pairs using the first value in the pair
23  bool _vec_sort(const nthScore& i, const nthScore& j){
24  return (i.score > j.score);
25  }
26 
27 
28  //TODO: Currently adapt naive algorithm to optimized algorithm
29 
30  //As written it's slower than the naive and may use as much memory....
32  if (!hmm->isBasic()){
33  std::cerr << "Model isn't a simple/basic HMM. Use complex algorithms\n";
34  return;
35  }
36 
37  //Initialize the traceback table
38  if (nth_traceback_table != NULL){delete nth_traceback_table;}
39  if (ending_nth_viterbi != NULL){delete ending_nth_viterbi;}
40  if (nth_scoring_previous!= NULL){delete nth_scoring_previous;}
41  if (nth_scoring_current != NULL){delete nth_scoring_current;}
42 
43  nth_scoring_previous = new (std::nothrow) std::vector<std::vector<nthScore> > (state_size);
44  nth_scoring_current = new (std::nothrow) std::vector<std::vector<nthScore> > (state_size);
45  nth_traceback_table = new (std::nothrow) std::vector<nthTrace>(seq_size);
46  ending_nth_viterbi = new(std::nothrow) std::vector<nthScore>;
47 
48  if (nth_scoring_previous == NULL || nth_scoring_current == NULL || nth_traceback_table == NULL){
49  std::cerr << "Can't allocate Viterbi score and traceback table. OUT OF MEMORY" << std::endl;
50  exit(2);
51  }
52 
53 
54  std::bitset<STATE_MAX> next_states;
55  std::bitset<STATE_MAX> current_states;
56 
57  double viterbi_temp(-INFINITY);
58  double emission(-INFINITY);
59  double trans(-INFINITY);
60  bool exDef_position(false);
61 
62 
63  state* init = hmm->getInitial();
64 
65  std::bitset<STATE_MAX>* initial_to = hmm->getInitialTo();
66  std::bitset<STATE_MAX>* from_trans(NULL);
67 
68  //Calculate Viterbi from transitions from INIT (initial) state
69  for(size_t st = 0; st < state_size; ++st){
70  if ((*initial_to)[st]){ //if the bitset is set (meaning there is a transition to this state), calculate the viterbi
71 
72  viterbi_temp = (*hmm)[st]->get_emission_prob(*seqs,0) + getTransition(init, st, 0);
73 
74  if (viterbi_temp > -INFINITY){
75  (*nth_scoring_current)[st].push_back(nthScore(-1,-1,viterbi_temp));
76 
77  next_states |= (*(*hmm)[st]->getTo());
78  }
79  }
80  }
81 
82  //Each position in the sequence
83  for(size_t position = 1; position < seq_size ; ++position ){
84 
85  //Swap current and previous viterbi scores
86  nth_scoring_previous->assign(state_size,std::vector<nthScore>());
90 
91  //Swap current_states and next states sets
92 
93  current_states.reset();
94  current_states |= next_states;
95  next_states.reset();
96 
97  if (exDef_defined){
98  exDef_position = seqs->exDefDefined(position);
99  }
100 
101  //Current states
102  for (size_t st_current = 0; st_current < state_size; ++st_current){ //Current state that emits value
103 
104  //Check to see if current state is valid
105  if (!current_states[st_current]){
106  continue;
107  }
108 
109  //Get emission of current state
110  emission = (*hmm)[st_current]->get_emission_prob(*seqs, position);
111 
112 
113  if (exDef_defined && exDef_position){
114  emission += seqs->getWeight(position, st_current);
115  }
116 
117  if (emission == -INFINITY){
118  continue;
119  }
120 
121  //Get list of states that are valid previous states
122  from_trans = (*hmm)[st_current]->getFrom();
123 
124  for (size_t st_previous = 0; st_previous < state_size ; ++st_previous) { //for previous states
125  if (!(*from_trans)[st_previous]){
126  continue;
127  }
128 
129  //Check that previous state has transition to current state
130  //and that the previous viterbi score is not -INFINITY
131  if (!(*nth_scoring_previous)[st_previous].empty()){
132 
133  trans = getTransition((*hmm)[st_previous], st_current, position);
134  if (trans== -INFINITY){
135  continue;
136  }
137 
138  for(size_t vit_previous=0; vit_previous < (*nth_scoring_previous)[st_previous].size(); vit_previous++){
139  viterbi_temp = emission + trans + (*nth_scoring_previous)[st_previous][vit_previous].score;
140  (*nth_scoring_current)[st_current].push_back(nthScore(st_previous,vit_previous,viterbi_temp));
141  }
142 
143  next_states |= (*(*hmm)[st_current]->getTo());
144  }
145  }
146 
147  sort_scores((*nth_scoring_current)[st_current]);
148  if ((*nth_scoring_current)[st_current].size() > nth_size){
149  (*nth_scoring_current)[st_current].resize(nth_size);
150  }
151 
152  for (size_t i=0; i < (*nth_scoring_current)[st_current].size(); i++){
153  (*nth_traceback_table)[position].assign(st_current, i , (*nth_scoring_current)[st_current][i].st_tb, (*nth_scoring_current)[st_current][i].score_tb);
154  }
155  }
156  }
157 
158  //TODO: Calculate ending and set the final viterbi and traceback pointer
159  //Swap current and previous viterbi scores
160  nth_scoring_previous->assign(state_size,std::vector<nthScore>());
164 
165 
166  //Calculate ending viterbi score and traceback from END state
167  for(size_t st_previous = 0; st_previous < state_size ;++st_previous){
168  if (!(*nth_scoring_previous)[st_previous].empty()){
169 
170  trans = (*hmm)[st_previous]->getEndTrans();
171 
172  if (trans == -INFINITY){
173  continue;
174  }
175 
176  for(size_t vit_previous=0; vit_previous < (*nth_scoring_previous)[st_previous].size(); vit_previous++){
177  viterbi_temp = trans + (*nth_scoring_previous)[st_previous][vit_previous].score;
178  ending_nth_viterbi->push_back(nthScore(st_previous,vit_previous,viterbi_temp));
179  }
180  }
181  }
182 
184  if (ending_nth_viterbi->size() > nth_size){
185  ending_nth_viterbi->resize(nth_size);
186  }
187 
188  delete nth_scoring_previous;
189  delete nth_scoring_current;
190  nth_scoring_previous = NULL;
191  nth_scoring_current = NULL;
192  }
193 
195  nth_size = n;
196 
197  if (naive_nth_scores != NULL){delete naive_nth_scores; naive_nth_scores=NULL;}
198  if (ending_nth_viterbi != NULL){delete ending_nth_viterbi; ending_nth_viterbi = NULL;}
199 
200  naive_nth_scores = new (std::nothrow) std::vector<std::vector<std::vector<nthScore >* > >(seq_size, std::vector<std::vector<nthScore>* >(state_size,NULL));
201 
202  std::vector<nthScore>* temp_scores;
203 
204  double emission(-INFINITY);
205  double viterbi_temp(-INFINITY);
206  double trans(-INFINITY);
207  bool exDef_position(false);
208 
209  state* init = hmm->getInitial();
210 
211  //Calculate from Initial states
212  for(size_t st = 0; st < state_size; ++st){
213  viterbi_temp = (*hmm)[st]->get_emission_prob(*seqs,0) + getTransition(init, st, 0);
214 
215  if (viterbi_temp == -INFINITY){
216  continue;
217  }
218 
219  temp_scores = new (std::nothrow) std::vector<nthScore>;
220  temp_scores->push_back(nthScore(-1,-1,viterbi_temp));
221  //nth_table[0][st]=temp_scores;
222  (*naive_nth_scores)[0][st]=temp_scores;
223  }
224 
225  //Calculate Forward for all states
226  for (size_t position = 1 ; position < seq_size ; ++position){
227 
228  if (exDef_defined){
229  exDef_position = seqs->exDefDefined(position);
230  }
231 
232  for (size_t st_current = 0; st_current < state_size; ++st_current){
233  //Calc emissions
234  emission = (*hmm)[st_current]->get_emission_prob(*seqs, position);
235 
236  if (exDef_defined && exDef_position){
237  emission += seqs->getWeight(position, st_current);
238  }
239 
240  if (emission == -INFINITY){
241  continue;
242  }
243 
244  temp_scores = new (std::nothrow) std::vector<nthScore>;
245  for (size_t st_previous = 0; st_previous < state_size; ++st_previous){
246  trans = getTransition(hmm->getState(st_previous), st_current, position);
247  if (trans== -INFINITY){
248  continue;
249  }
250 
251  if ((*naive_nth_scores)[position-1][st_previous] == NULL){
252  continue;
253  }
254 
255  for(size_t vit_previous=0; vit_previous < (*naive_nth_scores)[position-1][st_previous]->size(); vit_previous++){
256  viterbi_temp = emission + trans + (*(*naive_nth_scores)[position-1][st_previous])[vit_previous].score;
257  temp_scores->push_back(nthScore(st_previous,vit_previous,viterbi_temp));
258  }
259  }
260 
261  sort_scores(*temp_scores);
262  if (temp_scores->size() > nth_size){
263  temp_scores->resize(nth_size);
264  }
265  (*naive_nth_scores)[position][st_current]=temp_scores;
266  }
267  }
268 
269  //Calculate Ending Transition
270  temp_scores = new(std::nothrow) std::vector<nthScore>;
271  for (size_t st_previous = 0; st_previous < state_size; ++st_previous){
272 
273  //Check validity of transition to end for the state
274  if ((*hmm)[st_previous]->getEndTrans() != -INFINITY){
275 
276  //Check to see that previous viterbi scores are defined for the state
277  if ((*naive_nth_scores)[seq_size-1][st_previous] != NULL){
278 
279  //Calculate viterbi score for each score in previous cell
280  for (size_t prev_scores=0; prev_scores < (*naive_nth_scores)[seq_size-1][st_previous]->size(); prev_scores++){
281 
282  viterbi_temp = (*(*naive_nth_scores)[seq_size-1][st_previous])[prev_scores].score + (*hmm)[st_previous]->getEndTrans();
283  temp_scores->push_back(nthScore(st_previous, prev_scores, viterbi_temp));
284  }
285 
286  }
287  }
288  }
289 
290  sort_scores(*temp_scores);
291  if (temp_scores->size() > nth_size){
292  temp_scores->resize(nth_size);
293  }
294 
295  ending_nth_viterbi = temp_scores;
296 
297  return;
298  }
299 
300 }