StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
posterior.cpp
Go to the documentation of this file.
1 //
2 // posterior.cpp
3 // StochHMM
4 //
5 // Created by Paul Lott on 2/4/13.
6 // Copyright (c) 2013 Korf Lab, Genome Center, UC Davis, Davis, CA. All rights reserved.
7 //
8 
9 #include "trellis.h"
10 
11 namespace StochHMM {
12 
14  //if (hmm->isBasic()){
16  //}
17  }
18 
20  hmm = h;
21  seqs = sqs;
22  seq_size = seqs->getLength();
25 
26  if (posterior_score!=NULL){
27  delete posterior_score;
28  posterior_score = NULL;
29  }
30  ending_backward_prob = -INFINITY;
31  ending_forward_prob = -INFINITY;
32 
33  posterior();
34  }
35 
37  hmm = h;
38  seqs = sqs;
39  seq_size = seqs->getLength();
42 
43  if (posterior_score!=NULL){
44  delete posterior_score;
45  posterior_score = NULL;
46  }
47  ending_backward_prob = -INFINITY;
48  ending_forward_prob = -INFINITY;
49 
50  if (hmm->isBasic()){
52  }
53  }
54 
55 
57 
58 // if (!hmm->isBasic()){
59 // std::cerr << "Model isn't a simple/basic HMM. Use complex algorithms\n";
60 // return;
61 // }
62 
63  //posterior_score = new (std::nothrow) float_2D(seq_size, std::vector<float>(state_size,-INFINITY));
64  posterior_score = new (std::nothrow) double_2D(seq_size, std::vector<double>(state_size,-INFINITY));
65 
66  scoring_current = new (std::nothrow) std::vector<double> (state_size,-INFINITY);
67  scoring_previous= new (std::nothrow) std::vector<double> (state_size,-INFINITY);
68 
69  if (scoring_current == NULL || scoring_previous == NULL || posterior_score == NULL){
70  std::cerr << "Can't allocate Posterior score table. OUT OF MEMORY" << std::endl;
71  exit(2);
72  }
73 
74  std::bitset<STATE_MAX> next_states;
75  std::bitset<STATE_MAX> current_states;
76 
77  double forward_temp(-INFINITY);
78  double emission(-INFINITY);
79  bool exDef_position(false);
80 
81  state* init = hmm->getInitial();
82 
83  std::bitset<STATE_MAX>* initial_to = hmm->getInitialTo();
84  std::bitset<STATE_MAX>* from_trans(NULL);
85 
86 
87  //Calculate Forward from transitions from INIT (initial) state
88  for(size_t i = 0; i < state_size; ++i){
89  if ((*initial_to)[i]){ //if the bitset is set (meaning there is a transition to this state), calculate the viterbi
90  forward_temp = (*hmm)[i]->get_emission_prob(*seqs,0) + getTransition(init, i, 0);
91 
92  if (forward_temp > -INFINITY){
93  (*scoring_current)[i] = forward_temp;
94  next_states |= (*(*hmm)[i]->getTo());
95  }
96  }
97  }
98 
99 
100  for(size_t position = 1; position < seq_size ; ++position ){
101  (*posterior_score)[position-1].assign(scoring_current->begin(), scoring_current->end());
102 
103  //Swap current and previous viterbi scores
104  scoring_previous->assign(state_size,-INFINITY);
108 
109  //Swap current_states and next states sets
110  current_states.reset();
111  current_states |= next_states;
112  next_states.reset();
113 
114  if (exDef_defined){
115  exDef_position = seqs->exDefDefined(position);
116  }
117 
118  for (size_t current = 0; current < state_size; ++current){ //i is current state that emits value
119  if (!current_states[current]){
120  continue;
121  }
122 
123  emission = (*hmm)[current]->get_emission_prob(*seqs, position);
124 
125  if (exDef_defined && exDef_position){
126  emission += seqs->getWeight(position, current);
127  }
128 
129  if (emission == -INFINITY){
130  continue;
131  }
132 
133  from_trans = (*hmm)[current]->getFrom();
134 
135  for (size_t previous = 0; previous < state_size ; ++previous) { //j is previous state
136  if (!(*from_trans)[previous]){
137  continue;
138  }
139 
140 
141 
142  if ((*scoring_previous)[previous] != -INFINITY){
143  forward_temp = (*scoring_previous)[previous] + emission + getTransition((*hmm)[previous], current , position);
144 
145  if ((*scoring_current)[current] == -INFINITY){
146  (*scoring_current)[current] = forward_temp;
147  }
148  else{
149  (*scoring_current)[current] = addLog(forward_temp, (*scoring_current)[current]);
150  }
151 
152  next_states |= (*(*hmm)[current]->getTo());
153  }
154  }
155  }
156  }
157 
158  (*posterior_score)[seq_size-1].assign(scoring_current->begin(), scoring_current->end());
159 
160  //Swap current and previous scores
161  scoring_previous->assign(state_size,-INFINITY);
165 
166 
167  ending_forward_prob = -INFINITY;
168  for(size_t i = 0; i < state_size ;++i){
169  if ((*scoring_previous)[i] != -INFINITY){
170  forward_temp = (*scoring_previous)[i] + (*hmm)[i]->getEndTrans();
171 
172  if (forward_temp > -INFINITY){
173  if (ending_forward_prob == -INFINITY){
174  ending_forward_prob = forward_temp;
175  }
176  else{
178  }
179  }
180  }
181  }
182 
183  // Perform Backward Algorithm
184  double backward_temp(-INFINITY);
185  scoring_previous->assign(state_size, -INFINITY);
186  scoring_current->assign(state_size, -INFINITY);
187 // std::vector<float> posterior_sum(seq_size,-INFINITY);
188 
189  std::vector<double> posterior_sum(seq_size,-INFINITY);
190 
191 
192  std::bitset<STATE_MAX>* ending_from = hmm->getEndingFrom();
193 
194  //Calculate initial Backward from ending state
195  for(size_t st_current = 0; st_current < state_size; ++st_current){
196  if ((*ending_from)[st_current]){ //if the bitset is set (meaning there is a transition to this state)
197 
198  backward_temp = (*hmm)[st_current]->getEndTrans();
199 
200  if (backward_temp > -INFINITY){
201  (*scoring_current)[st_current] = backward_temp;
202  next_states[st_current] = 1;
203  }
204  }
205  }
206 
207  for(size_t position = seq_size-2; position != SIZE_MAX ; --position ){
208 
209  //Swap current_states and next states sets
210  current_states.reset();
211  current_states |= next_states;
212  next_states.reset();
213 
214  for (size_t i=0;i<state_size;++i){
215  if ((*posterior_score)[position+1][i] != -INFINITY && (*scoring_current)[i]!= -INFINITY){
216  (*posterior_score)[position+1][i] = ((double)(*posterior_score)[position+1][i] + (double)(*scoring_current)[i]) - ending_forward_prob;
217  if ((*posterior_score)[position+1][i] > -7.6009){ //Above significant value;
218  posterior_sum[position+1] = addLog(posterior_sum[position+1], (*posterior_score)[position+1][i]);
219  }
220  }
221  }
222 
223  //Swap current and previous viterbi scores
224  scoring_previous->assign(state_size,-INFINITY);
228 
229 
230 
231  if (exDef_defined){
232  exDef_position = seqs->exDefDefined(position);
233  }
234 
235 
236  for (size_t st_previous = 0; st_previous < state_size; ++st_previous){ //i is current state that emits value
237  if (!current_states[st_previous]){
238  continue;
239  }
240 
241  emission = (*hmm)[st_previous]->get_emission_prob(*seqs, position+1);
242 
243  if (exDef_defined && exDef_position){
244  emission += seqs->getWeight(position+1, st_previous);
245  }
246 
247  if (emission == -INFINITY){
248  continue;
249  }
250 
251  from_trans = (*hmm)[st_previous]->getFrom();
252 
253  for (size_t st_current = 0; st_current < state_size ; ++st_current) { //j is previous state
254  if (!(*from_trans)[st_current]){
255  continue;
256  }
257 
258  if ((*scoring_previous)[st_previous] != -INFINITY){
259 
260  backward_temp =(*scoring_previous)[st_previous] + emission + getTransition((*hmm)[st_current], st_previous , position);
261 
262  if ((*scoring_current)[st_current] == -INFINITY){
263  (*scoring_current)[st_current] = backward_temp;
264  }
265  else{
266  (*scoring_current)[st_current] = addLog(backward_temp, (*scoring_current)[st_current]);
267  }
268 
269  next_states[st_current] = 1;
270  }
271  }
272  }
273  }
274 
275  for (size_t i=0;i<state_size;++i){
276  (*posterior_score)[0][i] = ((double)(*posterior_score)[0][i] + (double)(*scoring_current)[i]) - ending_forward_prob;
277  posterior_sum[0] = addLog(posterior_sum[0], (*posterior_score)[0][i]);
278  }
279 
280 
281 
282  ending_backward_prob = -INFINITY;
283  init = hmm->getInitial();
284  for(size_t i = 0; i < state_size ;++i){
285 
286  if ((*scoring_current)[i] != -INFINITY){
287  backward_temp = (*scoring_current)[i] + (*hmm)[i]->get_emission_prob(*seqs,0) + getTransition(init, i, 0);
288 
289  if (backward_temp > -INFINITY){
290  if (ending_backward_prob == -INFINITY){
291  ending_backward_prob = backward_temp;
292  }
293  else{
295  }
296  }
297  }
298  }
299 
300  if (abs(ending_backward_prob - ending_forward_prob) > 0.0000001){
301  std::cerr << "Ending sequence probabilities calculated by Forward and Backward algorithm are different. They should be the same.\t" << __FUNCTION__ << std::endl;
302  }
303 
304 
305 
306  for (size_t i=0;i<seq_size;i++){
307  for(size_t j=0;j<state_size;j++){
308  if ((*posterior_score)[i][j] == -INFINITY){
309  continue;
310  }
311 // std::cout << "Sum:\t" << exp(posterior_sum[i]) << std::endl;
312 // std::cout << "Score:\t" << exp((*posterior_score)[i][j]) << std::endl;
313 
314  if ((*posterior_score)[i][j] > -7.6009){ //Above significant value;
315  (*posterior_score)[i][j] -= posterior_sum[i];
316  }
317  else{
318  (*posterior_score)[i][j] = -INFINITY;
319  }
320  }
321  }
322 
323 
324  delete scoring_previous;
325  delete scoring_current;
326  scoring_previous = NULL;
327  scoring_current = NULL;
328  }
329 
330 
332  if (posterior_score == NULL){
333  std::cerr << __FUNCTION__ << " called before trellis::posterior was completed\n";
334  exit(2);
335  }
336 
337  double max(-INFINITY);
338  int16_t max_ptr(-1);
339  for(size_t position=seq_size-1; position != SIZE_MAX ;position--){
340  max = -INFINITY;
341  max_ptr = -1 ;
342 
343  for (size_t st=0; st < state_size; st++){
344  if ((*posterior_score)[position][st] > max){
345  max = (*posterior_score)[position][st];
346  max_ptr = st;
347  }
348 
349  }
350  path.push_back(max_ptr);
351  }
352  return;
353  }
354 
356  for (size_t position =seq_size-1; position != SIZE_MAX; --position){
357  double random=((double)rand()/((double)(RAND_MAX)+(double)(1)));
358  double cumulative_prob(0.0);
359 
360  for (size_t st = 0; st < state_size ; ++st){
361  cumulative_prob+=exp((*posterior_score)[position][st]);
362  if (random < cumulative_prob){
363  path.push_back( (int16_t) st);
364  }
365  }
366  }
367  return;
368  }
369 
371  for (size_t i = 0; i < reps; i++){
372  traceback_path path(hmm);
374  paths.assign(path);
375  }
376  return;
377  }
378 
379  // void trellis::simple_posterior(){
380  //
381  // if (!hmm->isBasic()){
382  // std::cerr << "Model isn't a simple/basic HMM. Use complex algorithms\n";
383  // return;
384  // }
385  //
386  // if (forward_score == NULL){
387  // simple_forward();
388  // }
389  //
390  // if (backward_score == NULL){
391  // simple_backward();
392  // }
393  //
394  // if (abs(ending_backward_prob - ending_forward_prob) > 0.0000001){
395  // std::cerr << "Ending Forward and Backward Probabilities are not equal.\n Check the model.\t" << __FUNCTION__ << std::endl;
396  // exit(2);
397  // }
398  //
399  // posterior_score = new(std::nothrow) float_2D(seq_size, std::vector<float>(state_size,-INFINITY));
400  //
401  // for(size_t position = 0;position < seq_size; ++position){
402  // for(size_t state = 0; state < state_size; ++state){
403  // (*posterior_score)[position][state] = ((*forward_score)[position][state] + (*backward_score)[position][state]) - ending_forward_prob;
404  // }
405  // }
406  //
407  // return;
408  // }
409 
410 
411 }