StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
trellis.cpp
Go to the documentation of this file.
1 //
2 // new_trellis.cpp
3 // StochHMM
4 //
5 // Created by Paul Lott on 11/13/12.
6 // Copyright (c) 2012 Korf Lab, Genome Center, UC Davis, Davis, CA. All rights reserved.
7 //
8 
9 #include "trellis.h"
10 
11 
12 namespace StochHMM {
14  hmm=NULL;
15  seqs=NULL;
17 
18  seq_size=0;
19  state_size=0;
20 
21  type = SIMPLE;
22  store_values=false;
23  exDef_defined=false;
24 
25  traceback_table = NULL;
26  stochastic_table = NULL;
27 
28  viterbi_score = NULL;
29  forward_score = NULL;
30  backward_score = NULL;
31  posterior_score = NULL;
32  dbl_forward_score = NULL;
33  dbl_viterbi_score = NULL;
34  dbl_backward_score = NULL;
35  dbl_posterior_score = NULL;
36 
37  ending_viterbi_score = -INFINITY;
38  ending_viterbi_tb = -1;
39 // ending_posterior = -INFINITY;
40  ending_forward_prob = -INFINITY;
41  ending_backward_prob= -INFINITY;
42 
43  naive_nth_scores = NULL;
44  ending_nth_viterbi = NULL;
45  nth_traceback_table = NULL;
47  nth_scoring_current = NULL;
48 
49  scoring_current = NULL;
50  scoring_previous= NULL;
51  alt_scoring_current = NULL;
52  alt_scoring_previous = NULL;
53  }
54 
56  hmm=h;
57  seqs=sqs;
59 
60  seq_size = seqs->getLength();
62 
63  type = SIMPLE;
64  store_values=false;
66 
67  traceback_table = NULL;
68  stochastic_table = NULL;
69  nth_traceback_table = NULL;
70  viterbi_score = NULL;
71  forward_score = NULL;
72  backward_score = NULL;
73  posterior_score = NULL;
74  dbl_forward_score = NULL;
75  dbl_viterbi_score = NULL;
76  dbl_backward_score = NULL;
77  dbl_posterior_score = NULL;
78 
79  ending_viterbi_score = -INFINITY;
80  ending_viterbi_tb = -1;
81 // ending_posterior = -INFINITY;
82  ending_forward_prob = -INFINITY;
83  ending_backward_prob= -INFINITY;
84 
85  naive_nth_scores = NULL;
86  ending_nth_viterbi = NULL;
87  nth_traceback_table = NULL;
89  nth_scoring_current = NULL;
90 
91  scoring_current = NULL;
92  scoring_previous= NULL;
93  alt_scoring_current = NULL;
94  alt_scoring_previous = NULL;
95 
96  }
97 
98 
99 
101  delete traceback_table;
102  delete stochastic_table;
103 
104  delete viterbi_score;
105  delete forward_score;
106  delete backward_score;
107  delete posterior_score;
108  delete dbl_forward_score;
109  delete dbl_viterbi_score;
110  delete dbl_backward_score;
111  delete dbl_posterior_score;
112 
113  delete naive_nth_scores;
114  delete ending_nth_viterbi;
115  delete nth_traceback_table;
116  delete nth_scoring_current;
117  delete nth_scoring_previous;
118 
119  delete scoring_previous;
120  delete scoring_current;
121  delete alt_scoring_previous;
122  delete alt_scoring_current;
123 
124  traceback_table = NULL;
125  stochastic_table = NULL;
126 
127  viterbi_score = NULL;
128  forward_score = NULL;
129  backward_score = NULL;
130  posterior_score = NULL;
131  dbl_forward_score = NULL;
132  dbl_viterbi_score = NULL;
133  dbl_backward_score = NULL;
134  dbl_posterior_score = NULL;
135 
136  naive_nth_scores = NULL;
137  ending_nth_viterbi = NULL;
138  nth_traceback_table = NULL;
139  nth_scoring_previous= NULL;
140  nth_scoring_current = NULL;
141 
142  scoring_previous = NULL;
143  scoring_current = NULL;
144  alt_scoring_previous= NULL;
145  alt_scoring_current = NULL;
146  }
147 
149  hmm=NULL;
150  seqs=NULL;
152  state_size=0;
153  seq_size=0;
154  type= SIMPLE;
155  store_values = false;
156  exDef_defined = false;
157 
158  delete traceback_table;
159  delete stochastic_table;
160 
161  delete viterbi_score;
162  delete forward_score;
163  delete backward_score;
164  delete posterior_score;
165 
166  delete scoring_current;
167  delete scoring_previous;
168 
169  delete alt_scoring_current;
170  delete alt_scoring_previous;
171 
172  delete dbl_forward_score;
173  delete dbl_backward_score;
174  delete dbl_viterbi_score;
175  delete dbl_posterior_score;
176 
177  delete naive_nth_scores;
178  delete ending_nth_viterbi;
179  delete nth_traceback_table;
180  delete nth_scoring_current;
181  delete nth_scoring_previous;
182 
183  traceback_table = NULL;
184  stochastic_table = NULL;
185  nth_traceback_table = NULL;
186 
187  viterbi_score = NULL;
188  forward_score = NULL;
189  backward_score = NULL;
190  posterior_score = NULL;
191 
192  scoring_current = NULL;
193  scoring_previous = NULL;
194 
195  alt_scoring_current = NULL;
196  alt_scoring_previous= NULL;
197 
198  dbl_forward_score = NULL;
199  dbl_viterbi_score = NULL;
200  dbl_backward_score = NULL;
201  dbl_posterior_score = NULL;
202 
203  naive_nth_scores = NULL;
204  ending_nth_viterbi = NULL;
205  nth_traceback_table = NULL;
206  nth_scoring_previous= NULL;
207  nth_scoring_current = NULL;
208 
209  ending_viterbi_score = -INFINITY;
210  ending_viterbi_tb = -1;
211 // ending_posterior = -INFINITY;
212  ending_forward_prob = -INFINITY;
213  ending_backward_prob= -INFINITY;
214 
215 
216  }
217 
218 
219  //TODO: Fix getTransitions to work with all transition types
220  double trellis::getTransition(state* st, size_t trans_to_state, size_t sequencePosition){
221  double transition_prob(-INFINITY);
222  transition* trans = st->getTrans(trans_to_state);
223  if (trans==NULL){
224  return transition_prob;
225  }
226 
227  transType trans_type= trans->getTransitionType();
228 
229 
230 
231 
232  if (trans_type == STANDARD ){ //if the transition type is standard then just return the standard probability
233 
234  transition_prob= trans->getTransition(0,NULL);
235  }
236  else if (trans_type == DURATION){
237 
238  //If the traceback_table isn't defined, then we need to generate it using the viterbi algorithm
239  if (traceback_table==NULL){
240  (*this).viterbi();
241  }
242 
243  //Calculate the duration length from the traceback table
244  size_t size = get_explicit_duration_length(trans,sequencePosition, st->getIterator(), trans_to_state);
245  transition_prob=trans->getTransition(size,NULL);
246  }
247  else if (trans_type == LEXICAL){
248  transition_prob=trans->getTransition(sequencePosition, seqs);
249  }
250 
251  //Is external function define for the transition
252  if (trans->FunctionDefined()){
253 
254  //If the traceback_table isn't defined, then we need to generate it using the viterbi algorithm
255  if (traceback_table==NULL){
256  (*this).viterbi();
257  }
258 
259  transition_prob+=transitionFuncTraceback(st, sequencePosition, trans->getExtFunction());
260  }
261 
262  return transition_prob;
263  }
264 
265 
266  //! Traceback to get the duration length of the state.
267  //! If duration is already being tracked in the table then it will return
268  //! value +1. Otherwise, it will traceback through the trellis until the
269  //! traceback identifier is reached.
270  //! \return length of traceback (Giving duration)
271  size_t trellis::get_explicit_duration_length(transition* trans, size_t sequencePosition, size_t state_iter, size_t to_state){
272 
273  if ((*explicit_duration_previous)[state_iter]!=0){
274 // std::cout << "Duration:\t" << (*explicit_duration_previous)[state_iter]+1 << std::endl;
275  return (*explicit_duration_previous)[state_iter]+1;
276  }
277 
278 
279  //If duration hasn't been defined then traceback until the ending parameter is reached
280  size_t length(0);
281 
282  size_t tbState(state_iter); //First traceback pointer to use in traceback_table
283 
284  //tracebackIdentifier traceback_identifier = previousState->transi[transitionTo].traceback_identifier;
285  tracebackIdentifier traceback_identifier = trans->getTracebackIdentifier();
286 
287  //string identifier = previousState->transi[transitionTo].traceback_string;
288  std::string identifier = trans->getTracebackString();
289 
290  //Traceback through trellis until the correct identifier is reached.
291  for(size_t trellPos=sequencePosition-1 ; trellPos != SIZE_MAX ;trellPos--){
292  length++;
293  tbState = (*traceback_table)[trellPos][tbState];
294 
295  if (tbState == SIZE_MAX){
296  break;
297  }
298 
299  state* st = hmm->getState(tbState);
300 
301  //Check to see if stop conditions of traceback are met, if so break;
302  if(traceback_identifier == START_INIT && tbState == SIZE_MAX) {break;}
303  else if (traceback_identifier == DIFF_STATE && state_iter != st->getIterator()) { break;}
304  else if (traceback_identifier == STATE_NAME && identifier.compare(st->getName())==0) { break;}
305  else if (traceback_identifier == STATE_LABEL && identifier.compare(st->getLabel())==0) { break;}
306  else if (traceback_identifier == STATE_GFF && identifier.compare(st->getGFF())==0) { break;}
307 
308  }
309 // std::cout << "Duration:\t" << length+1 << std::endl;
310  return length+1;
311 
312  }
313 
314  //! When a transitionFunc is to be called it must performs a traceback
315  //! and get the required sequence to pass to the function
316  //! \param
317  double trellis::transitionFuncTraceback(state* st, size_t position,transitionFuncParam* func){
318 
319  std::vector<int> tracebackPath;
320  std::vector<std::string> tracebackString;
321 
322  //How far to traceback
323  tracebackIdentifier traceback_identifier = func->getTracebackType();
324  const std::string& tracebackIdentifierName = func->getTracebackName();
325 
326 
327  //What to combine
328  combineIdentifier combineIdent = func->getCombineType();
329  const std::string& combineIdentName = func->getCombineName();
330 
331 
332  //Deterimine which track to use
333  track* alphaTrack = func->getTrack();
334  size_t trackIndex = alphaTrack->getIndex();
335  if (!alphaTrack->isAlpha()){
336 
337  std::cerr << "External transition function called on track that isn't discrete (ALPHANUMERIC)\n";
338  exit(2);
339 
340  }
341 
342  const sequence* seq = seqs->getSeq(trackIndex);
343  int16_t tb_state(st->getIterator());
344  int16_t starting_state = tb_state;
345 
346 
347  for(size_t trellisPos = position-1; trellisPos != SIZE_MAX ; --trellisPos){
348 
349  tracebackPath.push_back(tb_state);
350 
351  if ((combineIdent == FULL) ||
352  (combineIdent == STATENAME && combineIdentName.compare(st->getName())==0)||
353  (combineIdent == STATELABEL && combineIdentName.compare(st->getLabel())==0)||
354  (combineIdent == STATEGFF && combineIdentName.compare(st->getGFF())==0))
355 
356  {
357  tracebackString.push_back(seq->getSymbol(trellisPos));
358  }
359 
360  tb_state= (*traceback_table)[trellisPos][tb_state];
361  state* temp_st = hmm->getState(tb_state);
362 
363  //Check to see if stop conditions of traceback are met, if so break;
364  if(traceback_identifier == START_INIT && tb_state == -1) {break;}
365  else if (traceback_identifier == DIFF_STATE && starting_state != tb_state) { break;}
366  else if (traceback_identifier == STATE_NAME && tracebackIdentifierName.compare(temp_st->getName())==0){ break;}
367  else if (traceback_identifier == STATE_LABEL && tracebackIdentifierName.compare(temp_st->getLabel())==0) { break;}
368  else if (traceback_identifier == STATE_GFF && tracebackIdentifierName.compare(temp_st->getGFF())==0) { break;}
369  }
370 
371  size_t length=tracebackPath.size();
372  std::string CombinedString;
373 
374  size_t maxSymbolSize = alphaTrack->getAlphaMax();
375  //For single letter characters
376  if (maxSymbolSize ==1){
377  for(std::vector<std::string>::reverse_iterator rit = tracebackString.rbegin(); rit!=tracebackString.rend();++rit){
378  CombinedString+=(*rit);
379  }
380  }
381  else{ // For kmer words > 1 in length
382  std::vector<std::string>::reverse_iterator rit = tracebackString.rbegin();
383  CombinedString+=(*rit);
384  ++rit;
385  for(; rit!=tracebackString.rend();++rit){
386  CombinedString+="," + (*rit);
387  }
388  }
389 
390  //Call the transitionFunc and get the score back
391  double transitionValue = func->evaluate(seqs->getUndigitized(trackIndex), position, &CombinedString, length);
392 
393  return transitionValue;
394  }
395 
396 
397 
398 
399 
400  //!Perform traceback through trellis
401  //!\return path trackback_path
403 
404  if (seq_size==0 || traceback_table == NULL){
405  return;
406  }
407 
408  if (path.getModel() == NULL){
409  path.setModel(hmm);
410  }
411 
412  if (ending_viterbi_score == -INFINITY){
413  return;
414  }
415  else{
418 
419  int16_t pointer = ending_viterbi_tb;
420 
421  for( size_t position = seq_size -1 ; position>0 ; position--){
422  pointer = (*traceback_table)[position][pointer];
423 
424  if (pointer == -1){
425  std::cerr << "No valid path at Position: " << position << std::endl;
426  return;
427  }
428 
429  path.push_back(pointer);
430  }
431  }
432  return;
433  }
434 
435 
436  //!Perform a traceback starting at position and state given
437  //! \param [out] traceback_path
438  //! \param position Position to start traceback
439  //! \param state State to begin traceback
440  void trellis::traceback(traceback_path& path, size_t position, size_t state){
441  if (seq_size == 0 || traceback_table == NULL){
442  return;
443  }
444 
445  if (path.getModel() == NULL){
446  path.setModel(hmm);
447  }
448 
449  int16_t pointer = (*traceback_table)[position][state];
450  if (pointer == -1){
451  std::cerr << "No valid path at State: " << state << " from Position: " << position << std::endl;
452  return;
453  }
454 
455  for( size_t pos = position - 1 ; pos>0 ; pos--){
456  pointer = (*traceback_table)[pos][pointer];
457 
458  if (pointer == -1){
459  std::cerr << "No valid path at State: " << state << " from Position: " << position << std::endl;
460  return;
461  }
462 
463  path.push_back(pointer);
464  }
465 
466  return;
467  }
468 
469 
471 
473  return;
474  }
475 
476 
478  for(size_t i=0;i<reps;i++){
479  traceback_path pth(hmm);
481  paths.assign(pth);
482  }
483  return;
484  }
485 
486 
487 
488  //TODO: Finish nth traceback function
489  void trellis::traceback_nth(traceback_path& path, size_t n){
490  if (seq_size == 0 || n > nth_size || (nth_traceback_table == NULL && naive_nth_scores == NULL)){
491  return;
492  }
493 
494  if (nth_traceback_table != NULL){
495  int16_t st_pointer = (*ending_nth_viterbi)[n].st_tb;
496  int16_t sc_pointer = (*ending_nth_viterbi)[n].score_tb;
497 
498  path.setScore((*ending_nth_viterbi)[n].score);
499  path.push_back(st_pointer);
500 
501 
502  for( size_t position = seq_size -1 ; position>0 ; position--){
503  (*nth_traceback_table)[position].get(st_pointer,sc_pointer);
504 
505  if (st_pointer == -1){
506  std::cerr << "No valid path at Position: " << position << std::endl;
507  return;
508  }
509 
510  path.push_back(st_pointer);
511  }
512 
513  }
514  else{
515  int16_t st_pointer = (*ending_nth_viterbi)[n].st_tb;
516  int16_t sc_pointer = (*ending_nth_viterbi)[n].score_tb;
517 
518  path.setScore((*ending_nth_viterbi)[n].score);
519  path.push_back(st_pointer);
520 
521  for( size_t position = seq_size -1 ; position>0 ; position--){
522  nthScore& temp = (*(*naive_nth_scores)[position][st_pointer])[sc_pointer];
523  st_pointer = temp.st_tb;
524  sc_pointer = temp.score_tb;
525  if (st_pointer == -1){
526  std::cerr << "No valid path at Position: " << position << std::endl;
527  return;
528  }
529  path.push_back(st_pointer);
530  }
531  }
532  return;
533  }
534 
535 
536 
537 
538 }
539 
540 
541