StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
stochTable.cpp
Go to the documentation of this file.
1 //
2 // stoch_table.cpp
3 // StochHMM
4 //
5 // Created by Paul Lott on 2/4/13.
6 // Copyright (c) 2013 Korf Lab, Genome Center, UC Davis, Davis, CA. All rights reserved.
7 //
8 
9 #include "stochTable.h"
10 
11 namespace StochHMM {
12 
13  //! stochTable constructor
14  //! \param [in] seq_size Size of sequence
15  stochTable::stochTable(size_t seq_size):state_val(NULL){
16 
17  state_val = new(std::nothrow) std::vector<stoch_value>;
18  state_val->reserve(seq_size);
19 
20  position = new(std::nothrow) std::vector<size_t>(seq_size,0);
21  if (state_val == NULL){
22  std::cerr << "Cannot allocate stochTable- OUT OF MEMORY" << std::endl;
23  delete state_val;
24  delete position;
25  state_val = NULL;
26  position = NULL;
27  exit(2);
28  }
29  last_position = 0;
30  return;
31  }
32 
34  delete state_val;
35  delete position;
36  state_val = NULL;
37  position = NULL;
38  }
39 
40  //! Pushes the information for traceback pointer onto the table
41  //! \param pos Position of current traceback pointer in sequence
42  //! \param st Current State
43  //! \param st_to Previous State
44  //! \param value Unnormalized value of traceback pointer
45  //! \sa stochTable::finalize
46  void stochTable::push(size_t pos, size_t st, size_t st_to, float value){
47  if (pos != last_position){
48  (*position)[last_position] = state_val->size()-1;
49  last_position = pos;
50  }
51 
52  state_val->push_back( stoch_value(st,st_to,value));
53  }
54 
55  //! Finalized stochTable
56  //! Normalizes the traceback pointer values and calculates the previous cell
57  //! iterator within the previous position segment. This speeds up the
58  //! the referencing necessary for tracebacks across the traceback table.
60  (*position)[last_position] = state_val->size()-1;
61 
62  last_position = 0;
63 
64  double sum(-INFINITY);
65  uint16_t current_state(SIZE_MAX);
66  size_t state_start(-1);
67 
68  //For each position in the sequence we want to normalize the viterbi/forward values
69  //for the state from previous states
70  for(size_t i=0;i<position->size()-1;++i){
71 
72  //Need to sum the values. If we see another state then we'll need to
73  //normalize the previous states that contributed to the sum.
74  //Then set the sum for the current state.
75  for (size_t j = (i==0) ? 0 : (*position)[i-1]+1; j <= (*position)[i] ; ++j ){
76 
77  //If this is a different state then we'll need to apply the sum
78  //if this is the first state then we'll just set the current state
79  //and set the sum value. Also need to keep track of which state
80  // is the first state so when we normalize we only normalize the
81  // the previous state
82  if ((*state_val)[j].state_id != current_state){
83 
84  //Normalize
85  for(size_t k = state_start; k < j ;++k){
86  (*state_val)[k].prob = exp((*state_val)[k].prob - sum);
87  }
88 
89  //Set state and sum value and starting state
90  current_state = (*state_val)[j].state_id;
91  sum = (*state_val)[j].prob;
92  state_start = j;
93  }
94  else{
95  sum = addLog(sum,(double) (*state_val)[j].prob);
96  }
97  }
98 
99  //Normalize the last states seen (b/c) we'll exit out of for loop before
100  //we have done these
101  for(size_t k = state_start; k <= (*position)[i]; ++k){
102  (*state_val)[k].prob = exp((*state_val)[k].prob - sum);
103  }
104 
105  //Set values for next loop
106  state_start = (*position)[i] + 1;
107  current_state = SIZE_MAX;
108  }
109 
110 
111  //Process and normalize the ending cell
112  //Calculate ending state sum
113  sum = -INFINITY;
114  for (size_t j=state_start; j < state_val->size();j++){
115  sum=addLog(sum,(double)(*state_val)[j].prob);
116  }
117 
118  //Normalize the ending cell probability
119  for (size_t j=state_start; j < state_val->size();j++){
120  (*state_val)[j].prob = exp((*state_val)[j].prob - sum);
121  }
122 
123 
124 
125 
126  //Assign previous cell relative to (position) value. Used when traceback
127  //That way function won't have to search for correct value;
128  for (size_t i =1; i < position->size(); ++i){
129  for (size_t j = (*position)[i-1]+1; j <= (*position)[i] ; ++j){
130  (*state_val)[j].prev_cell = get_state_position(i-1, (*state_val)[j].state_prev);
131  }
132  }
133 
134  return;
135  }
136 
137  //! Returns the states iterator position within the array
138  //! \param pos Position within the sequence
139  //! \param state Iterator of state that you want
140  //! \return Position within the table where state can be found
141  size_t stochTable::get_state_position(size_t pos, uint16_t state){
142  size_t start_val = (pos == 0) ? 0 : (*position)[pos-1]+1;
143  for (size_t i = start_val ; i <= (*position)[pos] ; ++i){
144  if ((*state_val)[i].state_id == state){
145  return i-start_val;
146  }
147  }
148 
149  return UINT16_MAX;
150  }
151 
152  //! Prints the stochTable to stdout
154  std::cout << stringify();
155  return;
156  }
157 
158  //! Stringify the stochTable
159  std::string stochTable::stringify(){
160  std::stringstream str;
161 
162  last_position = 0;
163 
164  for(size_t i=0;i<position->size();++i){
165  for (size_t j = (i==0) ? 0 : (*position)[i-1]+1 ; j <= (*position)[i] ; ++j ){
166  str << (*state_val)[j].state_id <<":"<< (*state_val)[j].state_prev << " : " << (*state_val)[j].prob << "\t";
167  }
168  str << std::endl;
169  }
170  return str.str();
171  }
172 
173 
174  //TODO: Test stochastic tracebacks
175  //! Traceback through the table using the traceback probabilities
176  //! \param[out] path Reference to traceback_path
178 
179  double random((double)rand()/((double)(RAND_MAX)+(double)(1)));
180  double cumulative_prob(0.0);
181  //std::cout << random << std::endl;
182 
183  size_t offset(0);
184  uint16_t state_prev(UINT16_MAX);
185 
186  //Get traceback from END state
187  for(size_t i = (*position)[position->size()-2]+1 ; i <= position->back() ; ++i){
188  cumulative_prob += (*state_val)[i].prob;
189  if (random <= cumulative_prob){
190  state_prev = (*state_val)[i].state_prev;
191  path.push_back(state_prev);
192  offset = (*state_val)[i].prev_cell;
193  //std::cout << "Chose:\t" << state_prev << std::endl;
194  break;
195  }
196  }
197 
198  //For the rest of the table traceback to the beginning of the sequence
199  for(size_t i = position->size()-2; i != SIZE_MAX ; --i){
200  random = (double)rand()/((double)(RAND_MAX)+(double)(1));
201  //std::cout << random << std::endl;
202  cumulative_prob = 0;
203 
204  for (size_t cells = (i == 0) ? 0 + offset : (*position)[i-1]+offset+1; cells <= (*position)[i] ; ++cells){
205 // if ((*state_val)[cells].state_id != state_prev ){
206 // std::cout << "Houston, We have an error!" << std::endl;
207 // }
208 
209  cumulative_prob+=(*state_val)[cells].prob;
210 
211  if (random <= cumulative_prob){
212  state_prev = (*state_val)[cells].state_prev;
213  path.push_back(state_prev);
214  offset = (*state_val)[cells].prev_cell;
215  //std::cout << "Chose:\t" << state_prev << std::endl;
216  break;
217  }
218  }
219  }
220  return;
221  }
222 
223 
224 
225 
226 // void alt_stochTable::push(size_t pos, size_t state, size_t state_to, float val){
227 // (*table)[state][pos].push_back(stoch_val(state_to,val));
228 // return;
229 // }
230 //
231 // void alt_stochTable::push_ending(size_t state_to, float val){
232 // ending.push_back(stoch_val(state_to,val));
233 // return;
234 // }
235 //
236 //
237 // //! Traceback through the table using the traceback probabilities
238 // //! \param[out] path Reference to traceback_path
239 // void alt_stochTable::traceback(traceback_path& path){
240 //
241 // double random((double)rand()/((double)(RAND_MAX)+(double)(1)));
242 // double cumulative_prob(0.0);
243 //
244 // uint16_t state_prev(UINT16_MAX);
245 //
246 // //Get traceback from END state
247 // for(size_t i = 0; i< ending.size(); ++i){
248 // cumulative_prob += (ending)[i].prob;
249 // if (random <= cumulative_prob){
250 // state_prev = ending[i].previous_state;
251 // path.push_back(state_prev);
252 // break;
253 // }
254 // }
255 //
256 // //For the rest of the table traceback to the beginning of the sequence
257 // for(size_t position = table[state_prev].size()-1; position != SIZE_MAX ; --position){
258 // random = (double)rand()/((double)(RAND_MAX)+(double)(1));
259 // cumulative_prob = 0;
260 //
261 // for (size_t st = 0; st < (*table)[state_prev][position].size(); st++){
262 //
263 // cumulative_prob+=(*table)[state_prev][position][st].prob;
264 //
265 // if (random <= cumulative_prob){
266 // state_prev = (*table)[state_prev][position][st].previous_state;
267 // path.push_back(state_prev);
268 // break;
269 // }
270 // }
271 // }
272 // return;
273 // }
274 //
275 // alt_stochTable::alt_stochTable(size_t st, size_t length){
276 // states = st;
277 // seq_length = length;
278 //
279 // table = new (std::nothrow) std::vector<sparseArray<std::vector<stoch_val> > >(states);
280 //
281 // for (size_t i=0;i<states;i++){
282 // (*table)[i].resize(seq_length);
283 // }
284 // }
285 //
286 // alt_stochTable::~alt_stochTable(){
287 // delete table;
288 // table=NULL;
289 // }
290 //
291 // //! Finalized stochTable
292 // //! Normalizes the traceback pointer values and calculates the previous cell
293 // //! iterator within the previous position segment. This speeds up the
294 // //! the referencing necessary for tracebacks across the traceback table.
295 // void alt_stochTable::finalize(){
296 //
297 // double sum(-INFINITY);
298 //
299 // //For each position in the sequence we want to normalize the viterbi/forward values
300 // //for the state from previous states
301 // for(size_t position=0; position < seq_length;++position){
302 //
303 // for(size_t state=0; state < states; ++state){
304 // if ((*table)[state].defined(position)){
305 // sum = -INFINITY;
306 // for(size_t value=0; value < (*table)[state][position].size();value++){
307 // sum = addLog(sum,(double) (*table)[state][position][value].prob);
308 // }
309 //
310 // for(size_t value=0; value < (*table)[state][position].size();value++){
311 // (*table)[state][position][value].prob = exp((*table)[state][position][value].prob-sum);
312 // }
313 // }
314 // }
315 // }
316 //
317 // sum = -INFINITY;
318 // for(size_t i=0;i<ending.size();i++){
319 // sum = addLog(sum,(double) ending[i].prob);
320 // }
321 //
322 // for(size_t i=0;i<ending.size();i++){
323 // ending[i].prob=exp(ending[i].prob-sum);
324 // }
325 //
326 // return;
327 // }
328 //
329 // std::string alt_stochTable::stringify(){
330 //
331 // std::stringstream str;
332 // for(size_t position = 0; position < seq_length; position++){
333 // str << position ;
334 // for(size_t state = 0; state < states; state++){
335 // if ((*table)[state].defined(position)){
336 // for (size_t val = 0; val < (*table)[state][position].size();val++){
337 // str << "\t" << state << ":" << (*table)[state][position][val].previous_state;
338 // }
339 // }
340 // }
341 // str << "\n";
342 // }
343 //
344 // return str.str();
345 // }
346 //
347 // void alt_stochTable::print(){
348 // std::cout << stringify() << std::endl;
349 // return;
350 // }
351 
352 
353  alt_simple_stochTable::alt_simple_stochTable(size_t st, size_t length): states(st), seq_length(length){
354  table = new (std::nothrow) std::vector<std::vector<std::vector<stoch_val> > > ;
355  table->resize(seq_length,std::vector<std::vector<stoch_val> > (states, std::vector<stoch_val>()));
356  }
357 
359  delete table;
360  }
361 
362  void alt_simple_stochTable::push(size_t pos, size_t state, size_t state_to, float val){
363  (*table)[pos][state].push_back(stoch_val(state_to,val));
364  return;
365  }
366 
367  void alt_simple_stochTable::push_ending(size_t state_to, float val){
368  ending.push_back(stoch_val(state_to,val));
369  return;
370  }
371 
372  //! Finalized stochTable
373  //! Normalizes the traceback pointer values and calculates the previous cell
374  //! iterator within the previous position segment. This speeds up the
375  //! the referencing necessary for tracebacks across the traceback table.
377 
378  double sum(-INFINITY);
379 
380  //For each position in the sequence we want to normalize the viterbi/forward values
381  //for the state from previous states
382  for(size_t position=0; position < seq_length;++position){
383 
384  for(size_t state=0; state < states; ++state){
385  if ((*table)[position][state].size() > 0 ){
386  sum = -INFINITY;
387  for(size_t value=0; value < (*table)[position][state].size(); value++){
388  sum = addLog(sum,(double) (*table)[position][state][value].prob);
389  }
390 
391  for(size_t value=0; value < (*table)[position][state].size();value++){
392  (*table)[position][state][value].prob = exp((*table)[position][state][value].prob-sum);
393  }
394  }
395  }
396  }
397 
398  sum = -INFINITY;
399  for(size_t i=0;i<ending.size();i++){
400  sum = addLog(sum,(double) ending[i].prob);
401  }
402 
403  for(size_t i=0;i<ending.size();i++){
404  ending[i].prob=exp(ending[i].prob-sum);
405  }
406 
407  return;
408  }
409 
410  //! Traceback through the table using the traceback probabilities
411  //! \param[out] path Reference to traceback_path
413 
414  double random((double)rand()/((double)(RAND_MAX)+(double)(1)));
415  double cumulative_prob(0.0);
416 
417  uint16_t state_prev(UINT16_MAX);
418 
419  //Get traceback from END state
420  for(size_t i = 0; i< ending.size(); ++i){
421  cumulative_prob += (ending)[i].prob;
422  if (random <= cumulative_prob){
423  state_prev = ending[i].previous_state;
424  path.push_back(state_prev);
425  break;
426  }
427  }
428 
429  //For the rest of the table traceback to the beginning of the sequence
430  for(size_t position = table[state_prev].size()-1; position != SIZE_MAX ; --position){
431  random = (double)rand()/((double)(RAND_MAX)+(double)(1));
432  cumulative_prob = 0;
433 
434  for (size_t st = 0; st < (*table)[position][state_prev].size(); st++){
435 
436  cumulative_prob+=(*table)[position][state_prev][st].prob;
437 
438  if (random <= cumulative_prob){
439  state_prev = (*table)[position][state_prev][st].previous_state;
440  path.push_back(state_prev);
441  break;
442  }
443  }
444  }
445  return;
446  }
447 
449 
450  std::stringstream str;
451  for(size_t position = 0; position < seq_length-1; position++){
452  str << position ;
453  for(size_t state = 0; state < states; state++){
454  for (size_t val = 0; val < (*table)[position][state].size();val++){
455  str << "\t" << state << ":" << (*table)[position][state][val].previous_state << " : " << (*table)[position][state][val].prob ;
456  }
457  }
458  str << "\n";
459  }
460 
461  return str.str();
462  }
463 
465  std::cout << stringify() << std::endl;
466  return;
467  }
468 
469 
470 }
471