StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
alignment.cpp
Go to the documentation of this file.
1 //
2 // alignment.cpp
3 // StochHMM
4 //
5 // Created by Paul Lott on 5/18/12.
6 // Copyright (c) 2012 University of California, Davis. All rights reserved.
7 //
8 
9 #include "alignment.h"
10 
11 namespace StochHMM{
12 
13 
14  cell::cell():traceback(NONE),score(0.0){
15  scores.insert(scores.begin(),3,0.0);
16  }
17 
18 
19  alignment::alignment():gap(0),gap_ext(0),high_score(0),start_position(0),target(NULL),query(NULL),tr(NULL){
20 
21 
22  }
23 
25  target=NULL;
26  query=NULL;
27  tr=NULL;
28  delete mMatrix;
29  mMatrix=NULL;
30  gap=0;
31  gap_ext=0;
32  high_score=0;
34  end_position=0;
35  return;
36  }
37 
39  target=NULL;
40  query=NULL;
41  delete mMatrix;
42  mMatrix=NULL;
43  high_score=0;
45  end_position=0;
46  }
47 
48  void alignment::setMatch(double value){
49  if (target==NULL && query==NULL){
50  std::cerr << "Either target or query sequence must be set before defining match matrix" << std::endl;
51  return;
52  }
53  else{
54  size_t alphaSize=(target!=NULL) ? target->size() : query->size();
55  if (mMatrix==NULL){
56  std::vector<double> row (alphaSize, 0.0);
57  mMatrix = new mmMatrix;
58  mMatrix->insert(mMatrix->begin(),alphaSize,row);
59  //std::cout << mMatrix->size() <<std::endl;
60  //std::cout << (*mMatrix)[0].size();
61  }
62  else if (mMatrix->size()!=alphaSize){
63  delete mMatrix;
64  mMatrix=NULL;
65  std::vector<double> row (alphaSize,0.0);
66  mMatrix = new mmMatrix;
67  mMatrix->insert(mMatrix->begin(),alphaSize,row);
68  }
69 
70  for(size_t i=0;i<alphaSize;++i){
71  (*mMatrix)[i][i]=value;
72  }
73 
74  }
75  return;
76  }
77 
79  if (target==NULL && query==NULL){
80  std::cerr << "Either target or query sequence must be set before defining match matrix" << std::endl;
81  return;
82  }
83  else{
84  size_t alphaSize=(target!=NULL) ? target->size() : query->size();
85  if (mMatrix==NULL){
86  mMatrix=&matrix;
87  }
88  else{
89  delete mMatrix;
90  mMatrix=&matrix;
91  }
92  }
93  return;
94  }
95 
96  void alignment::setMismatch(double value){
97  if (target==NULL && query==NULL){
98  std::cerr << "Either target or query sequence must be set before defining match matrix" << std::endl;
99  return;
100  }
101  else{
102  size_t alphaSize=(target!=NULL) ? target->size() : query->size();
103  if (mMatrix==NULL){
104  std::vector<double> row (0.0,alphaSize);
105  mMatrix = new mmMatrix;
106  mMatrix->insert(mMatrix->begin(),alphaSize,row);
107  }
108  else if (mMatrix->size()!=alphaSize){
109  delete mMatrix;
110  mMatrix=NULL;
111  std::vector<double> row (0.0,alphaSize);
112  mMatrix = new mmMatrix;
113  mMatrix->insert(mMatrix->begin(),alphaSize,row);
114  }
115 
116  for(size_t row=0;row<alphaSize;++row){
117  for(size_t col=0;col<alphaSize;++col){
118  if (row!=col){
119  (*mMatrix)[row][col]=value;
120  }
121  }
122  }
123  }
124  return;
125  }
126 
128  track* temp = seq.getTrack();
129  if (tr==temp){
130  target= &seq;
131  high_score=0;
132  start_position=0;
133  end_position=0;
134  }
135  else if (tr==NULL){
136  target = &seq;
137  tr=temp;
138  }
139  else{
140  _resetSeqs();
141  target = &seq;
142  tr=temp;
143  }
144  return;
145  }
146 
148  track* temp = seq.getTrack();
149  if (tr==temp){
150  query= &seq;
151  high_score=0;
152  start_position=0;
153  end_position=0;
154  }
155  else if (tr==NULL){
156  query = &seq;
157  tr=temp;
158  }
159  else{
160  _resetSeqs();
161  query = &seq;
162  tr=temp;
163  }
164  return;
165  }
166 
167  double alignment::_getGapScore(size_t queryIndex, size_t targetIndex, tracebackDirection dir){
168  if (gap == gap_ext){
169  return gap;
170  }
171  else{
172  if (dir == LEFT){
173  if ( (*trellis)[queryIndex][targetIndex].getTraceback() == LEFT){
174  return gap_ext;
175  }
176  else{
177  return gap;
178  }
179  }
180  else{
181  if ( (*trellis)[queryIndex][targetIndex].getTraceback() == UP){
182  return gap_ext;
183  }
184  else{
185  return gap;
186  }
187  }
188  }
189  }
190 
191 
192  void alignment::_initTrellis(size_t rows, size_t columns){
193  if (trellis!=NULL){
194  delete trellis;
195  trellis=NULL;
196  }
197 
198  cell x;
199  x.setTraceback(NONE);
200  x.setScore(0.0);
201  x.setDiag(0.0);
202  x.setLeft(0.0);
203  x.setUp(0.0);
204  std::vector<cell> row (columns+1,x);
205  trellis= new std::vector<std::vector<cell> > (rows+1, row);
206 
207  (*trellis)[0][0].setTraceback(NONE);
208  (*trellis)[0][1].setScore(gap);
209  (*trellis)[0][1].setTraceback(LEFT);
210  (*trellis)[1][0].setScore(gap);
211  (*trellis)[1][0].setTraceback(UP);
212 
213  for(size_t i=2;i<=columns;i++){
214  (*trellis)[0][i].setScore((*trellis)[0][i-1].getScore()+gap_ext);
215  (*trellis)[0][i].setTraceback(LEFT);
216  }
217 
218  for(size_t j=2;j<=rows;j++){
219  (*trellis)[j][0].setScore((*trellis)[j-1][0].getScore()+gap_ext);
220  (*trellis)[j][0].setTraceback(UP);
221  }
222  return;
223  }
224 
225 
227  for (size_t row=0;row<trellis->size();++row){
228  for (size_t col = 0; col<(*trellis)[row].size();++col){
229  std::cout << (*trellis)[row][col].getScore() << " " << (*trellis)[row][col].getTraceback() << "\t";
230  }
231  std::cout << std::endl;
232  }
233  }
234 
235  double alignment::align(sequence& tgt, sequence& qu, alignment_type typ, double mtch, double mismtch, double gp, double gp_ext){
236  setTarget(tgt);
237  setQuery(qu);
238  setMatch(mtch);
239  setMismatch(mismtch);
240  setGap(gp);
241  setGapExt(gp_ext);
242  return align(typ);
243  }
244 
245 
246 
247  double alignment::align (alignment_type typ){ //(sequence &top, sequence &side, double gap, double mismatch, double match){
248 
249  int top_size=target->size();
250  int side_size=query->size();
251 
252  _initTrellis(side_size, top_size);
253  double max_score(-INFINITY);
254  size_t max_row_index(0);
255  size_t max_col_index(0);
256 
257  //Fill
258  for(size_t top_index=1;top_index<=top_size;top_index++){
259  for(size_t side_index=1;side_index<=side_size;side_index++){
260  //table[side_index-1][top_index-1].setDiag();
261 
262  short targetLetter = query->seqValue(side_index-1);
263  short queryLetter = target->seqValue(top_index-1);
264 
265  double diagonal_score = (*trellis)[side_index-1][top_index-1].getScore() + (*mMatrix)[queryLetter][targetLetter];
266 
267 
268 
269  double left_score=(*trellis)[side_index][top_index-1].getScore()+_getGapScore(side_index, top_index-1, LEFT);
270 
271 
272  double up_score=(*trellis)[side_index-1][top_index].getScore()+_getGapScore(side_index-1, top_index, UP);
273 
274 
275 
276  if (typ == cLocal){
277  if (diagonal_score<0.0){
278  diagonal_score=0.0;
279  }
280  else if (diagonal_score>max_score){
281  max_score = diagonal_score;
282  max_row_index=side_index;
283  max_col_index=top_index;
284  }
285 
286  if (left_score<0.0){
287  left_score=0.0;
288  }
289  else if (left_score>max_score){
290  max_score = left_score;
291  max_row_index=side_index;
292  max_col_index=top_index;
293  }
294 
295  if (up_score<0.0){
296  up_score=0.0;
297  }
298  else if (up_score>max_score){
299  max_score = up_score;
300  max_row_index=side_index;
301  max_col_index=top_index;
302  }
303  }
304 
305 
306  if (diagonal_score>=up_score && diagonal_score >= left_score){
307  (*trellis)[side_index][top_index].setScore(diagonal_score);
308  (*trellis)[side_index][top_index].setTraceback(DIAGONAL);
309  }
310  else if(up_score>=left_score){
311  (*trellis)[side_index][top_index].setScore(up_score);
312  (*trellis)[side_index][top_index].setTraceback(UP);
313  }
314  else{
315  (*trellis)[side_index][top_index].setScore(left_score);
316  (*trellis)[side_index][top_index].setTraceback(LEFT);
317  }
318  }
319  }
320 
321  std::string align_top;
322  std::string align_side;
323  std::string align_status;
324 
325  size_t top_iter;
326  size_t side_iter;
327 
328  if (typ == cGlobal){
329  top_iter=top_size;
330  side_iter=side_size;
331  }
332  else if (typ == cLocal){
333  top_iter=max_col_index;
334  side_iter=max_row_index;
335  }
336 
337 
338  while (top_iter!=0 || side_iter!=0){
339 
340  std::cout << side_iter << "," << top_iter << "\t" << (*trellis)[side_iter][top_iter].getScore() << std::endl;
341 
342  switch ((*trellis)[side_iter][top_iter].getTraceback()){
343  case DIAGONAL:
344  align_side+=query->getSymbol(--side_iter);
345  align_top+=target->getSymbol(--top_iter);
346  align_status+= (query->seqValue(side_iter) != target->seqValue(top_iter)) ? ' ' : '|';
347  break;
348  case UP:
349  align_side+=query->getSymbol(--side_iter);
350  align_top+="-";
351  align_status+=" ";
352  break;
353  case LEFT:
354  //cout << "Case LEFT:" << side_size << "\t" << top_size << endl;
355 
356  align_top+=target->getSymbol(--top_iter);
357  align_side+="-";
358  align_status+=" ";
359  break;
360  default:
361  //cout << "We have a problem Houston\n" << endl;
362  break;
363  }
364 
365 
366  if (typ == cLocal && fabs((*trellis)[side_iter][top_iter].getScore())<0.001){
367  break;
368  }
369  }
370 
371  std::reverse(align_top.begin(), align_top.end());
372  std::reverse(align_side.begin(), align_side.end());
373  std::reverse(align_status.begin(), align_status.end());
374 
375  std::cout << align_top << std::endl << align_status << std::endl << align_side << std::endl;
376 
377  printTrellis();
378  return (*trellis)[side_size][top_size].getScore();
379  }
380 
381 
382  double alignment::calcLambda(std::vector<double>& frequencies){
383 
384  return 0;
385  }
386 
388  size_t alphaSize = mMatrix->size();
389  double estimatedFreq= 1/static_cast<double>(alphaSize);
390 
391  double expectedScore(0);
392  for ( size_t row=0; row<alphaSize; ++row){
393  for ( size_t col=0; col<alphaSize; ++col){
394 
395  }
396  }
397  }
398 
399 
401  std::vector<double> scores;
402 
403  for(int i=0;i<100;++i){
404  sequence* oldQuery=query;
405  sequence shuffled=shuffle(query);
406  setQuery(shuffled);
407  double score = align(cGlobal);
408  scores.push_back(score);
409  std::cout<< score << std::endl;
410  query=oldQuery;
411  }
412 
413  return 0.0;
414  }
415 
416 
417 
418 // alignment local(std::string &top, std::string &side,int match,int mismatch, int gap, int min_score){
419 // int top_size=top.size();
420 // int side_size=side.size();
421 // int max_top_index;
422 // int max_side_index;
423 // int max_score=0;
424 // alignment results;
425 //
426 // //Initialize
427 // cell x;
428 // x.traceback=NONE;
429 // x.score=0;
430 // std::vector<cell> row (top_size+1,x);
431 // std::vector<std::vector<cell> > table (side_size+1,row);
432 //
433 // //Fill
434 // for(int top_index=1;top_index<=top_size;top_index++){
435 // for(int side_index=1;side_index<=side_size;side_index++){
436 // int diagonal_score;
437 // int up_score;
438 // int left_score;
439 //
440 // diagonal_score = (top[top_index-1] == side[side_index-1]) ? table[side_index-1][top_index-1].score+match : table[side_index-1][top_index-1].score-mismatch;
441 // left_score=table[side_index][top_index-1].score-gap;
442 // up_score=table[side_index-1][top_index].score-gap;
443 //
444 //
445 // if (diagonal_score>=up_score && diagonal_score >= left_score){
446 // if (diagonal_score>0){
447 // table[side_index][top_index].score=diagonal_score;
448 // table[side_index][top_index].traceback=DIAGONAL;
449 // }
450 // }
451 // else if(up_score>=left_score){
452 // if (up_score>0){
453 // table[side_index][top_index].score=up_score;
454 // table[side_index][top_index].traceback=UP;
455 // }
456 // }
457 // else{
458 // if (left_score>0){
459 // table[side_index][top_index].score=left_score;
460 // table[side_index][top_index].traceback=LEFT;
461 // }
462 // }
463 //
464 // if (table[side_index][top_index].score>max_score){
465 // max_side_index=side_index;
466 // max_top_index=top_index;
467 // max_score=table[side_index][top_index].score;
468 // }
469 // }
470 // }
471 //
472 // /*
473 // //Print table
474 // for(int side_index=0;side_index<=side_size;side_index++){
475 // for(int top_index=0;top_index<=top_size;top_index++){
476 // cout << table[side_index][top_index].score << ",";
477 // }
478 // cout << endl;
479 // }
480 // */
481 //
482 // std::string align_top;
483 // std::string align_side;
484 //
485 // if (max_score<min_score){
486 // results.score=max_score;
487 // return results;
488 // }
489 //
490 // //cout << "Side_INDEX:\t" << max_side_index << endl;
491 // //cout << "Top_INDEX:\t" << max_top_index << endl;
492 //
493 // if (max_side_index<side_size){
494 // int difference=side_size-max_side_index;
495 // results.end_position=max_top_index+difference;
496 // results.begin_position=results.end_position-(side_size-1);
497 // }
498 // else{
499 // results.end_position=max_top_index;
500 // results.begin_position=max_top_index-(side_size-1);
501 // }
502 //
503 //
504 // top_size=max_top_index;
505 // side_size=max_side_index;
506 // int pointer=table[side_size][top_size].traceback;
507 //
508 // //cout << side_size << "\t" << top_size << endl;
509 // while (pointer!=NONE){
510 // //cout << "SIDE_SIZE\t" << side_size << endl;
511 // //cout << "TOP_SIZE\t" << top_size << endl;
512 // //cout << "TRACEBACK\t" << table[side_size][top_size].traceback << endl;
513 // //cout << "SCORE\t" << table[side_size][top_size].score << endl;
514 //
515 //
516 // switch (pointer){
517 // case DIAGONAL:
518 // //cout << "Case Diagonal:" << side_size << "\t" << top_size << endl;
519 // align_side+=side[side_size-1];
520 // align_top+=top[top_size-1];
521 // top_size--;
522 // side_size--;
523 // pointer=table[side_size][top_size].traceback;
524 // break;
525 // case UP:
526 // //cout << "Case UP:" << side_size << "\t" << top_size << endl;
527 //
528 // align_side+=side[side_size-1];
529 // align_top+="-";
530 // side_size--;
531 // pointer=table[side_size][top_size].traceback;
532 // break;
533 // case LEFT:
534 // //cout << "Case LEFT:" << side_size << "\t" << top_size << endl;
535 //
536 // align_top+=top[top_size-1];
537 // align_side+="-";
538 // top_size--;
539 // pointer=table[side_size][top_size].traceback;
540 // break;
541 // default:
542 // //cout << "We have a problem Houston\n" << endl;
543 // break;
544 // }
545 // }
546 //
547 //
548 // results.target=align_top;
549 // results.query=align_side;
550 // results.score=max_score;
551 //
552 // //cout << align_top << endl << align_side <<endl<< max_score <<endl;
553 //
554 // return results;
555 // }
556 }
557