StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
emm.cpp
Go to the documentation of this file.
1 //emm.cpp
2  //Copyright (c) 2007-2012 Paul C Lott
3  //University of California, Davis
4  //Genome and Biomedical Sciences Facility
5  //UC Davis Genome Center
6  //Ian Korf Lab
7  //Website: www.korflab.ucdavis.edu
8  //Email: lottpaul@gmail.com
9  //
10  //Permission is hereby granted, free of charge, to any person obtaining a copy of
11  //this software and associated documentation files (the "Software"), to deal in
12  //the Software without restriction, including without limitation the rights to
13  //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
14  //the Software, and to permit persons to whom the Software is furnished to do so,
15  //subject to the following conditions:
16  //
17  //The above copyright notice and this permission notice shall be included in all
18  //copies or substantial portions of the Software.
19  //
20  //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
22  //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
23  //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
24  //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
25  //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 
27 #include "emm.h"
28 namespace StochHMM{
29 
30  //! Create an emission
32  real_number = false;
33  complement = false;
34  continuous = false;
35  multi_continuous = false;
36  realTrack = NULL;
37 
38  function = false;
39  lexFunc = NULL;
40 
41  pdf = NULL;
42  dist_parameters = NULL;
43 
44  multiPdf = NULL;
45  number_of_tracks = 0;
46  trcks = NULL;
47  pass_values = NULL;
48  track_indices = NULL;
49 
50  tagFunc = NULL;
51  }
52 
53 
54  //! Destroy an emission
56  delete lexFunc;
57  delete tagFunc;
58  function = false;
59 
60  lexFunc = NULL;
61  tagFunc = NULL;
62  }
63 
64  //!Parse an emission from text model file
65  //!\param txt String representation of emission
66  //!\param trks Tracks used by the model
67  //!\param wts Weights used by the model
68  //!\param funcs State functions used by the model
69  bool emm::parse(std::string& txt,tracks& trks, weights* wts, StateFuncs* funcs){
70  if (!_processTags(txt,trks, wts, funcs)){
71  return false;
72  }
73 
74  stringList ln;
75  ln.splitString(txt,"\n");
76  size_t idx;
77  if (ln.contains("EMISSION")){
78  idx = ln.indexOf("EMISSION");
79  }
80  else{
81  std::cerr << "Missing EMISSION tag from emission. Please check the formatting. This is what was handed to the emission class:\n " << txt << std::endl;
82  return false;
83  }
84 
85  stringList line;
86  line.splitString(ln[idx], "\t,: ");
87 
88  size_t typeBegin(0);
89 
90  //Determine Emission Type and set appropriate flags
91  valueType valtyp(PROBABILITY);
92  if (line.contains("P(X)")){
93  typeBegin = line.indexOf("P(X)");
94  valtyp=PROBABILITY;
95  }
96  else if (line.contains("LOG")){
97  typeBegin = line.indexOf("LOG");
98  valtyp=LOG_PROB;
99  }
100  else if (line.contains("COUNTS")){
101  typeBegin = line.indexOf("COUNTS");
102  valtyp=COUNTS ;
103  }
104  else if (line.contains("REAL_NUMBER")){
105  typeBegin = line.indexOf("REAL_NUMBER");
106  real_number = true;
107  if (line.contains("COMPLEMENT") || line.contains("1-P(X)")) {
108  complement=true;
109  }
110  }
111  else if (line.contains("MULTI_CONTINUOUS")){
112  typeBegin = line.indexOf("MULTI_CONTINUOUS");
113  multi_continuous=true;
114  if (line.contains("COMPLEMENT") || line.contains("1-P(X)")) {
115  complement=true;
116  }
117  }
118  else if (line.contains("CONTINUOUS")){
119  typeBegin = line.indexOf("CONTINUOUS");
120  continuous=true;
121  if (line.contains("COMPLEMENT") || line.contains("1-P(X)")) {
122  complement=true;
123  }
124  }
125 
126  else if (line.contains("FUNCTION")){
127  typeBegin = line.indexOf("FUNCTION");
128  function=true;
129  }
130  else {
131  std::string info = "Couldn't parse Value type in the Emission: " + txt + " Please check the formatting. The allowed types are: P(X), LOG, COUNTS, or REAL_NUMBER. \n";
132  std::cerr << info << std::endl;
133 
134  //errorInfo(sCantParseLine, info.c_str());
135  }
136 
137 
138  //remaining tracks and Orders then set Track
139  std::vector<track*> tempTracks;
140  for(size_t i=1;i<typeBegin;i++){
141  track* tk = trks.getTrack(line[i]);
142  if (tk==NULL){
143  std::cerr << "Emissions tried to add a track named: " << line[i] << " . However, there isn't a matching track in the model. Please check to model formatting.\n";
144  return false;
145  }
146  else{
147  tempTracks.push_back(tk);
148  }
149  }
150 
151  //Real Number Emissions
152  if (real_number){
153 
154  if (tempTracks.size()>1){
155  std::cerr << "Multiple tracks listed under Real Track Emission Definition\n";
156  return false;
157  }
158 
159  realTrack = tempTracks[0];
160  return true;
161  }
162  //Multivariate Continuous PDF emission
163  else if (multi_continuous){
164  if (tempTracks.size()==1){
165  std::cerr << "Only a single track listed under MULTI_CONTINUOUS\n\
166  Use CONTINUOUS instead of MULTI-CONTINUOUS\n";
167  return false;
168  }
169 
170  //Assign track information
171  track_indices = new std::vector<size_t>;
172  trcks = new std::vector<track*> (tempTracks);
173  number_of_tracks = trcks->size();
174  pass_values = new std::vector<double> (number_of_tracks,-INFINITY);
175  for(size_t i = 0; i < number_of_tracks ; ++i){
176  track_indices->push_back((*trcks)[i]->getIndex());
177  }
178 
179  idx = ln.indexOf("PDF");
180  line.splitString(ln[idx],"\t:, ");
181 
182  size_t function_idx = line.indexOf("PDF") + 1;
183  multiPdfName = line[function_idx];
185 
186 
187  size_t parameter_idx = line.indexOf("PARAMETERS");
188 
189  dist_parameters = new(std::nothrow) std::vector<double>;
190 
191  if(parameter_idx != SIZE_MAX){
192  for(size_t i = parameter_idx+1 ; i< line.size() ; i++){
193  double value;
194  stringToDouble(line[i], value);
195  dist_parameters->push_back(value);
196  }
197  }
198 
199  return true;
200 
201  }
202  //U
203  else if (continuous){
204 
205  if (tempTracks.size()>1){
206  std::cerr << "Multiple tracks listed under CONTINUOUS Track Emission Definition\n\
207  Must use MULTI_CONTINUOUS for multivariate emissions\n";
208  return false;
209  }
210 
211  realTrack = tempTracks[0];
212 
213  idx = ln.indexOf("PDF");
214  line.splitString(ln[idx],"\t:, ");
215 
216  size_t function_idx = line.indexOf("PDF") + 1;
217  pdfName = line[function_idx];
218 
219  size_t parameter_idx = line.indexOf("PARAMETERS");
220  dist_parameters = new(std::nothrow) std::vector<double>;
221 
222 
223  if(parameter_idx != SIZE_MAX){
224  for(size_t i = parameter_idx+1 ; i< line.size() ; i++){
225  double value;
226  stringToDouble(line[i], value);
227  dist_parameters->push_back(value);
228  }
229  }
230 
231  pdf = funcs->getPDFFunction(pdfName);
232 
233  return true;
234  }
235  else if (function){
236  //Get function name
237  std::string& functionName = line[typeBegin+1];
238 
239  //Set parameters for function
240  lexFunc = new(std::nothrow) emissionFuncParam(functionName,funcs,tempTracks[0]);
241 
242  if (lexFunc==NULL){
243  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
244  exit(1);
245  }
246 
247  return true;
248  }
249  else{ //Traditional Lexcical Emission
250 
251 
252  if (ln.contains("ORDER")){
253  idx=ln.indexOf("ORDER");
254  }
255  else{
256  std::cerr << "Couldn't find ORDER in non-Real_Number emission. Please check the formatting" << std::endl;
257  return false;
258  //errorInfo(sCantParseLine, "Couldn't find ORDER in non-Real_Number emission. Please check the formatting\n");
259  }
260 
261  std::vector<int> tempOrder;
262  line.splitString(ln[idx],"\t:, ");
263 
264  size_t orderIdx = line.indexOf("ORDER");
265  orderIdx++;
266 
267  size_t ambIdx;
268  bool containsAmbig = line.contains("AMBIGUOUS");
269 
270  if (containsAmbig){ambIdx=line.indexOf("AMBIGUOUS");}
271  else{ ambIdx= line.size();}
272 
273  for(size_t i=orderIdx;i<ambIdx;i++){
274 
275  int tempValue;
276  if (!stringToInt(line[i], tempValue)){
277  std::cerr << "Emission Order not numeric" << std::endl;
278  return false;
279  }
280 
281  if (tempValue>32){
282  std::cerr << "Emission order is greater than 32. Must be 32 or less" << std::endl;
283  return false;
284  }
285 
286  tempOrder.push_back(tempValue);
287  }
288 
289  if (tempOrder.size() == tempTracks.size()){
290  for(size_t i=0;i<tempOrder.size();i++){
291  scores.addTrack(tempTracks[i], tempOrder[i]);
292  }
293  }
294  else{
295  std::cerr << "Different number of tracks and orders parsed in Emission: " << txt << " Check the formatting of the Emission" << std::endl;
296  return false;
297  }
298 
299 
300  //Parse Ambiguous Tag Info
301  if (containsAmbig){
302  ambIdx++;
303  if (line.size()<=ambIdx){
304  std::cerr << "No scoring type after AMBIGUOUS label\nAssuming AVG\n";
306  }
307  else if (line[ambIdx].compare("AVG")==0){scores.setUnkScoreType(AVERAGE_SCORE);}
308  else if (line[ambIdx].compare("MAX")==0){scores.setUnkScoreType(HIGHEST_SCORE);}
309  else if (line[ambIdx].compare("MIN")==0){scores.setUnkScoreType(LOWEST_SCORE);}
310 
311  //Constant values assigned for Ambiguous characters
312  //Can either be passed as P(X) or LOG
313  else if (line[ambIdx].compare("P(X)")==0)
314  {
316 
317  ambIdx++;
318  if (ambIdx>=line.size()){
319  std::cerr << "Missing Ambiguous Value" << std::endl;
320 
321  return false;
322  }
323 
324  double tempValue;
325  if (!stringToDouble(line[ambIdx], tempValue)){
326  std::cerr << "Ambiguous Value couldn't be parsed: "<< line[ambIdx] << std::endl;
327  return false;
328  }
329 
330  scores.setUnkScore(log(tempValue));
331  }
332  else if (line[ambIdx].compare("LOG")==0){
334  ambIdx++;
335 
336  if (ambIdx>=line.size()){
337  std::cerr << "Missing Ambiguous Value" << std::endl;
338 
339  return false;
340  }
341 
342  double tempValue;
343  if (!stringToDouble(line[ambIdx], tempValue)){
344  std::cerr << "Ambiguous Value couldn't be parsed: "<< line[ambIdx] << std::endl;
345  return false;
346  }
347 
348  scores.setUnkScore(tempValue);
349  }
350  }
351 
352  //Get Emission Tables
353  size_t expectedColumns(1);
354  size_t expectedRows(1);
355  for(size_t i = 0; i<scores.getNumberOfAlphabets(); i++){
356  expectedColumns*=scores.getAlphaSize(i);
357  expectedRows*=POWER[scores.getOrder(i)][scores.getAlphaSize(i)-1];
358  }
359 
360  std::vector<std::vector<double> >* log_prob = scores.getLogProbabilityTable();
361  std::vector<std::vector<double> >* prob = scores.getProbabilityTable();
362  std::vector<std::vector<double> >* counts = scores.getCountsTable();
363 
364 
365  for (size_t iter = 2; iter< ln.size();iter++){
366 
367 
368  //If it's the first line check for a '#' indicating that the column header is present
369  if (iter==2 && ln[iter][0]=='@'){
370  continue;
371  }
372 
373  line.splitString(ln[iter],"\t ");
374 
375  //Check for Row header
376  if (line[0][0]=='@'){
377  line.pop_ith(0);
378  }
379 
380 
381  std::vector<double> temp = line.toVecDouble();
382  if (temp.size() != expectedColumns){
383  std::string info = "The following line couldn't be parsed into the required number of columns. Expected Columns: " + int_to_string(expectedColumns) + "\n The line appears as: " + ln[iter] ;
384 
385  std::cerr << info << std::endl;
386  return false;
387  //errorInfo(sCantParseLine, info.c_str());
388  }
389  else{
390  if (valtyp == PROBABILITY){
391  prob->push_back(temp);
392  logVector(temp);
393  log_prob->push_back(temp);
394  }
395  else if (valtyp == LOG_PROB){
396  log_prob->push_back(temp);
397  expVector(temp);
398  prob->push_back(temp);
399  }
400  else if (valtyp == COUNTS){
401  counts->push_back(temp);
402  probVector(temp);
403  prob->push_back(temp);
404  logVector(temp);
405  log_prob->push_back(temp);
406  }
407  }
408  }
409 
410  if (log_prob->size() != expectedRows){
411  std::cerr << " The Emission table doesn't contain enough rows. Expected Rows: " << expectedRows << " \n Please check the Emission Table and formatting for " << txt << std::endl;
412  return false;
413  }
414 
416 
417  }
418 
419  return true;
420  }
421 
422  //!Parse an emission from text
423  //!\param txt String representation of emission
424  //!\param trks Tracks used by the model
425  //!\param wts Weights used by the model
426  //!\param funcs State functions used by the model
427  bool emm::parse(std::string& txt,track* trk){
428 
429  stringList ln;
430  ln.splitString(txt,"\n");
431  size_t idx;
432  if (ln.contains("EMISSION")){
433  idx = ln.indexOf("EMISSION");
434  }
435  else{
436  std::cerr << "Missing EMISSION tag from emission. Please check the formatting. This is what was handed to the emission class:\n " << txt << std::endl;
437  return false;
438  }
439 
440  stringList line;
441  line.splitString(ln[idx], "\t,: ");
442  //size_t typeBegin(0);
443 
444  valueType valtyp(PROBABILITY);
445  if (line.contains("P(X)")){
446  //typeBegin = line.indexOf("P(X)");
447  valtyp=PROBABILITY;
448  }
449  else if (line.contains("LOG")){
450  //typeBegin = line.indexOf("LOG");
451  valtyp=LOG_PROB;
452  }
453  else if (line.contains("COUNTS")){
454  //typeBegin = line.indexOf("COUNTS");
455  valtyp=COUNTS ;
456  }
457  else {
458  std::string info = "Couldn't parse Value type in the Emission: " + txt + " Please check the formatting. The allowed types are: P(X), LOG, COUNTS, or REAL_NUMBER. \n";
459  std::cerr << info << std::endl;
460 
461  //errorInfo(sCantParseLine, info.c_str());
462  }
463 
464 
465  //remaining tracks and Orders then set Track
466  std::vector<track*> temp_tracks;
467  temp_tracks.push_back(trk);
468 
469 
470  if (ln.contains("ORDER")){
471  idx=ln.indexOf("ORDER");
472  }
473  else{
474  std::cerr << "Couldn't find ORDER in non-Real_Number emission. Please check the formatting" << std::endl;
475  return false;
476  //errorInfo(sCantParseLine, "Couldn't find ORDER in non-Real_Number emission. Please check the formatting\n");
477  }
478 
479  std::vector<int> temp_order;
480  line.splitString(ln[idx],"\t:,");
481 
482  size_t orderIdx = line.indexOf("ORDER");
483  orderIdx++;
484 
485  size_t ambIdx;
486  bool containsAmbig = line.contains("AMBIGUOUS");
487 
488  if (containsAmbig){ambIdx=line.indexOf("AMBIGUOUS");}
489  else{ ambIdx= line.size();}
490 
491  for(size_t i=orderIdx;i<ambIdx;i++){
492 
493  int temp_value;
494  if (!stringToInt(line[i], temp_value)){
495  std::cerr << "Emission Order not numeric" << std::endl;
496  return false;
497  }
498 
499  if (temp_value>32){
500  std::cerr << "Emission order is greater than 32. Must be 32 or less" << std::endl;
501  return false;
502  }
503 
504  temp_order.push_back(temp_value);
505  }
506 
507  if (temp_order.size() == temp_tracks.size()){
508  for(size_t i=0;i<temp_order.size();i++){
509  scores.addTrack(temp_tracks[i], temp_order[i]);
510  }
511  }
512  else{
513  std::cerr << "Different number of tracks and orders parsed in Emission: " << txt << " Check the formatting of the Emission" << std::endl;
514  return false;
515  }
516 
517 
518  //Parse Ambiguous Tag Info
519  if (containsAmbig){
520  ambIdx++;
521  if (line.size()<=ambIdx){
522  std::cerr << "No scoring type after AMBIGUOUS label\nAssuming AVG\n";
524  }
525  else if (line[ambIdx].compare("AVG")==0){scores.setUnkScoreType(AVERAGE_SCORE);}
526  else if (line[ambIdx].compare("MAX")==0){scores.setUnkScoreType(HIGHEST_SCORE);}
527  else if (line[ambIdx].compare("MIN")==0){scores.setUnkScoreType(LOWEST_SCORE);}
528  else if (line[ambIdx].compare("P(X)")==0)
529  {
531 
532  ambIdx++;
533  if (ambIdx>=line.size()){
534  std::cerr << "Missing Ambiguous Value" << std::endl;
535 
536  return false;
537  }
538 
539  double tempValue;
540  if (!stringToDouble(line[ambIdx], tempValue)){
541  std::cerr << "Ambiguous Value couldn't be parsed: "<< line[ambIdx] << std::endl;
542  return false;
543  }
544 
545  scores.setUnkScore(log(tempValue));
546  }
547  else if (line[ambIdx].compare("LOG")==0){
549  ambIdx++;
550 
551  if (ambIdx>=line.size()){
552  std::cerr << "Missing Ambiguous Value" << std::endl;
553 
554  return false;
555  }
556 
557  double tempValue;
558  if (!stringToDouble(line[ambIdx], tempValue)){
559  std::cerr << "Ambiguous Value couldn't be parsed: "<< line[ambIdx] << std::endl;
560  return false;
561  }
562 
563  scores.setUnkScore(tempValue);
564  }
565  }
566 
567  //Get Tables
568  size_t expectedColumns(1);
569  size_t expectedRows(1);
570  for(size_t i = 0; i<scores.getNumberOfAlphabets(); i++){
571  expectedColumns*=scores.getAlphaSize(i);
572  expectedRows*=POWER[scores.getOrder(i)][scores.getAlphaSize(i)-1];
573  }
574 
575  std::vector<std::vector<double> >* log_prob = scores.getLogProbabilityTable();
576  std::vector<std::vector<double> >* prob = scores.getProbabilityTable();
577  std::vector<std::vector<double> >* counts = scores.getCountsTable();
578 
579 
580  for (size_t iter = 2; iter< ln.size();iter++){
581 
582 
583  //If it's the first line check for a '#' indicating that the column header is present
584  if (iter==2 && ln[iter][0]=='@'){
585  continue;
586  }
587 
588  line.splitString(ln[iter],"\t ");
589 
590  //Check for Row header
591  if (line[0][0]=='@'){
592  line.pop_ith(0);
593  }
594 
595 
596  std::vector<double> temp = line.toVecDouble();
597  if (temp.size() != expectedColumns){
598  std::string info = "The following line couldn't be parsed into the required number of columns. Expected Columns: " + int_to_string(expectedColumns) + "\n The line appears as: " + ln[iter] ;
599 
600  std::cerr << info << std::endl;
601  return false;
602  //errorInfo(sCantParseLine, info.c_str());
603  }
604  else{
605  if (valtyp == PROBABILITY){
606  prob->push_back(temp);
607  logVector(temp);
608  log_prob->push_back(temp);
609  }
610  else if (valtyp == LOG_PROB){
611  log_prob->push_back(temp);
612  expVector(temp);
613  prob->push_back(temp);
614  }
615  else if (valtyp == COUNTS){
616  counts->push_back(temp);
617  probVector(temp);
618  prob->push_back(temp);
619  logVector(temp);
620  log_prob->push_back(temp);
621  }
622  }
623  }
624 
625  if (log_prob->size() != expectedRows){
626  std::cerr << " The Emission table doesn't contain enough rows. Expected Rows: " << expectedRows << " \n Please check the Emission Table and formatting for " << txt << std::endl;
627  return false;
628  }
629 
631 
632  if (tagFunc != NULL){
633  std::cerr << "Not NULL" << std::endl;
634  }
635  return true;
636  }
637 
638 
639  //!Parses the Emission Function Tag information from the text model definition
640  bool emm::_processTags(std::string& txt, tracks& trks,weights* wts, StateFuncs* funcs){
641  stringList lst = extractTag(txt);
642 
643  if (lst.size() == 0){
644  return true;
645  }
646 
647  tagFunc=new(std::nothrow) emissionFuncParam();
648 
649 
650  if (tagFunc==NULL){
651  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
652  exit(1);
653  }
654 
655 
656  if (!tagFunc->parse(lst, trks, wts,funcs)){
657  std::cerr << "Couldn't parse Emission Tag: " << lst.stringify() << std::endl;
658  return false;
659  }
660  std::string trackName = tagFunc->getTrackName();
661 
662 
663  //Don't need if we check in the parsing of emissionFuncParam....
664  if (!trks.isTrackDefined(trackName)){
665  std::cerr << "No Track defined with name:\t" << trackName << "\nEmission Tag:\n" << txt << std::endl;
666  return false;
667  }
668 
669 
670  return true;
671  }
672 
673 
674 
675  //!Calculate the emission value given a position in the sequence
676  //!If emission is a real number it will return the value from the real number track
677  //!If emission is a sequence then it will get the value and return it
678  //!\param seqs Sequences to use
679  //!\param iter Position within the sequences
680  //!\return double log(prob) value of emission
681  double emm::get_emission(sequences& seqs,size_t pos){
682  double final_emission(-INFINITY);
683 
684  if (real_number){
685 
686  final_emission=seqs.realValue(realTrack->getIndex(),pos);
687  if (complement){
688  final_emission=log(1-exp(final_emission));
689  }
690  }
691  else if (function){
692  final_emission=lexFunc->evaluate(seqs, pos);
693  }
694  else if (multi_continuous){
695  //Get all values from the tracks
696  for (size_t i = 0; i < number_of_tracks ; ++i){
697  (*pass_values)[i] = seqs.realValue((*track_indices)[i], pos);
698  }
699 
700  final_emission = (*multiPdf)(pass_values, dist_parameters);
701 
702  if (complement){
703  final_emission=log(1-exp(final_emission));
704  }
705  }
706  else if (continuous){
707 
708  final_emission = (*pdf)(seqs.realValue(realTrack->getIndex(),pos),dist_parameters);
709 
710  if (complement){
711  final_emission=log(1-exp(final_emission));
712  }
713  }
714  else{
715  final_emission=scores.getValue(seqs, pos);
716  }
717 
718 
719  if (tagFunc!=NULL){
720  final_emission+=tagFunc->evaluate(seqs, pos);
721  }
722 
723  return final_emission;
724  }
725 
726 
727  //!Calculate the emission value given a position in the sequence
728  //!If emission is a real number it will return the value from the real number track
729  //!If emission is a sequence then it will get the value and return it
730  //!\param seqs Sequences to use
731  //!\param iter Position within the sequences
732  //!\return double log(prob) value of emission
733  double emm::get_emission(sequence& seq,size_t pos){
734  double final_emission;
735 
736  if (real_number){
737 
738  final_emission=seq.realValue(pos);
739  if (complement){
740  final_emission=log(1-exp(final_emission));
741  }
742  }
743  else if (function){
744  final_emission=lexFunc->evaluate(seq, pos);
745  }
746  else if (multi_continuous){
747  //Get all values from the tracks
748  for (size_t i = 0; i < number_of_tracks ; ++i){
749  (*pass_values)[i] = seq.realValue(pos);
750  }
751 
752  final_emission = (*multiPdf)(pass_values, dist_parameters);
753 
754  if (complement){
755  final_emission=log(1-exp(final_emission));
756  }
757  }
758  else if (continuous){
759 
760  final_emission = (*pdf)(seq.realValue(pos),dist_parameters);
761 
762  if (complement){
763  final_emission=log(1-exp(final_emission));
764  }
765  }
766  else{
767  final_emission=scores.getValue(seq, pos);
768  }
769 
770 
771  if (tagFunc!=NULL){
772  final_emission+=tagFunc->evaluate(seq, pos);
773  }
774 
775  return final_emission;
776  }
777 
778 
779 
780  //!Is the emission from a real Number track
781  //!\return true if track is real number track
782  //!\return false if track is alphanumerical track
783  bool emm::isReal(){
785  return true;
786  }
787  else{
788  return false;
789  }
790  }
791 
792 
793  //Get the string representation of the emission
794  //\return std::string
795  std::string emm::stringify(){
796  std::string emissionString("EMISSION:\t");
797 
798  if (real_number){
799 
800  emissionString+=realTrack->getName();
801  emissionString+=":\t";
802 
803  emissionString+="REAL_NUMBER";
804  if (complement){
805  emissionString+=":\tCOMPLEMENT\t";
806  }
807  else{
808  emissionString+="\t";
809  }
810 
811 
812  if (tagFunc){
813  emissionString+=tagFunc->stringify();
814  }
815  emissionString+="\n";
816  }
817  //Univariate Continuous PDF emission
818  else if (continuous){
819  emissionString+=realTrack->getName();
820  emissionString+=":\tCONTINUOUS";
821 
822  if (tagFunc){
823  emissionString+=tagFunc->stringify();
824  }
825  emissionString+="\n\t";
826 
827  emissionString+="PDF:\t";
828  emissionString+=pdfName + "\tPARAMETERS:\t";
829  emissionString+=join(*dist_parameters, ',');
830  emissionString+= "\n";
831 
832  }
833  else if (multi_continuous){
834  for (size_t i=0; i < number_of_tracks; i++) {
835  if (i>0){
836  emissionString+=",";
837  }
838  emissionString+=(*trcks)[i]->getName();
839  }
840  emissionString+=":\tMULTI_CONTINUOUS";
841 
842  if (tagFunc){
843  emissionString+=tagFunc->stringify();
844  }
845  emissionString+="\n\t";
846 
847  emissionString+="PDF:\t";
848  emissionString+=pdfName + "\tPARAMETERS:\t";
849  emissionString+=join(*dist_parameters, ',');
850  emissionString+= "\n";
851  }
852  else if (function){
853  emissionString+=lexFunc->getTrack()->getName();
854  emissionString+=":\tFUNCTION:\t";
855  emissionString+=lexFunc->getName();
856  emissionString+="\t";
857 
858  if (tagFunc){
859  emissionString += tagFunc->stringify();
860  }
861 
862  emissionString+="\n";
863  }
864  else{
865 
866  for(size_t i=0;i<scores.trackSize();i++){
867  if (i>0){
868  emissionString+=",";
869  }
870  emissionString+=scores.getTrack(i)->getName();
871  }
872 
873  emissionString+=":\t";
874 
875  emissionString+="LOG";
876 
877  if (tagFunc){
878 
879  emissionString += "\t";
880 
881  emissionString += tagFunc->stringify();
882  }
883 
884  emissionString+="\n\tORDER:\t";
885 
886  for(size_t i=0;i<scores.trackSize();i++){
887  if (i>0){
888  emissionString+=",";
889  }
890  emissionString+=int_to_string(scores.getOrder(i));
891  }
892 
894 
895  if (ambTemp!=NO_SCORE){
896  emissionString+="\tAMBIGUOUS:\t";
897  emissionString+=(ambTemp==HIGHEST_SCORE)? "MAX":
898  (ambTemp==LOWEST_SCORE)? "MIN":
899  (ambTemp==AVERAGE_SCORE)? "AVG": "LOG:" + double_to_string(scores.getAmbDefinedScore());
900  }
901  emissionString+="\n";
902 
903  emissionString+=scores.stringify();
904  //scores.stringifyAmbig();
905  }
906  emissionString+="\n";
907 
908  return emissionString;
909  }
910 
911 }