StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
hmm.cpp
Go to the documentation of this file.
1 //hmm.cpp
2 //Copyright (c) 2007-2012 Paul C Lott
3 //University of California, Davis
4 //Genome and Biomedical Sciences Facility
5 //UC Davis Genome Center
6 //Ian Korf Lab
7 //Website: www.korflab.ucdavis.edu
8 //Email: lottpaul@gmail.com
9 //
10 //Permission is hereby granted, free of charge, to any person obtaining a copy of
11 //this software and associated documentation files (the "Software"), to deal in
12 //the Software without restriction, including without limitation the rights to
13 //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
14 //the Software, and to permit persons to whom the Software is furnished to do so,
15 //subject to the following conditions:
16 //
17 //The above copyright notice and this permission notice shall be included in all
18 //copies or substantial portions of the Software.
19 //
20 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
22 //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
23 //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
24 //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
25 //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include "hmm.h"
27 namespace StochHMM{
28 
29 
30  //! Import multiple models
31  //! \param modelFile Path to multiple model file
32  //! \param funcs Pointer to state Functions
33  void models::importModels(std::string& modelFile, StateFuncs* funcs){
34  std::ifstream MOD;
35  MOD.open(modelFile.c_str());
36  if (MOD.fail()){
37  std::string info = "Model file not found: " + modelFile;
38  std::cerr << info << std::endl;
39  exit(1);
40  }
41 
42  //Input is assigned to "input" & get first line
43  std::string input;
44 
45  //Check file header for correct type
46  getline(MOD, input, '\n');
47 #ifdef DEBUG_MODEL
48  std::cout << input <<std::endl;
49 #endif
50  if (input.compare("#STOCHHMM MODEL FILE")==0){
51  MOD.close();
52  model* temp= new(std::nothrow) model;
53 
54  if (temp==NULL){
55  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
56  exit(1);
57  }
58 
59  hmms.push_back(temp);
60  hmms[0]->import(modelFile,funcs);
61  }
62  else if (input.compare("#STOCHHMM MODELS")==0){
63  std::vector<std::string> filenames;
64 
65  while (getline(MOD,input,'\n')){
66 #ifdef DEBUG_MODEL
67  sd::cout << input <<endl;
68 #endif
69  //Need to check input to make sure its valid;
70  //cout << input <<endl;
71  if (input.compare("")==0){
72  break;
73  }
74  else{
75  filenames.push_back(input);
76  }
77  }
78  MOD.close();
79 
80  for(size_t i=0;i<filenames.size();i++){
81  model* temp=new(std::nothrow) model;
82 
83  if (temp==NULL){
84  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
85  exit(1);
86  }
87 
88  hmms.push_back(temp);
89  }
90 
91  for(size_t i=0;i<filenames.size();i++){
92  hmms[i]->import(filenames[i],funcs);
93  hmms[i]->print();
94  }
95  }
96  else{
97  std::cerr << "Header for " << modelFile << "does not indicate that it is a StochHMM model file.\n";
98  std::cerr << "Header should be: #STOCHHMM MODEL FILE \n" << "Header given: " << input <<std::endl;
99  }
100  return;
101  }
102 
103 
104  // TODO: Change to getModelByAttrib();
105  //
106  ////----------------------------------------------------------------------------//
107  //// Description: getGCModel(float gc)
108  //// Returns a pointer to the model that has the closest GC range(calculated from
109  //// midpoint to the sequence's gc percentage
110  ////
111  ////----------------------------------------------------------------------------//
112  //model* models::getGCModel(float gc){
113  //
114  // model *model=NULL;
115  // gc*=100.0f; //Get percentage
116  // float min=100.0f;
117  // size_t minIterator=0;
118  //
119  // for(size_t iter=0; iter<models.size(); iter++){
120  // //get distance from midpoint to sequences GC%
121  // float start=models[iter]->getLowerRange();
122  // float stop=models[iter]->getUpperRange();
123  // float midpoint=(stop-start)/2.0f;
124  // midpoint+=start;
125  //
126  // float distance =fabs(midpoint - gc);
127  //
128  // if (distance<min){ //if it is a minimum keep track of it
129  // minIterator=iter;
130  // min=distance;
131  // }
132  // }
133  //
134  // if (min<100.0f){ //If we've seen a min then return it else return NULL
135  // model=models[minIterator];
136  // }
137  //
138  // return model; //return pointer to HMM
139  //}
140 
141  //!Get pointer to model at index position
142  //! \param iter Index iterator for position
143  //! \return pointer to model
144  model* models::getModel(size_t iter) {
145  if (iter<this->size()) {
146  return hmms[iter];
147  }
148  else{
149  return NULL;
150  }
151  }
152 
153 
154 
155  //!Create a model
157  ending=new(std::nothrow) state();
158 
159  if (ending==NULL){
160  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
161  exit(1);
162  }
163 
164  finalized=false;
165  basicModel=true;
166  attribTwo=false;
167 
168  initial = NULL;
169  scaling = NULL;
170  templatedStates = NULL;
174 
175  range[0]=-INFINITY;
176  range[1]=-INFINITY;
177 
178  return;
179  }
180 
181  // //TODO: more model constructors based on additional templates and weights to pass to import/parse
182  // //!Create a model file
183  // //! \param modelFile Filename of model file
184  // //! \param funcs Pointer to State functions defined by programmer
185  // model::model(std::string& modelFile, StateFuncs* funcs){
186  // ending=new state();
187  // finalized=false;
188  // basicModel=true;
189  // initial=NULL;
190  // scaling=NULL;
191  // templatedStates=NULL;
192  // attribTwo=false;
193  // import(modelFile,funcs);
194  // return;
195  // }
196 
197  //TODO: multiple import function for templates and weights...
198  //!Import the model file and parse it
199  //! \param modelFile Model filename
200  //! \param funcs Pointer to State functions defined by programmer
201  bool model::import(std::string& modelFile, StateFuncs* funcs){
202  std::string modelString=slurpFile(modelFile);
203  return parse(modelString,funcs,NULL,NULL);
204  }
205 
206 
207  bool model::import(std::string& modelFile, StateFuncs* funcs, templates* tmpls, weights* scl){
208  std::string modelString=slurpFile(modelFile);
209  return parse(modelString, funcs, tmpls, scl);
210  }
211 
212  bool model::import(std::string& modelFile){
213  std::string modelString=slurpFile(modelFile);
214  return parse(modelString,NULL,NULL,NULL);
215  }
216 
217 
218  bool model::importFromString(std::string& modelString, StateFuncs* funcs){
219  return parse(modelString,funcs,NULL,NULL);
220  }
221 
222 
223  bool model::importFromString(std::string& modelString, StateFuncs* funcs, templates* tmpls, weights* scl){
224  return parse(modelString, funcs, tmpls, scl);
225  }
226 
227 
228  bool model::importFromString(std::string& modelString){
229  return parse(modelString,NULL,NULL,NULL);
230  }
231 
232  //!Parses text model file
233  //!Splits the model into sections that are then parsed by the individiual classes
234  //!parse() functions.
235  bool model::parse(const std::string& model, StateFuncs* funcs, templates* tmpls, weights* scl ){
236 
237  templatedStates=tmpls;
238  scaling=scl;
239 
240  //std::cout << model <<std::endl;
241 
242  size_t header = model.find("MODEL INFORMATION");
243  size_t track = model.find("TRACK SYMBOL DEFINITIONS");
244  size_t ambig = model.find("AMBIGUOUS SYMBOL DEFINITIONS");
245  size_t templ = model.find("TEMPLATED STATES");
246  size_t scale = model.find("SCALING FUNCTIONS");
247  size_t st = model.find("STATE DEFINITIONS");
248  size_t blank;
249  size_t nlChar;
250 
251  //Parse Model Informaton (Optional)
252  if (header!=std::string::npos){
253 
254  blank=model.find("\n\n",header); //Get coordinates of splitting Model Information
255 
256  size_t nlCharEq = model.rfind("####\n",blank);
257  size_t nlCharNum= model.rfind("====\n",blank);
258  //Check for optional dividing line
259  if (nlCharEq!=std::string::npos){
260  nlChar=nlCharEq+5;
261  }
262  else if (nlCharNum!=std::string::npos){
263  nlChar=nlCharNum+5;
264  }
265  else{ //No divider line
266  nlChar=model.find("\n",header);
267  nlChar++;
268  }
269 
270  std::string head = model.substr(nlChar,blank-nlChar);
271  if (!_parseHeader(head)){
272  return false;
273  }
274  }
275 
276 
277  //Parse Tracks (Required)
278  if (track!=std::string::npos){
279 
280  blank=model.find("\n\n",track);
281 
282 
283  size_t nlCharEq = model.rfind("####\n",blank);
284  size_t nlCharNum= model.rfind("====\n",blank);
285  //Check for optional dividing line
286  if (nlCharEq!=std::string::npos){
287  nlChar=nlCharEq+5;
288  }
289  else if (nlCharNum!=std::string::npos){
290  nlChar=nlCharNum+5;
291  }
292  else{ //No divider line
293  nlChar=model.find("\n",track);
294  nlChar++;
295  }
296 
297 
298  std::string trck (model.substr(nlChar,blank-nlChar));
299 
300  if (!_parseTracks(trck)){
301  return false;
302  }
303  }
304  else{
305  std::cerr << "Required section: TRACK SYMBOL DEFINITIONS missing from the model" << std::endl;
306  return false;
307  }
308 
309  // else{
310  // errorInfo(sMissingTrackDefinition, "Required section: TRACK SYMBOL DEFINITIONS missing from the model")
311  // }
312 
313  //Parse Ambiguous Characters (Optional)
314  if (ambig!=std::string::npos){
315  blank=model.find("\n\n",ambig);
316 
317  size_t nlCharEq = model.rfind("####\n",blank);
318  size_t nlCharNum= model.rfind("====\n",blank);
319  //Check for optional dividing line
320  if (nlCharEq!=std::string::npos){
321  nlChar=nlCharEq+5;
322  }
323  else if (nlCharNum!=std::string::npos){
324  nlChar=nlCharNum+5;
325  }
326  else{ //No divider line
327  nlChar=model.find("\n",ambig);
328  nlChar++;
329  }
330 
331  std::string amb(model.substr(nlChar,blank-nlChar));
332 
333  if (!_parseAmbiguous(amb)){
334 
335  std::cerr << "Model Line:\t" << amb << std::endl;
336  exit(2);
337  //return false;
338  }
339  //std::cout << ambig << "\t" << amb << std::endl;
340  }
341 
342  //Parse Scaling Function (Optional)
343  if (scale!=std::string::npos){
344  blank=model.find("\n\n",scale);
345 
346 
347  size_t nlCharEq = model.rfind("####\n",blank);
348  size_t nlCharNum= model.rfind("====\n",blank);
349  //Check for optional dividing line
350  if (nlCharEq!=std::string::npos){
351  nlChar=nlCharEq+5;
352  }
353  else if (nlCharNum!=std::string::npos){
354  nlChar=nlCharNum+5;
355  }
356  else{ //No divider line
357  nlChar=model.find("\n",scale);
358  nlChar++;
359  }
360 
361 
362 
363  std::string scaleTxt = model.substr(nlChar,blank-nlChar);
364  if(!_parseScaling(scaleTxt)){
365  return false;
366  }
367  //std::cout << scale << "\t" << scaleTxt << std::endl;
368  }
369 
370 
371 
372  //Parse Templated Tracks (Optional)
373  if (templ!=std::string::npos){
374  blank=model.find("\n\n",templ);
375 
376 
377  size_t nlCharEq = model.rfind("####\n",blank);
378  size_t nlCharNum= model.rfind("====\n",blank);
379  //Check for optional dividing line
380  if (nlCharEq!=std::string::npos){
381  nlChar=nlCharEq+5;
382  }
383  else if (nlCharNum!=std::string::npos){
384  nlChar=nlCharNum+5;
385  }
386  else{ //No divider line
387  nlChar=model.find("\n",templ);
388  nlChar++;
389  }
390 
391  std::string tempTxt = model.substr(nlChar,blank-nlChar);
392  if (!_parseTemplates(tempTxt)){
393  return false;
394  }
395  //std::cout << templ << "\t" << tempTxt << std::endl;
396  }
397 
398 
399  //Parse State Definitions (Required)
400  if (st!=std::string::npos){
401  size_t blankNum = model.find("####\n",st);
402  size_t blankEq = model.find("====\n",st);
403 
404  blank=model.find("####\n",st);
405 
406 
407  if (blankEq!=std::string::npos){
408  blank=blankEq+5;
409  }
410  else if (blankNum!=std::string::npos){
411  blank=blankNum+5;
412  }
413  else{
414  blank=model.find("\n",st);
415  blank++;
416  }
417 
418 
419  nlChar=model.find("\n//END");
420  if (nlChar==std::string::npos){
421  nlChar=model.size()-1;
422  }
423 
424  std::string stateTxt= model.substr(blank,nlChar-blank);
425  if (!_parseStates(stateTxt,funcs)){
426  return false;
427  }
428 
429  }
430  else{
431  std::cerr << "Required sections <STATE DEFINITIONS> missing from the model" << std::endl;
432  return false;
433  //errorInfo(sMissingStateDefinition, "Required sections <STATE DEFINITIONS> missing from the model")
434  }
435 
436  return true;
437  }
438 
439 
440  //Parse model header information
441  bool model::_parseHeader(std::string& txt){
442  stringList lst;
443  size_t index;
444  bool first(false);
445  bool second(false);
446 
447  //std::string headers[] = {"MODEL_NAME", "MODEL_DESCRIPTION", "MODEL_CREATION_DATE","MODEL_CREATION_COMMAND", "MODEL_AUTHOR","MODEL_NUM_ATTRIB","MODEL_NUM_ATTRIB_UPPER","MODEL_NUM_ATTRIB_LOWER"};
448  std::string headers[] = {"NAME", "DESCRIPTION", "CREATION_DATE","CREATION_COMMAND", "AUTHOR","NUM_ATTRIB","UPPER","LOWER"};
449 
450  std::string* head[]={&name, &desc, &date, &command,&author};
451 
452  lst.fromTxt(txt);
453 
454 
455  for(int i=0;i<5;i++){
456  if (lst.contains(headers[i])){
457  index = lst.indexOf(headers[i]);
458  if (index+1 < lst.size()){
459  index++;
460  (*head[i])=lst[index];
461 
462  }
463  else{
464  std::cerr << "Couldn't parse " << headers[i] << " from \"MODEL INFORMATION\" section." << std::endl;
465  return false;
466  }
467  }
468  }
469 
470  //Set Numerical Attributes of Model
471  if (lst.contains(headers[5])){
472  index = lst.indexOf(headers[5]);
473 
474  if (index+1 < lst.size()){
475  index++;
476 
477 
478  double tempValue;
479  if (!stringToDouble(lst[index], tempValue)){
480  std::cerr << "Numerical attribute couldn't be converted to numerical value: " << lst[index] << std::endl;
481  return false;
482  }
483  range[0]=tempValue;
484 
485  }
486  else{
487  std::cerr << "Couldn't parse " << headers[5] << " value from \"MODEL INFORMATION\" section." << std::endl;
488  return false;
489  }
490  }
491  else if (lst.contains(headers[6]) && lst.contains(headers[7])){
492  index = lst.indexOf(headers[6]);
493 
494  if (index+1<lst.size()){
495  index++;
496 
497  double tempValue;
498  if (!stringToDouble(lst[index], tempValue)){
499  std::cerr << "Numerical attribute couldn't be converted to numerical value: " << lst[index] << std::endl;
500  return false;
501  }
502  range[1]=tempValue;
503  second=true;
504  }
505  else{
506  std::cerr << "Couldn't parse " << headers[6] << " value from \"MODEL INFORMATION\" section." << std::endl;
507  return false;
508  }
509 
510 
511  index = lst.indexOf(headers[7]);
512 
513  if (index+1<lst.size()){
514  index++;
515 
516 
517  double tempValue;
518  if (!stringToDouble(lst[index], tempValue)){
519  std::cerr << "Numerical attribute couldn't be converted to numerical value: " << lst[index] << std::endl;
520  return false;
521  }
522  range[0]=tempValue;
523  first=true;
524  }
525  else{
526  std::cerr << "Couldn't parse " << headers[6] << " value from \"MODEL INFORMATION\" section." << std::endl;
527  return false;
528  }
529 
530  if (first && second){
531  attribTwo=true;
532  }
533  else{
534  std::cerr << "Unable to parse both UPPER and LOWER" << std::endl;
535  return false;
536  }
537  }
538  else if (lst.contains(headers[6])){
539  index = lst.indexOf(headers[6]);
540 
541  if (index+1<lst.size()){
542  index++;
543 
544  double tempValue;
545  if (!stringToDouble(lst[index], tempValue)){
546  std::cerr << "Numerical attribute couldn't be converted to numerical value: " << lst[index] << std::endl;
547  return false;
548  }
549  range[0]=tempValue;
550 
551 
552  }
553  else{
554  std::cerr << "Couldn't parse " << headers[6] << " value from \"MODEL INFORMATION\" section." << std::endl;
555  return false;
556  }
557  }
558  else if (lst.contains(headers[7])){
559  index = lst.indexOf(headers[6]);
560 
561  if (index+1<lst.size()){
562  index++;
563 
564  double tempValue;
565  if (!stringToDouble(lst[index], tempValue)){
566  std::cerr << "Numerical attribute couldn't be converted to numerical value: " << lst[index] << std::endl;
567  return false;
568  }
569  range[0]=tempValue;
570  }
571  else{
572  std::cerr << "Couldn't parse " << headers[7] << " value from \"MODEL INFORMATION\" section." << std::endl;
573  return false;
574  }
575  }
576 
577  return true;
578  }
579 
580 
581  bool model::_parseTracks(std::string& txt){
582  stringList lst;
583  lst.splitString(txt, "\n");
584  for(size_t i=0;i<lst.size();i++){
585  track* trk=new(std::nothrow) track();
586 
587  if (trk==NULL){
588  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
589  exit(1);
590  }
591 
592  if (trk->parse(lst[i])){
593  trk->setIndex(trcks.size());
594  trcks.push_back(trk);
595  }
596  else {
597  std::string info = "Couldn't parse new track line. Please check formatting of : " + lst[i];
598  std::cerr << info << std::endl;
599  exit(1);
600 
601  //errorInfo(sCantParseLine, info.c_str());
602 
603  //std::cerr << "Couldn't parse new track line. Please check formatting of : " << lst[i] << std::endl;
604  //exit(1);
605  }
606 
607  }
608  return true;
609  }
610 
611  bool model::_parseAmbiguous(std::string& txt){
612  stringList lst;
613  lst.splitString(txt, "\n");
614  for(size_t i=0;i<lst.size();i++){
615  stringList ln;
616  ln.splitString(lst[i],":");
617  track* trk = getTrack(ln[0]);
618  if (trk!=NULL){
619  if (!trk->parseAmbiguous(ln[1])){
620  std::cerr << "Couldn't parse the Ambiguous section for " << ln[0] << std::endl;
621  return false;
622  }
623  }
624  else{
625  std::string info = "Ambiguous Characters Section\nSupplied track name doesn't correspond to any previously parsed tracks.\nPlease check the formatting and names.\n Unfound Name: " + ln[0];
626  std::cerr << info << std::endl;
627  return false;
628  }
629 
630  }
631  //lst.print();
632  return true;
633  }
634 
635 
636  bool model::_parseScaling(std::string& txt){
637 
638  if (scaling==NULL){
639  scaling = new(std::nothrow) weights;
640 
641  if (scaling==NULL){
642  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
643  exit(1);
644  }
645  }
646 
647  stringList lst;
648  lst.splitND(txt, "SCALE:");
649  for(size_t i=0;i<lst.size();i++){
650  weight* wt = new(std::nothrow) weight;
651 
652  if (wt==NULL){
653  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
654  exit(1);
655  }
656 
657  if (!wt->parse(lst[i])){
658  return false;
659  }
660 
661 
662  scaling->addWeight(wt);
663  }
664  //lst.print();
665 
666  return true;
667  }
668 
669 
670  bool model::_parseTemplates(std::string& txt){
671  if (!txt.empty()){
672  templatedStates = new(std::nothrow) templates();
673 
674  if (templatedStates==NULL){
675  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
676  exit(1);
677  }
678 
679  if (!templatedStates->parse(txt)){
680  std::cerr << "Unable to parse Templates for the model" << std::endl;
681  return false;
682  }
683  }
684 
685  return true;
686  }
687 
688  bool model::_parseStates(std::string& txt, StateFuncs* funcs){
689  //1. split sections and identify any template sections
690  //2. get state names list
691  //3. create and parse states
692 
693  stringList stats;
694  _splitStates(txt,stats);
695 
696 
697  stringList NameList;
698  _getOrderedStateNames(stats,NameList);
699 
700  for(size_t iter=0;iter<stats.size();iter++){
701  state* st = new(std::nothrow) state;
702 
703  if (st==NULL){
704  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
705  exit(1);
706  }
707 
708  if (!st->parse(stats[iter],NameList,trcks,scaling, funcs)){
709  delete st;
710  return false;
711  }
712 
713 
714  if (st->getName() == "INIT"){
715  initial=st;
716  stateByName[st->getName()]=st;
717  }
718  else{
719  states.push_back(st);
720  stateByName[st->getName()]=st;
721  }
722  }
723 
724 
725  //Post process states to create final state with only transitions from filled out.
726  finalize();
727 
728 
729  return true;
730  }
731 
732 
733  //!Add state to model
734  //! \param st Pointer to state
736  states.push_back(st);
737  stateByName[st->getName()]=st;
738  return;
739  };
740 
741 
742  //Split states into individual state strings
743  bool model::_splitStates(std::string& txt ,stringList& sts){
744 
745  size_t start=0;
746  size_t end=0;
747 
748  while(start!=std::string::npos){
749  end=txt.find("STATE:",start+1);
750 
751  std::string st = txt.substr(start,end-start);
752 
753  clear_whitespace(st, "#");
754 
755  if (st.find("TEMPLATE:")!=std::string::npos){
756 
757  stringList tmpd;
758 
759  if (!_processTemplateState(st,tmpd)){ //! Get templated states using the defined parameters
760  std::cerr << "Unable to process template information" << std::endl;
761  return false;
762  }
763 
764  for(size_t i=0;i<tmpd.size();i++){
765  sts.push_back(tmpd[i]);
766  }
767  }
768  else{
769  sts.push_back(st);
770  }
771 
772  start=txt.find("STATE:",end);
773 
774  }
775  return true;
776  }
777 
778  bool model::_processTemplateState(std::string& txt, stringList& lst){
779  if (templatedStates==NULL){
780  std::cerr << "No template provided for State Definitions. Please provide the template in the model or when calling HMM class" << std::endl;
781  exit(1);
782 
783  //errorInfo(sMissingTemplate, "No template provided for State Definitions. Please provide the template in the model or when calling HMM class");
784  }
785  std::string templateName;
786  std::string templateIdentifier;
787  std::map<std::string,std::string> parameters;
788 
789  lst.splitString(txt,"\n");
790 
791  //Process Template Name and Identifier
792  clear_whitespace(lst[0], " \n");
793  stringList nmid;
794  nmid.splitString(lst[0],":\t");
795  nmid.print();
796 
797  //Extracting TEMPLATE NAME
798  if (nmid.contains("TEMPLATE")){
799  size_t index=nmid.indexOf("TEMPLATE");
800  index++;
801  templateName=nmid[index];
802  }
803  else{
804  std::cerr << "State Template definition doesn't contain \"TEMPLATE:\" keyword in first line of State definition of the template. Please check formatting\n";
805  return false;
806  }
807 
808  //Extracting TEMPLATE IDENTIFIER
809  if (nmid.contains("IDENTIFIER")){
810  size_t index=nmid.indexOf("IDENTIFIER");
811  index++;
812  templateIdentifier=nmid[index];
813  }
814  else{
815  std::cerr << "State Template definition doesn't contain \"IDENTIFIER:\" keyword in first line of State definition of the template. Please check formatting\n";
816  return false;
817  }
818 
819  //Process Template Variables
820  std::string lastTag;
821  for(size_t i=1;i<lst.size();i++){
822  std::string key;
823  std::string value;
824  getKeyValue(lst[i],key,value);
825  if (key.empty()){
826  parameters[lastTag]+="\n" + value;
827  }
828  else{
829  lastTag=key;
830  parameters[lastTag]=value;
831  }
832  }
833 
834 
835  //Get filled out model template
836  std::string filledTemplate = templatedStates->getTemplate(templateName, templateIdentifier, parameters);
837 
838  //Split filled out template into individual states
839  stringList sts;
840 
841  if (!_splitStates(filledTemplate, sts)){ //Split the templated states
842  std::cerr << "Unable to split templated states\n" << sts.stringify() << std::endl;
843  return false;
844  }
845 
846  return true;
847  }
848 
849 
851  for(size_t i=0;i<states.size();i++){
852  size_t nameHeader=states[i].find("NAME:");
853  size_t nameLineEnding=states[i].find_first_of("\n",nameHeader);
854  std::string name = states[i].substr(nameHeader+5,nameLineEnding-(nameHeader+5));
855  clear_whitespace(name, " \t\n");
856  if (names.containsExact(name)){
857  std::cerr << "State with name of: " << name << " is defined twice in the model\n";
858  return false;
859  }
860  else{
861  names.push_back(name);
862  }
863  }
864  return true;
865  }
866 
867 
868  //!Get pointer to state by state name
869  //!\param txt String name of state
870  //!\return pointer to state if it exists;
871  //!\return NULL if state doesn't exist in model
872  state* model::getState(const std::string& txt){
873  if (stateByName.count(txt)){
874  return stateByName[txt];
875  }
876  else {
877  return NULL;
878  }
879  }
880 
881 
882  //!Finalize the model before performing decoding
883  //!Sets transitions, checks labels, Determines if model is basic or requires intermittent tracebacks
885  if (!finalized){
886  //Add States To Transitions
887  std::set<std::string> labels;
888  std::set<std::string> gff;
889  std::set<std::string> name;
890 
891  explicit_duration_states = new(std::nothrow) std::vector<bool>(states.size(),false);
892 
893  //Create temporary hash of states for layout
894  for(size_t i=0;i<states.size();i++){
895  labels.insert(states[i]->getLabel());
896  gff.insert(states[i]->getGFF());
897  gff.insert(states[i]->getName());
898  }
899 
900  for (size_t i=0; i < states.size() ; ++i){
901  states[i]->setIter(i);
902  }
903 
904 
905 
906  //Add states To and From transition
907 
908  for(size_t i=0;i<states.size();i++){
909  states[i]->checkLabels(labels,gff,name);
911 
912  }
913 
915 
916 
917  //Now that we've seen all the states in the model
918  //We need to fix the States transitions vector transi, so that the state
919  //iterator correlates to the position within the vector
920  for(size_t i=0;i<states.size();i++){
921  states[i]->_finalizeTransitions(stateByName);
922  }
924 
925  //Check to see if model is basic model
926  //Meaning that the model doesn't call outside functions or perform
927  //Tracebacks for explicit duration.
928  //If explicit duration exist then we'll keep track of which states
929  //they are in explicit_duration_states
930 // for(size_t i=0;i<states.size();i++){
931 // std::vector<transition*>* transitions = states[i]->getTransitions();
932 // for(size_t trans=0;trans<transitions->size();trans++){
933 // if ((*transitions)[trans] == NULL){
934 // continue;
935 // }
936 //
937 // if ((*transitions)[trans]->FunctionDefined()){
938 // basicModel=false;
939 // break;
940 // }
941 // else if ((*transitions)[trans]->getTransitionType()!=STANDARD || (*transitions)[trans]->getTransitionType()!=LEXICAL){
942 //
943 // if ((*transitions)[trans]->getTransitionType() == DURATION){
944 // (*explicit_duration_states)[i]=true;
945 // }
946 //
947 // basicModel=false;
948 // break;
949 // }
950 // }
951 // }
952 
953  checkBasicModel();
955  checkTopology();
956 
957 
958  //Assign StateInfo
959  for(size_t i=0; i < states.size();i++){
960  std::string& st_name = states[i]->getName();
961 
962  info.stateIterByName[st_name]=i;
963  info.stateIterByLabel[st_name].push_back(i);
964  info.stateIterByGff[st_name].push_back(i);
965  }
966 
967 
968  finalized=true;
969  }
970 
971  return;
972 
973  }
974 
976  basicModel = true;
981 
982  complex_emission_states = new(std::nothrow) std::vector<bool>(state_size(),false);
983  complex_transition_states = new(std::nothrow) std::vector<bool>(state_size(),false);
984 
985  for(size_t st = 0 ; st<state_size(); ++st){
986  (*complex_emission_states)[st] = states[st]->hasComplexEmission();
987  (*complex_transition_states)[st]= states[st]->hasComplexTransition();
988 
990  basicModel = false;
991  }
992  }
993 
994  return;
995  }
996 
997 
1000  explicit_duration_states = NULL;
1001  explicit_duration_states = new(std::nothrow) std::vector<bool>(state_size(),false);
1002 
1003  for(size_t i=0;i<states.size();i++){
1004 
1005  std::vector<transition*>* transitions = states[i]->getTransitions();
1006 
1007  for(size_t trans=0;trans<transitions->size();trans++){
1008  if ((*transitions)[trans]== NULL){
1009  continue;
1010  }
1011 
1012  if ((*transitions)[trans]->getTransitionType() == DURATION){
1013  (*explicit_duration_states)[i]=true;
1014  }
1015  }
1016  }
1017 
1018  return;
1019  }
1020 
1022  std::vector<transition*>* trans;
1023 
1024  //Process Initial State
1025  trans = st->getTransitions();
1026  for(size_t i=0;i<trans->size();i++){
1027  state* temp;
1028  temp=this->getState((*trans)[i]->getName());
1029  if (temp){
1030  st->addToState(temp); //Also add the ptr to state vector::to
1031  (*trans)[i]->setState(temp);
1032  if (st!=initial){
1033  temp->addFromState(st);
1034  }
1035  }
1036  }
1037 
1038  if (st->endi){
1039  ending->addFromState(st);
1040  }
1041  }
1042 
1043 
1044  //!Get pointer to track by track name
1045  //!\param name track name
1046  //!\return pointer to track
1047  //!\return NULL if track with given name doesn't exist in the model
1048  track* model::getTrack(const std::string& name){
1049  return trcks.getTrack(name);
1050  }
1051 
1052 
1053  //!Get string representation of model
1054  //!\return std::string
1055  std::string model::stringify(){
1056  std::string model;
1057  std::string lnSep(50,'=');
1058  model+="#STOCHHMM MODEL FILE\n\n";
1059 
1060  model+=_stringifyHeader();
1061  model+=_stringifyTracks();
1062  model+=_stringifyScaling();
1063  model+=_stringifyStates();
1064  return model;
1065  }
1066 
1068  std::string headerString;
1069  std::string lnSep(50,'=');
1070  headerString+="MODEL INFORMATION\n" + lnSep + "\n";
1071  std::string headers[] = {"MODEL_NAME", "MODEL_DESCRIPTION", "MODEL_CREATION_DATE","MODEL_CREATION_COMMAND", "MODEL_AUTHOR","MODEL_NUM_ATTRIB","MODEL_NUM_ATTRIB_UPPER","MODEL_NUM_ATTRIB_LOWER"};
1072  std::string* head[]={&name, &desc, &date, &command,&author};
1073 
1074  for(int i=0;i<5;i++){
1075  if (!(*head[i]).empty()){
1076  headerString+=headers[i] + ":\t" + (*head[i]) + "\n";
1077  }
1078  }
1079 
1080 
1081  if (attribTwo){
1082  headerString+= headers[7] + ":\t" + double_to_string(range[1]) + "\n";
1083  headerString+= headers[8] + ":\t" + double_to_string(range[0]) + "\n";
1084  }
1085  headerString+="\n";
1086 
1087  return headerString;
1088  }
1089 
1091  std::string trackString;
1092  trackString+=trcks.stringify();
1093  trackString+="\n";
1094  return trackString;
1095  }
1096 
1098  std::string scaleString;
1099  if (scaling){
1100  scaleString+=scaling->stringify();
1101  scaleString+="\n";
1102  }
1103 
1104  return scaleString;
1105  }
1106 
1108  std::string stateString;
1109  std::string lnSep(50,'#');
1110  stateString+="STATE DEFINITIONS\n" + lnSep + "\n";
1111 
1112  if (initial){
1113  stateString+=initial->stringify();
1114  }
1115 
1116  stateString+= lnSep + "\n";
1117 
1118  for(size_t i=0;i<states.size();i++){
1119  if (states[i]){
1120  stateString+=states[i]->stringify();
1121  stateString+=lnSep + "\n";
1122  }
1123  }
1124  stateString+= "//END";
1125  return stateString;
1126  }
1127 
1128 
1129  //!Print the string representation to stdout
1131  std::cout << stringify() << std::endl;
1132  return;
1133  }
1134 
1135 
1136  //To improve we could group into those connected groups
1137  //Or we could allow user to define groups and clusters;
1138  //!Create graphvis file for the state of the model
1139  //!\param filepath complete path to use for graphviz file
1140  //!\param q0 TRUE will draw the initial state
1141 // void model::writeGraphViz(std::string filepath,bool q0){
1142 // std::ofstream gv (filepath.c_str());
1143 // if (gv.is_open()){
1144 // std::string modelName=name;
1145 // replaceChar(modelName, ' ', '_');
1146 //
1147 // gv << "digraph " << modelName << " {\n";
1148 // gv << "\tratio=LR\n";
1149 // gv << "\tsize=\"8,5\"\n";
1150 // gv << "\tnode [shape = circle];\n";
1151 //
1152 //// std::bitset<STATE_MAX>* temp;
1153 ////
1154 //// if (q0){
1155 //// temp=initial->getTo();
1156 //// for(size_t temp_iter=0; temp_iter < temp->size(); temp_iter++){
1157 //// //gv << "\tINIT -> " << (*temp)[temp_iter]->getName() << ";\n";
1158 //// }
1159 //// }
1160 ////
1161 ////
1162 //// for(size_t state_iter=0; state_iter < states.size(); state_iter++){
1163 //// temp = states[state_iter]->getTo();
1164 //// std::string stName=states[state_iter]->getName();
1165 ////
1166 //// if (stName.find_first_of("0123456789")==0){
1167 //// stName.insert(0,"_");
1168 //// }
1169 ////
1170 ////
1171 //// for(size_t temp_iter=0; temp_iter < temp->size(); temp_iter++){
1172 //// //std::string tempName=(*temp)[temp_iter]->getName();
1173 ////
1174 //// if (tempName.find_first_of("0123456789")==0){
1175 //// tempName.insert(0,"_");
1176 //// }
1177 ////
1178 //// gv << "\t" << stName << " -> " << tempName << ";\n";
1179 //// }
1180 ////
1181 ////
1182 //// if (q0 && states[state_iter]->endi){
1183 //// gv << "\t" << stName << " -> END;\n";
1184 //// }
1185 //// }
1186 //
1187 // gv << "}\n";
1188 // }
1189 // gv.close();
1190 //
1191 // }
1192 
1193  //!Get pointer to weight by name of weight
1194  //! \param name Name of weight
1195  //! \return pointer to weight
1196  weight* model::getScalingFactor(std::string& name){
1197  if (scaling==NULL){
1198  return NULL;
1199  }
1200  else{
1201  return (*scaling)[name];
1202  }
1203  }
1204 
1205 
1206  //!Get Distance from the model attrib values to user defined value
1207  double model::getDistanceToAttrib(double val){
1208  double mid=range[0];
1209  if (attribTwo){
1210  mid+=(range[1]-range[0])/2;
1211  }
1212 
1213  return abs(mid-val);
1214  }
1215 
1216 
1217 
1219 
1220  std::vector<bool> states_visited (states.size(),false);
1221  std::vector<uint16_t> visited;
1222 
1223  bool ending_defined(false);
1224 
1225  _checkTopology(initial, visited);
1226 
1227  while (visited.size()>0){
1228  uint16_t st_iter = visited.back();
1229  visited.pop_back();
1230 
1231  if (!states_visited[st_iter]){
1232  std::vector<uint16_t> tmp_visited;
1233  _checkTopology(states[st_iter],tmp_visited);
1234  size_t num_visited = tmp_visited.size();
1235 
1236  //Check orphaned
1237  if (num_visited == 0 ){
1238  //No transitions
1239  //std::cerr << "Warning: State: " << states[st_iter]->getName() << " has no transitions defined\n";
1240  }
1241  else if (num_visited == 1 && tmp_visited[0] == st_iter){
1242  //Orphaned
1243  if(states[st_iter]->getEnding() == NULL){
1244  std::cerr << "State: " << states[st_iter]->getName() << " is an orphaned state that has only transition to itself\n";
1245  }
1246 // else{
1247 // std::cerr << "State: " << states[st_iter]->getName() << " may be an orphaned state that only has transitions to itself and END state.\n";
1248 // }
1249  }
1250 
1251  for(size_t i=0; i < tmp_visited.size(); i++){
1252  if (!states_visited[tmp_visited[i]]){
1253  visited.push_back(tmp_visited[i]);
1254  }
1255  }
1256 
1257  states_visited[st_iter] = true;
1258  }
1259  }
1260 
1261  //Check for defined ending
1262  for(size_t i=0; i< states.size() ; i++){
1263  if ( states[i]->getEnding() != NULL){
1264  ending_defined = true;
1265  break;
1266  }
1267  }
1268 
1269  if (!ending_defined){
1270  std::cerr << "No END state defined in the model\n";
1271  }
1272 
1273  for(size_t i=0; i< states_visited.size(); i++){
1274  if (!states_visited[i]){
1275  std::cerr << "State: " << states[i]->getName() << " doesn't have valid model topology\n\
1276  Please check the model transitions\n";
1277  return false;
1278  }
1279  }
1280  return true;
1281  }
1282 
1283  void model::_checkTopology(state* st, std::vector<uint16_t>& visited){
1284  //Follow transitions to see if every state is visited
1285  for(size_t i = 0 ; i < st->transi->size() ; i++){
1286  if (st->transi->at(i) != NULL){
1287  visited.push_back(st->transi->at(i)->getState()->getIterator());
1288  }
1289 
1290  }
1291  return;
1292  }
1293 
1294 
1295  void print_vec (std::vector<std::vector<double> > &x){
1296  size_t sze=x.size();
1297  size_t internal_sze=x[0].size();
1298 
1299  for(size_t j=0;j<sze;j++){
1300  //cout << j << "\t";
1301  for(size_t i=0;i<internal_sze;i++){
1302  std::cout << x[j][i] << '\t' ;
1303  }
1304  std::cout << std::endl;
1305  }
1306  }
1307 
1308 }