StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
hmm.h
Go to the documentation of this file.
1 //hmm.h
2 //Copyright (c) 2007-2012 Paul C Lott
3 //University of California, Davis
4 //Genome and Biomedical Sciences Facility
5 //UC Davis Genome Center
6 //Ian Korf Lab
7 //Website: www.korflab.ucdavis.edu
8 //Email: lottpaul@gmail.com
9 //
10 //Permission is hereby granted, free of charge, to any person obtaining a copy of
11 //this software and associated documentation files (the "Software"), to deal in
12 //the Software without restriction, including without limitation the rights to
13 //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
14 //the Software, and to permit persons to whom the Software is furnished to do so,
15 //subject to the following conditions:
16 //
17 //The above copyright notice and this permission notice shall be included in all
18 //copies or substantial portions of the Software.
19 //
20 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
22 //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
23 //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
24 //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
25 //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 // TODO: add survival function distribution option
27 // TODO: Test internal distribution output options
28 
29 #ifndef HMM_H
30 #define HMM_H
31 
32 
33 #include <iostream>
34 #include <string>
35 #include <vector>
36 #include <sstream>
37 #include <fstream>
38 #include <map>
39 #include <list>
40 #include <set>
41 #include <stdlib.h>
42 #include <bitset>
43 
44 #include "state.h"
45 #include "track.h"
46 #include "text.h"
47 #include "stochTypes.h"
48 #include "weight.h"
49 //#include "transInfoParse.h" // Transitions Information Parsing
50 #include "modelTemplate.h"
51 #include "stateInfo.h"
52 namespace StochHMM{
53 
54 
55 
56 
57  /*! \class model
58  Hidden Markov Model Class
59  model class combines the States, and model information together in a single unit.
60  This includes the states(emissions, transitions), initial and ending states, track
61  information(alphabet and ambiguous character definitions).
62 
63  Provides functions to import the model from a text file
64 
65  Model is used by trellis class to evaluates sequences.
66 
67 
68  */
69  class model {
70  public:
71  model();
72 
73  // model(std::string&,StateFuncs*); //! Construct model from model file
74  // model(std::string&,std::string&,StateFuncs*); //!Construct model from model file and template file
75  // model(std::string&,StateFuncs*,templates*,weights*);
76 
77  //ACCESSOR FUNCTIONS
78 
79  //Model Information
80 
81  //!Get the Name of the Model
82  inline std::string& getName(){return name;} // Get name of model
83 
84  //! Get Description of model
85  inline std::string& getDescription(){return desc;}
86 
87  //! Get Date of model
88  inline std::string& getDate(){return date;}
89 
90  //!Get Creation Command of model
91  inline std::string& getCommand(){return command;}
92 
93  //! Get Author of model
94  inline std::string& getAuthor(){return author;}
95  //inline float getLowerRange(){return range[0];};
96  //inline float getUpperRange(){return range[1];};
97 
98 
99 
100  //-----------State Information-------------/
101 
102  //!Get the number of states that are defined in the model
103  inline size_t state_size(){return states.size();}
104 
105  //!Get the name of the state at index
106  //! \param iter Index of state
107  inline std::string& getStateName(size_t iter){
108  if (iter>= states.size()){
109  std::cerr << "Attempting to access State Name which is out of range\n";
110  exit(2);
111  }
112  return states[iter]->getName();
113  };
114 
115  //!Get the Label of the state at index
116  //! \param iter Index of state
117  inline std::string& getStateLabel(size_t iter){
118  if (iter>= states.size()){
119  std::cerr << "Attempting to access State Label which is out of range\n";
120  exit(2);
121  }
122  return states[iter]->getLabel();
123  }
124 
125  //!Get the GFF Tag of the state at index
126  //! \param iter Index of state
127  inline std::string& getStateGFF(size_t iter) {
128  if (iter>= states.size()){
129  std::cerr << "Attempting to access State GFF which is out of range\n";
130  exit(2);
131  }
132  return states[iter]->getGFF();
133  }
134 
135  //!Get pointer to the state at index
136  //! \param iter Index of state
137  //! \return ptr_state Pointer to state
138  inline state* getState(size_t iter){
139  if (iter>= states.size()){
140  return NULL;
141  }
142  return states[iter];
143  }
144 
145  //!Get pointer to state by the name
146  //! \param const std::string Name of state
147  //! \return ptr_state Pointer to state
148  state* getState(const std::string&);
149 
150 
151  //!Get state by using iterator value
152  inline state* operator[](size_t iter){
153  if (iter>= states.size()){
154  return NULL;
155  }
156  return states[iter];
157  }
158 
159  //!Get vector of states that state at index transitions to
160  inline std::bitset<STATE_MAX>* getStateXTo(size_t iter){
161  if (iter>= states.size()){
162  return NULL;
163  }
164  return &(states[iter]->to);
165  }
166 
167  //!Get vector of states that the initial state transitions to
168  inline std::bitset<STATE_MAX>* getInitialTo(){return &(initial->to);}
169 
170  //!Get vector of states that transfer to the state at index
171  inline std::bitset<STATE_MAX>* getStateXFrom(size_t iter){
172  if (iter>= states.size()){
173  return NULL;
174  }
175  return &(states[iter]->from);
176  }
177 
178  //!Get list of states that transition to the ending state
179  inline std::bitset<STATE_MAX>* getEndingFrom(){return &(ending->from);}
180 
181  inline stateInfo* getStateInfo(){return &info;}
182 
183  //--------- Initial and Ending States
184 
185  //!Get pointer to the initial state
186  inline state* getInitial(){return initial;}
187 
188  //!Get pointer to the ending state
189  inline state* getEnding(){return ending;}
190 
191 
192  //--------- Scaling Factors
193  //!Get Scaling factor defined in model by name
194  //! \param std::string Name of Scaling or Weight defined in model
195  //! \return ptr to weight
196  weight* getScalingFactor(std::string&);
197 
198 
199  //--------- Attrib
200  //!Get distance to value
201  //!This is used when from selecting multiple models
202  //!User can set an attribute value. Then when evaluating
203  //!an attribute of sequence they can see which model is closest
204  //!and choose that model
205  double getDistanceToAttrib(double);
206 
207 
208  //---------- Track Information
209 
210  //!Get the number of tracks defined in the model
211  inline size_t track_size(){return trcks.size();}
212 
213  //!Get pointer to track at the index
214  //!\param iter Index of track to get
215  //! \return if iter is within range then return pointer to track
216  //! \return else return NULL
217  inline track* getTrack(size_t iter){
218  if (iter >= trcks.size()){
219  return NULL;
220  }
221  return trcks[iter];
222  }
223 
224  //!Get pointer to track based on Name associated with the track
225  //! \param const std::string Name associated with Track
226  //! \return if name is found returns pointer to name.
227  //! \return if name is not found return NULL
228  track* getTrack(const std::string&);
229 
230  //!Get index iterator of the track with a particular name
231  //!\param txt Name of track to get index for
232  //!\return size_t index of track with name
233  inline size_t getTrackIter(const std::string& txt){return trcks.indexOf(txt);}
234 
235  //!Get pointer to the tracks of the model
236  //!\return pointer to tracks defined in model
237  inline tracks* getTracks(){return &trcks;}
238 
239  //!Check to see if model is a basic HMM
240  //!\return false if model contains explicit duration transition, or user-defined functions for emission/transitions
241  inline bool isBasic(){return basicModel;}
242 
243 
244 
245 
246  //---------------- Printing or getting String representation of Model
247 
248  //! Print model by std::cout
249  void print();
250 
251  //! Get text representation of the model
252  //! \return std::string of model.
253  std::string stringify();
254 
255  //void writeGraphViz(std::string);
256  //! Write a simple GraphViz graph
257  //! Formatting is very basic and function may disappear.
258  //void writeGraphViz(std::string,bool);
259 
260 
261 
262  //MUTATORS
263  //!Import and Parse the model from text file
264  //!\param std::string Filename
265  //!\param StateFuncs ptr Pointer to StateFuncts, if no State Functions
266  //! are (Univariate, Multivariate, Emission Functs, Transition Functions)
267  //! described then you can use NULL
268  //! \return true if import was successful
269  bool import(std::string&,StateFuncs*);
270 
271  //!Import and Parse the model from text file
272  //! \param std::string Filename
273  //! \return true if import was successful
274  bool import(std::string&);
275 
276  //!Import and Parse the model from text file
277  //! \param std::string Filename
278  //! \param StateFuncs ptr Pointer to StateFunctions
279  //! \param templates ptr Pointer to Templated State template
280  //! \param weight ptr Pointer to weighting factors
281  //! \return true if import was successful
282  bool import(std::string&, StateFuncs*, templates*, weights*);
283 
284 
285  //!Import and parse the model from std::string
286  //! \sa import(std::string&)
287  bool importFromString(std::string&);
288 
289  //!Import and parse the model from std::string
290  bool importFromString(std::string&,StateFuncs*);
291 
292  //!Import and parse the model from std::string
293  bool importFromString(std::string&, StateFuncs*, templates*, weights*);
294 
295  //!Parse the model from std::string
296  //!This is used by import functions to parse the model
297  bool parse(const std::string&, StateFuncs*, templates*, weights*);
298 
299  //!Parse the model from std::string
300  bool parse(std::string&,std::string&);
301 
302 
303  //-------------- Set Model Data
304 
305  //!Set the name of the model
306  inline void setName(std::string& txt){name=txt;};
307 
308  //! Set the model desc
309  inline void setDesc(std::string& txt){desc=txt;};
310 
311  //! Set the model creation date
312  inline void setDate(std::string& txt){date=txt;};
313 
314  //! Set the model creation command
315  inline void setCommand(std::string& txt){command=txt;};
316 
317  //! Set the model author
318  inline void setAuthor(std::string& txt){author=txt;};
319 
320  //! Set the attribute of the model
321  //! If model is going to be chosen from different values, then you can assign value
322  inline void setNumericalAttrib(float value){range[0]=value;attribTwo=false;};
323 
324  //! Set the upper range of the attribute
325  inline void setUpperRange(float& value){range[1]=value;attribTwo=true;};
326 
327  //! Set the lower range of the attribute
328  inline void setLowerRange(float& value){range[0]=value;attribTwo=true;};
329 
330  //! Add track to the model
331  inline void addTrack(track* trk){trcks.push_back(trk);};
332 
333  void addState(state*);
334 
335  //! Set the initial state pointer
336  //! \param st pointer to initial state
337  inline void setInit(state* st){initial=st;};
338 
339  //! Set teh ending state pointer
340  inline void setEnd(state* st){ending=st;};
341  //inline void addWeight(std::string& txt,weight* wt){scaling[txt]=wt;};
342 
343 
344  //----------------- Finalize and Check Final Model
345 
346  //!Finalizes model references from and to states
347  //!Each model must be finalized before being used to decode
348  //!Check the Functions and Labels of the States
349  void finalize();
350 
351  //!Check model topology
352  //!Iterates through all states to check to see if there are any:
353  //! 1. Orphaned States
354  //! 2. Dead end States
355  //! 3. Uncompleted States
356  bool checkTopology();
357 
358  //Get a vector<bool> of states that are explicit duration states
359  inline std::vector<bool>* get_explicit(){return explicit_duration_states;}
360 
363  return true;
364  }
365  return false;
366  }
367 
368  private:
369  //!Flag set to tell whether the transitions bitsets have been set foreach
370  //!state. Model is also checked for correct order of states
371  bool finalized;
372 
373 
374  //!Flag for whether model contains anything other than simple transitions and emissions
375  //!If False then the model either contains additional function or emissions
377 
378  std::string name; //! Model Name
379  std::string desc; //! Model Description
380  std::string date; //! Model Creation Date
381  std::string command; //! Model Creation Command
382  std::string author; //! Model Author
383  float range[2]; //! Model Attrib Values
384  bool attribTwo; //! Two attrib Values
385 
386  tracks trcks; //! Tracks defined by model (Contains alphabet and ambiguous character definitions
387 
388  std::vector<state*> states; //! All the states contained in the model
389 
390  std::map<std::string,state*> stateByName; //Ptr to state stored by State name;
392 
393 
394  state* initial; //!Initial state q0
395  state* ending; //!Ending state
396 
397  weights* scaling; //! Weights or scaling fractors associated with the model
398 
399  //std::map<std::string,weight*> scaling;
400  templates* templatedStates; //!Templated states
401 
402  std::vector<bool>* explicit_duration_states; //! States that are explicit duration states
403 
404  std::vector<bool>* complex_transition_states; //! States that have functions associated with transitions
405  std::vector<bool>* complex_emission_states; //! States that have functions associated with emissions
406 
407  bool _parseHeader(std::string&); //! Function to parse header of the model from text file
408  bool _parseTracks(std::string&); //! Parse Tracks definitions from text file
409  bool _parseAmbiguous(std::string&); //! Parse Ambiguous definitions from text file
410  bool _parseScaling(std::string&); //! Parse Scaling definitions from text file
411  bool _parseTemplates(std::string&); //! Parse Templated States definitions from text file
412 
413  bool _parseStates(std::string&,StateFuncs*); //!Parse state from text file
414  bool _splitStates(std::string&,stringList&); //!Split the state definitions into individual states from text file
415  bool _getOrderedStateNames(stringList&,stringList&); //! Gets list of states names from model
416  bool _processTemplateState(std::string&, stringList&); //! Adds templated states to using template
417 
418  std::string _stringifyHeader(); //!Converts Header information from model to string representation found in text file
419  std::string _stringifyTracks(); //!Converts Tracks information from model to string representation found in text file
420  std::string _stringifyAmbig(); //!Converts Ambiguous Character information from model to text string
421  std::string _stringifyScaling();//!Converts Scaling definitions from model to text string
422  std::string _stringifyStates(); //!Converts States definitions from model to text string
423 
424 
425  void _addStateToFromTransition(state*); //!Processes each statea and defines definitions to state and from state for use
426  //!in banding the trellis decoding functions
427 
428  void checkBasicModel(); //!Checks to see if the model has basic transitions and emissions(no addtl functions)
429  void checkExplicitDurationStates(); //!Checks to see which states are explicit duration states
430  void _checkTopology(state* st, std::vector<uint16_t>& visited); //!Checks to see that all states are connected and there
431 
432 
433  };
434 
435 
436  //----------------------------------------------------------------------------//
437  //! models is a class to store multiple models. This allows StochMM the ability, to
438  //! load multiple models, then select the model that appropriate for the sequence
439  //! based on a used-defined attribute.
440 
441  //! Stores multiple HMM models and contains the get functions for specific models
442  //!
443  //!
444  //----------------------------------------------------------------------------//
445  class models{
446  public:
447  //CONSTRUCTOR
448 
449 
450  //ACCESSOR
451 
452  //! Get model located at index
453  //! \param iter Index iterator for model
454  //! \return pointer to model
455  inline model* operator[](size_t iter){
456  if (iter>hmms.size()-1){
457  return NULL;
458  }
459 
460  return hmms[iter];
461  };
462 
463  //!Get the number of model
464  //! \return size_t
465  inline size_t size(){return hmms.size();};
466 
467  //FIXME: implement getModelByAttrib
468  //model* getModelByAttrib(float);
469 
470  model* getModel(size_t);
471 
472  //MUTATOR
473  void importModels(std::string&,StateFuncs*);
474  void addModel(model*);
475 
476  private:
477  std::vector<model*> hmms;
480  };
481 
482 
483  //!Print 2D vector to std::cout
484  void print_vec (std::vector<std::vector<double> >&);
485 
486  // void markov_length_distribution(model*);
487  //
488  // void markov_generate_sequence(model*);
489 }
490 #endif /*HMM_H*/