StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
emm.h
Go to the documentation of this file.
1 //emm.h
2  //Copyright (c) 2007-2012 Paul C Lott
3  //University of California, Davis
4  //Genome and Biomedical Sciences Facility
5  //UC Davis Genome Center
6  //Ian Korf Lab
7  //Website: www.korflab.ucdavis.edu
8  //Email: lottpaul@gmail.com
9  //
10  //Permission is hereby granted, free of charge, to any person obtaining a copy of
11  //this software and associated documentation files (the "Software"), to deal in
12  //the Software without restriction, including without limitation the rights to
13  //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
14  //the Software, and to permit persons to whom the Software is furnished to do so,
15  //subject to the following conditions:
16  //
17  //The above copyright notice and this permission notice shall be included in all
18  //copies or substantial portions of the Software.
19  //
20  //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
22  //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
23  //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
24  //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
25  //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 
27 #ifndef EMM_H
28 #define EMM_H
29 
30 
31 #include <iostream>
32 #include <fstream>
33 #include <string>
34 #include <vector>
35 #include <math.h>
36 #include "track.h"
37 #include "index.h"
38 #include "externalFuncs.h"
39 #include "weight.h"
40 #include "sequences.h"
41 #include "lexicalTable.h"
42 #include <stdlib.h>
43 namespace StochHMM{
44 
45 
46  /*! Emissions for model
47  Contains the emission definition. Each emissions contains the probability, the log(p(x), and counts
48  Counts are used for calculating lower order emissions from higher order. This is only applicable
49  at the beginning of the sequence.
50  Each emission depends on some track or function, an emission can have multiple tracks.
51  Or in other words output a single character from each track it is associated with.
52  Tracks can be either alphabetic, real numbers values.
53  Emissions can also call an external function that is user defined.
54  If ambiguity is defined in the alphabet, the emission score can be defined as such
55  If ambiguity is not defined the returned value will be -INFINITY
56  */
57 
58  class emm{
59  public:
60  emm(); //!Constructs an empty emission
61  emm(std::string&); //!Constructs emission from a string;
62 
63  ~emm();
64 
65  friend class state;
66  friend class model;
67 
68  //MUTATORS
69  bool parse(std::string&, tracks&, weights*, StateFuncs* );
70  bool parse(std::string& txt,track* trk);
71 
72  //!Set the emission to a Real Number
73  inline void setRealNumber(){real_number=true;};
74 
75  //!Set the emission to be the complement 1-P of given value
76  inline void setComplement(){complement=true;};
77 
79 
80  //ACCESSORS
81 
82  bool isReal();
83 
84  //!Check to see if emission will return the complement (1-P) value of emission
85  inline bool isComplement(){return complement;};
86 
87  double get_emission(sequences& , size_t );
88  double get_emission(sequence&, size_t);
89 
90  //! Get the external Functions defined for the emission
91  //! \return externalFuncs*
93 
94  //! Print the string representation of the emission to stdout
95  inline void print(){std::cout << stringify()<<std::endl;};
96 
97  std::string stringify();
98 
99  inline lexicalTable* getTables(){return &scores;};
100  inline bool isSimple(){
101  if (!function && tagFunc==NULL){return true;}
102  return false;
103  }
104 
105  inline bool isComplex(){
106  if (function || tagFunc){return true;}
107  return false;
108  }
109 
110  private:
111 
112  //size_t track_size;
117 
119 
120  //Lexical Scoring Tables
122 
123  //Lexical Function Only
124  bool function;
126 
127  //Continuous Univariate Distribution
129  std::string pdfName;
130 
131  //Parameters for Univariate and Multivariate Distributions
132  std::vector<double>* dist_parameters;
133 
134 
135  //TODO: Implement Continuous Multivariate Distributions
136  //Continuous Multivariate Distribution
137  multiPdfFunc* multiPdf; //Pointer to multivariate function
138  std::string multiPdfName; //Name of function according to StateFuncs
139  size_t number_of_tracks; //Number of tracks in multi-emission
140  std::vector<track*>* trcks; //Tracks used
141  std::vector<size_t>* track_indices; //Indices of tracks used
142  std::vector<double>* pass_values; //Array to pass values to multivariate function
143 
144  //TODO: Implement the external Function capabilities
146 
147  //Private Methods
148  bool _processTags(std::string&, tracks&, weights*, StateFuncs*);
149 
150  };
151 
152 }
153 #endif /*EMM_H*/
154 
155