StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
lexicalTable.h
Go to the documentation of this file.
1 //
2 // Lexical.h
3 // StochHMM
4 //
5 // Created by Paul Lott on 4/2/12.
6 // Copyright (c) 2012 University of California, Davis. All rights reserved.
7 //
8 
9 #ifndef StochHMM_Lexical_h
10 #define StochHMM_Lexical_h
11 #include <string>
12 #include <vector>
13 #include <math.h>
14 #include <ctype.h>
15 #include <algorithm>
16 #include <stdint.h>
17 #include <stdlib.h>
18 #include "track.h"
19 #include "index.h"
20 #include "externalFuncs.h"
21 #include "weight.h"
22 #include "sequences.h"
23 #include "stochTypes.h"
24 //#include "simpleTable.h"
25 
26 namespace StochHMM{
27 
28  //! \class lexicalTable
29  //! \brief Lexical table stores the log2 probabilities for both emissions and lexical transitions
30  //!
31  class lexicalTable{
32  private:
33 
34 
35  public:
36 
37  lexicalTable();
38 
39  ~lexicalTable();
40 
41  double getValue(sequences&, size_t);
42  double getValue(sequence& , size_t);
43 
44  //!Initialize the final emission table with ambiguous characters
45  //Creates the log_emission simpleTable
47  double getReducedOrder(sequences& seqs, size_t position);
48 
49  double getReducedOrder(sequence& seq, size_t position);
50 
51  std::vector<std::vector<double> >* getCountsTable();
52  std::vector<std::vector<double> >* getProbabilityTable();
53  std::vector<std::vector<double> >* getLogProbabilityTable();
54 
55  void createTable(int rows, int columns, int pseudocount, valueType typ);
56 
57  void addTrack(track*,int);
58  void assignTable(std::vector<std::vector<double> >*, valueType);
59 
60  //!Set how the emission will deal with unknown alphabet
61  //! \param type enum UnknownCharScoringType
63 
64  //!Set a given score to be returned for unknownCharScoringType
65  inline void setUnkScore(double val){unknownDefinedScore=val;};
66 
67  //!Get pointer to track at index position of emission
68  //!\param iter Index iterator of position
69  //!\return track* Track in emission
70  inline track* getTrack(size_t iter){return trcks[iter];};
71 
72  //!Get the number of tracks defined in emission
73  //!\return size_t
74  inline size_t trackSize(){return trcks.size();};
75 
76  //!Get Orders of lexical emission will use for all tracks
77  //!\return std::vector<int>
78  inline std::vector<uint8_t>& getOrder(){return order;};
79  inline uint8_t getOrder(size_t i){return order[i];}
80 
81  //! Get Log(prob) emission table
82  //! \return std::vector<std::vector<double> >
83  inline std::vector<std::vector<double> >& getLogEmm(){return *logProb;}
84 
85  //! Get the alphabet sizes for all tracks used in emission
86  //! \return std::vector<int>
87  inline std::vector<uint8_t>& getAlphaSize(){return alphabets;}
88  inline uint8_t getAlphaSize(size_t i){return alphabets[i];}
89  inline size_t getNumberOfAlphabets(){return alphabets.size();}
90 
92  inline double getAmbDefinedScore(){return unknownDefinedScore;}
93 
94  //!Increment counts
95  inline void incrementCounts(size_t word_index, size_t char_index) { if (counts != NULL) (*counts)[word_index][char_index]++; }
96 
97  //!Increment counts by double
98  inline void incrementCountsDouble(size_t word_index, size_t char_index, double val) { if (counts != NULL) (*counts)[word_index][char_index]+= val; }
99 
100  std::string stringify();
101 
102  std::string stringifyAmbig();
103 
104  void print();
105 
106  private:
107  unknownCharScoringType unknownScoreType; //! What type of score to use with unknown
108  double unknownDefinedScore; //!Undefined character score
109 
111  std::vector<track*> trcks; //Pointer to tracks of interest
112  std::vector<uint8_t> alphabets; //alphabet sizes for each emission
113  std::vector<uint8_t> max_unambiguous;
114  std::vector<uint8_t> order; //Orders for each emission
115  uint8_t max_order;
116 
117  size_t y_dim;
118  size_t* x_subarray;
119  size_t* y_subarray;
120  std::vector<std::vector<double> >* prob; //p(x)
121  std::vector<std::vector<double> >* counts; //counts
122  std::vector<std::vector<double> >* logProb; //log2(P(x))
123 
124 
125  size_t array_size;
126  size_t dimensions;
127  std::vector<size_t> subarray_value; //Values used to decompose index into sequenece AAA(A)B(B)
128  std::vector<size_t> subarray_sequence;
129  std::vector<size_t> subarray_position;
130 
131  std::vector<size_t> decompose_values; //Values used to compose index from sequences AAAB(AB)
132  std::vector<size_t> decompose_sequence;
133 
134  std::vector<double>* log_emission;
135  std::vector<std::vector<double>* > low_order_emissions;
136  std::vector<std::vector<std::pair<size_t,size_t>* > >low_order_info;
137 
140  size_t convertIndex(size_t,size_t);
141 
142  void decompose(size_t row, size_t column, std::vector<uint8_t>& letters);
143  void decompose(size_t index, std::vector<uint8_t>& letters);
144 
145  void transferValues(std::vector<bool>& transferred);
146  size_t calculateArrayIndex(std::vector<uint8_t>& kmer);
147  void expand_ambiguous(std::vector<uint8_t>& letters, std::vector<double>& expanded);
148  std::vector<std::vector<uint8_t> >* expand_ambiguous(std::vector<std::vector<uint8_t> >* words, size_t letter);
149  size_t calculateIndexFromDecomposed(std::vector<uint8_t>& word);
150  double getAmbiguousScore(std::vector<uint8_t>& letters);
151  };
152 
153 
154 
155 
156 }
157 
158 
159 #endif