StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
track.h
Go to the documentation of this file.
1 //track.h
2 //Copyright (c) 2007-2012 Paul C Lott
3 //University of California, Davis
4 //Genome and Biomedical Sciences Facility
5 //UC Davis Genome Center
6 //Ian Korf Lab
7 //Website: www.korflab.ucdavis.edu
8 //Email: lottpaul@gmail.com
9 //
10 //Permission is hereby granted, free of charge, to any person obtaining a copy of
11 //this software and associated documentation files (the "Software"), to deal in
12 //the Software without restriction, including without limitation the rights to
13 //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
14 //the Software, and to permit persons to whom the Software is furnished to do so,
15 //subject to the following conditions:
16 //
17 //The above copyright notice and this permission notice shall be included in all
18 //copies or substantial portions of the Software.
19 //
20 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
22 //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
23 //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
24 //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
25 //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 
27 #ifndef TRACK_H
28 #define TRACK_H
29 
30 #include <vector>
31 #include <string>
32 #include <map>
33 #include <iostream>
34 #include <fstream>
35 #include <sstream>
36 #include "text.h"
37 #include "stochTypes.h"
38 #include "userFunctions.h"
39 #include "stochMath.h"
40 #include <limits>
41 #include <stdlib.h>
42 #include <stdint.h>
43 
44 #ifndef SIZE_MAX
45 #define SIZE_MAX ((size_t)-1)
46 #endif
47 
48 namespace StochHMM{
49 
50 
51  class track;
52  class tracks;
53 
54 
55  /*! \class ambigCharacter
56  \brief Define the ambiguous characters symbol and index number for digitizing ambiguous characters in the sequence
57  For example in DNA N = [ACGT] = [0,1,2,3]
58 
59  */
61  public:
62  ambigCharacter(track*, std::string&, std::vector<std::string>& ); //track, ambiguous character, unambiguous characters
63 
64 
65  //! Get Ambiguous String/Alphabet character symbol
66  //! \return std::string Symbol of character/word
67  inline std::string getSymbol(){return symbol;};
68 
69 
70  //FIXME: should be vector of shorts unless I expand from 256 to larger int
71  //! Get the characters that the characters defines.
72  //! For example in DNA N = [ACGT] = [0,1,2,3]
73  //! \return std::vector<int> Digitized value of characters that are represented by the given symbol
74  inline std::vector<size_t>& getDef(){return setDefinition;};
75 
76  private:
77  std::string symbol; //Ambiguous character definition
78  std::vector<size_t> setDefinition; //Set of letters by digital value that ambiguous character defines
79  };
80 
81 
82  //! \class track
83  //! Defines types of data (real-value, text-sequence) used in the model
84  //! and the alphabet that a text-sequence uses. Tracks are used to digitize
85  //! the sequence before decoding in HMM
86  class track {
87  public:
88  track();
89 
90  //TODO: Complete definition of constructor
91  track(TrackFuncs*);
92  track(std::vector<std::string>&);
93 
94  friend class state;
95  friend class model;
96  friend class tracks;
97 
98  //MUTATOR
99  bool parse(std::string&);
100  bool parseAmbiguous(std::string&);
101 
102  //!Set the name of the track
103  //! \param nm Name of the track
104  inline void setName(std::string& nm){name=nm;};
105 
106  //!Set the Description of the track
107  //! \param desc Description of track
108  inline void setDescription(std::string& desc){description=desc;};
109 
110  //!Set the integer index value of the track in tracks
111  //! \param indx User defined index value;
112  inline void setIndex(size_t indx){
114  trackIndex=indx;
115  return;
116  }
117  std::cerr << "Track index: " << indx << " is OUT_OF_RANGE\n";
118  exit(1);
119  }
120 
121 
122  //!Set the alphabet type of the track (Real or alphanum)
123  //! \param typ enum trackType
124  inline void setAlphaType(trackType typ){alpha_type=typ;};
125 
126 
127  bool addAlphabetChar(std::string&);
128  bool addAlphabetChar(const char *);
129  bool addAlphabetChar(std::vector<std::string>&);
130  bool addAlphabetChar(size_t chSize, const char * characters[]);
131  bool addAlphabetChar(std::string& character, std::string& complement);
132  bool addAlphabetChar(size_t chSize, const char* characters[], const char* complements[]);
133  bool addAlphabetChar(std::vector<std::string>& characters , std::vector<std::string>& complements);
134 
135  void addComplement(std::string&, std::string&);
136  void addComplement(const char *, const char *);
137  bool addComplement(std::vector<std::string>& characters, std::vector<std::string>& complements);
138 
139  //! Set ambiguous character flag to true
140  //! This will allow ambiguous characters to be processed in sequence
141  //! Without this flag, only strict track characters or values are allowed
142  inline void setAmbiguous(){ambiguous=true; return;};
143  void addAmbiguous(std::string&,std::vector<std::string>&);
144 
145 
146  //ACCESSOR
147 
148  //! Get the name of the track
149  //! \return std::string Name of the track
150  inline std::string getName(){return name;};
151 
152  //! Get the description of the track
153  //! \return std::string Description of the track
154  inline std::string getDescription(){return description;};
155 
156  //! Get the index of the track
157  //! \return int Index of the track
158  inline size_t getIndex(){
160  return trackIndex;
161  }
162  std::cerr << "Track Index is not set in track. Set the track index with setIndex(size_t indx) before calling.\n";
163  exit(1);
164  };
165 
166  //! Get the alphabet type of the track
167  //! \return trackType Alphabet type of the track
168  //! \sa enum trackType
170 
171  //! Get the number of characters defined in the track
172  //! \return size_t Number of characters/words defined in the track
173  inline size_t getAlphaSize(){return alphabet.size();};
174 
175  //! Get alphabet size including ambiguous characters
176  inline size_t getTotalAlphabetSize(){return symbolIndices.size();}
177 
178  //! Get the size of the largest alphabet word
179  //! \return size_t
180  inline size_t getAlphaMax(){return maxSize;};
181 
182 
183  std::string getAlpha(int);
184  std::string getAlpha(size_t);
185 
186 
187  uint8_t symbolIndex(const std::string&);
188  uint8_t symbolIndex(unsigned char);
189 
190  uint8_t getComplementIndex(uint8_t val);
191  uint8_t getComplementIndex(std::string&);
192 
193 
194  std::string getComplementSymbol(std::string& character);
195  std::string getComplementSymbol(uint8_t value);
196 
197  inline bool isComplementDefined(){return complementSet;}
198 
199 
200  //! Check to see if ambiguous flag is set for the track
201  //! \return true if ambiguous flag is set to handle ambig. characters
202  //! \return false if not set
203  inline bool isAmbiguousSet(){return ambiguous;};
204 
205 
206  //! Check to see if the track is AlphaNum type and not a REAL Track
207  //! \return true if it is AlphaNum type
208  //! \return false if it is a REAL type
209  inline bool isAlpha(){if (alpha_type == ALPHA_NUM){return true;} else {return false;}};
210 
211  //! Get the number of ambiguous characters that are defined
212  //! \return size_t Number of ambiguous characters/words defined
213  inline size_t getAmbiguousSize(){return ambiguousSymbols.size();};
214 
215  std::string getAmbiguousCharacter(size_t);
216 
217 
218  //! Get the indices of characters that an ambiguous character represents
219  //! \return std::vector<int>
220  inline std::vector<size_t>& getAmbiguousSet(uint8_t val){return ambiguousSymbols[(val-max_unambiguous)-1].getDef();}
221 
222  inline std::vector<size_t>& getUnambiguousSet(){ return unambiguous; }
223 
224 
225  std::string stringify();
226  std::string stringifyAmbig();
227 
228  //! Print the string representation of the track to stdout
229  inline void print(){ std::cout << stringify() << std::endl;}
230 
231  std::string convertIndexToWord(size_t,size_t);
232  void convertIndexToDigital(size_t,size_t,uint8_t*);
233 
234 
235  //!Check if the TrackFunction is defined for this track
236  //!\return true if the track has a trackFunc defined
238 
239  //! Get name of TrackFunc defined for track
240  //! \return std::string Name of trackFunc defined
241  inline std::string getTrackFunction(){return trackFunction;}
242 
243  //! Get name of Track to use for trackFunc
244  inline std::string getTrackToUse(){return trackToUse;}
245 
246  inline uint8_t getMaxUnambiguous(){return max_unambiguous;}
247  inline uint8_t getMaxAmbiguous(){return max_ambiguous;}
248 
249  private:
250  std::string name; /* Track Name */
251  std::string description; /* Track Desc */
252  size_t trackIndex; /*track number*/
253 
254  trackType alpha_type; /* Track Type 1=string, 2=real_number 0=uninitialized*/
255 
256  //! Track Functions for defining Real Number Tracks
259 
260  std::string trackToUse;
261  std::string trackFunction;
262 
263  std::vector<std::string> alphabet; //Contains the corresponding symbol,letter, word that is referenced in the seq by index
264  std::map<uint8_t,uint8_t> complementAlphabet;
265 
266  size_t maxSize; //Maximum size of the alphabet words
267 
269  uint8_t max_ambiguous;
270  std::vector<size_t> unambiguous;
271 
272  bool ambiguous; //Are ambiguous characters set
273  int defaultAmbiguous; //Default ambiguous character
274 
275  //Contains the ambiguous characters defined by user corresponding to position in the
276  //array. Where index 0=-1, 1=-2... so on.
277  std::vector<ambigCharacter> ambiguousSymbols;
278 
279  std::map<std::string,uint8_t> symbolIndices;
280  //std::map<char,uint8_t>* charIndices;
281  std::vector<uint8_t>* charIndices;
282 
283  void _splitAmbiguousList(std::vector<std::pair<std::string ,std::vector<std::string> > >&, const std::string&);
284  };
285 
286 
287  class tracks{
288  public:
289 
290  //MUTATOR
291  void push_back(track*);
292 
293  //ACCESSOR
294  size_t indexOf(const std::string&);
295  size_t size(){return trks.size();};
296  track* getTrack(const std::string&);
297  bool isTrackDefined(const std::string&);
298  track* operator[](size_t i){return trks[i];};
299 
300  void print();
301  std::string stringify();
302 
303  private:
304  std::vector<track*> trks;
305  std::map<std::string,size_t> index;
306  };
307 
308 
309 
310 
311 }
312 #endif /*TRACK_H*/