StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
traceback_path.h
Go to the documentation of this file.
1 ///traceback_path.h
2 
3  //Copyright (c) 2007-2012 Paul C Lott
4  //University of California, Davis
5  //Genome and Biomedical Sciences Facility
6  //UC Davis Genome Center
7  //Ian Korf Lab
8  //Website: www.korflab.ucdavis.edu
9  //Email: lottpaul@gmail.com
10  //
11  //Permission is hereby granted, free of charge, to any person obtaining a copy of
12  //this software and associated documentation files (the "Software"), to deal in
13  //the Software without restriction, including without limitation the rights to
14  //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
15  //the Software, and to permit persons to whom the Software is furnished to do so,
16  //subject to the following conditions:
17  //
18  //The above copyright notice and this permission notice shall be included in all
19  //copies or substantial portions of the Software.
20  //
21  //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
23  //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
24  //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
25  //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26  //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 
28 #ifndef TRACEBACK_PATH_H
29 #define TRACEBACK_PATH_H
30 
31 #include <vector>
32 #include <string>
33 #include <iostream>
34 #include <math.h>
35 #include <fstream>
36 #include <algorithm>
37 #include <sstream>
38 #include <stdint.h>
39 #include <stdlib.h>
40 #include "text.h"
41 #include "options.h"
42 #include "hmm.h"
43 #include "stochMath.h"
44 namespace StochHMM{
45 
46  //! \struct gff_feature
47  //!Each gff_feature represents a single GFF line
48  struct gff_feature{
49  std::string seqname; //Column 1
50  std::string source; //Column 2
51  std::string feature; //Column 3
52  size_t start; //Column 4
53  size_t end; //Column 5
54  char score; //Column 6
55  char strand; //Column 7
56  char frame; //Column 8
57  std::string attribute; //Column 9
58  };
59 
60 
61  //! Perform traceback of traceback table
62  //! Stores one traceback path for a sequence
64  public:
66 
67  //!Add state to traceback
68  void push_back(int);
69 
70  //!Erase the traceback
71  void clear();
72 
73  //!Get the size of traceback
74  //!\return size_t
75  size_t size() const;
76 
77  //!Returns the state index at a given position (it) within the traceback sequence
78  inline int val(size_t it){
79  if (it>=trace_path.size()){
80  std::cerr << "Out of Range index\n ";
81  exit(2);
82  }
83  return trace_path[it];
84  }
85 
86  //!Get the model used for the decoding
87  //! \return model
88  inline model* getModel() const {return hmm;};
89  inline void setModel(model* mdl){hmm=mdl;};
90 
91 
92  //!Print the path to file stream
93  void fprint_path(std::ofstream&);
94 
95  //! Get traceback as vector of state labels
96  //! \param [out] std::vector<std::string> Vector of Labels
97  void label(std::vector<std::string>&);
98 
99  //! Get traceback as string of state labels
100  void label(std::string&);
101 
102 
103  //! Get traceback as vector of gff_features
104  //! \param[out] pth reference to vector of gff_feature
105  //! \param[in] sequenceName Name of Sequence to be used in GFF
106  void gff(std::vector<gff_feature>&,std::string&);
107 
108  //!Get names of traceback path
109  //!\param [out] pth vector<string>
110  void name(std::vector<std::string>&);
111 
112  //! Get the path to std::vector<int>
113  //! \param [out] pth std::vector<int> that represents path
114  void path(std::vector<int>&);
115 
116  //! Print the traceback path as path to stdout using cout
117  //! Path numbers correspond to state index in model
118  void print_path() const ;
119 
120  //! Print the traceback path as state labels
121  //! State labels
122  void print_label() const ;
123 
124  //!Outputs the gff formatted output for the traceback to stdout
125  //!Allows user to provide additional information, that may be
126  //!pertinent to stochastic tracebacks
127  //!\param[in] sequence_name Name of sequence used
128  //!\param[in] score score to use in the GFF output
129  //!\param[in] ranking Rank of traceback
130  //!\param[in] times Number of times that traceback occurred
131  //!\param[in] posterior Posterior probability score
132  void print_gff(std::string,double,int,int,double) const ;
133 
134  //!Outputs the gff formatted output for the traceback
135  void print_gff(std::string) const ;
136 
137  //!Get the score that is associated with the traceback
138  inline double getScore(){
139  return score;
140  }
141 
142  //!Set the score for the traceback;
143  inline void setScore(double scr){
144  score = scr;
145  return;
146  };
147 
148  //double path_prob (const HMM&, sequence&); //Need to rewrite function
149  inline int operator[](size_t val) const {return trace_path[val];};
150  bool operator== (const traceback_path&) const;
151  bool operator< (const traceback_path&) const;
152  bool operator> (const traceback_path&) const;
153  bool operator<= (const traceback_path&) const;
154  bool operator>= (const traceback_path&) const;
155  private:
157  std::vector<int> trace_path;
158  double score;
159  };
160 
161  //Rows are the Sequence Position
162  //Columns are the States
163  //! \def std::vector<std::vector<int> > heatTable;
164  //! Table for heat map data generated from multiple tracebacks
165  typedef std::vector<std::vector<int> > heatTable;
166 
167  //! \class multiTraceback
168  //! Contains multiple tracebacks. Will store them in sorted unique list (sorted in order of number of occurances);
170  public:
171  multiTraceback();
172  ~multiTraceback();
173 
174  //Iteratorate through map;
175  void begin();
176  void end();
177  void operator++();
178  void operator--();
179  void operator=(size_t);
180 
181 
182  void print_path();
183  void print_label();
184  void print_gff(std::string&);
185  void print_hits();
186 
187 
188  //Access the data at a point
190  int counts();
191  traceback_path operator[](size_t);
192 
193 
194  //Assign
195  void assign(traceback_path&);
196 
197  //Finalize multiTraceback (Sort and setup Iterators);
198  void finalize();
199 
200  inline void clear(){paths.clear();return;};
201  inline size_t size(){return paths.size();};
202 
204 
205  private:
207  size_t maxSize;
208  std::vector<std::map<traceback_path,int>::iterator> pathAccess;
209  std::map<traceback_path,int> paths;
211  };
212 
213  bool sortTBVec(std::map<traceback_path,int>::iterator, std::map<traceback_path,int>::iterator);
214 }
215 #endif /*TRACEBACK_PATH_H*/
216 
217 
218