StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sequences.h
Go to the documentation of this file.
1 //
2 // sequences.h
3 //Copyright (c) 2007-2012 Paul C Lott
4 //University of California, Davis
5 //Genome and Biomedical Sciences Facility
6 //UC Davis Genome Center
7 //Ian Korf Lab
8 //Website: www.korflab.ucdavis.edu
9 //Email: lottpaul@gmail.com
10 //
11 //Permission is hereby granted, free of charge, to any person obtaining a copy of
12 //this software and associated documentation files (the "Software"), to deal in
13 //the Software without restriction, including without limitation the rights to
14 //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
15 //the Software, and to permit persons to whom the Software is furnished to do so,
16 //subject to the following conditions:
17 //
18 //The above copyright notice and this permission notice shall be included in all
19 //copies or substantial portions of the Software.
20 //
21 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
23 //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
24 //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
25 //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26 //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 #ifndef StochHMM_sequences_h
28 #define StochHMM_sequences_h
29 
30 #include <vector>
31 #include <string>
32 #include <iostream>
33 #include <math.h>
34 #include <fstream>
35 #include <stdlib.h>
36 #include "text.h"
37 #include "track.h"
38 #include "externDefinitions.h"
39 #include "sequence.h"
40 
41 namespace StochHMM {
42  //TODO: Need to fix so it can handle unrelated sequences
43  //Adding sequences error prone if index isn't set;
44 
45 
46  //! \class sequences
47  //! Container hold the track sequence(s)
48  //! Each sequence have to be the same length
49  //! Created to pass multiple sequence tracks of different datasets to the HMM
50  //! If you need multiple unrelated sequences, use std::vector<sequence> instead.
51  class sequences{
52  public:
53  //Constructors
54  sequences();
55  sequences(size_t sz);
56  sequences(tracks* tr);
57 
58  //Copy Constructor
59  sequences(const sequences&);
60 
61 
63 
64  ~sequences();
65 
66 
67  //ACCESSOR
68 
69  //Sequence and Sequence Information
70  double realValue(int , size_t); // Get value of Real track(i) in jth position
71  double realValue(size_t, size_t);
72  short seqValue( int , size_t); // Get digitized value for sequence track(i) in jth position
73 
74  sequence* getSeq(size_t);// Return sequence for Track (i)
75 
76  //!Get the attribute value for a particular sequence
77  //! \param iter Sequence to get the attribute from
78  //! \return double value of the attribute set for the sequence
79  inline double getAttrib(size_t iter){
80  if (iter<num_of_sequences && seq[iter]!=NULL){
81  return seq[iter]->getAttrib(); //Returns attrib value for Track (i)
82  }
83  std::cerr << "No sequence defined at iterator " << iter << std::endl;
84  exit(1);
85  }
86 
87  //!Get the header for the first sequence
88  //! \return std::string& The header for the first sequence in sequences
89  //!
90  inline std::string& getHeader(){
91  if (num_of_sequences>0 && seq[0]!=NULL){
92  return seq[0]->header;
93  }
94  std::cerr << "No sequence defined." << std::endl;
95  exit(1);
96  }
97 
98  //TODO: fix if iter is not defined
99  //!Get the header for the ith sequence
100  //! \param iter size_t iterator for ith sequence
101  //! \return std::string& The header for the ith sequence
102  inline std::string& getHeader(size_t iter){
103  if (iter<num_of_sequences){
104  if (seq[iter]!=NULL){
105  return seq[iter]->header;
106  }
107  }
108  std::cerr << "No sequence defined at iterator " << iter << std::endl;
109  exit(1);
110  }
111 
112  //TODO: need to fix so returns a reference to the sequence.
113  //!Get the undigitized ith sequence from sequences
114  //! \param iter size_t iterator for ith sequence
115  //! \return std::string of undigitized sequence at ith position
116  inline std::string* getUndigitized(size_t iter){
117  if (iter>seq.size()){
118  std::cerr << "getUndigitized(size_t iter) called where iter is out of range\n";
119  return NULL;
120  }
121  else{
122  return seq[iter]->getUndigitized();
123  }
124  };
125 
126  // Sizes
127 
128  //!Get the number of sequence type in sequences
129  //! \return size_t value of size
130  inline size_t size(){return num_of_sequences;} //Get number of sequences
131 
132  //!Get the length of the ith sequence
133  //! \param iter size_t iterator
134  //! \return size_t value of length of sequence at iter
135  inline size_t getLength(size_t iter){
136  if (iter<num_of_sequences && seq[iter]!=NULL){
137  return seq[iter]->getLength(); //Get lenght of sequence in position (i)
138  }
139  return 0;
140  }
141 
142  //!Get the length of sequences in general
143  //!All of the sequence(s) should be the same length
144  //!\return size_t value of length of all sequences
145  inline size_t getLength(){
146  return length;
147 // if (related_sequences){
148 // return length; // Get length of related sequence
149 // }
150 // return 0;
151  };
152 
153 
154  bool exDefDefined(size_t); // Is External definition set for state(i)
155  bool exDefDefined(size_t,size_t);// Is External definitiion set for state (i) and position (j);
156  double getWeight(size_t,size_t); //! Get Weight value for state at position
157  bool exDefDefined();//! Is External definition defined
158 
159  //!Print the string representation of digitized sequencs to the stdout
160  inline void print(){std::cout<< stringify() << std::endl;}; //Print sequences to stdout
161 
162  std::string stringify(); //! Get string of sequences
163  std::string undigitize(); //! Get sequence based on alphabet
164 
165  //MUTATOR
166 
167  //inline void addSeq(sequence* sq){seq.push_back(sq);}; //Add sequence to sequences
168 
169  void addSeq(sequence* sq);
170 
171  //! Add sequence in the track position
172  //! \param
173  void addSeq(sequence*,size_t);
174 
175  //! Add sequence for track
176  void addSeq(sequence*,track*);
177 
178  //! Set Length of sequences
179  void setLength(size_t len);
180 
181  //! Set external definition
182  //! \param ex External definition to assign to the sequences
183  inline void setExDef(ExDefSequence* ex){external=ex;};
184 
185  inline bool isSameSize(){
186  return same_length;
187  }
188 
189  sequence& operator[](size_t index){return *seq[index];}
190 
191  void getFastas(const std::string& , track*);
192 
193  private:
194  //EXTERNAL DEFINITIONS
196 
197  std::vector<sequence*> seq;
198 
199  size_t length; //Length of the sequence(s)
201 
204  };
205 
206 
207 }
208 
209 #endif