StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sequences.cpp
Go to the documentation of this file.
1 //
2 // sequences.cpp
3 //Copyright (c) 2007-2012 Paul C Lott
4 //University of California, Davis
5 //Genome and Biomedical Sciences Facility
6 //UC Davis Genome Center
7 //Ian Korf Lab
8 //Website: www.korflab.ucdavis.edu
9 //Email: lottpaul@gmail.com
10 //
11 //Permission is hereby granted, free of charge, to any person obtaining a copy of
12 //this software and associated documentation files (the "Software"), to deal in
13 //the Software without restriction, including without limitation the rights to
14 //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
15 //the Software, and to permit persons to whom the Software is furnished to do so,
16 //subject to the following conditions:
17 //
18 //The above copyright notice and this permission notice shall be included in all
19 //copies or substantial portions of the Software.
20 //
21 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
23 //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
24 //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
25 //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26 //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 
28 #include "sequences.h"
29 
30 
31 namespace StochHMM {
32 
33 
34 
36  length=-1;
37  external=NULL;
38  related_sequences=false;
40  same_length=true;
41  }
42 
43 
44  //!Create an empty sequences
45  //! \param sz Number of sequences
46  sequences::sequences(size_t sz):seq(sz,NULL){
47  length=-1;
48  external=NULL;
49  related_sequences=true;
51  same_length=true;
52  }
53 
54 
55  //!Create a sequences data t
56  //! \param sz Number of sequences
57  sequences::sequences(tracks* tr):seq(tr->size(),NULL){
58  length=-1;
59  external=NULL;
60  related_sequences=true;
61  num_of_sequences=tr->size();
62  same_length=true;
63  }
64 
65 
67  external = (rhs.external==NULL) ? NULL : new(std::nothrow) ExDefSequence(*rhs.external);
68 
69  if (rhs.external != NULL && external==NULL){
70  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
71  exit(1);
72  }
73 
74  length = rhs.length;
78 
79  for(size_t i=0;i<seq.size();i++){
80  sequence* temp=NULL;
81  if (rhs.seq[i]!=NULL){
82  temp = new(std::nothrow) sequence(*rhs.seq[i]);
83 
84  if (temp == NULL){
85  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
86  exit(1);
87  }
88  }
89  seq.push_back(temp);
90  }
91 
92  }
93 
94 
95  //!Destroy sequences
97  for(size_t i=0;i<seq.size();i++){
98  delete seq[i];
99  seq[i]=NULL;
100  }
101  delete external;
102  external = NULL;
103  }
104 
105 
106  //! Assignment Operator
108  external = (rhs.external==NULL) ? NULL : new(std::nothrow) ExDefSequence(*rhs.external);
109 
110  if (rhs.external != NULL && external==NULL){
111  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
112  exit(1);
113  }
114 
115  length = rhs.length;
119 
120  for(size_t i=0;i<seq.size();i++){
121  sequence* temp=NULL;
122  if (rhs.seq[i]!=NULL){
123  temp = new(std::nothrow) sequence(*rhs.seq[i]);
124 
125  if (temp == NULL){
126  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
127  exit(1);
128  }
129  }
130  seq.push_back(temp);
131  }
132 
133  return *this;
134  }
135 
136 
137  //!Get the value from a real Number sequence for track trck at position
138  //! \param trck Sequence track to use
139  //! \param position Position in sequence to get value from
140  //! \return double value of the real sequence at the position
141  double sequences::realValue(int trck,size_t position){
142  if (seq[trck]->realSeq){
143  return seq[trck]->realValue(position);
144  }
145  return -INFINITY;
146 
147  }
148 
149  //!Get the value from a real Number sequence for track trck at position
150  //! \param trck Sequence track to use
151  //! \param position Position in sequence to get value from
152  //! \return double value of the real sequence at the position
153  double sequences::realValue(size_t trck,size_t position){
154  if (seq[trck]->realSeq){
155  return seq[trck]->realValue(position);
156  }
157  return -INFINITY;
158 
159  }
160 
161  //TODO: fix if the sequence isn't a alpha sequence
162  //!Get the digitized value from the sequence at trck at position
163  //! \param trck Sequence track to use
164  //! \param position Position in sequence to get the value from
165  //! \return short digitized value of the sequence based on track type
166  short sequences::seqValue(int trck, size_t position){
167  return seq[trck]->seqValue(position);
168  }
169 
170  //!Get pointer to ith sequence from sequences
171  //! \param iter Iterator to use for extracting sequence;
172  //! \return sequence* pointer to sequence
173  //! \return NULL if no sequence exists at iter
175  if(iter<seq.size()){
176  return seq[iter];
177  }
178 
179  return NULL;
180  }
181 
182  //!Get std:string representation of all the digitized sequence(s) in sequences
183  //! \return std::string representation of all string (digitized)
184  std::string sequences::stringify(){
185  std::string tmp;
186  for(size_t i=0; i<size(); i++){
187 
188 
189  if (seq[i]==NULL){
190  tmp+= ">TRACK: " + int_to_string(i) + ":\t" ;
191  tmp+= "<<EMPTY>>\n" ;
192  }
193  else{
194  track* trk = seq[i]->getTrack();
195  tmp+= ">" + trk->getName();
196  tmp+= seq[i]->stringify();
197  }
198  }
199  return tmp;
200  }
201 
202  //!Get std::string representation of all the undigitized sequence(s) in sequences
203  //! \return std::string representation of all string (digitized)
204  std::string sequences::undigitize(){
205  std::string output;
206  for(size_t i=0;i<size();i++){
207  if (seq[i]==NULL){
208  output+= ">TRACK: " + int_to_string(i) + ":\t" ;
209  output+= "<<EMPTY>>\n" ;
210  }
211  else{
212  track* trk = seq[i]->getTrack();
213  output+= ">" + trk->getName();
214  output+= seq[i]->undigitize();
215  }
216  }
217  return output;
218  }
219 
220  //!Check to see if ther is an external definition defined for a certain position within the sequences
221  //! \param pos Position within the sequence
222  //! \return True if there is an external definition defined for the position
223  bool sequences::exDefDefined(size_t pos){
224  if (external && external->defs[pos]){
225  return true;
226  }
227  else{
228  return false;
229  }
230  }
231 
232  //!Check to see if there is an external definition defined for a certain state at a certain position
233  //! \param pos Position within the sequence
234  //! \return True if there is an external definition fo the the state and position
235  bool sequences::exDefDefined(size_t pos, size_t stateIter){
236  if (external && external->defs[pos]){
237  if (external->defined(stateIter)){
238  return true;
239  }
240  }
241  return false;
242  }
243 
244 
245  //!Check to see if there are any external definitions defined
246  //! \return true if there are external definitions defined
248  return external;
249  }
250 
251  //!Get the weight for the state at a certain position in the sequence
252  //! \param position Position with the sequence
253  //! \param stateIter integer iterator of the state
254  double sequences::getWeight(size_t position, size_t stateIter){
255  return external->getWeight(position, stateIter);
256  }
257 
258  //!Add a sequence to the sequences
259  //!Sequence is added to a certain position based on the track used by the sequence
260 
261  //! \param sq Pointer to sequence
263  if (related_sequences){
264  track* trk = sq->getTrack();
265  seq[trk->getIndex()]=sq;
266  setLength(sq->getLength());
267  return;
268  }
269  else{
270  seq.push_back(sq);
271  setLength(sq->getLength());
273  }
274 
275  }
276 
277  //!Add a sequence to the sequences at a certain position
278  //!Sequence is added to a certain position based on the iterator
279  //! \param sq Pointer to sequence
280  //! \param iter Position in sequences to add sequence
281  void sequences::addSeq(sequence* sq, size_t iter){
282  if (seq.size()>iter){ //If position already exists just add it
283  seq[iter]=sq;
284  }
285  else{ //If the position doesn't exist extend the vector, then add
286  seq.resize(iter+1,NULL);
287  seq[iter]=sq;
288  }
289 
290  setLength(sq->getLength());
291 
292  if (!related_sequences){
293  num_of_sequences=iter;
294  }
295 
296  return;
297  }
298 
299  //!Add a sequence to the sequences given a certain track
300  //!Sequence is added to a certain position based on the track
301  //! \param sq Pointer to sequence
302  //! \param tr Track to use when adding sequence
304  if (tr!=NULL){
305  if (related_sequences){
306  size_t index = tr->getIndex();
307  addSeq(sq,index);
308  }
309  else{
310  addSeq(sq);
311  }
312  }
313  else{
314  if (related_sequences){
315  std::cerr << "Track is undefined for related sequences. Unable to add related sequences if track is not defined\n";
316  exit(1);
317  }
318  else{
319  addSeq(sq);
320  }
321  }
322  return;
323  }
324 
325  //! Set the length of the sequence(s) in sequence
326  //! Because all the sequences should be the same size
327  //! If there size differs when adding a sequence
328  //! \exception sDifferentSizeSequences thrown if the sizes differ
329  void sequences::setLength(size_t len){
331  length=len;
332  }
333 
334  if (len==length){
335  return;
336  }
337  else{
338  same_length=false;
339  if (related_sequences){
340  std::cerr << "Sequences have different lengths. Sequences should all have the same length because they are suppose to be related (from different datasets). For multiple unrelated sequence types, use a different structure.\n";
341  exit(20);
342  }
343  }
344  return;
345  }
346 
347  void sequences::getFastas(const std::string& filename, track* tr){
348  std::ifstream file;
349  file.open(filename.c_str());
350  if (!file.is_open()){
351  std::cerr << "Couln't open the file:" << filename << "\n";
352  exit(2);
353  }
354  while(!file.eof()){
355  sequence* sq = new(std::nothrow) sequence(false);
356 
357  if (sq==NULL){
358  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
359  exit(1);
360  }
361  bool success = sq->getFasta(file, tr);
362 
363  if (!success){
364  delete sq;
365  }
366  addSeq(sq,tr);
367  }
368  return;
369  }
370 
371 }