StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
sequenceStream.cpp
Go to the documentation of this file.
1 //
2 // sequenceStream.cpp
3 // StochHMM
4 //
5 // Created by Ken Yu on 7/17/12.
6 // Copyright 2012 University of California, Davis. All rights reserved.
7 //
8 
9 #include "sequenceStream.h"
10 
11 namespace StochHMM {
13  bufferSize=0;
14  retainSize=0;
15  readingFile = false;
16  }
17 
18  sequenceStream::sequenceStream(bool realTrack): sequence(realTrack){
19  bufferSize=0;
20  retainSize=0;
21  readingFile = false;
22  }
23 
24  sequenceStream::sequenceStream(std::vector<double>*vec, track* tr ): sequence(vec, tr){
25  bufferSize=0;
26  retainSize=0;
27  readingFile = false;
28  }
29 
30  sequenceStream::sequenceStream(char* seq, track* tr ): sequence(seq, tr){
31  bufferSize=0;
32  retainSize=0;
33  readingFile = false;
34  }
35 
36  sequenceStream::sequenceStream(std::string& sq, track* tr ): sequence(sq, tr){
37  bufferSize=0;
38  retainSize=0;
39  readingFile = false;
40  }
41 
42  sequenceStream::sequenceStream(size_t buff, size_t ret): sequence(){
43  bufferSize=buff;
44  retainSize=ret;
45  readingFile = false;
46  }
47 
48  sequenceStream::sequenceStream(size_t buff, size_t ret, bool realTrack): sequence(realTrack){
49  bufferSize=buff;
50  retainSize=ret;
51  readingFile = false;
52  }
53 
54  sequenceStream::sequenceStream(size_t buff, size_t ret, std::vector<double>*vec, track* tr ): sequence(vec, tr){
55  bufferSize=buff;
56  retainSize=ret;
57  readingFile = false;
58  }
59 
60  sequenceStream::sequenceStream(size_t buff, size_t ret, char* seq, track* tr ): sequence(seq, tr){
61  bufferSize=buff;
62  retainSize=ret;
63  readingFile = false;
64  }
65 
66  sequenceStream::sequenceStream(size_t buff, size_t ret, std::string& sq, track* tr ): sequence(sq, tr){
67  bufferSize=buff;
68  retainSize=ret;
69  readingFile = false;
70  }
71 
72 // sequenceStream::~sequenceStream(){
73 //
74 // }
75 
76 // sequenceStream::sequenceStream(std::ifstream& file, size_t buff, size_t retain) {
77 // bufferSize=buff;
78 // retainSize=retain;
79 // }
80 
81  bool sequenceStream::getFasta(std::ifstream &file, track* trk) {
82 
83  resetSeq();
84  seqtrk=trk;
85 
86  if (readingFile == false) {
87  //Find next header mark
88  while(file.peek() != '>'){
89  std::string temp;
90  getline(file,temp,'\n');
91 
92  if (!file.good()){
93  return false;
94  }
95  }
96  getline(file,header,'\n');
97  //std::cout << header << std::endl;
98  readingFile = true;
99  }
100 
101 
102  bool success;
103 
104  size_t fillBuffer=0;
105 
106  //Sequence always begins with whatever was retained
107  fillBuffer+=retain.size();
109  retain.clear();
110 
111  //The following clears up the previousSeq when the "getline" size
112  //is 2 or more times the buffer size
113  fillBuffer+=previousSeq.size();
114  if (fillBuffer >= bufferSize) {
115  size_t overhanging, charsToKeep;
116  overhanging = fillBuffer - bufferSize;
117  if (overhanging == 0) {
119  previousSeq.clear();
120  }
121  else{
122  charsToKeep = previousSeq.size() - overhanging;
123  undigitized+=previousSeq.substr(0,charsToKeep);
124  std::string temp = previousSeq;
125  previousSeq.clear();
126  previousSeq+=temp.substr(charsToKeep);
127  }
128 
129  retain+=undigitized.substr(undigitized.size()-retainSize);
130 
131  //
132  //std::cout << "1: " << undigitized << std::endl;
133  //
134  success = _digitize();
135 
136  length=seq->size();
137 
138  return success;
139  }
140 
141  //The remaining line sequence (if any) after the last buffer filled up goes to the next buffer
143  previousSeq.clear();
144 
145  //For the case where the buffer filled right before the next header or eof;
146  //the "nl_peek" portion inside the loop would be skipped, and when overhanging!=0,
147  //there is still some sequence to deal with in the final line
148  char nl_peek = file.peek();
149  if (nl_peek =='>' || nl_peek == EOF) {
150  if (undigitized.size() >= 1) {
151  //
152  // std::cout << "2: " << undigitized << std::endl;
153  //
154  if (!_digitize()) {
155  std::cerr << "sequence was not digitized" << std::endl;
156  }
157  }
158 
159  success = false;
160  readingFile = false;
161  length=seq->size();
162  return success;
163  }
164 
165  std::string line;
166 
167  while(getline(file,line,'\n')) {
168  fillBuffer+=line.size();
169 
170  //When the buffer fills up, the entire line may fill it up
171  //or some of the line is left overhanging
172  if (fillBuffer >= bufferSize) {
173  size_t overhanging, charsToKeep;
174  overhanging = fillBuffer - bufferSize;
175  if (overhanging == 0) {
176  undigitized+=line;
177  }
178  else{
179  //Overhanging is stored in previousSeq and accounted for in the next calling
180  //of this function
181  charsToKeep = line.size() - overhanging;
182  undigitized+=line.substr(0,charsToKeep);
183  previousSeq+=line.substr(charsToKeep);
184 
185  }
186 
187  retain+=undigitized.substr(undigitized.size()-retainSize);
188 
189  //
190  // std::cout << "3: " << undigitized << std::endl;
191  //
192 
193  success = _digitize();
194 
195  break;
196  }
197 
198  undigitized+=line;
199 
200  //For the case where the buffer has not filled up but the file has reached the next header or eof
201  char nl_peek = file.peek();
202  if (nl_peek =='>' || nl_peek == EOF) {
203  //
204  // std::cout << "4: " << undigitized << std::endl;
205  //
206  if (!_digitize()) {
207  std::cerr << "sequence was not digitized" << std::endl;
208  }
209  success = false;
210  readingFile = false;
211  break;
212  }
213 
214  }
215 
216  length = seq->size();
217  return success;
218  }
219 
221  if (realSeq) {
222  real=new(std::nothrow) std::vector<double>;
223 
224  if (real==NULL){
225  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
226  exit(1);
227  }
228 
229  seq=NULL;
230  }
231  else{
232  real=NULL;
233  seq=new(std::nothrow) std::vector<short>;
234 
235  if (seq==NULL){
236  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
237  exit(1);
238  }
239  }
240  }
241 
242 }