StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
seqTracks.h
Go to the documentation of this file.
1 //
2 // seqTracks.h
3 //Copyright (c) 2007-2012 Paul C Lott
4 //University of California, Davis
5 //Genome and Biomedical Sciences Facility
6 //UC Davis Genome Center
7 //Ian Korf Lab
8 //Website: www.korflab.ucdavis.edu
9 //Email: lottpaul@gmail.com
10 //
11 //Permission is hereby granted, free of charge, to any person obtaining a copy of
12 //this software and associated documentation files (the "Software"), to deal in
13 //the Software without restriction, including without limitation the rights to
14 //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
15 //the Software, and to permit persons to whom the Software is furnished to do so,
16 //subject to the following conditions:
17 //
18 //The above copyright notice and this permission notice shall be included in all
19 //copies or substantial portions of the Software.
20 //
21 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
23 //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
24 //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
25 //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26 //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 
28 #ifndef SEQTRACK_H
29 #define SEQTRACK_H
30 
31 #include <string>
32 #include <vector>
33 #include <fstream>
34 #include <map>
35 #include <queue>
36 #include "text.h"
37 #include "traceback_path.h"
38 #include "hmm.h"
39 #include "sequences.h"
40 #include <stdlib.h>
41 
42 
43 
44 
45 namespace StochHMM{
46 
47 
48 #ifdef THREADS
49  extern pthread_cond_t exit_thread_flag_cv;
50  extern pthread_mutex_t exit_thread_flag_mutex;
51 #endif
52 
53  //!\file seqTracks.h
54  //! Contains functions to import FASTA/FASTQ sequences from files and select the applicable model to deal with that sequence.
55  //! It was set up to generate a seqJob for each sequence, then select a model, and then allow the programmer to thread the decoding algorithm.
56 
57  //!\enum SeqFileFormat
58  //!File format of the sequences
59  //!Currently only FASTA and FASTQ formats are supported
61 
62  //!\enum SeqFilesType
63  //!Sequence files have single track or multiple track sequences per file
64  //!SINGLE means that only a single sequence is required (using single track)
65  //!MULTI means that multiple sequences are required from multiple tracks
67 
68 
69 
70  class seqJob;
71 
72 
73 
74  //!\struct ppTrack
75  //!Stores what track is determined by a Track Function
76  //!trackNumber is the index reference the derived track
77  //!trackToUse is the track to use to generate the derived track
78  //!Example: Function would be SIDD, the track we would use to get the sidd track
79  //!would be a DNA sequence track.
80  struct ppTrack{
81  size_t trackNumber;
82  size_t trackToUse;
84  };
85 
86 
87  /*! \class SeqTracks
88  \brief SeqTracks are used to integrate the information provided by the model with the sequences that are being imported
89  */
90  class seqTracks{
91  public:
92 
93  //Constructor
94  seqTracks();
95 
96 
97  //Single Model, Single Seq File
98  seqTracks(model&, std::string&);
99  seqTracks(model&, std::string&, SeqFileFormat);
100  seqTracks(model&, std::string&, SeqFileFormat, TrackFuncs*);
101 
102  //Single Model, Multiple Seq File)
103  seqTracks(model&, std::vector<std::string>&, SeqFileFormat, SeqFilesType);
104  seqTracks(model&, std::vector<std::string>&, SeqFileFormat, SeqFilesType, TrackFuncs*);
105 
106  //Multiple Models, Single Seq File
107  seqTracks(models&,std::string&, SeqFileFormat, pt2Attrib*);
108  seqTracks(models&,std::string&, SeqFileFormat, pt2Attrib*, TrackFuncs*);
109 
110 
111  //Multiple Models, Multiple Seq Files
112  seqTracks(models&,std::vector<std::string>&, SeqFileFormat, SeqFilesType, pt2Attrib*);
113  seqTracks(models&,std::vector<std::string>&, SeqFileFormat, SeqFilesType, pt2Attrib*, TrackFuncs*);
114 
115 
116  //Destructor
117  ~seqTracks();
118 
119  //MUTATORS
120 
121  //////////////// Single Model , Single Sequence File ////////////////////////
122  bool loadSeqs(model&, std::string&);
123  bool loadSeqs(model&, std::string&, SeqFileFormat); // <-Main Function
124  bool loadSeqs(model&, std::string&, SeqFileFormat, TrackFuncs*); // <-Main Function
125 
126 
127  //////////////// Single Model , Multiple Sequence File //////////////////////
128  bool loadSeqs(model&, std::vector<std::string>&, SeqFileFormat, SeqFilesType);
129  bool loadSeqs(model&, std::vector<std::string>&, SeqFileFormat, SeqFilesType, TrackFuncs*); // <- Main Function
130 
131 
132  //////////////// Multiple Models , Single Sequence File //////////////////////
133  bool loadSeqs(models&, std::string&, SeqFileFormat, pt2Attrib*, TrackFuncs*); // <-Main Function
134  bool loadSeqs(models&, std::string&, SeqFileFormat); //only allow if pt2Attrib is set else error
135 
136 
137  //////////////// Multiple Models , Multiple Sequence File ////////////////////
138 
139 
140  //// Main Function ////
141  bool loadSeqs(models&, std::vector<std::string>&, SeqFileFormat, SeqFilesType, pt2Attrib*, TrackFuncs*); // <-Main Function
142 
143 
144  //// Facade Functions ////
145  bool loadSeqs(models&, std::vector<std::string>&, SeqFileFormat, SeqFilesType); //only allow if pt2Attrib is set else error
146  bool loadSeqs(models&, std::vector<std::string>&, SeqFileFormat, SeqFilesType, pt2Attrib*);
147 
148 
149  //!Sets the function to evaluate the which model to use with a particular sequence
150  inline void setAttribFunc(pt2Attrib* func){attribModelFunc=func;}
151 
152  //!Sets a trackfunction that will be evaluated to generate a necessary track for the model
153  inline void setTrackFunc(TrackFuncs* func){trackFunctions=func;}
154  inline void setNumImportJobs(size_t value){numImportJobs=value;}
155 
156  void setTrackFilename(std::string&,std::string&);
157 
158  //ACCESSORS
159  bool getNext();
160  bool importJobs();
161 
162  seqJob* getJob();
163 
164  sequence* getFasta(int);
165  sequence* getFastq(int);
166  sequence* getReal(int);
167 
168  size_t size(void){return jobQueue.size();}
169  inline size_t getTrackCount(){return trackCount;}
170 
171  void print();
172  void getInformation();
173  float remainingSeqs(){
174  float val = (float) seqFilenames.size() / (float) importTracks.size();
175  //std::cout << seqFilenames.size() <<"\t" << importTracks.size() <<"\t" << val << std::endl;
176  return val;}
177 
178  private:
179 
180  std::vector<std::ifstream*> filehandles; //input file stream
181  std::vector<std::string> seqFilenames; //input filenames
183  bool good;
184 
187 
188 
190 
192 
193  std::vector<std::pair<int,trackType> > importTracks;
194  std::vector<ppTrack> postprocessTracks;
195 
196 
197  models* hmms; //Models
198  model* hmm; //Models
199  tracks* modelTracks; //Tracks defined by models
200 
201  SeqFileFormat seqFormat; //File format (Fasta or FastQ);
202  SeqFilesType fileType; //File Type (Single File or Multiple Files);
203 
204 
205  //bool fastq;
206  //bool multiFile;
207 
208  size_t trackCount;
210 
211 
212  //Threading Variables
213  std::queue<seqJob*> jobQueue; //used to be trcks
214  size_t jobs; //Counts of # of jobs waiting
215  int exit_thread; //set to 0 if file stream is EOF
216 
217 
218  //External Definition import function for Sequence
219  ExDefSequence* getExDef(int,int);
220 
221  bool _loadFASTA(std::string&, SeqFilesType);
222  bool _loadFASTA(std::vector<std::string>&, SeqFilesType);
223 
224  bool _loadFASTQ(std::string&, SeqFilesType);
225  bool _loadFASTQ(std::vector<std::string>&, SeqFilesType);
226 
227 
228  bool _initImportTrackInfo(void);
229  void _reset();
230  void _init();
231  bool _open();
232  bool _close();
233  };
234 
235 
236 
237 
238 
239 
240  //!\class seqJob
241  //!Stores the model and sequence information for each job
242  class seqJob{ //Could make a derivative of sequences
243  public:
244  //Constructor
245  seqJob(size_t);
246 
247  //Destructor
248  ~seqJob();
249 
250  friend class seqTracks;
251 
252 
253  //MUTATORS
254  void evaluateFunctions();
255 
256 
257  //ACCESSORS
258  inline size_t size(){return set->getLength();};
259 
260  inline model* getModel(){return hmm;};
261  inline sequences* getSeqs(){return set;};
262 
263  inline std::string getHeader(){return set->getHeader();};
264 
265  inline bool evaluated(){return funcEvaluated;};
266 
267 
268  inline void printModel(){if(hmm) hmm->print();};
269 
270  inline void printSeq(){set->print();};
271 
272  inline traceback_path* getPath(){if (decodingPerformed) return path;else return NULL;};
273 
274  double getSeqAttrib(){return attrib;};
275 
276  inline std::string getSeqFilename(size_t iter){return seqFilename[iter];};
277  inline void setSeqFilename(std::string& filename){seqFilename.push_back(filename); return;};
278  inline void printFilenames(){for(size_t i=0;i<seqFilename.size();i++){ std::cout << seqFilename[i] << std::endl;}};
279 
280  private:
281  model* hmm;
282  sequences* set;
283 
284  std::vector< std::string> seqFilename;
285 
286  double attrib;
288 
290 
293 
294  };
295 
296 
297 }
298 #endif /*SEQTRACK_H*/