StochHMM
v0.34
Flexible Hidden Markov Model C++ Library and Application
Main Page
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
seqTracks.h
Go to the documentation of this file.
1
//
2
// seqTracks.h
3
//Copyright (c) 2007-2012 Paul C Lott
4
//University of California, Davis
5
//Genome and Biomedical Sciences Facility
6
//UC Davis Genome Center
7
//Ian Korf Lab
8
//Website: www.korflab.ucdavis.edu
9
//Email: lottpaul@gmail.com
10
//
11
//Permission is hereby granted, free of charge, to any person obtaining a copy of
12
//this software and associated documentation files (the "Software"), to deal in
13
//the Software without restriction, including without limitation the rights to
14
//use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
15
//the Software, and to permit persons to whom the Software is furnished to do so,
16
//subject to the following conditions:
17
//
18
//The above copyright notice and this permission notice shall be included in all
19
//copies or substantial portions of the Software.
20
//
21
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
//IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
23
//FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
24
//COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
25
//IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26
//CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28
#ifndef SEQTRACK_H
29
#define SEQTRACK_H
30
31
#include <string>
32
#include <vector>
33
#include <fstream>
34
#include <map>
35
#include <queue>
36
#include "
text.h
"
37
#include "
traceback_path.h
"
38
#include "
hmm.h
"
39
#include "
sequences.h
"
40
#include <stdlib.h>
41
42
43
44
45
namespace
StochHMM{
46
47
48
#ifdef THREADS
49
extern
pthread_cond_t exit_thread_flag_cv;
50
extern
pthread_mutex_t exit_thread_flag_mutex;
51
#endif
52
53
//!\file seqTracks.h
54
//! Contains functions to import FASTA/FASTQ sequences from files and select the applicable model to deal with that sequence.
55
//! It was set up to generate a seqJob for each sequence, then select a model, and then allow the programmer to thread the decoding algorithm.
56
57
//!\enum SeqFileFormat
58
//!File format of the sequences
59
//!Currently only FASTA and FASTQ formats are supported
60
enum
SeqFileFormat
{
FASTA
,
FASTQ
};
61
62
//!\enum SeqFilesType
63
//!Sequence files have single track or multiple track sequences per file
64
//!SINGLE means that only a single sequence is required (using single track)
65
//!MULTI means that multiple sequences are required from multiple tracks
66
enum
SeqFilesType
{
SINGLE_TRACK
,
MULTI_TRACK
};
67
68
69
70
class
seqJob;
71
72
73
74
//!\struct ppTrack
75
//!Stores what track is determined by a Track Function
76
//!trackNumber is the index reference the derived track
77
//!trackToUse is the track to use to generate the derived track
78
//!Example: Function would be SIDD, the track we would use to get the sidd track
79
//!would be a DNA sequence track.
80
struct
ppTrack
{
81
size_t
trackNumber
;
82
size_t
trackToUse
;
83
StochHMM::pt2TrackFunc
*
func
;
84
};
85
86
87
/*! \class SeqTracks
88
\brief SeqTracks are used to integrate the information provided by the model with the sequences that are being imported
89
*/
90
class
seqTracks
{
91
public
:
92
93
//Constructor
94
seqTracks
();
95
96
97
//Single Model, Single Seq File
98
seqTracks
(
model
&, std::string&);
99
seqTracks
(
model
&, std::string&,
SeqFileFormat
);
100
seqTracks
(
model
&, std::string&,
SeqFileFormat
,
TrackFuncs
*);
101
102
//Single Model, Multiple Seq File)
103
seqTracks
(
model
&, std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
);
104
seqTracks
(
model
&, std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
,
TrackFuncs
*);
105
106
//Multiple Models, Single Seq File
107
seqTracks
(
models
&,std::string&,
SeqFileFormat
,
pt2Attrib
*);
108
seqTracks
(
models
&,std::string&,
SeqFileFormat
,
pt2Attrib
*,
TrackFuncs
*);
109
110
111
//Multiple Models, Multiple Seq Files
112
seqTracks
(
models
&,std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
,
pt2Attrib
*);
113
seqTracks
(
models
&,std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
,
pt2Attrib
*,
TrackFuncs
*);
114
115
116
//Destructor
117
~seqTracks
();
118
119
//MUTATORS
120
121
//////////////// Single Model , Single Sequence File ////////////////////////
122
bool
loadSeqs
(
model
&, std::string&);
123
bool
loadSeqs
(
model
&, std::string&,
SeqFileFormat
);
// <-Main Function
124
bool
loadSeqs
(
model
&, std::string&,
SeqFileFormat
,
TrackFuncs
*);
// <-Main Function
125
126
127
//////////////// Single Model , Multiple Sequence File //////////////////////
128
bool
loadSeqs
(
model
&, std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
);
129
bool
loadSeqs
(
model
&, std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
,
TrackFuncs
*);
// <- Main Function
130
131
132
//////////////// Multiple Models , Single Sequence File //////////////////////
133
bool
loadSeqs
(
models
&, std::string&,
SeqFileFormat
,
pt2Attrib
*,
TrackFuncs
*);
// <-Main Function
134
bool
loadSeqs
(
models
&, std::string&,
SeqFileFormat
);
//only allow if pt2Attrib is set else error
135
136
137
//////////////// Multiple Models , Multiple Sequence File ////////////////////
138
139
140
//// Main Function ////
141
bool
loadSeqs
(
models
&, std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
,
pt2Attrib
*,
TrackFuncs
*);
// <-Main Function
142
143
144
//// Facade Functions ////
145
bool
loadSeqs
(
models
&, std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
);
//only allow if pt2Attrib is set else error
146
bool
loadSeqs
(
models
&, std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
,
pt2Attrib
*);
147
148
149
//!Sets the function to evaluate the which model to use with a particular sequence
150
inline
void
setAttribFunc
(
pt2Attrib
* func){
attribModelFunc
=func;}
151
152
//!Sets a trackfunction that will be evaluated to generate a necessary track for the model
153
inline
void
setTrackFunc
(
TrackFuncs
* func){
trackFunctions
=func;}
154
inline
void
setNumImportJobs
(
size_t
value){
numImportJobs
=value;}
155
156
void
setTrackFilename
(std::string&,std::string&);
157
158
//ACCESSORS
159
bool
getNext
();
160
bool
importJobs
();
161
162
seqJob
*
getJob
();
163
164
sequence
*
getFasta
(
int
);
165
sequence
*
getFastq
(
int
);
166
sequence
*
getReal
(
int
);
167
168
size_t
size
(
void
){
return
jobQueue
.size();}
169
inline
size_t
getTrackCount
(){
return
trackCount
;}
170
171
void
print
();
172
void
getInformation
();
173
float
remainingSeqs
(){
174
float
val = (float)
seqFilenames
.size() / (float)
importTracks
.size();
175
//std::cout << seqFilenames.size() <<"\t" << importTracks.size() <<"\t" << val << std::endl;
176
return
val;}
177
178
private
:
179
180
std::vector<std::ifstream*>
filehandles
;
//input file stream
181
std::vector<std::string>
seqFilenames
;
//input filenames
182
size_t
numImportJobs
;
183
bool
good
;
184
185
TrackFuncs
*
trackFunctions
;
186
pt2Attrib
*
attribModelFunc
;
187
188
189
int
TrackToUseForAttrib
;
190
191
stateInfo
*
info
;
192
193
std::vector<std::pair<int,trackType> >
importTracks
;
194
std::vector<ppTrack>
postprocessTracks
;
195
196
197
models
*
hmms
;
//Models
198
model
*
hmm
;
//Models
199
tracks
*
modelTracks
;
//Tracks defined by models
200
201
SeqFileFormat
seqFormat
;
//File format (Fasta or FastQ);
202
SeqFilesType
fileType
;
//File Type (Single File or Multiple Files);
203
204
205
//bool fastq;
206
//bool multiFile;
207
208
size_t
trackCount
;
209
stringList
input
;
210
211
212
//Threading Variables
213
std::queue<seqJob*>
jobQueue
;
//used to be trcks
214
size_t
jobs
;
//Counts of # of jobs waiting
215
int
exit_thread
;
//set to 0 if file stream is EOF
216
217
218
//External Definition import function for Sequence
219
ExDefSequence
*
getExDef
(
int
,
int
);
220
221
bool
_loadFASTA
(std::string&,
SeqFilesType
);
222
bool
_loadFASTA
(std::vector<std::string>&,
SeqFilesType
);
223
224
bool
_loadFASTQ
(std::string&,
SeqFilesType
);
225
bool
_loadFASTQ
(std::vector<std::string>&,
SeqFilesType
);
226
227
228
bool
_initImportTrackInfo
(
void
);
229
void
_reset
();
230
void
_init
();
231
bool
_open
();
232
bool
_close
();
233
};
234
235
236
237
238
239
240
//!\class seqJob
241
//!Stores the model and sequence information for each job
242
class
seqJob
{
//Could make a derivative of sequences
243
public
:
244
//Constructor
245
seqJob
(
size_t
);
246
247
//Destructor
248
~seqJob
();
249
250
friend
class
seqTracks
;
251
252
253
//MUTATORS
254
void
evaluateFunctions
();
255
256
257
//ACCESSORS
258
inline
size_t
size
(){
return
set
->getLength();};
259
260
inline
model
*
getModel
(){
return
hmm
;};
261
inline
sequences
*
getSeqs
(){
return
set
;};
262
263
inline
std::string
getHeader
(){
return
set
->getHeader();};
264
265
inline
bool
evaluated
(){
return
funcEvaluated
;};
266
267
268
inline
void
printModel
(){
if
(
hmm
)
hmm
->
print
();};
269
270
inline
void
printSeq
(){
set
->print();};
271
272
inline
traceback_path
*
getPath
(){
if
(
decodingPerformed
)
return
path
;
else
return
NULL;};
273
274
double
getSeqAttrib
(){
return
attrib
;};
275
276
inline
std::string
getSeqFilename
(
size_t
iter){
return
seqFilename
[iter];};
277
inline
void
setSeqFilename
(std::string& filename){
seqFilename
.push_back(filename);
return
;};
278
inline
void
printFilenames
(){
for
(
size_t
i=0;i<
seqFilename
.size();i++){ std::cout <<
seqFilename
[i] << std::endl;}};
279
280
private
:
281
model
*
hmm
;
282
sequences
*
set
;
283
284
std::vector< std::string>
seqFilename
;
285
286
double
attrib
;
287
bool
funcEvaluated
;
288
289
TrackFuncs
*
functions
;
290
291
bool
decodingPerformed
;
292
traceback_path
*
path
;
293
294
};
295
296
297
}
298
#endif
/*SEQTRACK_H*/
Generated on Tue Jul 30 2013 13:23:11 for StochHMM by
1.8.1