StochHMM
v0.34
Flexible Hidden Markov Model C++ Library and Application
Main Page
Modules
Namespaces
Classes
Files
File List
File Members
•
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
seqTracks.h
Go to the documentation of this file.
1
//
2
// seqTracks.h
3
//Copyright (c) 2007-2012 Paul C Lott
4
//University of California, Davis
5
//Genome and Biomedical Sciences Facility
6
//UC Davis Genome Center
7
//Ian Korf Lab
8
//Website: www.korflab.ucdavis.edu
9
//Email: lottpaul@gmail.com
10
//
11
//Permission is hereby granted, free of charge, to any person obtaining a copy of
12
//this software and associated documentation files (the "Software"), to deal in
13
//the Software without restriction, including without limitation the rights to
14
//use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
15
//the Software, and to permit persons to whom the Software is furnished to do so,
16
//subject to the following conditions:
17
//
18
//The above copyright notice and this permission notice shall be included in all
19
//copies or substantial portions of the Software.
20
//
21
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
//IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
23
//FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
24
//COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
25
//IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26
//CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28
#ifndef SEQTRACK_H
29
#define SEQTRACK_H
30
31
#include <string>
32
#include <vector>
33
#include <fstream>
34
#include <map>
35
#include <queue>
36
#include "
text.h
"
37
#include "
traceback_path.h
"
38
#include "
hmm.h
"
39
#include "
sequences.h
"
40
#include <stdlib.h>
41
42
43
44
45
namespace
StochHMM{
46
47
48
#ifdef THREADS
49
extern
pthread_cond_t exit_thread_flag_cv;
50
extern
pthread_mutex_t exit_thread_flag_mutex;
51
#endif
52
53
//!\file seqTracks.h
54
//! Contains functions to import FASTA/FASTQ sequences from files and select the applicable model to deal with that sequence.
55
//! It was set up to generate a seqJob for each sequence, then select a model, and then allow the programmer to thread the decoding algorithm.
56
57
//!\enum SeqFileFormat
58
//!File format of the sequences
59
//!Currently only FASTA and FASTQ formats are supported
60
enum
SeqFileFormat
{
FASTA
,
FASTQ
};
61
62
//!\enum SeqFilesType
63
//!Sequence files have single track or multiple track sequences per file
64
//!SINGLE means that only a single sequence is required (using single track)
65
//!MULTI means that multiple sequences are required from multiple tracks
66
enum
SeqFilesType
{
SINGLE_TRACK
,
MULTI_TRACK
};
67
68
69
70
class
seqJob;
71
72
73
74
//!\struct ppTrack
75
//!Stores what track is determined by a Track Function
76
//!trackNumber is the index reference the derived track
77
//!trackToUse is the track to use to generate the derived track
78
//!Example: Function would be SIDD, the track we would use to get the sidd track
79
//!would be a DNA sequence track.
80
struct
ppTrack
{
81
size_t
trackNumber
;
82
size_t
trackToUse
;
83
StochHMM::pt2TrackFunc
*
func
;
84
};
85
86
87
/*! \class SeqTracks
88
\brief SeqTracks are used to integrate the information provided by the model with the sequences that are being imported
89
*/
90
class
seqTracks
{
91
public
:
92
93
//Constructor
94
seqTracks
();
95
96
97
//Single Model, Single Seq File
98
seqTracks
(
model
&, std::string&);
99
seqTracks
(
model
&, std::string&,
SeqFileFormat
);
100
seqTracks
(
model
&, std::string&,
SeqFileFormat
,
TrackFuncs
*);
101
102
//Single Model, Multiple Seq File)
103
seqTracks
(
model
&, std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
);
104
seqTracks
(
model
&, std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
,
TrackFuncs
*);
105
106
//Multiple Models, Single Seq File
107
seqTracks
(
models
&,std::string&,
SeqFileFormat
,
pt2Attrib
*);
108
seqTracks
(
models
&,std::string&,
SeqFileFormat
,
pt2Attrib
*,
TrackFuncs
*);
109
110
111
//Multiple Models, Multiple Seq Files
112
seqTracks
(
models
&,std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
,
pt2Attrib
*);
113
seqTracks
(
models
&,std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
,
pt2Attrib
*,
TrackFuncs
*);
114
115
116
//Destructor
117
~seqTracks
();
118
119
//MUTATORS
120
121
//////////////// Single Model , Single Sequence File ////////////////////////
122
bool
loadSeqs
(
model
&, std::string&);
123
bool
loadSeqs
(
model
&, std::string&,
SeqFileFormat
);
// <-Main Function
124
bool
loadSeqs
(
model
&, std::string&,
SeqFileFormat
,
TrackFuncs
*);
// <-Main Function
125
126
127
//////////////// Single Model , Multiple Sequence File //////////////////////
128
bool
loadSeqs
(
model
&, std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
);
129
bool
loadSeqs
(
model
&, std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
,
TrackFuncs
*);
// <- Main Function
130
131
132
//////////////// Multiple Models , Single Sequence File //////////////////////
133
bool
loadSeqs
(
models
&, std::string&,
SeqFileFormat
,
pt2Attrib
*,
TrackFuncs
*);
// <-Main Function
134
bool
loadSeqs
(
models
&, std::string&,
SeqFileFormat
);
//only allow if pt2Attrib is set else error
135
136
137
//////////////// Multiple Models , Multiple Sequence File ////////////////////
138
139
140
//// Main Function ////
141
bool
loadSeqs
(
models
&, std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
,
pt2Attrib
*,
TrackFuncs
*);
// <-Main Function
142
143
144
//// Facade Functions ////
145
bool
loadSeqs
(
models
&, std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
);
//only allow if pt2Attrib is set else error
146
bool
loadSeqs
(
models
&, std::vector<std::string>&,
SeqFileFormat
,
SeqFilesType
,
pt2Attrib
*);
147
148
149
//!Sets the function to evaluate the which model to use with a particular sequence
150
inline
void
setAttribFunc
(
pt2Attrib
* func){
attribModelFunc
=func;}
151
152
//!Sets a trackfunction that will be evaluated to generate a necessary track for the model
153
inline
void
setTrackFunc
(
TrackFuncs
* func){
trackFunctions
=func;}
154
inline
void
setNumImportJobs
(
size_t
value){
numImportJobs
=value;}
155
156
void
setTrackFilename
(std::string&,std::string&);
157
158
//ACCESSORS
159
bool
getNext
();
160
bool
importJobs
();
161
162
seqJob
*
getJob
();
163
164
sequence
*
getFasta
(
int
);
165
sequence
*
getFastq
(
int
);
166
sequence
*
getReal
(
int
);
167
168
size_t
size
(
void
){
return
jobQueue
.size();}
169
inline
size_t
getTrackCount
(){
return
trackCount
;}
170
171
void
print
();
172
void
getInformation
();
173
float
remainingSeqs
(){
174
float
val = (float)
seqFilenames
.size() / (float)
importTracks
.size();
175
//std::cout << seqFilenames.size() <<"\t" << importTracks.size() <<"\t" << val << std::endl;
176
return
val;}
177
178
private
:
179
180
std::vector<std::ifstream*>
filehandles
;
//input file stream
181
std::vector<std::string>
seqFilenames
;
//input filenames
182
size_t
numImportJobs
;
183
bool
good
;
184
185
TrackFuncs
*
trackFunctions
;
186
pt2Attrib
*
attribModelFunc
;
187
188
189
int
TrackToUseForAttrib
;
190
191
stateInfo
*
info
;
192
193
std::vector<std::pair<int,trackType> >
importTracks
;
194
std::vector<ppTrack>
postprocessTracks
;
195
196
197
models
*
hmms
;
//Models
198
model
*
hmm
;
//Models
199
tracks
*
modelTracks
;
//Tracks defined by models
200
201
SeqFileFormat
seqFormat
;
//File format (Fasta or FastQ);
202
SeqFilesType
fileType
;
//File Type (Single File or Multiple Files);
203
204
205
//bool fastq;
206
//bool multiFile;
207
208
size_t
trackCount
;
209
stringList
input
;
210
211
212
//Threading Variables
213
std::queue<seqJob*>
jobQueue
;
//used to be trcks
214
size_t
jobs
;
//Counts of # of jobs waiting
215
int
exit_thread
;
//set to 0 if file stream is EOF
216
217
218
//External Definition import function for Sequence
219
ExDefSequence
*
getExDef
(
int
,
int
);
220
221
bool
_loadFASTA
(std::string&,
SeqFilesType
);
222
bool
_loadFASTA
(std::vector<std::string>&,
SeqFilesType
);
223
224
bool
_loadFASTQ
(std::string&,
SeqFilesType
);
225
bool
_loadFASTQ
(std::vector<std::string>&,
SeqFilesType
);
226
227
228
bool
_initImportTrackInfo
(
void
);
229
void
_reset
();
230
void
_init
();
231
bool
_open
();
232
bool
_close
();
233
};
234
235
236
237
238
239
240
//!\class seqJob
241
//!Stores the model and sequence information for each job
242
class
seqJob
{
//Could make a derivative of sequences
243
public
:
244
//Constructor
245
seqJob
(
size_t
);
246
247
//Destructor
248
~seqJob
();
249
250
friend
class
seqTracks
;
251
252
253
//MUTATORS
254
void
evaluateFunctions
();
255
256
257
//ACCESSORS
258
inline
size_t
size
(){
return
set
->getLength();};
259
260
inline
model
*
getModel
(){
return
hmm
;};
261
inline
sequences
*
getSeqs
(){
return
set
;};
262
263
inline
std::string
getHeader
(){
return
set
->getHeader();};
264
265
inline
bool
evaluated
(){
return
funcEvaluated
;};
266
267
268
inline
void
printModel
(){
if
(
hmm
)
hmm
->
print
();};
269
270
inline
void
printSeq
(){
set
->print();};
271
272
inline
traceback_path
*
getPath
(){
if
(
decodingPerformed
)
return
path
;
else
return
NULL;};
273
274
double
getSeqAttrib
(){
return
attrib
;};
275
276
inline
std::string
getSeqFilename
(
size_t
iter){
return
seqFilename
[iter];};
277
inline
void
setSeqFilename
(std::string& filename){
seqFilename
.push_back(filename);
return
;};
278
inline
void
printFilenames
(){
for
(
size_t
i=0;i<
seqFilename
.size();i++){ std::cout <<
seqFilename
[i] << std::endl;}};
279
280
private
:
281
model
*
hmm
;
282
sequences
*
set
;
283
284
std::vector< std::string>
seqFilename
;
285
286
double
attrib
;
287
bool
funcEvaluated
;
288
289
TrackFuncs
*
functions
;
290
291
bool
decodingPerformed
;
292
traceback_path
*
path
;
293
294
};
295
296
297
}
298
#endif
/*SEQTRACK_H*/
Generated on Tue Jul 30 2013 13:23:11 for StochHMM by
1.8.1