StochHMM
v0.34
Flexible Hidden Markov Model C++ Library and Application
Main Page
Modules
Namespaces
Classes
Files
File List
File Members
All
Classes
Namespaces
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Macros
Groups
Pages
sequence.h
Go to the documentation of this file.
1
//
2
// sequence.h
3
//Copyright (c) 2007-2012 Paul C Lott
4
//University of California, Davis
5
//Genome and Biomedical Sciences Facility
6
//UC Davis Genome Center
7
//Ian Korf Lab
8
//Website: www.korflab.ucdavis.edu
9
//Email: lottpaul@gmail.com
10
//
11
//Permission is hereby granted, free of charge, to any person obtaining a copy of
12
//this software and associated documentation files (the "Software"), to deal in
13
//the Software without restriction, including without limitation the rights to
14
//use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
15
//the Software, and to permit persons to whom the Software is furnished to do so,
16
//subject to the following conditions:
17
//
18
//The above copyright notice and this permission notice shall be included in all
19
//copies or substantial portions of the Software.
20
//
21
//THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
//IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
23
//FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
24
//COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
25
//IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26
//CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
28
#ifndef SEQUENCE_H
29
#define SEQUENCE_H
30
31
#include <vector>
32
#include <stdlib.h>
33
#include <string>
34
#include <iostream>
35
#include <math.h>
36
#include <fstream>
37
#include <algorithm>
38
#include "
text.h
"
39
#include "
track.h
"
40
#include "
stateInfo.h
"
41
#include "
externDefinitions.h
"
42
#include "
index.h
"
43
44
//! \file
45
46
namespace
StochHMM{
47
//! \class sequence
48
//! Contains individual sequence information and functions to deal with importing and digitizing the sequence
49
//! Sequence can be either real numbers (double values) or sequence(characters or words) discrete values
50
//! class sequence supports 255 discrete values.
51
class
sequence
{
52
public
:
53
54
//Constructors
55
56
sequence
();
57
sequence
(
bool
);
//True if Real number track, False if alpha
58
//sequence(trackType);
59
sequence
(std::vector<double>*,
track
*);
60
sequence
(std::string&,
track
*);
61
sequence
(
char
* ,
track
*);
62
63
~sequence
();
64
65
//Copy Constructors
66
sequence
(
const
sequence
&);
67
sequence
&
operator=
(
const
sequence
&);
68
69
friend
class
sequences
;
70
friend
class
sequenceStream
;
71
72
//ACCESSOR
73
74
//!Get reference to undigitized sequence
75
//!If sequence hasn't been undigitized then it will undigitize it and
76
//!store the result. (Only undigitizes the sequence once, then passes
77
//!reference to undigitized sequence)
78
inline
std::string*
getUndigitized
(){
79
if
(!
undigitized
.empty() ||
seq
->empty()){
80
return
&
undigitized
;
81
}
82
else
{
83
undigitized
=
undigitize
();
84
return
&
undigitized
;
85
}
86
}
87
88
//!Get the size of the sequence
89
inline
size_t
getLength
(){
return
length
;};
//Returns length of sequence
90
91
//!Get the attribute value for the sequence
92
//!Selection of model may use this value to determine which model to use
93
//! \sa setAttrib
94
inline
double
getAttrib
(){
return
attrib
;};
//Returns the Attribute value for the sequence
95
96
//!Get pointer to ExDefSequence for the sequence
97
//! \return ExDefSequence*
98
inline
ExDefSequence
*
getExDef
(){
return
external
;};
99
100
//!Check to see if exDef is defined for the sequence
101
//! \return true if ExDefSequence is defined for sequence
102
//! \return false if no External definition exists for sequence
103
inline
bool
exDefDefined
(){
if
(
external
){
return
true
;}
return
false
;};
104
105
double
realValue
(
size_t
);
// Returns Sequence Value at position
106
uint8_t
seqValue
(
size_t
);
// Returns Digitized Value at position
107
//char charValue(size_t); // Returns Alpha Character Value at position
108
109
//!Get the size of the sequence
110
//! \return size_t size of the sequence
111
inline
size_t
size
(){
if
(
realSeq
){
return
real
->size();}
else
{
return
seq
->size();}};
// Returns size of sequence
112
113
//! Get the pointer to the track that is defined for the sequence;
114
//! \return pointer to track
115
inline
track
*
getTrack
(){
return
seqtrk
;};
116
117
inline
void
setTrack
(
track
* tr){
118
seqtrk
= tr;
119
return
;
120
}
121
122
123
//! Print the string represntation of the sequence to stdout
124
//! Prints the digitized version
125
inline
void
print
(){std::cout <<
stringify
() << std::endl;};
//Print sequence to stdout
126
std::string
stringify
();
// Get sequence as string
127
128
129
//! Undigitize the sequence
130
//! If the sequence has not been digitized then it will return directly
131
//! If the sequence has been digitized then it will undigitize it and return it
132
//! \return character or word sequence from fasta
133
std::string
undigitize
();
134
135
//MUTATOR
136
//!Set the sequence attribute value
137
//!\param attr Value of attributes for sequence;
138
inline
void
setAttrib
(
double
attr){
attrib
=attr;};
//!Set the attribute value
139
140
//!Set the header of the sequence
141
//!\param head Header of the sequence
142
inline
void
setHeader
(std::string& head){
header
=head;};
143
144
void
setSeq
(std::string&,
track
*);
145
void
setRealSeq
(std::vector<double>*,
track
*);
146
147
inline
bool
getFasta
(std::ifstream& file){
return
getFasta
(file,NULL,NULL);}
148
inline
bool
getFasta
(std::ifstream& file,
track
* trk){
return
getFasta
(file,trk,NULL);}
149
bool
getFasta
(std::ifstream&,
track
*,
stateInfo
*);
150
151
152
bool
getMaskedFasta
(std::ifstream&,
track
*);
153
bool
getFastq
(std::ifstream&,
track
*);
154
155
inline
bool
getReal
(std::ifstream& file){
return
getReal
(file,NULL,NULL);}
156
inline
bool
getReal
(std::ifstream& file,
track
* trk){
return
getReal
(file,trk,NULL);}
157
bool
getReal
(std::ifstream&,
track
*,
stateInfo
*);
158
159
int
getMaxMask
(){
return
max_mask
;}
160
int
getMask
(
size_t
);
161
162
std::string
getSymbol
(
size_t
)
const
;
163
164
void
get_index
(
size_t
position,
int
order, std::pair<Index, Index>& word_index);
165
166
167
//! Returns the header of the sequence as a std::string
168
inline
std::string
getHeader
() {
return
header
; }
169
170
bool
reverseComplement
();
171
bool
complement
();
172
bool
reverse
();
173
174
//!Converts sequence digital based on track alphabet
175
bool
digitize
();
176
177
//! Shuffles the sequence using std::random_shuffle
178
void
shuffle
();
179
180
inline
std::vector<uint8_t>*
getDigitalSeq
(){
return
seq
;}
181
182
inline
uint8_t
operator[]
(
size_t
index){
return
(*
seq
)[index];}
183
184
185
//!Empty Sequence
186
void
clear
();
187
188
189
//void getNext (std::ifstream&, track*);
190
191
192
//bool _checkSequence(); //!Check the sequence adheres to the track alphabet
193
194
private
:
195
bool
realSeq
;
//If Real number sequence
196
std::string
header
;
// Header from the sequence
197
198
double
attrib
;
//Attribute value (Could be %GC or whatever user defines)
199
size_t
length
;
//Lenght of the Sequence
200
201
track
*
seqtrk
;
//Ptr to track describing alphabet and type
202
203
ExDefSequence
*
external
;
//External definitions
204
//Stores defined states for given sequence
205
206
// FIXME:: DIGITIZED SEQUENCES STORED AS SHORT. NEED TO STANDARDIZE BOTH TRACK AND SEQUENCE CLASS (Track stores as (int) but sequence stores as short.
207
std::vector<uint8_t>*
seq
;
// Digitized Sequence
208
std::vector<double>*
real
;
// Real Number Sequence
209
std::vector<int>*
mask
;
//Stores State masking information for training
210
int
max_mask
;
//Maximum mask number
211
212
213
std::string
undigitized
;
//Undigitized sequence
214
215
bool
_digitize
();
//Digitize the sequence
216
};
217
218
219
220
//!Randomly generate a sequence based on Probabilities of each character
221
//! \param freq Reference to std::vector<double> that contains frequencies of alphabet corresponding to alphabet in track
222
//! \param length Length of sequence to generate
223
//! \param tr Pointer to StochHMM::track where alphabet and ambiguous characters are defined
224
sequence
random_sequence
(std::vector<double>& freq,
size_t
length,
track
* tr);
225
// sequence random_sequence(emm*);
226
// sequence translate();
227
228
}
229
#endif
/*SEQUENCE_H*/
Generated on Tue Jul 30 2013 13:23:11 for StochHMM by
1.8.1