StochHMM
v0.34
Flexible Hidden Markov Model C++ Library and Application
|
#include <sequence.h>
Public Member Functions | |
sequence () | |
Create a sequence datatype. | |
sequence (bool) | |
sequence (std::vector< double > *, track *) | |
Create a sequence typ. | |
sequence (std::string &, track *) | |
sequence (char *, track *) | |
~sequence () | |
Destroy sequence type. | |
sequence (const sequence &) | |
sequence & | operator= (const sequence &) |
std::string * | getUndigitized () |
size_t | getLength () |
Get the size of the sequence. | |
double | getAttrib () |
ExDefSequence * | getExDef () |
bool | exDefDefined () |
double | realValue (size_t) |
uint8_t | seqValue (size_t) |
size_t | size () |
track * | getTrack () |
void | setTrack (track *tr) |
void | print () |
std::string | stringify () |
std::string | undigitize () |
void | setAttrib (double attr) |
void | setHeader (std::string &head) |
Set the attribute value. | |
void | setSeq (std::string &, track *) |
void | setRealSeq (std::vector< double > *, track *) |
bool | getFasta (std::ifstream &file) |
bool | getFasta (std::ifstream &file, track *trk) |
bool | getFasta (std::ifstream &, track *, stateInfo *) |
bool | getMaskedFasta (std::ifstream &, track *) |
bool | getFastq (std::ifstream &, track *) |
bool | getReal (std::ifstream &file) |
bool | getReal (std::ifstream &file, track *trk) |
bool | getReal (std::ifstream &, track *, stateInfo *) |
int | getMaxMask () |
int | getMask (size_t) |
Return the mask at sequence position. | |
std::string | getSymbol (size_t) const |
void | get_index (size_t position, int order, std::pair< Index, Index > &word_index) |
std::string | getHeader () |
Returns the header of the sequence as a std::string. | |
bool | reverseComplement () |
bool | complement () |
bool | reverse () |
bool | digitize () |
Converts sequence digital based on track alphabet. | |
void | shuffle () |
Shuffles the sequence using std::random_shuffle. | |
std::vector< uint8_t > * | getDigitalSeq () |
uint8_t | operator[] (size_t index) |
void | clear () |
Empty Sequence. |
Private Member Functions | |
bool | _digitize () |
Digitize the sequence. |
Private Attributes | |
bool | realSeq |
std::string | header |
double | attrib |
size_t | length |
track * | seqtrk |
ExDefSequence * | external |
std::vector< uint8_t > * | seq |
std::vector< double > * | real |
std::vector< int > * | mask |
int | max_mask |
std::string | undigitized |
Friends | |
class | sequences |
class | sequenceStream |
Sequence can be either real numbers (double values) or sequence(characters or words) discrete values class sequence supports 255 discrete values.
Contains individual sequence information and functions to deal with importing and digitizing the sequence
Definition at line 51 of file sequence.h.
StochHMM::sequence::sequence | ( | ) |
StochHMM::sequence::sequence | ( | bool | realTrack | ) |
Create a sequence data type
realTrack | True if the sequence is a list of real numbers |
Definition at line 48 of file sequence.cpp.
References attrib, external, length, max_mask, real, realSeq, seq, and seqtrk.
StochHMM::sequence::sequence | ( | std::vector< double > * | vec, |
track * | tr | ||
) |
StochHMM::sequence::sequence | ( | std::string & | sq, |
track * | tr | ||
) |
Create a sequence type
sq | std::string string that represent sequence |
tr | Track to be used to digitize sequence |
Definition at line 116 of file sequence.cpp.
References _digitize(), attrib, external, length, max_mask, real, realSeq, seq, seqtrk, and undigitized.
StochHMM::sequence::sequence | ( | char * | sq, |
track * | tr | ||
) |
Create a sequence type
sq | Character string that represent sequence |
tr | Track to be used to digitize sequence |
Definition at line 92 of file sequence.cpp.
References _digitize(), attrib, external, length, max_mask, real, realSeq, seq, seqtrk, and undigitized.
StochHMM::sequence::~sequence | ( | ) |
StochHMM::sequence::sequence | ( | const sequence & | rhs | ) |
Copy constructor for sequence
Definition at line 153 of file sequence.cpp.
References attrib, external, header, length, mask, max_mask, real, realSeq, seq, seqtrk, and undigitized.
|
private |
Digitize the sequence.
Definition at line 546 of file sequence.cpp.
References StochHMM::clear_whitespace(), StochHMM::track::getAlphaMax(), seq, seqtrk, StochHMM::stringList::size(), StochHMM::stringList::splitString(), StochHMM::track::symbolIndex(), and undigitized.
Referenced by digitize(), StochHMM::sequenceStream::getFasta(), getFasta(), getFastq(), getMaskedFasta(), getReal(), sequence(), and setSeq().
void StochHMM::sequence::clear | ( | ) |
Empty Sequence.
Definition at line 199 of file sequence.cpp.
References attrib, external, header, length, mask, max_mask, real, seq, seqtrk, and undigitized.
Referenced by getFasta(), getFastq(), getMaskedFasta(), and getReal().
bool StochHMM::sequence::complement | ( | ) |
Complements the sequence.
Definition at line 919 of file sequence.cpp.
References StochHMM::track::getAlphaMax(), StochHMM::track::getComplementIndex(), StochHMM::track::getComplementSymbol(), realSeq, seq, seqtrk, and undigitized.
Referenced by reverseComplement().
bool StochHMM::sequence::digitize | ( | ) |
Converts sequence digital based on track alphabet.
Definition at line 975 of file sequence.cpp.
References _digitize(), realSeq, seq, and undigitized.
|
inline |
Check to see if exDef is defined for the sequence
Definition at line 103 of file sequence.h.
References external.
Referenced by StochHMM::seqTracks::getNext().
void StochHMM::sequence::get_index | ( | size_t | position, |
int | order, | ||
std::pair< Index, Index > & | word_index | ||
) |
Referenced by StochHMM::Counter::_count().
|
inline |
Get the attribute value for the sequence *Selection of model may use this value to determine which model to use
Definition at line 94 of file sequence.h.
References attrib.
|
inline |
|
inline |
Get pointer to ExDefSequence for the sequence
Definition at line 98 of file sequence.h.
References external.
Referenced by StochHMM::seqTracks::getNext().
|
inline |
Definition at line 147 of file sequence.h.
References getFasta().
Referenced by getFasta(), StochHMM::sequences::getFastas(), and StochHMM::seqTracks::getNext().
|
inline |
Reimplemented in StochHMM::sequenceStream.
Definition at line 148 of file sequence.h.
References getFasta().
Referenced by getFasta().
Extract sequence from a fasta file
file | File stream |
trk | Track to use for digitizing sequence |
Definition at line 412 of file sequence.cpp.
References _digitize(), clear(), external, header, length, StochHMM::ExDefSequence::parse(), seq, seqtrk, and undigitized.
bool StochHMM::sequence::getFastq | ( | std::ifstream & | file, |
track * | trk | ||
) |
Import one fastq entry from the file *FastQ format: *Line 1: Start with @ *Line 2: Sequence , Can be multiple lines *Line 3: Start with + *Line 4: Quality Score , Can be multiple lines
file | File stream to file |
trk | Track to used to digitize |
Definition at line 615 of file sequence.cpp.
References _digitize(), clear(), header, length, seq, seqtrk, sequence(), and undigitized.
Referenced by StochHMM::seqTracks::getNext().
|
inline |
Returns the header of the sequence as a std::string.
Definition at line 168 of file sequence.h.
References header.
Referenced by StochHMM::PWM::scoreSimple(), StochHMM::PWM::scoreUndefSpacer(), and StochHMM::PWM::scoreVariableSpacer().
|
inline |
Get the size of the sequence.
Definition at line 89 of file sequence.h.
References length.
Referenced by StochHMM::Counter::_count(), StochHMM::sequences::addSeq(), StochHMM::Counter::countGeneral(), StochHMM::Counter::countMask(), StochHMM::Counter::countPeriodic(), StochHMM::Counter::countPWM(), getMask(), StochHMM::PWM::scoreSimple(), StochHMM::PWM::scoreUndefSpacer(), and StochHMM::PWM::scoreVariableSpacer().
int StochHMM::sequence::getMask | ( | size_t | position | ) |
Return the mask at sequence position.
Definition at line 769 of file sequence.cpp.
References getLength(), and mask.
Referenced by StochHMM::Counter::_count().
bool StochHMM::sequence::getMaskedFasta | ( | std::ifstream & | file, |
track * | trk | ||
) |
Definition at line 475 of file sequence.cpp.
References _digitize(), clear(), StochHMM::clear_whitespace(), header, length, mask, max_mask, seq, seqtrk, StochHMM::stringList::size(), StochHMM::stringList::splitString(), StochHMM::stringList::toVecInt(), and undigitized.
|
inline |
Definition at line 159 of file sequence.h.
References max_mask.
Referenced by StochHMM::Counter::countMask().
|
inline |
Definition at line 155 of file sequence.h.
References getReal().
Referenced by StochHMM::seqTracks::getNext(), and getReal().
|
inline |
Definition at line 156 of file sequence.h.
References getReal().
Referenced by getReal().
Import one Real number sequence from the file
file | File stream to file |
trk | Track to used to digitize |
Definition at line 705 of file sequence.cpp.
References _digitize(), clear(), external, header, length, StochHMM::ExDefSequence::parse(), real, seq, seqtrk, sequence(), StochHMM::stringList::splitString(), and StochHMM::stringList::toVecDouble().
std::string StochHMM::sequence::getSymbol | ( | size_t | pos | ) | const |
Get the symbol (alphabet character or word) for a a given position of a alphanumerical sequence
pos | Position within sequence |
Definition at line 823 of file sequence.cpp.
References StochHMM::track::getAlpha(), realSeq, seq, and seqtrk.
Referenced by StochHMM::alignment::align(), and StochHMM::trellis::transitionFuncTraceback().
|
inline |
Get the pointer to the track that is defined for the sequence;
Definition at line 115 of file sequence.h.
References seqtrk.
Referenced by StochHMM::sequences::addSeq(), StochHMM::alignment::setQuery(), and StochHMM::alignment::setTarget().
|
inline |
Get reference to undigitized sequence If sequence hasn't been undigitized then it will undigitize it and *store the result. (Only undigitizes the sequence once, then passes *reference to undigitized sequence)
Definition at line 78 of file sequence.h.
References seq, undigitize(), and undigitized.
Referenced by StochHMM::emissionFuncParam::evaluate().
Definition at line 227 of file sequence.cpp.
References attrib, external, header, length, mask, max_mask, real, realSeq, seq, seqtrk, and undigitized.
|
inline |
|
inline |
Print the string represntation of the sequence to stdout Prints the digitized version
Definition at line 125 of file sequence.h.
References stringify().
double StochHMM::sequence::realValue | ( | size_t | position | ) |
Get real value of sequence at a position
position | Position in the sequence to get the value |
Definition at line 319 of file sequence.cpp.
Referenced by StochHMM::emm::get_emission().
bool StochHMM::sequence::reverse | ( | ) |
Reverse the sequence; If mask is defined, the mask will also be reversed
Definition at line 880 of file sequence.cpp.
References StochHMM::track::getAlphaMax(), mask, real, realSeq, seq, seqtrk, and undigitized.
Referenced by reverseComplement().
void sequence::reverseComplement | ( | ) |
Reverses and complements the sequence
Definition at line 962 of file sequence.cpp.
References complement(), and reverse().
uint8_t StochHMM::sequence::seqValue | ( | size_t | position | ) |
Get digitized sequence value at a position
position | Position in the sequence to get the value for |
Definition at line 300 of file sequence.cpp.
Referenced by StochHMM::alignment::align().
|
inline |
Set the sequence attribute value
attr | Value of attributes for sequence; |
Definition at line 138 of file sequence.h.
References attrib.
|
inline |
Set the attribute value.
Set the header of the sequence
head | Header of the sequence |
Definition at line 142 of file sequence.h.
References header.
void StochHMM::sequence::setRealSeq | ( | std::vector< double > * | rl, |
track * | tr | ||
) |
Set the sequence from a vector of doubles
rl | Vector of doubles to be used as real number sequence |
tr | Track to be used to digitize sequence |
Definition at line 808 of file sequence.cpp.
void StochHMM::sequence::setSeq | ( | std::string & | sq, |
track * | tr | ||
) |
Set the sequence from a std::string
sq | Sequence to be used as sequence |
tr | Track to be used to digitize sequence |
Definition at line 789 of file sequence.cpp.
References _digitize(), length, realSeq, seq, seqtrk, and undigitized.
Referenced by StochHMM::random_sequence().
|
inline |
void StochHMM::sequence::shuffle | ( | ) |
Shuffles the sequence using std::random_shuffle.
Definition at line 994 of file sequence.cpp.
References real, realSeq, seq, and undigitized.
|
inline |
Get the size of the sequence
Definition at line 111 of file sequence.h.
References real, realSeq, and seq.
Referenced by StochHMM::alignment::align(), StochHMM::alignment::setMatch(), StochHMM::alignment::setMatrix(), and StochHMM::alignment::setMismatch().
std::string StochHMM::sequence::stringify | ( | ) |
Get std::string representation of the string *If the string is a real track, then it will return a string of doubles *If the string is a non-real track, then it will return a string of shorts, where the shorts are the digitized value of the sequence according to the track
Definition at line 338 of file sequence.cpp.
References StochHMM::double_to_string(), header, StochHMM::int_to_string(), length, mask, real, realSeq, seq, and undigitized.
Referenced by print(), and undigitize().
std::string StochHMM::sequence::undigitize | ( | ) |
Undigitize the sequence If the sequence has not been digitized then it will return directly If the sequence has been digitized then it will undigitize it and return it
Get the undigitized value of the string *If the string is a real-track then it will return the same as stringify() *If the string is a non-real track, it will return undigitized sequence
Definition at line 374 of file sequence.cpp.
References StochHMM::track::getAlpha(), StochHMM::track::getAlphaMax(), length, realSeq, seq, seqtrk, stringify(), and undigitized.
Referenced by getUndigitized().
|
friend |
Definition at line 69 of file sequence.h.
|
friend |
Definition at line 70 of file sequence.h.
|
private |
Definition at line 198 of file sequence.h.
Referenced by clear(), getAttrib(), operator=(), sequence(), and setAttrib().
|
private |
Definition at line 203 of file sequence.h.
Referenced by clear(), exDefDefined(), getExDef(), getFasta(), getReal(), operator=(), sequence(), and ~sequence().
|
private |
Definition at line 196 of file sequence.h.
Referenced by clear(), StochHMM::sequenceStream::getFasta(), getFasta(), getFastq(), getHeader(), getMaskedFasta(), getReal(), operator=(), sequence(), setHeader(), and stringify().
|
private |
Definition at line 199 of file sequence.h.
Referenced by clear(), StochHMM::sequenceStream::getFasta(), getFasta(), getFastq(), getLength(), getMaskedFasta(), getReal(), operator=(), sequence(), setRealSeq(), setSeq(), stringify(), and undigitize().
|
private |
Definition at line 209 of file sequence.h.
Referenced by clear(), getMask(), getMaskedFasta(), operator=(), reverse(), sequence(), stringify(), and ~sequence().
|
private |
Definition at line 210 of file sequence.h.
Referenced by clear(), getMaskedFasta(), getMaxMask(), operator=(), and sequence().
|
private |
Definition at line 208 of file sequence.h.
Referenced by clear(), getReal(), operator=(), realValue(), StochHMM::sequenceStream::resetSeq(), reverse(), sequence(), setRealSeq(), shuffle(), size(), stringify(), and ~sequence().
|
private |
Definition at line 195 of file sequence.h.
Referenced by complement(), digitize(), getSymbol(), operator=(), realValue(), StochHMM::sequenceStream::resetSeq(), reverse(), sequence(), seqValue(), setRealSeq(), setSeq(), shuffle(), size(), stringify(), and undigitize().
|
private |
Definition at line 207 of file sequence.h.
Referenced by _digitize(), clear(), complement(), digitize(), getDigitalSeq(), StochHMM::sequenceStream::getFasta(), getFasta(), getFastq(), getMaskedFasta(), getReal(), getSymbol(), getUndigitized(), operator=(), operator[](), StochHMM::sequenceStream::resetSeq(), reverse(), sequence(), seqValue(), setSeq(), shuffle(), size(), stringify(), undigitize(), and ~sequence().
|
private |
Definition at line 201 of file sequence.h.
Referenced by _digitize(), clear(), complement(), StochHMM::sequenceStream::getFasta(), getFasta(), getFastq(), getMaskedFasta(), getReal(), getSymbol(), getTrack(), operator=(), reverse(), sequence(), setRealSeq(), setSeq(), setTrack(), undigitize(), and ~sequence().
|
private |
Definition at line 213 of file sequence.h.
Referenced by _digitize(), clear(), complement(), digitize(), StochHMM::sequenceStream::getFasta(), getFasta(), getFastq(), getMaskedFasta(), getUndigitized(), operator=(), reverse(), sequence(), setSeq(), shuffle(), stringify(), and undigitize().