StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Public Member Functions | Private Member Functions | Private Attributes | Friends
StochHMM::sequence Class Reference

#include <sequence.h>

Inheritance diagram for StochHMM::sequence:
StochHMM::sequenceStream

List of all members.

Public Member Functions

 sequence ()
 Create a sequence datatype.
 sequence (bool)
 sequence (std::vector< double > *, track *)
 Create a sequence typ.
 sequence (std::string &, track *)
 sequence (char *, track *)
 ~sequence ()
 Destroy sequence type.
 sequence (const sequence &)
sequenceoperator= (const sequence &)
std::string * getUndigitized ()
size_t getLength ()
 Get the size of the sequence.
double getAttrib ()
ExDefSequencegetExDef ()
bool exDefDefined ()
double realValue (size_t)
uint8_t seqValue (size_t)
size_t size ()
trackgetTrack ()
void setTrack (track *tr)
void print ()
std::string stringify ()
std::string undigitize ()
void setAttrib (double attr)
void setHeader (std::string &head)
 Set the attribute value.
void setSeq (std::string &, track *)
void setRealSeq (std::vector< double > *, track *)
bool getFasta (std::ifstream &file)
bool getFasta (std::ifstream &file, track *trk)
bool getFasta (std::ifstream &, track *, stateInfo *)
bool getMaskedFasta (std::ifstream &, track *)
bool getFastq (std::ifstream &, track *)
bool getReal (std::ifstream &file)
bool getReal (std::ifstream &file, track *trk)
bool getReal (std::ifstream &, track *, stateInfo *)
int getMaxMask ()
int getMask (size_t)
 Return the mask at sequence position.
std::string getSymbol (size_t) const
void get_index (size_t position, int order, std::pair< Index, Index > &word_index)
std::string getHeader ()
 Returns the header of the sequence as a std::string.
bool reverseComplement ()
bool complement ()
bool reverse ()
bool digitize ()
 Converts sequence digital based on track alphabet.
void shuffle ()
 Shuffles the sequence using std::random_shuffle.
std::vector< uint8_t > * getDigitalSeq ()
uint8_t operator[] (size_t index)
void clear ()
 Empty Sequence.

Private Member Functions

bool _digitize ()
 Digitize the sequence.

Private Attributes

bool realSeq
std::string header
double attrib
size_t length
trackseqtrk
ExDefSequenceexternal
std::vector< uint8_t > * seq
std::vector< double > * real
std::vector< int > * mask
int max_mask
std::string undigitized

Friends

class sequences
class sequenceStream

Detailed Description

Sequence can be either real numbers (double values) or sequence(characters or words) discrete values class sequence supports 255 discrete values.

Contains individual sequence information and functions to deal with importing and digitizing the sequence

Definition at line 51 of file sequence.h.


Constructor & Destructor Documentation

StochHMM::sequence::sequence ( )

Create a sequence datatype.

Definition at line 34 of file sequence.cpp.

References attrib, external, length, mask, max_mask, real, realSeq, seq, and seqtrk.

Referenced by getFastq(), and getReal().

{
realSeq=false;
real = NULL;
seq=NULL;
mask=NULL;
seqtrk = NULL;
length = 0;
external= NULL;
attrib = -INFINITY;
}
StochHMM::sequence::sequence ( bool  realTrack)

Create a sequence data type

Parameters:
realTrackTrue if the sequence is a list of real numbers

Definition at line 48 of file sequence.cpp.

References attrib, external, length, max_mask, real, realSeq, seq, and seqtrk.

:mask(NULL){
if (realTrack){
real=new(std::nothrow) std::vector<double>;
if (real==NULL){
std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
exit(1);
}
seq=NULL;
}
else{
real=NULL;
seq=new(std::nothrow) std::vector<uint8_t>;
if (seq==NULL){
std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
exit(1);
}
}
realSeq = realTrack;
seqtrk = NULL;
length = 0;
external= NULL;
attrib = -INFINITY;
}
StochHMM::sequence::sequence ( std::vector< double > *  vec,
track tr 
)

Create a sequence typ.

Parameters:
vecVector of doubles to used as the real numbers for the sequence
trPointer to the track to be used

Definition at line 79 of file sequence.cpp.

References attrib, external, length, max_mask, real, realSeq, and seqtrk.

:mask(NULL){
realSeq = true;
seqtrk = tr;
real = vec;
external= NULL;
attrib = -INFINITY;
length = vec->size();
}
StochHMM::sequence::sequence ( std::string &  sq,
track tr 
)

Create a sequence type

Parameters:
sqstd::string string that represent sequence
trTrack to be used to digitize sequence

Definition at line 116 of file sequence.cpp.

References _digitize(), attrib, external, length, max_mask, real, realSeq, seq, seqtrk, and undigitized.

:mask(NULL){
length = 0;
real = NULL;
seq = new(std::nothrow) std::vector<uint8_t>;
if (seq==NULL){
std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
exit(1);
}
external= NULL;
attrib = -INFINITY;
realSeq = false;
seqtrk = tr;
length=seq->size();
}
StochHMM::sequence::sequence ( char *  sq,
track tr 
)

Create a sequence type

Parameters:
sqCharacter string that represent sequence
trTrack to be used to digitize sequence

Definition at line 92 of file sequence.cpp.

References _digitize(), attrib, external, length, max_mask, real, realSeq, seq, seqtrk, and undigitized.

:mask(NULL){
length = 0;
real = NULL;
seq = new(std::nothrow) std::vector<uint8_t>;
if (seq==NULL){
std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
exit(1);
}
external= NULL;
attrib = -INFINITY;
realSeq = false;
seqtrk = tr;
length=seq->size();
}
StochHMM::sequence::~sequence ( )

Destroy sequence type.

Definition at line 137 of file sequence.cpp.

References external, mask, real, seq, and seqtrk.

{
delete seq;
delete real;
delete mask;
seq = NULL;
real = NULL;
mask = NULL;
seqtrk = NULL;
external= NULL;
}
StochHMM::sequence::sequence ( const sequence rhs)

Copy constructor for sequence

Definition at line 153 of file sequence.cpp.

References attrib, external, header, length, mask, max_mask, real, realSeq, seq, seqtrk, and undigitized.

{
realSeq = rhs.realSeq;
header = rhs.header;
attrib = rhs.attrib;
length = rhs.length;
seqtrk = rhs.seqtrk;
external= rhs.external; //Need copy constructor for this
max_mask= rhs.max_mask;
undigitized=rhs.undigitized;
if (rhs.seq!=NULL){
seq = new(std::nothrow) std::vector<uint8_t>(*rhs.seq);
if (seq==NULL){
std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
exit(1);
}
}
else{
seq=NULL;
}
if (rhs.real!=NULL){
real = new(std::nothrow) std::vector<double>(*rhs.real);
if (real==NULL){
std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
exit(1);
}
}
else{
real=NULL;
}
if (rhs.mask!=NULL){
mask = new(std::nothrow) std::vector<int>(*rhs.mask);
if (mask==NULL){
std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
exit(1);
}
}
else{
mask = NULL;
}
}

Member Function Documentation

bool StochHMM::sequence::_digitize ( )
private

Digitize the sequence.

Definition at line 546 of file sequence.cpp.

References StochHMM::clear_whitespace(), StochHMM::track::getAlphaMax(), seq, seqtrk, StochHMM::stringList::size(), StochHMM::stringList::splitString(), StochHMM::track::symbolIndex(), and undigitized.

Referenced by digitize(), StochHMM::sequenceStream::getFasta(), getFasta(), getFastq(), getMaskedFasta(), getReal(), sequence(), and setSeq().

{
if (seqtrk==NULL){
std::cerr << "Can't digitize sequence without a valid track defined\n";
return false;
}
stringList lst;
if (seqtrk->getAlphaMax()>1){
lst.splitString(undigitized, " ,\t");
if (seq == NULL){
seq = new std::vector<uint8_t>(lst.size());
}
else{
seq->assign(lst.size(),0);
}
for (size_t i=0;i<lst.size();i++){
uint8_t symbl = seqtrk->symbolIndex(lst[i]);
//Check ambiguous here
// if (!seqtrk->isAmbiguousSet()){
// std::cerr << "Can't digitize ambiguous character without ambiguous characters being allowed in the model." << std::endl;
// return false;
// }
(*seq)[i] = symbl;
}
}
else{
if (seq == NULL){
seq = new std::vector<uint8_t>(undigitized.size());
}
else{
seq->assign(undigitized.size(),0);
}
for (size_t i=0; i<undigitized.size();i++){
uint8_t symbl = seqtrk->symbolIndex(undigitized[i]);
// //Check ambiguous here
// if (!seqtrk->isAmbiguousSet()){
// std::cerr << "Can't digitize ambiguous character without ambiguous characters being allowed in the model." << std::endl;
// return false;
// }
(*seq)[i] = symbl;
}
}
undigitized.clear(); //Once sequence is digitized we don't need the old seqeunce string
return true;
}
void StochHMM::sequence::clear ( )

Empty Sequence.

Definition at line 199 of file sequence.cpp.

References attrib, external, header, length, mask, max_mask, real, seq, seqtrk, and undigitized.

Referenced by getFasta(), getFastq(), getMaskedFasta(), and getReal().

{
header = "";
max_mask = -1;
if (mask!=NULL){
delete mask;
mask = NULL;
}
if (real!=NULL){
real->clear();
}
if (seq!=NULL){
seq->clear();
}
if (external!=NULL){
delete external;
}
seqtrk = NULL;
length = 0;
attrib = -INFINITY;
}
bool StochHMM::sequence::complement ( )

Complements the sequence.

Returns:
true if complementation was successful

Definition at line 919 of file sequence.cpp.

References StochHMM::track::getAlphaMax(), StochHMM::track::getComplementIndex(), StochHMM::track::getComplementSymbol(), realSeq, seq, seqtrk, and undigitized.

Referenced by reverseComplement().

{
if (realSeq){
std::cerr<< "sequence::complement isn't defined for real valued sequences\n";
}
else if (seqtrk==NULL){
std::cerr << "StochHMM::track is not defined. Can't complement without defined complement in track\n";
}
else if (seq!=NULL){
for (size_t i = 0; i < seq->size(); i++) {
(*seq)[i] = seqtrk->getComplementIndex((*seq)[i]);
}
return true;
}
else {
size_t undigitized_size=undigitized.size();
if (undigitized_size>0){
size_t max_size = seqtrk->getAlphaMax();
if (max_size ==1){
for(size_t i=0;i<undigitized_size;i++){
std::string character = seqtrk->getComplementSymbol(undigitized[i]);
undigitized[i] = character[0];
}
return true;
}
else{
std::cerr << "Complement on undigitized sequence isn't defined for track alphabets that are more than one character\n";
}
}
else{
std::cerr << "No sequence defined\n";
}
}
return false;
}
bool StochHMM::sequence::digitize ( )

Converts sequence digital based on track alphabet.

Definition at line 975 of file sequence.cpp.

References _digitize(), realSeq, seq, and undigitized.

{
if (realSeq){
return true;
}
else if (undigitized.size() > 0){
return true;
}
else if (seq!=NULL){
std::cerr << "Digitized sequence already exists\n";
return true;
}
std::cerr << "No undigitized sequence exists to convert\n";
return false;
}
bool StochHMM::sequence::exDefDefined ( )
inline

Check to see if exDef is defined for the sequence

Returns:
true if ExDefSequence is defined for sequence
false if no External definition exists for sequence

Definition at line 103 of file sequence.h.

References external.

Referenced by StochHMM::seqTracks::getNext().

{if (external){return true;} return false;};
void StochHMM::sequence::get_index ( size_t  position,
int  order,
std::pair< Index, Index > &  word_index 
)
double StochHMM::sequence::getAttrib ( )
inline

Get the attribute value for the sequence *Selection of model may use this value to determine which model to use

See also:
setAttrib

Definition at line 94 of file sequence.h.

References attrib.

{return attrib;}; //Returns the Attribute value for the sequence
std::vector<uint8_t>* StochHMM::sequence::getDigitalSeq ( )
inline

Definition at line 180 of file sequence.h.

References seq.

{return seq;}
ExDefSequence* StochHMM::sequence::getExDef ( )
inline

Get pointer to ExDefSequence for the sequence

Returns:
ExDefSequence*

Definition at line 98 of file sequence.h.

References external.

Referenced by StochHMM::seqTracks::getNext().

{return external;};
bool StochHMM::sequence::getFasta ( std::ifstream &  file)
inline

Definition at line 147 of file sequence.h.

References getFasta().

Referenced by getFasta(), StochHMM::sequences::getFastas(), and StochHMM::seqTracks::getNext().

{return getFasta(file,NULL,NULL);}
bool StochHMM::sequence::getFasta ( std::ifstream &  file,
track trk 
)
inline

Reimplemented in StochHMM::sequenceStream.

Definition at line 148 of file sequence.h.

References getFasta().

Referenced by getFasta().

{ return getFasta(file,trk,NULL);}
bool StochHMM::sequence::getFasta ( std::ifstream &  file,
track trk,
stateInfo info 
)

Extract sequence from a fasta file

Parameters:
fileFile stream
trkTrack to use for digitizing sequence
Returns:
true if function was able to get a sequence from the file

Definition at line 412 of file sequence.cpp.

References _digitize(), clear(), external, header, length, StochHMM::ExDefSequence::parse(), seq, seqtrk, and undigitized.

{
if (seq!=NULL){
this->clear();
}
seqtrk=trk;
if (!file.good()){
return false;
}
//Find next header mark
while(file.peek() != '>'){
std::string temp;
getline(file,temp,'\n');
if (!file.good()){
std::cerr << "Sequence doesn't contain a header \">\" "<< std::endl;
return false;
}
}
getline(file,header,'\n');
bool success(false);
//get sequence
std::string line;
while(getline(file,line,'\n')){
undigitized+=line;
char nl_peek=file.peek(); // see if we have new sequence header on the next line
if (nl_peek=='>'){
success = _digitize();
break;
}
else if (nl_peek=='['){
success = _digitize();
if (info == NULL){
std::cerr << "Found brackets [] in fasta sequence.\nHEADER: " << header << "\nCan't import External Definitions without stateInfo from HMM model. Pass stateInfo from model to " << __FUNCTION__ << std::endl;
exit(2);
}
else{
external= new (std::nothrow) ExDefSequence(seq->size());
external->parse(file, *info);
}
}
else if (nl_peek==EOF){
getline(file,line,'\n');
success = _digitize();
break;
}
else{
continue;
}
}
length=seq->size();
return success;
}
bool StochHMM::sequence::getFastq ( std::ifstream &  file,
track trk 
)

Import one fastq entry from the file *FastQ format: *Line 1: Start with @ *Line 2: Sequence , Can be multiple lines *Line 3: Start with + *Line 4: Quality Score , Can be multiple lines

Parameters:
fileFile stream to file
trkTrack to used to digitize
Returns:
true if the sequence was successfully imported

Definition at line 615 of file sequence.cpp.

References _digitize(), clear(), header, length, seq, seqtrk, sequence(), and undigitized.

Referenced by StochHMM::seqTracks::getNext().

{
if (seq!=NULL){
this->clear();
}
seqtrk=trk;
if (!file.good()){
return false;
}
//Find first line that starts with "@" and Get header
//Move down until the next line has a "@"
while(file.peek() != '@' && file.good()){
std::string temp;
getline(file,temp,'\n');
}
//Get Header (One line)
if (file.good()){
getline(file,header,'\n');
}
else{
return false;
}
std::string sequence="";
//Get sequence (Multiple Lines)
if (file.good()){
while(getline(file,sequence,'\n')){
char nl_peek=file.peek(); // see if we have new sequence header on the next line
if (nl_peek=='+'){ //If next line begins with + then we are at a
break;
}
else if (nl_peek==EOF){
break;
}
else{
continue;
}
}
}
else{
return false;
}
std::string quality_string;
//Get Quality String (Multiple Lines)
if (file.good()){
while(getline(file,sequence,'\n')){
quality_string+=sequence;
char nl_peek=file.peek(); // see if we have new sequence header on the next line
if (nl_peek=='@'){ //If next line begins with + then we are at a
if (quality_string.size() < undigitized.size()){
continue;
}
break;
}
else if (nl_peek==EOF){
break;
}
else{
continue;
}
}
}
else{
return false;
}
length=seq->size();
return true;
}
std::string StochHMM::sequence::getHeader ( )
inline

Returns the header of the sequence as a std::string.

Definition at line 168 of file sequence.h.

References header.

Referenced by StochHMM::PWM::scoreSimple(), StochHMM::PWM::scoreUndefSpacer(), and StochHMM::PWM::scoreVariableSpacer().

{ return header; }
size_t StochHMM::sequence::getLength ( )
inline
int StochHMM::sequence::getMask ( size_t  position)

Return the mask at sequence position.

Definition at line 769 of file sequence.cpp.

References getLength(), and mask.

Referenced by StochHMM::Counter::_count().

{
if (mask!=NULL){
if (position < getLength()) {
return (*mask)[position];
}
else {
std::cerr << "Position exceeds sequence length.\n";
exit(1);
}
}
else{
std::cerr << "No Mask information.\n";
exit(1);
}
}
bool StochHMM::sequence::getMaskedFasta ( std::ifstream &  file,
track trk 
)

Definition at line 475 of file sequence.cpp.

References _digitize(), clear(), StochHMM::clear_whitespace(), header, length, mask, max_mask, seq, seqtrk, StochHMM::stringList::size(), StochHMM::stringList::splitString(), StochHMM::stringList::toVecInt(), and undigitized.

{
if (seq!=NULL){
this->clear();
}
seqtrk=trk;
if (!file.good()){
return false;
}
//Find next header mark
while(file.peek() != '>'){
std::string temp;
getline(file,temp,'\n');
if (!file.good()){
std::cerr << "Sequence doesn't contain a header \">\" "<< std::endl;
return false;
}
}
bool success;
std::string line, mask_string;
getline(file,header,'\n');
if (file.good()) {
getline(file,undigitized,'\n');
success = _digitize();
length=seq->size();
}
if (file.good()) {
getline(file,mask_string,'\n');
stringList lst;
//split string on space, comma, tab
clear_whitespace(mask_string,"\n\r");
lst.splitString(mask_string, " ,\t");
//alloc mask vector
if (lst.size() == length) {
if (mask!=NULL){
delete mask;
}
mask=new(std::nothrow) std::vector<int>;
if (mask==NULL){
std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
exit(1);
}
}
else {
std::cerr << "Mask length not equal to Sequence length." << std::endl;
return false;
}
//pass vector to lst.toVecInt
lst.toVecInt(*mask);
max_mask=*max_element(mask->begin(),mask->end());
}
else {
success = false;
}
return success;
}
int StochHMM::sequence::getMaxMask ( )
inline

Definition at line 159 of file sequence.h.

References max_mask.

Referenced by StochHMM::Counter::countMask().

{return max_mask;}
bool StochHMM::sequence::getReal ( std::ifstream &  file)
inline

Definition at line 155 of file sequence.h.

References getReal().

Referenced by StochHMM::seqTracks::getNext(), and getReal().

{return getReal(file,NULL,NULL);}
bool StochHMM::sequence::getReal ( std::ifstream &  file,
track trk 
)
inline

Definition at line 156 of file sequence.h.

References getReal().

Referenced by getReal().

{ return getReal(file,trk,NULL);}
bool StochHMM::sequence::getReal ( std::ifstream &  file,
track trk,
stateInfo info 
)

Import one Real number sequence from the file

Parameters:
fileFile stream to file
trkTrack to used to digitize
Returns:
true if the sequence was successfully imported

Definition at line 705 of file sequence.cpp.

References _digitize(), clear(), external, header, length, StochHMM::ExDefSequence::parse(), real, seq, seqtrk, sequence(), StochHMM::stringList::splitString(), and StochHMM::stringList::toVecDouble().

{
if (real!=NULL){
this->clear();
}
seqtrk=trk;
//get header
while(file.peek() != '@' && !file.eof()){
std::string temp;
getline(file,temp,'\n');
}
if (file.eof()){
std::cerr << "No header found for sequence. Header should start line with \"@\".\n";
exit(2);
}
std::string sequence="";
getline(file,sequence,'\n');
//get sequence
std::string line;
stringList lst;
while(getline(file,line,'\n')){
lst.splitString(line,",\t ");
std::vector<double> temp (lst.toVecDouble());
for(size_t i=0;i<temp.size();i++){
real->push_back(temp[i]);
}
char nl_peek=file.peek(); // see if we have new sequence header on the next line
if (nl_peek=='>'){
break;
}
else if (nl_peek=='['){
if (info == NULL){
std::cerr << "Found brackets [] in fasta sequence.\nHEADER: " << header << "\nCan't import External Definitions without stateInfo from HMM model. Pass stateInfo from model to " << __FUNCTION__ << std::endl;
exit(2);
}
else{
external= new (std::nothrow) ExDefSequence(seq->size());
external->parse(file, *info);
}
}
else if (nl_peek==EOF){
break;
}
else{
continue;
}
}
length=real->size();
return true;
}
std::string StochHMM::sequence::getSymbol ( size_t  pos) const

Get the symbol (alphabet character or word) for a a given position of a alphanumerical sequence

Parameters:
posPosition within sequence
Returns:
string String of the symbol
See also:
track::getAlpha(short)

Definition at line 823 of file sequence.cpp.

References StochHMM::track::getAlpha(), realSeq, seq, and seqtrk.

Referenced by StochHMM::alignment::align(), and StochHMM::trellis::transitionFuncTraceback().

{
if (realSeq){
std::cerr << "Can't get symbol of real values\n";
return "";
}
if (seqtrk==NULL){
std::cerr << "track is undefined for sequence\n";
return "";
}
return seqtrk->getAlpha((*seq)[pos]);
}
track* StochHMM::sequence::getTrack ( )
inline

Get the pointer to the track that is defined for the sequence;

Returns:
pointer to track

Definition at line 115 of file sequence.h.

References seqtrk.

Referenced by StochHMM::sequences::addSeq(), StochHMM::alignment::setQuery(), and StochHMM::alignment::setTarget().

{return seqtrk;};
std::string* StochHMM::sequence::getUndigitized ( )
inline

Get reference to undigitized sequence If sequence hasn't been undigitized then it will undigitize it and *store the result. (Only undigitizes the sequence once, then passes *reference to undigitized sequence)

Definition at line 78 of file sequence.h.

References seq, undigitize(), and undigitized.

Referenced by StochHMM::emissionFuncParam::evaluate().

{
if (!undigitized.empty() || seq->empty()){
return &undigitized;
}
else {
return &undigitized;
}
}
sequence & StochHMM::sequence::operator= ( const sequence rhs)

Definition at line 227 of file sequence.cpp.

References attrib, external, header, length, mask, max_mask, real, realSeq, seq, seqtrk, and undigitized.

{
//Clean up if necessary
if (external != NULL){
delete external;
external = NULL;
}
if (seq!= NULL){
delete seq;
seq = NULL;
}
if (real!=NULL){
delete real;
seq = NULL;
}
if (mask!= NULL){
delete mask;
mask = NULL;
}
//Copy rhs over to this
realSeq = rhs.realSeq;
header = rhs.header;
attrib = rhs.attrib;
length = rhs.length;
seqtrk = rhs.seqtrk;
external= rhs.external;
max_mask= rhs.max_mask;
undigitized=rhs.undigitized;
if (rhs.seq!=NULL){
seq = new(std::nothrow) std::vector<uint8_t>(*rhs.seq);
if (seq==NULL){
std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
exit(1);
}
}
else{
seq=NULL;
}
if (rhs.real!=NULL){
real = new(std::nothrow) std::vector<double>(*rhs.real);
if (real==NULL){
std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
exit(1);
}
}
else{
real=NULL;
}
if (rhs.mask!=NULL){
mask = new(std::nothrow) std::vector<int>(*rhs.mask);
if (mask==NULL){
std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
exit(1);
}
}
else{
mask = NULL;
}
return *this;
}
uint8_t StochHMM::sequence::operator[] ( size_t  index)
inline

Definition at line 182 of file sequence.h.

References seq.

{return (*seq)[index];}
void StochHMM::sequence::print ( )
inline

Print the string represntation of the sequence to stdout Prints the digitized version

Definition at line 125 of file sequence.h.

References stringify().

{std::cout << stringify() << std::endl;}; //Print sequence to stdout
double StochHMM::sequence::realValue ( size_t  position)

Get real value of sequence at a position

Parameters:
positionPosition in the sequence to get the value
Returns:
positioin

Definition at line 319 of file sequence.cpp.

References real, and realSeq.

Referenced by StochHMM::emm::get_emission().

{
if (realSeq){
if (real!=NULL){
return (*real)[position];
}
else{
std::cerr << "Values have not been imported.\n";
exit(1);
}
}
std::cerr << "Invalid sequence type queried in sequence class" <<std::endl;
exit(1);
};
bool StochHMM::sequence::reverse ( )

Reverse the sequence; If mask is defined, the mask will also be reversed

Returns:
true if reverse was successfully performed on sequence

Definition at line 880 of file sequence.cpp.

References StochHMM::track::getAlphaMax(), mask, real, realSeq, seq, seqtrk, and undigitized.

Referenced by reverseComplement().

{
if (realSeq){
if (real!=NULL){
std::reverse(real->begin(), real->end());
return true;
}
}
else if (seq!=NULL){
std::reverse(seq->begin(), seq->end());
if (mask!=NULL){
std::reverse(mask->begin(), mask->end());
}
return true;
}
//Handle non-digitized sequence
if (seqtrk!=NULL){
size_t max_size = seqtrk->getAlphaMax();
if (max_size ==1){
if (undigitized.size()>0){
return true;
}
else{
std::cerr << "No sequence is defined to reverse\n";
}
}
else{
std::cerr << "Reverse on undigitized sequence isn't defined for track alphabets that are more than one character\n";
}
}
return false;
}
void sequence::reverseComplement ( )

Reverses and complements the sequence

Returns:
true if both reverse and complement were successful

Definition at line 962 of file sequence.cpp.

References complement(), and reverse().

{
if (!reverse()){
std::cerr << "Unable to perform reverseComplement on sequence because reverse failed\n";
return false;
};
if (!complement()){
std::cerr << "Unable to perform reverseComplement on sequence because complement failed\n";
return false;
};
return true;
}
uint8_t StochHMM::sequence::seqValue ( size_t  position)

Get digitized sequence value at a position

Parameters:
positionPosition in the sequence to get the value for
Returns:
short integer value of symbol at positiion

Definition at line 300 of file sequence.cpp.

References realSeq, and seq.

Referenced by StochHMM::alignment::align().

{
if (!realSeq){
if (seq!=NULL){
return (*seq)[position];
}
else{
std::cerr << "sequence has not been digitized. \n";
exit(1);
}
}
std::cerr << "Invalid sequence type queried in sequence class" <<std::endl;
exit(1);
};
void StochHMM::sequence::setAttrib ( double  attr)
inline

Set the sequence attribute value

Parameters:
attrValue of attributes for sequence;

Definition at line 138 of file sequence.h.

References attrib.

{attrib=attr;}; //!Set the attribute value
void StochHMM::sequence::setHeader ( std::string &  head)
inline

Set the attribute value.

Set the header of the sequence

Parameters:
headHeader of the sequence

Definition at line 142 of file sequence.h.

References header.

{header=head;};
void StochHMM::sequence::setRealSeq ( std::vector< double > *  rl,
track tr 
)

Set the sequence from a vector of doubles

Parameters:
rlVector of doubles to be used as real number sequence
trTrack to be used to digitize sequence

Definition at line 808 of file sequence.cpp.

References length, real, realSeq, and seqtrk.

{
seqtrk = tr;
realSeq = true;
real=rl;
length = rl->size();
return;
}
void StochHMM::sequence::setSeq ( std::string &  sq,
track tr 
)

Set the sequence from a std::string

Parameters:
sqSequence to be used as sequence
trTrack to be used to digitize sequence

Definition at line 789 of file sequence.cpp.

References _digitize(), length, realSeq, seq, seqtrk, and undigitized.

Referenced by StochHMM::random_sequence().

{
realSeq= false;
if (tr!= NULL){
seqtrk = tr;
length = seq->size();
}
else{
length = sq.size();
}
return;
}
void StochHMM::sequence::setTrack ( track tr)
inline

Definition at line 117 of file sequence.h.

References seqtrk.

{
seqtrk = tr;
return;
}
void StochHMM::sequence::shuffle ( )

Shuffles the sequence using std::random_shuffle.

Definition at line 994 of file sequence.cpp.

References real, realSeq, seq, and undigitized.

{
if (realSeq){
std::random_shuffle(real->begin(), real->end());
}
else if (seq!=NULL){
std::random_shuffle(seq->begin(), seq->end());
}
else{
std::random_shuffle(undigitized.begin(), undigitized.end());
}
return;
}
size_t StochHMM::sequence::size ( void  )
inline

Get the size of the sequence

Returns:
size_t size of the sequence

Definition at line 111 of file sequence.h.

References real, realSeq, and seq.

Referenced by StochHMM::alignment::align(), StochHMM::alignment::setMatch(), StochHMM::alignment::setMatrix(), and StochHMM::alignment::setMismatch().

{if (realSeq){return real->size();} else {return seq->size();}}; // Returns size of sequence
std::string StochHMM::sequence::stringify ( )

Get std::string representation of the string *If the string is a real track, then it will return a string of doubles *If the string is a non-real track, then it will return a string of shorts, where the shorts are the digitized value of the sequence according to the track

Returns:
std::string String representation of the sequence

Definition at line 338 of file sequence.cpp.

References StochHMM::double_to_string(), header, StochHMM::int_to_string(), length, mask, real, realSeq, seq, and undigitized.

Referenced by print(), and undigitize().

{
std::string output;
if (!header.empty()){
output+= header +"\n";
}
if (!seq && !realSeq){
output+=undigitized;
}
if (realSeq){
for(size_t i=0;i<length;i++){
output+= double_to_string((*real)[i]) + " ";
}
}
else{
for(size_t i=0;i<length;i++){
output+= int_to_string((int)(*seq)[i]) + " ";
}
}
if (mask){
output += "\n";
for(size_t i=0;i<length;i++){
output+= int_to_string((int)(*mask)[i]) + " ";
}
}
output+="\n";
return output;
}
std::string StochHMM::sequence::undigitize ( )

Undigitize the sequence If the sequence has not been digitized then it will return directly If the sequence has been digitized then it will undigitize it and return it

Returns:
character or word sequence from fasta

Get the undigitized value of the string *If the string is a real-track then it will return the same as stringify() *If the string is a non-real track, it will return undigitized sequence

Definition at line 374 of file sequence.cpp.

References StochHMM::track::getAlpha(), StochHMM::track::getAlphaMax(), length, realSeq, seq, seqtrk, stringify(), and undigitized.

Referenced by getUndigitized().

{
if (realSeq){
return stringify();
}
if (!seq){ //If the sequence is not digitized yet. Return the undigitized version
return undigitized;
}
std::string output;
// if (!header.empty()){
// output+= header +"\n";
// }
if (seqtrk!=NULL){
size_t alphaMax = seqtrk->getAlphaMax();
for (size_t i=0;i<length;i++){
output+=seqtrk->getAlpha((*seq)[i]);
if (alphaMax!=1){
output+=" ";
}
}
}
else {
std::cerr << "Track is not defined. Can't undigitize sequence without valid track\n";
}
return output;
}

Friends And Related Function Documentation

friend class sequences
friend

Definition at line 69 of file sequence.h.

friend class sequenceStream
friend

Definition at line 70 of file sequence.h.


Member Data Documentation

double StochHMM::sequence::attrib
private

Definition at line 198 of file sequence.h.

Referenced by clear(), getAttrib(), operator=(), sequence(), and setAttrib().

ExDefSequence* StochHMM::sequence::external
private

Definition at line 203 of file sequence.h.

Referenced by clear(), exDefDefined(), getExDef(), getFasta(), getReal(), operator=(), sequence(), and ~sequence().

std::string StochHMM::sequence::header
private
size_t StochHMM::sequence::length
private
std::vector<int>* StochHMM::sequence::mask
private

Definition at line 209 of file sequence.h.

Referenced by clear(), getMask(), getMaskedFasta(), operator=(), reverse(), sequence(), stringify(), and ~sequence().

int StochHMM::sequence::max_mask
private

Definition at line 210 of file sequence.h.

Referenced by clear(), getMaskedFasta(), getMaxMask(), operator=(), and sequence().

std::vector<double>* StochHMM::sequence::real
private
bool StochHMM::sequence::realSeq
private
std::vector<uint8_t>* StochHMM::sequence::seq
private
track* StochHMM::sequence::seqtrk
private
std::string StochHMM::sequence::undigitized
private

The documentation for this class was generated from the following files: