StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Public Member Functions | Private Member Functions | Private Attributes
StochHMM::lexicalTable Class Reference

Lexical table stores the log2 probabilities for both emissions and lexical transitions. More...

#include <lexicalTable.h>

List of all members.

Public Member Functions

 lexicalTable ()
 ~lexicalTable ()
double getValue (sequences &, size_t)
double getValue (sequence &, size_t)
void initialize_emission_table ()
 Initialize the final emission table with ambiguous characters.
double getReducedOrder (sequences &seqs, size_t position)
double getReducedOrder (sequence &seq, size_t position)
std::vector< std::vector
< double > > * 
getCountsTable ()
std::vector< std::vector
< double > > * 
getProbabilityTable ()
std::vector< std::vector
< double > > * 
getLogProbabilityTable ()
void createTable (int rows, int columns, int pseudocount, valueType typ)
void addTrack (track *, int)
void assignTable (std::vector< std::vector< double > > *, valueType)
void setUnkScoreType (unknownCharScoringType type)
void setUnkScore (double val)
 Set a given score to be returned for unknownCharScoringType.
trackgetTrack (size_t iter)
size_t trackSize ()
std::vector< uint8_t > & getOrder ()
uint8_t getOrder (size_t i)
std::vector< std::vector
< double > > & 
getLogEmm ()
std::vector< uint8_t > & getAlphaSize ()
uint8_t getAlphaSize (size_t i)
size_t getNumberOfAlphabets ()
unknownCharScoringType getAmbScoringType ()
double getAmbDefinedScore ()
void incrementCounts (size_t word_index, size_t char_index)
 Increment counts.
void incrementCountsDouble (size_t word_index, size_t char_index, double val)
 Increment counts by double.
std::string stringify ()
std::string stringifyAmbig ()
void print ()

Private Member Functions

void init_table_dimension_values ()
void init_array_dimension_values ()
size_t convertIndex (size_t, size_t)
void decompose (size_t row, size_t column, std::vector< uint8_t > &letters)
void decompose (size_t index, std::vector< uint8_t > &letters)
void transferValues (std::vector< bool > &transferred)
size_t calculateArrayIndex (std::vector< uint8_t > &kmer)
void expand_ambiguous (std::vector< uint8_t > &letters, std::vector< double > &expanded)
std::vector< std::vector
< uint8_t > > * 
expand_ambiguous (std::vector< std::vector< uint8_t > > *words, size_t letter)
size_t calculateIndexFromDecomposed (std::vector< uint8_t > &word)
double getAmbiguousScore (std::vector< uint8_t > &letters)

Private Attributes

unknownCharScoringType unknownScoreType
double unknownDefinedScore
 What type of score to use with unknown.
size_t number_of_tracks
 Undefined character score.
std::vector< track * > trcks
std::vector< uint8_t > alphabets
std::vector< uint8_t > max_unambiguous
std::vector< uint8_t > order
uint8_t max_order
size_t y_dim
size_t * x_subarray
size_t * y_subarray
std::vector< std::vector
< double > > * 
prob
std::vector< std::vector
< double > > * 
counts
std::vector< std::vector
< double > > * 
logProb
size_t array_size
size_t dimensions
std::vector< size_t > subarray_value
std::vector< size_t > subarray_sequence
std::vector< size_t > subarray_position
std::vector< size_t > decompose_values
std::vector< size_t > decompose_sequence
std::vector< double > * log_emission
std::vector< std::vector
< double > * > 
low_order_emissions
std::vector< std::vector
< std::pair< size_t, size_t > * > > 
low_order_info

Detailed Description

Lexical table stores the log2 probabilities for both emissions and lexical transitions.

Definition at line 31 of file lexicalTable.h.


Constructor & Destructor Documentation

StochHMM::lexicalTable::lexicalTable ( )
StochHMM::lexicalTable::~lexicalTable ( )

Definition at line 32 of file lexicalTable.cpp.

References counts, log_emission, logProb, prob, x_subarray, and y_subarray.

{
delete logProb;
delete prob;
delete counts;
delete log_emission;
delete x_subarray;
delete y_subarray;
logProb=NULL;
prob=NULL;
counts=NULL;
log_emission = NULL;
x_subarray=NULL;
y_subarray=NULL;
}

Member Function Documentation

void StochHMM::lexicalTable::addTrack ( track trk,
int  orderValue 
)

Add a track to an emission

Parameters:
trkPointer to track
orderValueorder of emission from track

Definition at line 158 of file lexicalTable.cpp.

References alphabets, StochHMM::track::getAlphaSize(), max_order, order, and trcks.

Referenced by StochHMM::transition::_parseLexical(), StochHMM::Counter::initializeTable(), and StochHMM::emm::parse().

{
trcks.push_back(trk);
alphabets.push_back(trk->getAlphaSize());
order.push_back(orderValue);
if (orderValue>max_order){
max_order=orderValue;
}
}
void StochHMM::lexicalTable::assignTable ( std::vector< std::vector< double > > *  temp,
valueType  emmType 
)

Set the type of counts in the emission 2D table provided by the user

Parameters:
tempvector of vectors of doubles
emmTypeType of value (COUNTS, PROBABILITY, LOG_PROB)

Definition at line 171 of file lexicalTable.cpp.

References StochHMM::COUNTS, counts, initialize_emission_table(), StochHMM::LOG_PROB, logProb, prob, and StochHMM::PROBABILITY.

{
if (emmType==COUNTS){
counts=temp;
}
else if (emmType == PROBABILITY){
prob=temp;
}
else if (emmType == LOG_PROB){
logProb=temp;
}
}
size_t StochHMM::lexicalTable::calculateArrayIndex ( std::vector< uint8_t > &  kmer)
private

Definition at line 814 of file lexicalTable.cpp.

References dimensions, and subarray_value.

Referenced by transferValues().

{
size_t index(0);
for(size_t i=0;i<dimensions;++i){
index += subarray_value[i] * kmer[i];
}
return index;
}
size_t StochHMM::lexicalTable::calculateIndexFromDecomposed ( std::vector< uint8_t > &  word)
private

Definition at line 806 of file lexicalTable.cpp.

References decompose_values, and dimensions.

{
size_t index(0);
for(size_t i=0;i<dimensions;++i){
index += decompose_values[i] * word[i];
}
return index;
}
size_t StochHMM::lexicalTable::convertIndex ( size_t  ,
size_t   
)
private
void StochHMM::lexicalTable::createTable ( int  rows,
int  columns,
int  pseudocount,
valueType  typ 
)

Definition at line 48 of file lexicalTable.cpp.

References StochHMM::COUNTS, and counts.

Referenced by StochHMM::Counter::initializeTable().

{
if (typ==COUNTS){
if (counts!=NULL){
delete counts;
}
std::vector<double> temp_columns(columns,pseudocount);
counts=new(std::nothrow) std::vector<std::vector<double> > (rows,temp_columns);
if (counts==NULL){
std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
exit(1);
}
}
return;
}
void StochHMM::lexicalTable::decompose ( size_t  row,
size_t  column,
std::vector< uint8_t > &  letters 
)
private

Definition at line 822 of file lexicalTable.cpp.

References number_of_tracks, x_subarray, y_dim, and y_subarray.

Referenced by transferValues().

{
//Decompose the row into the preceding letters
for(size_t i=0;i<y_dim;++i){
size_t val = floor(row/y_subarray[i]);
row -= val*y_subarray[i];
letters.push_back(val);
}
//Decompose the column into the emitted letters
for(size_t i=0;i<number_of_tracks;++i){
size_t val = floor(column/x_subarray[i]);
column-=val*x_subarray[i];
letters.push_back(val);
}
return;
}
void StochHMM::lexicalTable::decompose ( size_t  index,
std::vector< uint8_t > &  letters 
)
private

Definition at line 841 of file lexicalTable.cpp.

References dimensions, and subarray_value.

{
//Decompose the index into the emitted letters
for(size_t i=0;i<dimensions;++i){
size_t val = floor(index/subarray_value[i]);
index-=val*subarray_value[i];
letters.push_back(val);
}
return;
}
void StochHMM::lexicalTable::expand_ambiguous ( std::vector< uint8_t > &  letters,
std::vector< double > &  expanded 
)
private
std::vector<std::vector<uint8_t> >* StochHMM::lexicalTable::expand_ambiguous ( std::vector< std::vector< uint8_t > > *  words,
size_t  letter 
)
private
std::vector<uint8_t>& StochHMM::lexicalTable::getAlphaSize ( )
inline

Get the alphabet sizes for all tracks used in emission

Returns:
std::vector<int>

Definition at line 87 of file lexicalTable.h.

References alphabets.

Referenced by StochHMM::transition::_parseLexical(), and StochHMM::emm::parse().

{return alphabets;}
uint8_t StochHMM::lexicalTable::getAlphaSize ( size_t  i)
inline

Definition at line 88 of file lexicalTable.h.

References alphabets.

{return alphabets[i];}
double StochHMM::lexicalTable::getAmbDefinedScore ( )
inline

Definition at line 92 of file lexicalTable.h.

References unknownDefinedScore.

Referenced by StochHMM::emm::stringify(), and StochHMM::transition::stringify().

double StochHMM::lexicalTable::getAmbiguousScore ( std::vector< uint8_t > &  letters)
private

Definition at line 718 of file lexicalTable.cpp.

References StochHMM::AVERAGE_SCORE, decompose_sequence, decompose_values, dimensions, StochHMM::HIGHEST_SCORE, log_emission, StochHMM::LOWEST_SCORE, max_unambiguous, StochHMM::Index::setAmbiguous(), StochHMM::Index::size(), trcks, and unknownScoreType.

Referenced by transferValues().

{
Index indices;
for(size_t i=0;i<dimensions;++i){
Index subtotal;
if (letters[i]>max_unambiguous[decompose_sequence[i]]){
subtotal.setAmbiguous(trcks[decompose_sequence[i]]->getAmbiguousSet(letters[i]));
}
else{
subtotal+= letters[i];
}
subtotal *= decompose_values[i];
indices += subtotal;
}
double temp(0);
for(size_t i=0;i<indices.size();i++){
temp+=exp((*log_emission)[indices[i]]);
}
temp /= indices.size();
temp = log(temp);
return temp;
}
double temp(INFINITY);
for(size_t i=0;i<indices.size();i++){
double val = (*log_emission)[indices[i]];
if (val < temp){
temp = val;
}
}
return temp;
}
double temp(-INFINITY);
for(size_t i=0;i<indices.size();i++){
double val = (*log_emission)[indices[i]];
if (val > temp){
temp = val;
}
}
return temp;
}
return -INFINITY;
}
unknownCharScoringType StochHMM::lexicalTable::getAmbScoringType ( )
inline

Definition at line 91 of file lexicalTable.h.

References unknownScoreType.

Referenced by StochHMM::emm::stringify(), and StochHMM::transition::stringify().

std::vector< std::vector< double > > * StochHMM::lexicalTable::getCountsTable ( )

Definition at line 69 of file lexicalTable.cpp.

References counts.

Referenced by StochHMM::transition::_parseLexical(), and StochHMM::emm::parse().

{
if (counts==NULL){
counts = new(std::nothrow) std::vector<std::vector<double> >;
if (counts==NULL){
std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
exit(1);
}
}
return counts;
}
std::vector<std::vector<double> >& StochHMM::lexicalTable::getLogEmm ( )
inline

Get Log(prob) emission table

Returns:
std::vector<std::vector<double> >

Definition at line 83 of file lexicalTable.h.

References logProb.

{return *logProb;}
std::vector< std::vector< double > > * StochHMM::lexicalTable::getLogProbabilityTable ( )

Definition at line 99 of file lexicalTable.cpp.

References logProb.

Referenced by StochHMM::transition::_parseLexical(), and StochHMM::emm::parse().

{
if (logProb==NULL){
logProb = new(std::nothrow) std::vector<std::vector<double> >;
if (logProb==NULL){
std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
exit(1);
}
}
return logProb;
}
size_t StochHMM::lexicalTable::getNumberOfAlphabets ( )
inline

Definition at line 89 of file lexicalTable.h.

References alphabets.

Referenced by StochHMM::transition::_parseLexical(), and StochHMM::emm::parse().

{return alphabets.size();}
std::vector<uint8_t>& StochHMM::lexicalTable::getOrder ( )
inline

Get Orders of lexical emission will use for all tracks

Returns:
std::vector<int>

Definition at line 78 of file lexicalTable.h.

References order.

Referenced by StochHMM::transition::_parseLexical(), StochHMM::emm::parse(), StochHMM::emm::stringify(), and StochHMM::transition::stringify().

{return order;};
uint8_t StochHMM::lexicalTable::getOrder ( size_t  i)
inline

Definition at line 79 of file lexicalTable.h.

References order.

{return order[i];}
std::vector< std::vector< double > > * StochHMM::lexicalTable::getProbabilityTable ( )

Definition at line 84 of file lexicalTable.cpp.

References prob.

Referenced by StochHMM::transition::_parseLexical(), and StochHMM::emm::parse().

{
if (prob==NULL){
prob = new(std::nothrow) std::vector<std::vector<double> >;
if (prob==NULL){
std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
exit(1);
}
}
return prob;
}
double StochHMM::lexicalTable::getReducedOrder ( sequences seqs,
size_t  position 
)

Definition at line 607 of file lexicalTable.cpp.

References StochHMM::AVERAGE_SCORE, dimensions, StochHMM::HIGHEST_SCORE, log_emission, StochHMM::LOWEST_SCORE, max_unambiguous, StochHMM::NO_SCORE, StochHMM::Index::setAmbiguous(), StochHMM::Index::size(), subarray_position, subarray_sequence, subarray_value, trcks, and unknownScoreType.

Referenced by getValue().

{
Index indices;
for(size_t i=0;i<dimensions;i++){
Index subtotal;
size_t seq = subarray_sequence[i];
size_t pos = subarray_position[i];
if (subarray_position[i] > position){
subtotal.setAmbiguous(trcks[seq]->getUnambiguousSet());
}
else if (seqs[seq][position - pos] > max_unambiguous[seq]){
subtotal.setAmbiguous(trcks[seq]->getAmbiguousSet(seqs[seq][position-pos]));
}
else{
subtotal+=seqs[seq][position-pos];
}
subtotal *= subarray_value[i];
indices += subtotal;
}
double temp(0);
for(size_t i=0;i<indices.size();i++){
temp+=exp((*log_emission)[indices[i]]);
}
temp /= indices.size();
temp = log(temp);
return temp;
}
double temp(INFINITY);
for(size_t i=0;i<indices.size();i++){
double val = (*log_emission)[indices[i]];
if (val < temp){
temp = val;
}
}
return temp;
}
double temp(-INFINITY);
for(size_t i=0;i<indices.size();i++){
double val = (*log_emission)[indices[i]];
if (val > temp){
temp = val;
}
}
return temp;
}
return -INFINITY;
}
double StochHMM::lexicalTable::getReducedOrder ( sequence seq,
size_t  position 
)

Definition at line 664 of file lexicalTable.cpp.

References StochHMM::AVERAGE_SCORE, dimensions, StochHMM::HIGHEST_SCORE, log_emission, StochHMM::LOWEST_SCORE, max_unambiguous, StochHMM::NO_SCORE, StochHMM::Index::setAmbiguous(), StochHMM::Index::size(), subarray_position, subarray_sequence, subarray_value, trcks, and unknownScoreType.

{
Index indices;
for(size_t i=0;i<dimensions;i++){
Index subtotal;
size_t sq = subarray_sequence[i];
size_t pos = subarray_position[i];
if (subarray_position[i] > position){
subtotal.setAmbiguous(trcks[sq]->getUnambiguousSet());
}
else if (seq[position - pos] > max_unambiguous[sq]){
subtotal.setAmbiguous(trcks[sq]->getAmbiguousSet(seq[position-pos]));
}
else{
subtotal+=seq[position-pos];
}
subtotal *= subarray_value[i];
indices += subtotal;
}
double temp(0);
for(size_t i=0;i<indices.size();i++){
temp+=exp((*log_emission)[indices[i]]);
}
temp /= indices.size();
temp = log(temp);
return temp;
}
double temp(INFINITY);
for(size_t i=0;i<indices.size();i++){
double val = (*log_emission)[indices[i]];
if (val < temp){
temp = val;
}
}
return temp;
}
double temp(-INFINITY);
for(size_t i=0;i<indices.size();i++){
double val = (*log_emission)[indices[i]];
if (val > temp){
temp = val;
}
}
return temp;
}
return -INFINITY;
}
track* StochHMM::lexicalTable::getTrack ( size_t  iter)
inline

Get pointer to track at index position of emission

Parameters:
iterIndex iterator of position
Returns:
track* Track in emission

Definition at line 70 of file lexicalTable.h.

References trcks.

Referenced by StochHMM::emm::stringify(), and StochHMM::transition::stringify().

{return trcks[iter];};
double StochHMM::lexicalTable::getValue ( sequences seqs,
size_t  pos 
)

Definition at line 113 of file lexicalTable.cpp.

References array_size, dimensions, getReducedOrder(), log_emission, max_order, subarray_position, subarray_sequence, and subarray_value.

Referenced by StochHMM::emm::get_emission(), and StochHMM::transition::getTransition().

{
if (max_order>pos){
return getReducedOrder(seqs, pos);
}
size_t index(seqs[subarray_sequence[0]][pos - subarray_position[0]] * subarray_value[0]);
for(size_t i=1;i<dimensions;i++){
index += seqs[subarray_sequence[i]][pos - subarray_position[i]] * subarray_value[i];
}
if (index > array_size){
std::cerr << "Index is out of range of lookup table in lexicalTable" << std::endl;
exit(2);
}
return (*log_emission)[index];
}
double StochHMM::lexicalTable::getValue ( sequence seq,
size_t  pos 
)

Definition at line 134 of file lexicalTable.cpp.

References array_size, dimensions, getReducedOrder(), log_emission, max_order, subarray_position, and subarray_value.

{
if (max_order>pos){
return getReducedOrder(seq, pos);
}
size_t index(seq[pos - subarray_position[0]] * subarray_value[0]);
for(size_t i=1;i<dimensions;i++){
index += seq[pos - subarray_position[i]] * subarray_value[i];
}
if (index > array_size){
std::cerr << "Index is out of range of lookup table in lexicalTable" << std::endl;
exit(2);
}
return (*log_emission)[index];
}
void StochHMM::lexicalTable::incrementCounts ( size_t  word_index,
size_t  char_index 
)
inline

Increment counts.

Definition at line 95 of file lexicalTable.h.

References counts.

{ if (counts != NULL) (*counts)[word_index][char_index]++; }
void StochHMM::lexicalTable::incrementCountsDouble ( size_t  word_index,
size_t  char_index,
double  val 
)
inline

Increment counts by double.

Definition at line 98 of file lexicalTable.h.

References counts.

{ if (counts != NULL) (*counts)[word_index][char_index]+= val; }
void StochHMM::lexicalTable::init_array_dimension_values ( )
private

Definition at line 452 of file lexicalTable.cpp.

References array_size, decompose_sequence, decompose_values, dimensions, StochHMM::integerPower(), number_of_tracks, order, subarray_position, subarray_sequence, subarray_value, trcks, and y_dim.

Referenced by initialize_emission_table().

{
//Calculate total size of emission table needed with ambiguous characters
std::vector<size_t> complete_alphabet_size;
size_t current_dim(0);
for(size_t i=0;i<number_of_tracks;i++){
//Get alphabet size and store it
size_t alpha_size = trcks[i]->getTotalAlphabetSize();
complete_alphabet_size.push_back(alpha_size);
array_size*=integerPower(alpha_size, (size_t) order[i]+1);
for(size_t j=0;j<=order[i];++j){
subarray_sequence[current_dim]=i;
current_dim++;
}
}
//Calculate the Sequence positions
for (size_t i=0;i<number_of_tracks;i++){
for (size_t j=0;j<=order[i];++j){
subarray_position.push_back(order[i]-j);
}
}
//Compute the decomposing values
//Used to convert from index to word
std::vector<size_t> decompose_index;
for(size_t i=0;i<number_of_tracks;++i){
for(size_t j=0;j<order[i];++j){
decompose_index.push_back(complete_alphabet_size[i]);
}
}
for(size_t i=0;i<number_of_tracks;++i){
decompose_index.push_back(complete_alphabet_size[i]);
}
//Calculate subarray values
for(size_t i=0;i<dimensions;++i){
for(size_t j=i+1;j<dimensions;++j){
decompose_values[i]*=decompose_index[j];
}
}
//Rearrange decompose values for subarray values;
//Final values in subarray_value will correspond to sequence AAA(A)B(B).
//Where A is 3rd order and B is 1st order;
size_t array_index(0);
size_t index(0);
for(size_t i=0;i<number_of_tracks;++i){
for(size_t j=0;j<order[i];++j){
subarray_value[array_index] = decompose_values[index];
array_index++;
index++;
}
subarray_value[array_index] = decompose_values[dimensions-number_of_tracks-i];
array_index++;
}
//Finalize decompose_sequence
std::vector<size_t> temp;
for(size_t i=0;i<number_of_tracks;++i){
for(size_t j=0;j<order[i];++j){
temp.push_back(i);
}
}
for(size_t i=0;i<number_of_tracks;++i){
temp.push_back(i);
}
for(size_t i=0;i<dimensions;++i){
decompose_sequence[i]=temp[i];
}
return;
}
void StochHMM::lexicalTable::init_table_dimension_values ( )
private

Definition at line 408 of file lexicalTable.cpp.

References alphabets, number_of_tracks, order, StochHMM::sumVector(), trcks, x_subarray, y_dim, and y_subarray.

Referenced by initialize_emission_table().

{
//Calculate subarray dimensions for logProb and new log_emission table (includes ambiguous character)
y_subarray = new size_t[y_dim];
//Calculate Old subarray x_dimension values
for(size_t i=0;i<number_of_tracks;++i){
size_t value(1);
for(size_t j=i+1;j<number_of_tracks;++j){
value*=alphabets[j];
}
x_subarray[i]=value;
}
//Calcuate Old subarray y_dimension values
std::vector<size_t> index(order[0],alphabets[0]);
for(size_t i=1;i<order.size();i++){
for(size_t j=0;j<order[i];j++){
index.push_back(alphabets[i]);
}
}
for (size_t i=0;i<y_dim;i++){
size_t val = 1;
for(size_t j=i+1;j<y_dim;j++){
val*=index[j];
}
y_subarray[i]=val;
}
return;
}
void StochHMM::lexicalTable::initialize_emission_table ( )

Initialize the final emission table with ambiguous characters.

Definition at line 856 of file lexicalTable.cpp.

References array_size, StochHMM::DEFINED_SCORE, init_array_dimension_values(), init_table_dimension_values(), log_emission, logProb, max_unambiguous, number_of_tracks, transferValues(), trcks, and unknownDefinedScore.

Referenced by assignTable(), and StochHMM::emm::parse().

{
if (logProb == NULL){
std::cerr << "Cannot initialize emission table until after the tables have been assigned";
exit(2);
}
for(size_t i = 0; i < number_of_tracks ; ++i){
max_unambiguous.push_back(trcks[i]->getMaxUnambiguous());
}
log_emission = new std::vector<double> (array_size,unknownDefinedScore);
}
else{
log_emission = new std::vector<double> (array_size,-INFINITY);
}
//Transfer values to emission_table
std::vector<bool> transferred (array_size,false);
transferValues(transferred);
}
void StochHMM::lexicalTable::print ( )

Definition at line 64 of file lexicalTable.cpp.

References stringify().

{
std::cout << stringify() << std::endl;
}
void StochHMM::lexicalTable::setUnkScore ( double  val)
inline

Set a given score to be returned for unknownCharScoringType.

Definition at line 65 of file lexicalTable.h.

References unknownDefinedScore.

Referenced by StochHMM::transition::_parseLexical(), and StochHMM::emm::parse().

void StochHMM::lexicalTable::setUnkScoreType ( unknownCharScoringType  type)
inline

Set how the emission will deal with unknown alphabet

Parameters:
typeenum UnknownCharScoringType

Definition at line 62 of file lexicalTable.h.

References unknownScoreType.

Referenced by StochHMM::transition::_parseLexical(), and StochHMM::emm::parse().

std::string StochHMM::lexicalTable::stringify ( )

Definition at line 297 of file lexicalTable.cpp.

References counts, StochHMM::double_to_string(), logProb, order, StochHMM::POWER, prob, and trcks.

Referenced by print(), StochHMM::emm::stringify(), and StochHMM::transition::stringify().

{
std::string tbl("");
size_t tracks_size = trcks.size();
if(tracks_size==0){
std::cerr << "Can't print out table without track and order being set for lexicalTable\n";
exit(1);
}
//Output Column Headers
size_t columns(1);
std::vector<size_t> alphaSizes;
//alphaSizes.push_back(0);
for(size_t i = 0;i<trcks.size();i++){
size_t alphaSz = trcks[i]->getAlphaSize();
columns*=alphaSz;
alphaSizes.push_back(alphaSz);
}
reverse(alphaSizes.begin(),alphaSizes.end());
std::string colHeader("@");
for(size_t i = 0;i<columns;i++){
size_t indexValue = i;
for(size_t tr=0;tr<trcks.size();tr++){
if (tr>0){
colHeader+= "|";
}
size_t val(0);
if (tr<trcks.size()-1){
val= floor(indexValue/alphaSizes[tr]);
indexValue-=val*alphaSizes[tr];
}
else{
val = indexValue;
}
colHeader+=trcks[tr]->convertIndexToWord(val, 1);
}
colHeader+="\t";
}
tbl+=colHeader + "\n";
std::vector<std::vector<double> >* temp;
if (logProb!=NULL){
temp=logProb;
}
else if (prob!=NULL){
temp=prob;
}
else if (counts!=NULL){
temp=counts;
}
else{
std::cerr << "No table is defined\n";
return "";
}
//TODO: Fix row header for other orders
bool rowHeader = (temp->size()>1) ? true : false;
for(size_t i=0;i<temp->size();i++){
std::string header("");
if (rowHeader){
size_t indexValue = i;
for(size_t tr=0;tr<trcks.size();tr++){
if (tr>0 && order[tr]>0){
header+= "|";
}
size_t val(0);
if (tr<trcks.size()-1){
double pwr = POWER[order[tr+1]][trcks[tr+1]->getAlphaSize()-1];
val= floor(indexValue/pwr);
indexValue-=val*pwr;
}
else{
val = indexValue;
}
header+=trcks[tr]->convertIndexToWord(val, order[tr]);
}
tbl+="@" + header + "\t";
}
for(size_t j=0;j<(*temp)[i].size();j++){
if (j>0){
tbl+="\t";
}
tbl+=double_to_string((*temp)[i][j]);
}
tbl+="\n";
}
return tbl;
}
std::string StochHMM::lexicalTable::stringifyAmbig ( )

Definition at line 185 of file lexicalTable.cpp.

References trcks.

{
std::string tbl("");
size_t tracks_size = trcks.size();
if(tracks_size==0){
std::cerr << "Can't print out table without track and order being set for lexicalTable\n";
exit(1);
}
//Calculate the Number of Column Headers and get alphabet for each track
//This is the complete unambiguous and ambiguous
std::vector<std::vector<std::string> > complete_alphabet(tracks_size, std::vector<std::string>());
size_t columns(1);
std::vector<size_t> alphaSizes;
//Calculate the columns size
for(size_t i = 0;i<trcks.size();i++){
size_t alphaSz = (trcks[i]->isAmbiguousSet()) ? trcks[i]->getMaxAmbiguous()+1 : trcks[i]->getAlphaSize();
columns*=alphaSz;
alphaSizes.push_back(alphaSz);
//Get complete alphabet for each track
for(size_t j=0; j < alphaSz; ++j){
complete_alphabet[i].push_back(trcks[i]->getAlpha(j));
}
}
reverse(alphaSizes.begin(),alphaSizes.end());
std::string colHeader("@");
//Compose column heading
for(size_t i = 0; i < columns; ++i){
size_t indexValue = i;
for(size_t tr=0;tr<trcks.size();tr++){
if (tr>0){
colHeader+= "|";
}
size_t val(0);
if (tr<trcks.size()-1){
val= floor(indexValue/alphaSizes[tr]);
indexValue-=val*alphaSizes[tr];
}
else{
val = indexValue;
}
colHeader+=complete_alphabet[tr][val];
}
colHeader+="\t";
}
tbl+=colHeader + "\n";
// for (size_t i=0; i< log_emission->size();i++){
// std::cout << (*log_emission)[i] << std::endl;
// }
// bool rowHeader = (temp->size()>1) ? true : false;
//
// for(size_t i=0;i<temp->size();i++){
// std::string header("");
//
// if (rowHeader){
// size_t indexValue = i;
//
// for(size_t tr=0;tr<trcks.size();tr++){
//
// if (tr>0 && order[tr]>0){
// header+= "|";
// }
//
//
// size_t val(0);
// if (tr<trcks.size()-1){
// double pwr = POWER[order[tr+1]][trcks[tr+1]->getAlphaSize()-1];
// val= floor(indexValue/pwr);
// indexValue-=val*pwr;
// }
// else{
// val = indexValue;
// }
//
// header+=trcks[tr]->convertIndexToWord(val, order[tr]);
// }
// tbl+="@" + header + "\t";
//
// }
//
// for(size_t j=0;j<(*temp)[i].size();j++){
// if (j>0){
// tbl+="\t";
// }
// tbl+=double_to_string((*temp)[i][j]);
// }
// tbl+="\n";
// }
return tbl;
}
size_t StochHMM::lexicalTable::trackSize ( )
inline

Get the number of tracks defined in emission

Returns:
size_t

Definition at line 74 of file lexicalTable.h.

References trcks.

Referenced by StochHMM::emm::stringify(), and StochHMM::transition::stringify().

{return trcks.size();};
void StochHMM::lexicalTable::transferValues ( std::vector< bool > &  transferred)
private

Definition at line 543 of file lexicalTable.cpp.

References calculateArrayIndex(), decompose(), StochHMM::DEFINED_SCORE, getAmbiguousScore(), logProb, StochHMM::NO_SCORE, unknownDefinedScore, and unknownScoreType.

Referenced by initialize_emission_table().

{
//Transfer unambiguous scores
for(size_t row=0;row<logProb->size();++row){
for(size_t column=0;column<(*logProb)[row].size();++column){
std::vector<uint8_t> alphabet;
decompose(row, column, alphabet);
size_t index = calculateArrayIndex(alphabet);
(*log_emission)[index] = (*logProb)[row][column];
transferred[index] = true;
}
}
//Compute all ambiguous scores
for(size_t i=0;i<transferred.size();i++){
if (transferred[i]){
continue;
}
(*log_emission)[i] = unknownDefinedScore;
continue;
}
else if (unknownScoreType == NO_SCORE){
continue;
}
//Get the letters for index
std::vector<uint8_t> letters;
decompose(i,letters);
//Expand the unambiguous words and get all the possible values
//std::vector<double> expanded;
//expand_ambiguous(letters,expanded);
(*log_emission)[i] = getAmbiguousScore(letters);
// //Assign the values accordint the Score Type
// if (unknownScoreType == AVERAGE_SCORE){
// (*log_emission)[i] = avgLogVector(expanded);
// }
// else if (unknownScoreType == LOWEST_SCORE){
// (*log_emission)[i] = minVector(expanded);
// }
// else if (unknownScoreType == HIGHEST_SCORE){
// (*log_emission)[i] = maxVector(expanded);
// }
}
// for (size_t i=0;i<log_emission->size();++i){
// std::vector<uint8_t> letters;
// decompose(i,letters);
// for (size_t j = 0; j< letters.size();j++){
// std::cout << (int) letters[j];
// }
// std::cout << "\t" ;
// std::cout << (*log_emission)[i] << std::endl;
// }
return;
}

Member Data Documentation

std::vector<uint8_t> StochHMM::lexicalTable::alphabets
private
size_t StochHMM::lexicalTable::array_size
private
std::vector<std::vector<double> >* StochHMM::lexicalTable::counts
private
std::vector<size_t> StochHMM::lexicalTable::decompose_sequence
private

Definition at line 132 of file lexicalTable.h.

Referenced by getAmbiguousScore(), and init_array_dimension_values().

std::vector<size_t> StochHMM::lexicalTable::decompose_values
private
size_t StochHMM::lexicalTable::dimensions
private
std::vector<double>* StochHMM::lexicalTable::log_emission
private
std::vector<std::vector<double> >* StochHMM::lexicalTable::logProb
private
std::vector<std::vector<double>* > StochHMM::lexicalTable::low_order_emissions
private

Definition at line 135 of file lexicalTable.h.

std::vector<std::vector<std::pair<size_t,size_t>* > > StochHMM::lexicalTable::low_order_info
private

Definition at line 136 of file lexicalTable.h.

uint8_t StochHMM::lexicalTable::max_order
private

Definition at line 115 of file lexicalTable.h.

Referenced by addTrack(), getValue(), and lexicalTable().

std::vector<uint8_t> StochHMM::lexicalTable::max_unambiguous
private

Definition at line 113 of file lexicalTable.h.

Referenced by getAmbiguousScore(), getReducedOrder(), and initialize_emission_table().

size_t StochHMM::lexicalTable::number_of_tracks
private

Undefined character score.

Definition at line 110 of file lexicalTable.h.

Referenced by decompose(), init_array_dimension_values(), init_table_dimension_values(), and initialize_emission_table().

std::vector<uint8_t> StochHMM::lexicalTable::order
private
std::vector<std::vector<double> >* StochHMM::lexicalTable::prob
private
std::vector<size_t> StochHMM::lexicalTable::subarray_position
private

Definition at line 129 of file lexicalTable.h.

Referenced by getReducedOrder(), getValue(), and init_array_dimension_values().

std::vector<size_t> StochHMM::lexicalTable::subarray_sequence
private

Definition at line 128 of file lexicalTable.h.

Referenced by getReducedOrder(), getValue(), and init_array_dimension_values().

std::vector<size_t> StochHMM::lexicalTable::subarray_value
private
std::vector<track*> StochHMM::lexicalTable::trcks
private
double StochHMM::lexicalTable::unknownDefinedScore
private

What type of score to use with unknown.

Definition at line 108 of file lexicalTable.h.

Referenced by getAmbDefinedScore(), initialize_emission_table(), lexicalTable(), setUnkScore(), and transferValues().

unknownCharScoringType StochHMM::lexicalTable::unknownScoreType
private
size_t* StochHMM::lexicalTable::x_subarray
private
size_t StochHMM::lexicalTable::y_dim
private
size_t* StochHMM::lexicalTable::y_subarray
private

The documentation for this class was generated from the following files: