StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
externDefinitions.cpp
Go to the documentation of this file.
1 //
2 // externDefinitions.cpp
3 //Copyright (c) 2007-2012 Paul C Lott
4 //University of California, Davis
5 //Genome and Biomedical Sciences Facility
6 //UC Davis Genome Center
7 //Ian Korf Lab
8 //Website: www.korflab.ucdavis.edu
9 //Email: lottpaul@gmail.com
10 //
11 //Permission is hereby granted, free of charge, to any person obtaining a copy of
12 //this software and associated documentation files (the "Software"), to deal in
13 //the Software without restriction, including without limitation the rights to
14 //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
15 //the Software, and to permit persons to whom the Software is furnished to do so,
16 //subject to the following conditions:
17 //
18 //The above copyright notice and this permission notice shall be included in all
19 //copies or substantial portions of the Software.
20 //
21 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
23 //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
24 //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
25 //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26 //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 
28 #include "externDefinitions.h"
29 
30 
31 namespace StochHMM{
32 
33  //!Copy constructor for ExDefSequence
34 // ExDefSequence::ExDefSequence(const ExDefSequence& rhs){
35 // defs = rhs.defs;
36 // }
37 
38 // ExDefSequence& ExDefSequence::operator=(const ExDefSequence& rhs){
39 // for(size_t i=0; i < defs.size() ;++i){
40 // if (defs.defined(i)){
41 // defs[i] = new (std::nothrow)
42 // }
43 // }
44 // return *this;
45 // }
46 
47 
48  //!Print the External definitions to stdout
50  for(size_t i = 0;i<defs.size();i++){
51  if (defs.defined(i)){
52  std::cout << i << "\t" << defs[i]->stringify() << std::endl;
53  }
54  }
55  return;
56  }
57 
58  //!Defined if there is a external definition defined for the position
59  //! \param position Position of the sequence to check for external definition
60  bool ExDefSequence::defined(size_t position){
61 
62  if (defs.defined(position)){
63  return true;
64  }
65  else{
66  return false;
67  }
68  }
69 
70  //!External definitions are either absolute or weighted. Checks to see if the
71  //!external definition at position in sequence is absolute
72  //! \param position Position in teh sequence
73  //! \return true if the external definition is absolute
74  bool ExDefSequence::isAbsolute(size_t position){
75  if (defs.defined(position) && defs[position]->isAbsolute()){
76  return true;
77  }
78 
79  return false;
80  }
81 
82  //!Get the absolute state defined for the position in the sequence
83  //!\param position Position of the sequence
84  //!\return integer indice to state
85  size_t ExDefSequence::getAbsState(size_t position){
86 
87  if(defs.defined(position) && defs[position]->isAbsolute()){
88  return defs[position]->getState();
89  }
90  else{
91  std::cerr << "Calling getAbsState on weighted state" << std::endl;
92  return -1;
93  }
94  }
95 
96  //!Check to see if the external definition at the position is weighted and not absolute
97  //! \param position Position in the sequence
98  //! \return true if the external definition is weighted
99  //
100  bool ExDefSequence::isWeighted(size_t position){
101  if (defs.defined(position)){
102  return !defs[position]->isAbsolute();
103  }
104  return false;
105  }
106 
107  //! Get the weight for the external definition at a position in the sequence
108  //! \param position Position in the sequence
109  //! \param stateIter Index of state to get weight
110  //! \return double value of weight to apply to state at the position
111  double ExDefSequence::getWeight(size_t position, size_t stateIter){
112  if (defs.defined(position)){
113  return defs[position]->getWeight(stateIter);
114  }
115  else{
116  return 0.0;
117  }
118  }
119 
120 
121  //!Create a ExDef type
122  //!Default: absolute=false, weightedState = -2
124  absolute=false;
125  weightedState=-2;
126  //st=NULL;
127  }
128 
129  //!Create a weightDef type
130  weightDef::weightDef(size_t state_size):ExDef(), weights(state_size){
131  absolute=false;
133  //weights.assign(state_size, 0);
134  //st=NULL;
135  }
136 
137  //!Assign a weight to a particular state
138  //! \param stateIter integer Iterator to the state
139  //! \param logValue Log value of weight to apply
140  void weightDef::assignWeight(size_t stateIter, double logValue){
141  if (!weights.defined(stateIter)){
142  weights[stateIter] = logValue;
143  }
144  else{
145  weights[stateIter]+=logValue;
146  }
147 
148  return;
149  }
150 
151 
152  //!Get the string representation of the ExDef
153  // \return std::string representation of the ExDef
154  std::string ExDef::stringify(){
155 
156  std::string output;
157  output+="STATE: ";
158  output+= int_to_string(weightedState) + "\n";
159  return output;
160  }
161 
162  //! Get the string representation of the weightDef
163  // \return std::string representation of the weightDef
164  std::string weightDef::stringify(){
165  std::string test;
166 
167  test+="STATES: ";
168 
169  for(size_t i=0;i<weights.size();i++){
170  if (weights[i]!=0.0){
171  test+= "\t" + int_to_string(i) + "\t" + double_to_string(weights[i]) + "\n";
172  }
173  }
174  return test;
175  }
176 
177  //TODO: Change return so it will return false if not able to parse from file
178 
179  //! Parses the ExDefSequence from a file stream
180  //! \param file File stream to be used to parse the External definitions from
181  //! \return true if parsing was successful
182  bool ExDefSequence::parse(std::ifstream& file,stateInfo& info){
183  //use getDefs to parse the lines
184  //and create external definition
185 
186  stringList ln;
187 
188  std::string comment_char="#";
189  std::string ws=" []";
190  std::string split="\t:";
191 
192  while(ln.fromDef(file, ws, split)){
193 
194  if (ln[0].compare("EXDEF")==0){
195 
196  if (ln[1].compare("ABSOLUTE")==0){
197  _parseAbsDef(ln,info);
198  }
199  else if (ln[1].compare("WEIGHTED")==0){
200  _parseWeightDef(ln,info);
201  }
202  else{
203  std::cerr << ln[0] << " not a valid external definition\n";
204  continue;
205  }
206  }
207 
208  char nl_peek=file.peek();
209  if (nl_peek!='['){
210  break;
211  }
212  }
213 
214  //defs->print();
215 
216  return true;
217  }
218 
219  //! Parse the absolute def from stringList give the stateInfo
221  bool start=false;
222  bool stop=false;
223  bool state=false;
224 
225  size_t startPosition(SIZE_MAX);
226  size_t stopPosition(SIZE_MAX);
227  std::vector<size_t> path;
228 
229  for (size_t i=2;i<ln.size();i++){
230  std::string& tag=ln[i];
231  if (ln.size()<=i+1){
232  std::cerr << "Missing additional information for Absolute Definition: TAG: " << ln[i] << std::endl;
233  return false;
234  }
235 
236  if (tag.compare("START")==0){
237 
238  int tempInt;
239  if (!stringToInt(ln[i+1], tempInt)){
240  std::cerr << "Value in External Definition START is not numeric: " << ln[i+1] << std::endl;
241  return false;
242  }
243 
244  startPosition=tempInt;
245 
246  start=true;
247  i++;
248  }
249  else if (tag.compare("END")==0){
250 
251  int tempInt;
252  if (!stringToInt(ln[i+1], tempInt)){
253  std::cerr << "Value in External Definition END is not numeric: " << ln[i+1] << std::endl;
254  return false;
255  }
256 
257 
258  stopPosition=tempInt;
259  stop=true;
260  i++;
261  }
262  else if (tag.compare("TRACE")==0){
263  std::string& trace=ln[i+1];
264  std::vector<std::string> traces;
265  split_line(traces, trace);
266 
267  //Check trace size;
268  size_t expectedSize=stopPosition-startPosition+1;
269 
270  if (traces.size()!=expectedSize){
271  std::cerr << "Expected external definition trace size: " << expectedSize << "\n but got " << trace << std::endl;
272  return false;
273  }
274  path.assign(traces.size(),-2);
275 
276  for(size_t k=0;k<traces.size();k++){
277  if (info.stateIterByName.count(traces[k])){
278  path[k]=info.stateIterByName[traces[k]];
279  }
280  else{
281  std::cerr << "External definition Trace state name: " << traces[k] << " doesn't exist in the model" << std::endl;
282  return false;
283  }
284  }
285  state=true;
286  i++;
287  }
288  else{
289  std::cerr << "Invalid tag found in sequence external definition: " << ln[i] <<std::endl;
290  }
291  }
292 
293  //If everything is defined correctly then return true
294  if (start && stop && state){
295  for(size_t i=startPosition-1;i<stopPosition;i++){
296  size_t state=path[i-(startPosition-1)];
297  if (defs.defined(i)){
298  if (!defs[i]->absolute){
299  std::cerr << "Absolute overlaps weighted state definition" << std::endl;
300 
301  }
302  else if (state!=defs[i]->getState()){
303  std::cerr << "Two absolute paths defined for " << i+1 << " position in sequence" <<std::endl;
304  }
305  }
306  else{
307  defs[i] = new (std::nothrow) ExDef;
308  defs[i]->setState(state);
309  }
310  }
311  return true;
312  }
313 
314  return false;
315  }
316 
317  //! Parse the weightDef from stringList give the stateInfo
319  bool start(false);
320  bool stop(false);
321  bool state(false);
322  bool value(false);
323  bool valType(false);
324 
325  size_t startPosition(0);
326  size_t stopPosition(0);
327 
328  std::set<size_t> definedStates;
329  std::set<size_t>::iterator setIterator;
330  double val(0);
331 
332 
333  for (size_t line_iter=2; line_iter < ln.size();line_iter++){
334  std::string& tag=ln[line_iter];
335 
336  if (ln.size()<=line_iter+1){
337  std::cerr << "External Definition for Weighted Def is missing values: " << ln[line_iter] << std::endl;
338  return false;
339  }
340 
341  if (tag.compare("START")==0){
342 
343 
344  int tempInt;
345  if (!stringToInt(ln[line_iter+1], tempInt)){
346  std::cerr << "Value in External Definition START is not numeric: " << ln[line_iter+1] << std::endl;
347  return false;
348  }
349 
350 
351  startPosition=tempInt;
352  start=true;
353  line_iter++;
354  }
355  else if (tag.compare("END")==0){
356 
357  size_t tempInt;
358  if (!stringToInt(ln[line_iter+1], tempInt)){
359  std::cerr << "Value in External Definition END is not numeric: " << ln[line_iter+1] << std::endl;
360  return false;
361  }
362 
363 
364  stopPosition=tempInt;
365  stop=true;
366  line_iter++;
367  }
368  else if (tag.compare("STATE_NAME")==0){
369  definedStates.insert(info.stateIterByName[ln[line_iter+1]]);
370  state=true;
371  line_iter++;
372  }
373  else if (tag.compare("STATE_LABEL")==0){
374  std::vector<size_t>& temp=info.stateIterByLabel[ln[line_iter+1]];
375  for(size_t temp_iter=0; temp_iter < temp.size();temp_iter++){
376  definedStates.insert(temp[temp_iter]);
377  }
378  state=true;
379  line_iter++;
380  }
381  else if (tag.compare("STATE_GFF")==0){
382  std::vector<size_t>& temp = info.stateIterByGff[ln[line_iter+1]];
383  for(size_t temp_iter=0; temp_iter < temp.size(); temp_iter++){
384  definedStates.insert(temp[temp_iter]);
385  }
386  state=true;
387  line_iter++;
388  }
389  else if (tag.compare("VALUE")==0){
390 
391  double tempValue;
392  if(!stringToDouble(ln[line_iter+1], tempValue)){
393  std::cerr << "VALUE couldn't be converted to numerical value: " << ln[line_iter+1] << std::endl;
394  }
395 
396  val=tempValue;
397  value=true;
398  line_iter++;
399  }
400  else if (tag.compare("VALUE_TYPE")==0){
401  std::string &type=ln[line_iter+1];
402  if (type.compare("P(X)")==0){
403  val=log(val);
404  valType=true;
405  }
406  else if (type.compare("LOG")==0){
407  valType=true;
408  }
409  else{
410  valType=false;
411  }
412  line_iter++;
413  }
414  else{
415  std::cerr << "Invalid tag found in Sequence external definition: " << ln[line_iter] << std::endl;
416  }
417  }
418 
419  //Check to see that there are valid states defined in the set
420  if (definedStates.size()>0){
421  state=true;
422  }
423  else{
424  std::cerr << "No valid states defined by External definition" <<std::endl;
425  state=false;
426  }
427 
428  //If everything is define correctly then return true
429  if (start && stop && state && value && valType){
430  for (size_t position=startPosition-1; position < stopPosition; position++){
431  //Already have a defined external def at position
432  if (defs.defined(position)){
433  if (defs[position]->absolute){
434  std::cerr << "Can't add weight to absolute external definition" << std::endl;
435  return false;
436  }
437  }
438  else{
439  defs[position] = new(std::nothrow) weightDef(info.stateIterByName.size());
440 
441  if (defs[position]==NULL){
442  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
443  exit(1);
444  }
445  }
446 
447  // Add states and values to definition
448  for (setIterator=definedStates.begin();setIterator!=definedStates.end();setIterator++){
449  defs[position]->assignWeight(*setIterator, val);
450  }
451  }
452  return true;
453  }
454  return false;
455  }
456 
457 }