StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
weight.cpp
Go to the documentation of this file.
1 //
2 // weight.cpp
3 //Copyright (c) 2007-2012 Paul C Lott
4 //University of California, Davis
5 //Genome and Biomedical Sciences Facility
6 //UC Davis Genome Center
7 //Ian Korf Lab
8 //Website: www.korflab.ucdavis.edu
9 //Email: lottpaul@gmail.com
10 //
11 //Permission is hereby granted, free of charge, to any person obtaining a copy of
12 //this software and associated documentation files (the "Software"), to deal in
13 //the Software without restriction, including without limitation the rights to
14 //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
15 //the Software, and to permit persons to whom the Software is furnished to do so,
16 //subject to the following conditions:
17 //
18 //The above copyright notice and this permission notice shall be included in all
19 //copies or substantial portions of the Software.
20 //
21 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
23 //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
24 //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
25 //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26 //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 
28 #include "weight.h"
29 namespace StochHMM{
30 
31 
33  absolute=false;
34  absoluteValue=-INFINITY;
35  maxValue=NULL;
36  minValue=NULL;
37  vals=NULL;
38  }
39 
41  delete maxValue;
42  delete minValue;
43  delete vals;
44  maxValue=NULL;
45  minValue=NULL;
46  vals=NULL;
47  return;
48  }
49 
50  void weight::setMaxWeight(double& x, double& y){
51  if (maxValue!=NULL){
52  delete maxValue;
53  }
54 
55  maxValue=new(std::nothrow) std::pair<double,double>;
56  if (maxValue==NULL){
57  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
58  exit(1);
59  }
60  maxValue->first=x;
61  maxValue->second=y;
62  return;
63  }
64 
65  void weight::setMinWeight(double& x, double& y){
66  if (minValue!=NULL){
67  delete minValue;
68  }
69 
70  minValue=new(std::nothrow) std::pair<double,double>;
71  if (minValue==NULL){
72  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
73  exit(1);
74  }
75  minValue->first=x;
76  minValue->second=y;
77  return;
78  }
79 
80  void weight::setWeights(std::vector<double>& xVals, std::vector<double>& yVals){
81  if (maxValue==NULL && minValue==NULL){
82  setWeights(xVals, yVals, NULL, NULL);
83  }
84  else if (maxValue==NULL){
85  setWeights(xVals, yVals, minValue, NULL);
86  }
87  else if (minValue==NULL){
88  setWeights(xVals, yVals, NULL, maxValue);
89  }
90  else{
91  setWeights(xVals,yVals,minValue,maxValue);
92  }
93  }
94 
95  //Set weights given the coordinates
96  void weight::setWeights( std::vector<double>& xVals, std::vector<double>& yVals, std::pair<double,double>* xMin, std::pair<double,double>* xMax ){
97 
98  if (vals!=NULL){
99  delete vals;
100  }
101 
102  vals=new(std::nothrow) std::vector<std::pair<double,double> >;
103  if (vals==NULL){
104  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
105  exit(1);
106  }
107 
108  if (xVals.size()!=yVals.size()){
109  std::cerr << "X-Y values are not the same size." <<std::endl;
110  return;
111  }
112 
113  for(size_t i=0;i<xVals.size();i++){
114  std::pair<double,double> xy (xVals[i],yVals[i]);
115  vals->push_back(xy);
116  }
117  return;
118  }
119 
120 
121  bool compXval(const std::pair<double,double>& a, const std::pair<double,double>& b){
122  return (a.first<b.first);
123  }
124 
125 
126  //Given a score get the weighted score
127  double weight::getWeightedScore(double score){
128  //Find score that flanks or is equal to the score we supply
129  //If there is one equal to then we return it, otherwise
130  //we get the two values and interpolate the score linearly
131  //unless we are on the ends then we extrapolate
132 
133  if (absolute){
134  return score + absoluteValue;
135  }
136  else if (maxValue!=NULL && score > maxValue->first){
137  return maxValue->second;
138  }
139  else if (maxValue!=NULL && score < minValue->first){
140  return minValue->second;
141  }
142  else{
143  std::vector<std::pair<double,double> >::iterator it;
144 
145  std::pair<double,double> temp (score,0);
146 
147  it=lower_bound(vals->begin(),vals->end(),temp ,compXval);
148 
149  //std::cout << (*it).first << "\t" << exp((*it).first) <<std::endl;
150 
151  if ((*it).first==score){
152  return (*it).second;
153  }
154  else if (it==vals->begin()){ //extrapolate based on beginning
155  return extrapolate((*it),(*(it+1)),score);
156  }
157  else if (it==vals->end()){ //extrapolate based on end
158  return extrapolate((*(it-2)), (*(it-1)),score);
159  }
160  else{
161  return interpolate((*(it-1)),(*it),score);
162  }
163  }
164  }
165 
166  std::string weight::stringify(){
167  std::string scaleString;
168 
169  //Process Values and scaled values;
170  scaleString+= "\tVALUE:\tLOG\t[";
171  for(size_t i=0;i<vals->size();i++){
172  if (i>0){
173  scaleString+= ",";
174  }
175  scaleString+= double_to_string((*vals)[i].first);
176  }
177  scaleString+= "]\n";
178 
179  scaleString+= "\tSCALED:\tLOG\t[";
180  for(size_t i=0;i<vals->size();i++){
181  if (i>0){
182  scaleString+= ",";
183  }
184  scaleString+= double_to_string((*vals)[i].second);
185  }
186  scaleString+="]\n";
187 
188  if (minValue!=NULL){
189  scaleString+= "\tMIN_VALUE:\tLOG\t" + double_to_string(minValue->first) + "\n";
190  scaleString+= "\tMIN_SCALED:\tLOG\t" + double_to_string(minValue->second) + "\n";
191  }
192 
193  if (maxValue!=NULL){
194  scaleString+= "\tMAX_VALUE:\tLOG\t" + double_to_string(maxValue->first) + "\n";
195  scaleString+= "\tMAX_SCALED:\tLOG\t" + double_to_string(maxValue->second) + "\n";
196  }
197 
198  return scaleString;
199  }
200 
201  bool weight::parse(const std::string& txt){
202  stringList lst;
203  lst.splitString(txt,"\n");
204 
205  bool table=false;
206  std::vector<double> xVal;
207  std::vector<double> yVal;
208 
209  bool min=false;
210  double minXVal(0);
211  double minYVal(0);
212 
213  bool max=false;
214  double maxXVal(0);
215  double maxYVal(0);
216 
217  for(size_t i=0;i<lst.size();i++){
218  stringList line;
219  clear_whitespace(lst[i],":[]\n ");
220  line.splitString(lst[i],"\t,");
221  if (line.size()==0){
222  std::cerr << "Unable to parse weight line: " << lst[i] << std::endl;
223  return false;
224  }
225 
226  if (line[0].compare("SCALE")==0){
227  if (line.size()==2){
228  name=line[1];
229  }
230  else{
231  std::cerr << "Weight Scale is missing a name" << std::endl;
232  return false;
233  }
234  }
235  else if (line[0].compare("ABSOLUTE")==0){
236  if (line.size()<2){
237  std::cerr << "Weight Absolute value is missing " << std::endl;
238  return false;
239  }
240 
241 
242  double val;
243  if (!stringToDouble(line[1], val)){
244  std::cerr << "Weighted Absoolute value not numeric: " << line[1] << std::endl;
245  return false;
246  }
247 
248  setAbsolute(val);
249  return true;
250  }
251  else if (line[0].compare("VALUE")==0){
252 
253  if (line.size()<3){
254  std::cerr << "Weight VALUE line is missing values" << std::endl;
255  return false;
256  }
257 
258  bool px=line[1].compare("P(X)")==0;
259  for(size_t i=2;i<line.size();i++){
260 
261  double tempValue;
262  if (!stringToDouble(line[i], tempValue)){
263  std::cerr << "Weighted VALUE not numeric: " << line[i] << std::endl;
264  return false;
265  }
266 
267  if (px){
268  xVal.push_back(log(tempValue));
269  }
270  else{
271  xVal.push_back(tempValue);
272  }
273  }
274  table=true;
275  }
276  else if (line[0].compare("SCALED")==0){
277 
278  if (line.size()<3){
279  std::cerr << "Weight SCALED line is missing values" << std::endl;
280  return false;
281  }
282 
283  bool px = line[1].compare("P(X)")==0;
284  for(size_t i=2;i<line.size();i++){
285 
286  double tempValue;
287  if (!stringToDouble(line[i], tempValue)){
288  std::cerr << "Weighted SCALED value not numeric: " << line[i] << std::endl;
289  return false;
290  }
291 
292  if (px){
293  yVal.push_back(log(tempValue));
294  }
295  else{
296  yVal.push_back(tempValue);
297  }
298  }
299  table=true;
300  }
301  else if (line[0].compare("MIN_VALUE")==0){
302 
303  if (line.size()<3){
304  std::cerr << "Weight MIN_VALUE line is missing values" << std::endl;
305  return false;
306  }
307 
308  double tempValue;
309  if (!stringToDouble(line[2], tempValue)){
310  std::cerr << "Weighted VALUE not numeric: " << line[i] << std::endl;
311  return false;
312  }
313 
314 
315  if (line[1].compare("P(X)")==0){
316  minXVal=log(tempValue);
317  }
318  else{
319  minXVal=tempValue;
320  }
321  min=true;
322  }
323  else if (line[0].compare("MIN_SCALED")==0){
324 
325  if (line.size()<3){
326  std::cerr << "Weight MIN_SCALED line is missing values" << std::endl;
327  return false;
328  }
329 
330  double tempValue;
331  if (!stringToDouble(line[2], tempValue)){
332  std::cerr << "Weighted MIN_SCALED value not numeric: " << line[i] << std::endl;
333  return false;
334  }
335 
336 
337 
338  if (line[1].compare("P(X)")==0){
339  minYVal=log(tempValue);
340 
341  }
342  else{
343  minYVal=tempValue;
344 
345  }
346  min=true;
347  }
348  else if (line[0].compare("MAX_VALUE")==0){
349 
350 
351  if (line.size()<3){
352  std::cerr << "Weight MAX_VALUE line is missing values" << std::endl;
353  return false;
354  }
355 
356  double tempValue;
357  if (!stringToDouble(line[2], tempValue)){
358  std::cerr << "Weighted MAX_VALUE value not numeric: " << line[i] << std::endl;
359  return false;
360  }
361 
362 
363 
364  if (line[1].compare("P(X)")==0){
365  maxXVal=log(tempValue);
366  }
367  else{
368  maxXVal=tempValue;
369  }
370  max=true;
371  }
372  else if (line[0].compare("MAX_SCALED")==0){
373 
374  if (line.size()<3){
375  std::cerr << "Weight MAX_SCALED line is missing values" << std::endl;
376  return false;
377  }
378 
379  double tempValue;
380  if (!stringToDouble(line[2], tempValue)){
381  std::cerr << "Weighted MAX_SCALED value not numeric: " << line[i] << std::endl;
382  return false;
383  }
384 
385 
386  if (line[1].compare("P(X)")==0){
387  maxYVal=log(tempValue);
388 
389  }
390  else{
391  maxYVal=tempValue;
392  }
393  max=true;
394  }
395  }
396 
397  if (table && (yVal.size()==xVal.size())){
398  setWeights(xVal, yVal);
399  }
400  else{
401  std::cerr << "Scaling Values entries don't match up. They are of different sizes" <<std::endl;
402  return false;
403  }
404 
405  if (min){
406  setMinWeight(minXVal, minYVal);
407  }
408 
409  if (max){
410  setMaxWeight(maxXVal, maxYVal);
411  }
412 
413  return true;
414  }
415 
416 
417 
419 
420  std::string& name=wt->getName();
421  if (!wts.count(name)){
422  wts[name]=wt;
423  return;
424  }
425  else{
426  std::cerr << "Weight of name: " << name << " already exists and is duplicated. Please remove duplicates.\n";
427  exit(1);
428  }
429  }
430 
431  weight* weights::operator[](std::string& name){
432  if (wts.count(name)){
433  return wts[name];
434  }
435  else{
436  return NULL;
437  }
438  }
439 
441  std::cout << stringify() << std::endl;
442  }
443 
444  std::string weights::stringify(){
445  std::string weightString;
446  std::string lnSep(50,'=');
447 
448  weightString+= "SCALING FUNCTIONS\n" + lnSep + "\n";
449  std::map<std::string,weight*>::iterator it;
450  for(it=wts.begin();it!=wts.end();it++){
451  weightString+="SCALE:\t" + (*it).first + "\n";
452  weightString+=(*it).second->stringify();
453  }
454 
455  weightString+="\n";
456  return weightString;
457 
458  }
459 
460 
461 
462 }