StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
stochMath.cpp
Go to the documentation of this file.
1 //
2 // stochMath.cpp
3 
4 //Copyright (c) 2007-2012 Paul C Lott
5 //University of California, Davis
6 //Genome and Biomedical Sciences Facility
7 //UC Davis Genome Center
8 //Ian Korf Lab
9 //Website: www.korflab.ucdavis.edu
10 //Email: lottpaul@gmail.com
11 //
12 //Permission is hereby granted, free of charge, to any person obtaining a copy of
13 //this software and associated documentation files (the "Software"), to deal in
14 //the Software without restriction, including without limitation the rights to
15 //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
16 //the Software, and to permit persons to whom the Software is furnished to do so,
17 //subject to the following conditions:
18 //
19 //The above copyright notice and this permission notice shall be included in all
20 //copies or substantial portions of the Software.
21 //
22 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
24 //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
25 //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
26 //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
27 //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28 
29 #include "stochMath.h"
30 namespace StochHMM{
31 
32 
33  void generateUnknownIndices(std::vector<size_t>& results, size_t alphabetSize, size_t order ,size_t value){
34  results.assign(alphabetSize,0);
35  for (size_t i = 0; i < (size_t) alphabetSize; i++){
36  results[i]= (size_t)value + i * (size_t) POWER[order-1][alphabetSize-1];
37  }
38  return;
39  }
40 
41 
42  //Linear interpolation of y value given two pair<x,y> and x value
43  double interpolate(std::pair<double,double>& a, std::pair<double,double>& b, double& cx){
44  //std::cout << a.first << "\t" << a.second <<std::endl;
45  //std::cout << b.first << "\t" << b.second <<std::endl;
46 
47  return a.second+(b.second-a.second)*((cx-a.first)/(b.first-a.first));
48  }
49 
50  //Linear extrapolation of y value given two pair<x,y> and x value
51  double extrapolate(std::pair<double,double>& a, std::pair<double,double>& b, double& cx){
52  return a.second+((cx-a.first)/(b.first-a.first))*(b.second-a.second);
53  }
54 
55  //Shannon's entropy
56  float entropy (std::vector<float> &set){
57  float entropy=0;
58  for(size_t i=0;i<set.size();i++){
59  entropy+=set[i]*(log(set[i])/log(2));
60  }
61  return entropy*-1;
62  }
63 
64  //Shannon's entropy
65  double entropy (std::vector<double> &set){
66  double entropy=0;
67  for(size_t i=0;i<set.size();i++){
68  entropy+=set[i]*(log(set[i])/log(2));
69  }
70  return entropy*-1;
71  }
72 
73  //Shannon's Relative Entropy (Kullback-Liebler Convergence)
74  //Normalized for A->B and B->A
75  float rel_entropy (std::vector<float> &set, std::vector<float> &set2){
76  float rel_entropy=0;
77  if (set.size()!=set2.size()){
78  std::cerr << "Distributions aren't the same size";
79  }
80 
81  for(size_t i=0;i<set.size();i++){
82  rel_entropy+=0.5*(set[i]* (log (set[i]/set2[i]) /log(2) )+set2[i]*(log(set2[i]/set[i])/log(2)));
83  }
84  return rel_entropy;
85  }
86 
87  //Shannon's Relative Entropy (Kullback-Liebler Convergence)
88  //Normalized for A->B and B->A
89  double rel_entropy (std::vector<double> &set, std::vector<double> &set2){
90  double rel_entropy=0;
91  if (set.size()!=set2.size()){
92  std::cerr << "Distributions aren't the same size";
93  }
94 
95  for(size_t i=0;i<set.size();i++){
96  rel_entropy+=0.5*(set[i]* (log (set[i]/set2[i]) /log(2) )+set2[i]*(log(set2[i]/set[i])/log(2)));
97  }
98  return rel_entropy;
99  }
100 
101 
102  /////////////////////////////////////////// Integration & Summation //////////////////////////////////////////////
103  //Need to adapt to take up to three paramenter and pass them to the function.
104  //If one parameter is given then only pass one to function and so on.
105 
106  //Fix: variable handed to function to include only vector<double>
107  double _integrate (double (*funct)(double, std::vector<double>&),double upper, double lower,std::vector<double>& param){
108  double mid=(lower+upper)/2.0;
109  double h3=fabs(upper-lower)/6.0;
110  return h3*(funct(lower,param)+4*funct(mid,param)+funct(upper,param));
111  }
112 
113  double integrate (double (*funct)(double,std::vector<double>&),double lower, double upper ,std::vector<double>& param, double max_error=0.000001, double sum=0){
114  double mid=(upper+lower)/2.0;
115  double left=_integrate(funct,lower, mid, param);
116  double right=_integrate(funct,mid,upper, param);
117 
118 
119  if (fabs(left+right-sum)<=15*max_error){
120  return left+right +(left+right-sum)/15;
121  }
122 
123 
124 
125 
126  return integrate(funct,lower,mid,param,max_error/2,left) + integrate(funct,mid,upper,param,max_error/2,right);
127  }
128 
129 
130 
131  // Adaptive Simpson's numerical integration method
132  double simpson (double (*funct)(double,double,double),double alpha, double beta, double lower, double upper){
133  double mid=(lower+upper)/2.0;
134  double h3=fabs(upper-lower)/6.0;
135  return h3*(funct(lower,alpha,beta)+4*funct(mid,alpha,beta)+funct(upper,alpha,beta));
136  }
137 
138  double adapt_simpson (double (*funct)(double,double,double),double alpha, double beta, double lower, double upper, double max_error, double sum){
139  double mid=(upper+lower)/2.0;
140  double left=simpson(funct,alpha,beta,lower, mid);
141 
142  double right=simpson(funct,alpha,beta,mid,upper);
143  if (fabs(left+right-sum)<=15*max_error){
144  return left+right +(left+right-sum)/15;
145  }
146  return adapt_simpson(funct,alpha,beta,lower,mid,max_error/2,left) + adapt_simpson(funct,alpha,beta,mid,upper,max_error/2,right);
147  }
148 
149  double summation (double (*funct)(int,std::vector<double>), int lower, int upper, std::vector<double> param){
150  double sum=0;
151  for(int i=lower;i<=upper;i++){
152  sum+=funct(i,param);
153  }
154  return sum;
155  }
156 
157  /////////////////////////////////////// FUNCTIONS //////////////////////////////////////////////
158 
159  double igamma_upper (double s, double x){
160  return tgamma(s)-igamma_lower(s,x);
161  }
162 
163 
164  //incomplete lower gamma (a,x)
165  ///http://wwwC/.rskey.org/CMS/index.php/the-library/11
166  double igamma_lower (double a, double x){
167  double constant = pow(x,a) * exp(-1 * x);
168  double sum =0;
169  for (int n=0;n<60;n++){
170  double num = pow(x,(double)n);
171  double denom=a;
172  for(int m=1;m<=n;m++){
173  denom*=a+m;
174  }
175  num/=denom;
176  sum+=num;
177  }
178  return constant * sum;
179  }
180 
181 
182  double rgammap(double s, double x){
183  return igamma_lower(s,x)/tgamma(s);
184  }
185 
186 
187  double rgammaq(double s, double x){
188  return igamma_upper(s,x)/tgamma(s);
189  }
190 
191  //Beta(a,b) = Beta function
192  double beta(double a, double b){
193  double value=(tgamma(a)*tgamma(b))/tgamma(a+b);
194  return value;
195  }
196 
197  //Incomplete beta function B(x,a,b)
198  double ibeta(double x, double a, double b){
199  return betaPDF(x,a,b) * exp(log(tgamma(a)) + log(tgamma(b)) - log(tgamma(a+b)));
200  }
201 
202 
203  //!Beta probability distribution function
204  //!param x Value 0<x<1
205  //!param a Shape parameter a>0
206  //!param b Shape parameter b>0
207  float betaPDF(float x, float a, float b){
208  float constant = 1/beta(a,b);
209  constant*=pow(x,a-1) * pow(1-x,b-1);
210  return constant;
211  }
212 
213 
214  //
215  ////Incomplete beta function B(x,a,b)
216  //double ibeta(double x, double a, double b){
217  // vector<double> parameters(2,0.0);
218  // parameters[0]=a;
219  // parameters[1]=b;
220  // return (integrate(_ibeta,0.0,x,parameters));
221  //}
222  //
223  //double _ibeta(double x, vector<double>& parameters){
224  // double a=parameters[0];
225  // double b=parameters[1];
226  // return (pow(x,a-1)*pow(1-x,b-1));
227  //}
228 
229 
230  //Regularized incomplete beta Ix(a,b)
231  double ribeta(double x, double a, double b){
232  return ibeta(x,a,b)/beta(a,b);
233  }
234 
235  //Returns stirlings approximation of floor(X!)
236  double factorial(double x){
237  double f=sqrt((2*x+(1/3))*M_PI)*pow(x,x)*exp(-1*x);
238  return floor(f);
239  }
240 
241  //Calculate the binomial coefficient
242  double bin_coef (double n, double r){
243  double c=0;
244  for(int i=r+1;i<=n;i++) {c+=log(i);}
245  for(int j=1;j<=(n-r);j++) {c-=log(j);}
246  return exp(c);
247  }
248 
249  int bin_coef (int n, int r){
250  float c=0;
251  for(int i=r+1;i<=n;i++) {c+=log(i);}
252  for(int j=1;j<=(n-r);j++) {c-=log(j);}
253  return exp(c);
254  }
255 
256 
257 }