StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
pwm.h
Go to the documentation of this file.
1 //
2 // pwm.h
3 //Copyright (c) 2007-2012 Paul C Lott
4 //University of California, Davis
5 //Genome and Biomedical Sciences Facility
6 //UC Davis Genome Center
7 //Ian Korf Lab
8 //Website: www.korflab.ucdavis.edu
9 //Email: lottpaul@gmail.com
10 //
11 //Permission is hereby granted, free of charge, to any person obtaining a copy of
12 //this software and associated documentation files (the "Software"), to deal in
13 //the Software without restriction, including without limitation the rights to
14 //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
15 //the Software, and to permit persons to whom the Software is furnished to do so,
16 //subject to the following conditions:
17 //
18 //The above copyright notice and this permission notice shall be included in all
19 //copies or substantial portions of the Software.
20 //
21 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
23 //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
24 //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
25 //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26 //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 
28 #ifndef StochHMM_pwm_cpp
29 #define StochHMM_pwm_cpp
30 
31 #include <vector>
32 #include <math.h>
33 #include <algorithm>
34 #include <vector>
35 #include <list>
36 #include <bitset>
37 #include "track.h"
38 #include "sequences.h"
39 #include "stochTypes.h"
40 #include "emm.h"
41 namespace StochHMM{
42 
43 
44 
45 
46  class matrixPosition;
47 
48  //Position Weight Matrix Class
49  //Before using we need to calculate the pValues for the matrix
50  class PWM{
51  public:
52  PWM();
53  void import(std::string& file);
54  void import(const char* file);
55  bool parse(const std::string& matrix);
56 
57  void score(sequences* seqs);
58  void scoreSimple(sequences* seqs);
59  void scoreUndefSpacer(sequences* seqs);
60  void scoreVariableSpacer(sequences* seqs);
61 
62  void score(sequence* seq);
63  void scoreSimple(sequence* seq);
64  void scoreUndefSpacer(sequence* seq);
65  void scoreVariableSpacer(sequence* seq);
66 
67  inline void setBackground(emm* bg){bgWeight = bg;}
68  inline void setTrack(track* tr){trk = tr;}
69  inline void setSimpleThreshold(float thresh){simpleThreshold = thresh;};
70 
71  inline void setCurrentThreshold(float* thresh){currentThreshold = thresh;}
72  inline track* getTrack(){return trk;}
73 
74  std::string stringify();
75  void print();
76  private:
77 
78  bool _parseTrack(std::string& txt);
79  bool _parseAmbiguous(std::string& txt);
80  bool _parsePositions(std::string& txt);
81  bool _parseThreshold(std::string& txt);
82  bool _parseBackground(std::string& txt);
83  bool _parseSpacer(std::string& txt);
84  bool _splitPositions(std::string& txt ,stringList& sts);
85  bool _getOrderedPositionNames(stringList& states, stringList& names);
86  void _finalizeTransitions();
87  float calculateBack(sequences *seqs, size_t position, float sum);
88  float calculateBack(sequence *seq, size_t position, float sum);
89 
92 
93  bool simple;
98 
99  std::vector<matrixPosition*> weightMatrix;
100  std::map<std::string,size_t> positionNames;
101 
102  std::vector<matrixPosition*> frontWeightMatrix;
103  std::vector<matrixPosition*> backWeightMatrix;
104  std::list<float>* backScores;
105  std::bitset<1024>* backScored;
106 
107  std::vector<size_t> undefSpacerSizes;
108  std::vector<matrixPosition*> variableSpacerMatrix;
109  std::bitset<1024>* variableTransition;
110  std::string frontWeightName;
111  std::string backWeightName;
112  size_t min_spacer;
113  size_t max_spacer;
114 
115  emm* bgWeight; //Background weight
116  };
117 
118 
119  /*! \class matrixPosition
120  Stores weight information for a position in the position weight matrix
121  */
123  public:
124  matrixPosition();
125  ~matrixPosition();
126  bool parse(std::string& txt, track* trk, stringList& names);
127  float getEmissionValue(sequences*, size_t);
128  float getEmissionValue(sequence*, size_t);
129  inline emm* getEmission(){return positionMatrix;};
130  inline void addTransition(emm* trans){transitions.push_back(trans);}
131  inline std::vector<std::string>& getTransitionNames(){return transition_names;}
132  inline void setThreshold(float thresh){threshold = thresh;}
133  inline float* getThresholdPtr(){return &threshold;}
134  inline float getThreshold(){return threshold;}
135  inline size_t transitionsSize(){return transitions.size();}
136  inline bool isThresholdSet(){return thresholdSet;}
137  std::string stringify();
138 
139  private:
140  emm* positionMatrix; //Weight information for position
141  bool thresholdSet; //True if a threshold is set for this position(global threshold)
142  float threshold; //Global threshold for position
143  std::vector<emm*> transitions; //The next weights after this position (Transitions)
144  std::vector<std::string> transition_names; //Next weights after this position
145  std::string name; //Name assigned to this position
146  };
147 
148 
149 
150 
151 }
152 #endif