StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
pwm.cpp
Go to the documentation of this file.
1 //
2 // pwm.cpp
3 //Copyright (c) 2007-2012 Paul C Lott
4 //University of California, Davis
5 //Genome and Biomedical Sciences Facility
6 //UC Davis Genome Center
7 //Ian Korf Lab
8 //Website: www.korflab.ucdavis.edu
9 //Email: lottpaul@gmail.com
10 //
11 //Permission is hereby granted, free of charge, to any person obtaining a copy of
12 //this software and associated documentation files (the "Software"), to deal in
13 //the Software without restriction, including without limitation the rights to
14 //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
15 //the Software, and to permit persons to whom the Software is furnished to do so,
16 //subject to the following conditions:
17 //
18 //The above copyright notice and this permission notice shall be included in all
19 //copies or substantial portions of the Software.
20 //
21 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
23 //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
24 //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
25 //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26 //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 
28 #include "pwm.h"
29 namespace StochHMM{
30 
31  //! Constructor for PWM class
34  simple = true;
35  simpleThreshold = -INFINITY;
37  bgWeight = NULL;
39  max_spacer = 0;
40  return;
41  }
42 
43  /*! Import a Position Weight Matrices file
44  Imports file and parses the file position weight matrix file
45  \param[in] std::string file
46  */
47  void PWM::import(std::string& file){
48  std::string modelString=slurpFile(file);
49  parse(modelString);
50  }
51 
52  /*! Import a Position Weight Matrices file
53  Imports file and parses the file position weight matrix file
54  \param[in] const char* file
55  */
56  void PWM::import(const char* file){
57  std::string fl(file);
58  import(fl);
59  }
60 
61  /*! Parses a Position Weight Matrices string
62  Parses string containing position weight matrix definitions
63  \param[in] std::string file
64  */
65  bool PWM::parse(const std::string& matrix){
66  size_t thresh = matrix.find("THRESHOLD DEFINITION");
67  size_t track = matrix.find("TRACK SYMBOL DEFINITIONS");
68  size_t ambig = matrix.find("AMBIGUOUS SYMBOL DEFINITIONS");
69  size_t pwm = matrix.find("POSITION WEIGHT DEFINITIONS");
70  size_t back = matrix.find("BACKGROUND DEFINITION");
71  size_t space = matrix.find("SPACER DEFINITIONS");
72  size_t blank;
73  size_t nlChar;
74 
75  if (thresh != std::string::npos){
76  blank=matrix.find("\n\n",thresh);
77 
78  size_t nlCharEq = matrix.rfind("####\n",blank);
79  size_t nlCharNum= matrix.rfind("====\n",blank);
80  //Check for optional dividing line
81  if (nlCharEq!=std::string::npos){
82  nlChar=nlCharEq+5;
83  }
84  else if (nlCharNum!=std::string::npos){
85  nlChar=nlCharNum+5;
86  }
87  else{ //No divider line
88  nlChar=matrix.find("\n",thresh);
89  nlChar++;
90  }
91 
92 
93  std::string thr (matrix.substr(nlChar,blank-nlChar));
94 
95  if (!_parseThreshold(thr)){
96  return false;
97  }
98 
99  }
100 
101  if (track != std::string::npos){
102  blank=matrix.find("\n\n",track);
103 
104  size_t nlCharEq = matrix.rfind("####\n",blank);
105  size_t nlCharNum= matrix.rfind("====\n",blank);
106  //Check for optional dividing line
107  if (nlCharEq!=std::string::npos){
108  nlChar=nlCharEq+5;
109  }
110  else if (nlCharNum!=std::string::npos){
111  nlChar=nlCharNum+5;
112  }
113  else{ //No divider line
114  nlChar=matrix.find("\n",track);
115  nlChar++;
116  }
117 
118 
119  std::string trck (matrix.substr(nlChar,blank-nlChar));
120 
121  if (!_parseTrack(trck)){
122  return false;
123  }
124 
125  }
126  else{
127  std::cerr << "Required section: TRACK SYMBOL DEFINITIONS missing from the model" << std::endl;
128  return false;
129  }
130 
131  if (ambig != std::string::npos){
132  blank=matrix.find("\n\n",ambig);
133 
134  size_t nlCharEq = matrix.rfind("####\n",blank);
135  size_t nlCharNum= matrix.rfind("====\n",blank);
136  //Check for optional dividing line
137  if (nlCharEq!=std::string::npos){
138  nlChar=nlCharEq+5;
139  }
140  else if (nlCharNum!=std::string::npos){
141  nlChar=nlCharNum+5;
142  }
143  else{ //No divider line
144  nlChar=matrix.find("\n",ambig);
145  nlChar++;
146  }
147 
148  std::string amb(matrix.substr(nlChar,blank-nlChar));
149 
150  if (!_parseAmbiguous(amb)){
151  return false;
152  }
153 
154  }
155 
156  if (back!= std::string::npos){
157  blank=matrix.find("\n\n",back);
158 
159  size_t nlCharEq = matrix.rfind("####\n",blank);
160  size_t nlCharNum= matrix.rfind("====\n",blank);
161  //Check for optional dividing line
162  if (nlCharEq!=std::string::npos){
163  nlChar=nlCharEq+5;
164  }
165  else if (nlCharNum!=std::string::npos){
166  nlChar=nlCharNum+5;
167  }
168  else{ //No divider line
169  nlChar=matrix.find("\n",back);
170  nlChar++;
171  }
172 
173  std::string background(matrix.substr(nlChar,blank-nlChar));
174 
175  if (!_parseBackground(background)){
176  return false;
177  }
178  }
179 
180  //Parse the positions
181  if (pwm != std::string::npos){
182  std::string positions = matrix.substr(pwm);
183  _parsePositions(positions);
184  }
185 
186 
187  //Parse the Spacer Information
188  if (space != std::string::npos){
189  blank=matrix.find("\n\n",space);
190 
191  size_t nlCharEq = matrix.rfind("####\n",blank);
192  size_t nlCharNum= matrix.rfind("====\n",blank);
193  //Check for optional dividing line
194  if (nlCharEq!=std::string::npos){
195  nlChar=nlCharEq+5;
196  }
197  else if (nlCharNum!=std::string::npos){
198  nlChar=nlCharNum+5;
199  }
200  else{ //No divider line
201  nlChar=matrix.find("\n",space);
202  nlChar++;
203  }
204 
205  std::string spacer(matrix.substr(nlChar,blank-nlChar));
206 
207  if (!_parseSpacer(spacer)){
208  return false;
209  }
210  }
211 
213 
214  return true;
215  }
216 
218  positionMatrix = NULL;
219  thresholdSet = false;
220  threshold = -INFINITY;
221  }
222 
224  delete positionMatrix;
225  positionMatrix = NULL;
226  }
227 
228 
229  //!Parses the emission for each position from a string
230  //! \param txt String representation of emissions
231  //! \param names stringList of all state names defined in the model
232  //! \param trks Tracks defined in the model
233  //! \param wts Weight defined of the model
234  //! \param funcs StateFunction defined for the model
235  bool matrixPosition::parse(std::string& txt, track* trk, stringList& names){
236 
237  size_t nameHeader = txt.find("NAME:");
238  size_t transHeader = txt.find("TRANSITION:");
239  size_t thresholdHeader = txt.find("THRESHOLD:");
240  //size_t emmHeader = txt.find("EMISSION:");
241  size_t end = txt.find("//END");
242 
243  if (end != std::string::npos){
244  txt = txt.substr(0,end);
245  }
246 
247  //Parse Name
248  if (nameHeader != std::string::npos){
249  size_t endline = txt.find("\n",nameHeader);
250  std::string temp_name = txt.substr(nameHeader,endline-nameHeader);
251  stringList tmp;
252  tmp.splitString(temp_name, ":");
253  clear_whitespace(tmp[1], "\t\n ");
254  name = tmp[1];
255  }
256  else{
257  std::cerr << "Position is missing NAME definition\n"<< txt << std::endl;
258  exit(2);
259  }
260 
261 
262  //Parse Transitions
263  if (transHeader != std::string::npos){
264  std::string temp;
265  size_t endline = txt.find("\n",transHeader);
266  temp = txt.substr(transHeader, endline - transHeader);
267  stringList transi;
268  transi.splitString(temp,":,");
269  for(size_t i = 1; i < transi.size(); i++){
270  clear_whitespace(transi[i], " \t\n");
271  transition_names.push_back(transi[i]);
272  }
273  }
274 
275 
276  //Parse Thresholds
277  if (thresholdHeader != std::string::npos){
278  size_t endline = txt.find("\n",thresholdHeader);
279  std::string temp = txt.substr(thresholdHeader, endline-thresholdHeader);
280  stringList tmp;
281  tmp.splitString(temp, ":");
282  clear_whitespace(tmp[1], "\t\n ");
283  threshold = atof(tmp[1].c_str());
284  thresholdSet = true;
285  }
286 
287 
288  stringList lst;
289  lst.splitND(txt,"EMISSION:");
290 
291  emm* temp_emm = new(std::nothrow) emm;
292 
293  if (temp_emm==NULL){
294  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
295  exit(1);
296  }
297 
298  if (!temp_emm->parse(lst[0],trk)){
299  return false;
300  }
301 
302  positionMatrix = temp_emm;
303 
304  return true;
305  }
306 
307 
308  std::string PWM::stringify(){
309  std::string pwm;
310  std::string split("#################################\n");
311  pwm += "#POSITION WEIGHT MATRIX\n\n";
312 
313  if ( simpleThreshold!= -INFINITY){
314  pwm +="<THRESHOLD DEFINITION>\n";
315  pwm += split;
317  pwm += "\n\n";
318  }
319 
320  pwm +="<TRACK SYMBOL DEFINITIONS>\n";
321  pwm += split;
322  pwm += trk->stringify();
323  pwm += "\n";
324 
325 
326  if (bgWeight != NULL){
327  pwm += "<BACKGROUND DEFINITION>\n";
328  pwm += split;
329  pwm += bgWeight->stringify();
330  }
331 
332  if (undefinedSpacer){
333  pwm += "<SPACER DEFINITIONS>\n";
334  pwm += split;
335  pwm += frontWeightName + "-SPACER-" + backWeightName + ":";
336  pwm += join(undefSpacerSizes, ',');
337  pwm += "\n\n";
338  }
339 
340 
341  pwm +="<POSITION WEIGHT DEFINITIONS>\n";
342  pwm += split;
343 
344 
345  for(size_t i=0; i< weightMatrix.size(); i++){
346  pwm += weightMatrix[i]->stringify();
347  pwm += split;
348  }
349  pwm += "//END";
350  return pwm;
351  }
352 
354  std::string mat;
355  mat = "NAME:\t" + name + "\n";
356 
357  if (thresholdSet){
358  mat+= "THRESHOLD:\t" + double_to_string(threshold) + "\n";
359  }
360 
361  if (transition_names.size() > 0){
362  mat += "TRANSITIONS:" + join(transition_names, ',') + "\n";
363  }
364 
365  mat += positionMatrix->stringify();
366  return mat;
367  }
368 
369 
370  void PWM::score(sequences* seqs){
371  if (simple){
372  scoreSimple(seqs);
373  }
374  else if (undefinedSpacer){
375  scoreUndefSpacer(seqs);
376  }
377  else if (variableSpacer){
378  scoreVariableSpacer(seqs);
379  }
380  return;
381  }
382 
383  void PWM::score(sequence* seq){
384  if (simple){
385  scoreSimple(seq);
386  }
387  else if (undefinedSpacer){
388  scoreUndefSpacer(seq);
389  }
390  else if (variableSpacer){
391  scoreVariableSpacer(seq);
392  }
393  return;
394  }
395 
396 
397  float matrixPosition::getEmissionValue(sequences* seqs, size_t pos){
398  return positionMatrix->get_emission(*seqs, pos);
399  }
400 
402  return positionMatrix->get_emission(*seq, pos);
403  }
404 
405 
406 
408  size_t numberSeqs = seqs->size();
409  for (size_t i=0;i < numberSeqs ;i++){
410  sequence* sq = seqs->getSeq(i);
411  std::cout << sq->getHeader() << std::endl;
412  scoreSimple(sq);
413  }
414  return;
415  }
416 
418  size_t seq_size = seq->getLength();
419  size_t motif_size = weightMatrix.size();
420 
421  if (seq_size < motif_size){
422  return;
423  }
424 
425  float score(0);
426  for(size_t position = 0; position <= seq_size - motif_size; position++){
427  score = 0;
429 
430  for (size_t motif_pos = 0; motif_pos < motif_size; motif_pos ++){
431  score += weightMatrix[motif_pos]->getEmissionValue(seq,position+motif_pos);
432 
433  if (weightMatrix[motif_pos]->isThresholdSet()){
434  currentThreshold = weightMatrix[motif_pos]->getThresholdPtr();
435  }
436 
437 
438  if (score <= *currentThreshold){
439  break;
440  }
441 
442  }
443 
444  if (score <= *currentThreshold){
445  continue;
446  }
447 
448  std::cout << position << "\t" << score << "\n";
449  }
450  }
451 
453  size_t numberSeqs = seqs->size();
454  for (size_t i=0;i < numberSeqs ;i++){
455  sequence* sq = seqs->getSeq(i);
456  std::cout << sq->getHeader() << std::endl;
457  scoreUndefSpacer(sq);
458  }
459  return;
460  }
461 
462 
464  size_t seq_size = seq->getLength();
465  size_t motif_size = weightMatrix.size();
466  size_t front_size = frontWeightMatrix.size();
467  size_t back_size = backWeightMatrix.size();
468 
469  if (seq_size < motif_size + max_spacer){
470  return;
471  }
472 
473  float front_score(0);
474  float back_score(0);
475  float sumScore(0);
476  size_t spacerSize(0);
477  for(size_t position = 0; position < seq_size - (motif_size + max_spacer); position++){
478  front_score = 0;
479  back_score = 0;
480  sumScore = 0;
481  //Calculate the Front motif score
482  for (size_t front_pos = 0; front_pos < front_size; front_pos++){
483  front_score += frontWeightMatrix[front_pos]->getEmissionValue(seq,position+front_pos);
484  if (front_score <= *currentThreshold){
485  break;
486  }
487  }
488 
489  if (front_score <= *currentThreshold){
490  continue;
491  }
492 
493  //Calculate the back scores
494  for(size_t sp = 0; sp < undefSpacerSizes.size(); sp++){
495  spacerSize = undefSpacerSizes[sp];
496 
497  //Check to see if back_motif was previously calculated.
498  //If it was then get score and assign score.
499 
500  //If not then calculate
501  size_t back_pos(0);
502  for(; back_pos < back_size; back_pos++){
503  back_score += backWeightMatrix[back_pos]->getEmissionValue(seq, position+front_size+spacerSize+back_pos);
504  sumScore = front_score+back_score;
505  if (front_score+back_score <= *currentThreshold){
506  break;
507  }
508  }
509 
510  //TODO: Assign score to list
511  //Assign score of backward
512  if (back_pos == back_size){
513  //std::cout << "We've made it through" << std::endl;
514  }
515 
516  if (sumScore >= *currentThreshold){
517  std::cout << position << "\t" <<spacerSize << "\t" << sumScore << "\n";
518  }
519  }
520  }
521  return;
522  }
523 
524 
526  size_t numberSeqs = seqs->size();
527  for (size_t i=0;i < numberSeqs ;i++){
528  sequence* sq = seqs->getSeq(i);
529  std::cout << sq->getHeader() << std::endl;
531  }
532  return;
533  }
534 
536  size_t seq_size = seq->getLength();
537  size_t motif_size = weightMatrix.size();
538  size_t front_size = frontWeightMatrix.size();
539  //size_t back_size = backWeightMatrix.size();
540 
541  if (seq_size < motif_size + max_spacer){
542  return;
543  }
544 
545  float score(0);
546  float sumScore(0);
547  for(size_t position = 0; position < seq_size - (motif_size + max_spacer); position++){
548  score = 0;
549  sumScore = 0;
550  //Calculate the Front motif score
551  for (size_t front_pos = 0; front_pos < front_size; front_pos++){
552  score += frontWeightMatrix[front_pos]->getEmissionValue(seq,position+front_pos);
553  if (score <= *currentThreshold){
554  break;
555  }
556  }
557 
558  if (score <= *currentThreshold){
559  continue;
560  }
561 
562  //Calculate the back scores
563  for(size_t sp = 0; sp < variableSpacerMatrix.size(); sp++){
564 
565  //Check to see if back_motif was previously calculated.
566  //If it was then get score and assign score.
567 
568  //If not then calculate
569 
570  score+=variableSpacerMatrix[sp]->getEmissionValue(seq, position+front_size+sp);
571 
572  if ((*variableTransition)[sp]){
573  sumScore = calculateBack(seq, position+front_size+sp+1, score);
574  if (sumScore >= *currentThreshold){
575  std::cout << position << "\t" << (sp+1) << "\t" << sumScore << "\n";
576  }
577  }
578 
579 
580  }
581  }
582  return;
583  }
584 
585 
586  float PWM::calculateBack(sequences *seqs, size_t position, float sum){
587  float score = sum;
588  for (size_t pos = 0; pos < backWeightMatrix.size(); pos++){
589  sum += backWeightMatrix[pos]->getEmissionValue(seqs, position+pos);
590  if (sum < *currentThreshold){
591  return -INFINITY;
592  }
593  }
594  return score;
595  }
596 
597 
598 
599  float PWM::calculateBack(sequence *seq, size_t position, float sum){
600  float score = sum;
601  for (size_t pos = 0; pos < backWeightMatrix.size(); pos++){
602  sum += backWeightMatrix[pos]->getEmissionValue(seq, position+pos);
603  if (sum < *currentThreshold){
604  return -INFINITY;
605  }
606  }
607  return score;
608  }
609 
610 
611 
612  bool PWM::_parseSpacer(std::string& txt){
613  undefinedSpacer = true;
614  simple = false;
615  variableSpacer = false;
616 
617  stringList lst;
618  lst.splitString(txt,":");
619 
620 
621  //Split header to see where to insert spacer
622  stringList order;
623  order.splitString(lst[0],"-");
624 
625  frontWeightName = order[0];
626  backWeightName = order[2];
627 
628  //Push Frontside Weights into Matrix
629  if (positionNames.count(frontWeightName) == 1){
630  size_t frontIndex = positionNames[frontWeightName];
631  for(size_t i=0;i <= frontIndex; i++){
632  frontWeightMatrix.push_back(weightMatrix[i]);
633  }
634  }
635  else{
636  std::cerr << "Couldn't find " << frontWeightName << " in the POSITION WEIGHT DEFINITIONS" << std::endl;
637  exit(2);
638  }
639 
640  //Push backside weights into Matrix
641  if (positionNames.count(backWeightName) == 1){
642  size_t backIndex = positionNames[backWeightName];
643  for(size_t i=backIndex;i < weightMatrix.size() ; i++){
644  backWeightMatrix.push_back(weightMatrix[i]);
645  }
646  }
647  else{
648  std::cerr << "Couldn't find " << backWeightName << " in the POSITION WEIGHT DEFINITIONS" << std::endl;
649  exit(2);
650  }
651 
652  //Parse Spacer Sizes
653  stringList spc;
654  spc.splitString(lst[1],",");
655  for(size_t i=0;i<spc.size();i++){
656  clear_whitespace(spc[i], " \t\n");
657  size_t val = atoi(spc[i].c_str());
658  if (val > max_spacer){
659  max_spacer = val;
660  }
661 
662  if (val < min_spacer){
663  min_spacer = val;
664  }
665 
666  undefSpacerSizes.push_back(val);
667  }
668 
669  return true;
670  }
671 
672 
673  bool PWM::_parseThreshold(std::string& txt){
674  simpleThreshold = atof(txt.c_str());
675  return true;
676  }
677 
678  bool PWM::_parseBackground(std::string& txt){
679  emm* temp_emm = new(std::nothrow) emm;
680 
681  if (temp_emm==NULL){
682  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
683  exit(1);
684  }
685 
686  if (!temp_emm->parse(txt,trk)){
687  return false;
688  }
689 
690  bgWeight = temp_emm;
691 
692  return true;
693  }
694 
695 
696 
698 
699  //Assign emission transitions
700  for(size_t i=0; i < weightMatrix.size(); i++){
701  std::vector<std::string>& tmp= weightMatrix[i]->getTransitionNames();
702  for (size_t j = 0; j < tmp.size(); j++) {
703  size_t itera = positionNames[tmp[j]];
704  weightMatrix[i]->addTransition(weightMatrix[itera]->getEmission());
705  }
706 
707  if (tmp.size() > 1){
708  simple = false;
709  variableSpacer = true;
710  undefinedSpacer = false;
711  }
712  }
713 
714 
715  //TODO: DETERMINE CORE REGIONS AND THEN ADAPT SCORING TO THOSE
716  if (variableSpacer){
717 
718  //Determine Front Core
719  //Check the Transitions and determine core regions
720  size_t firstMultiTrans(SIZE_MAX);
721  size_t backWeightStart(SIZE_MAX);
722  for(size_t i=0 ; i < weightMatrix.size(); i++){
723  if (weightMatrix[i]->transitionsSize() > 1){
724  firstMultiTrans = i;
725  std::vector<std::string>& tmp = weightMatrix[i]->getTransitionNames();
726  for( size_t j = 0; j < tmp.size(); j++){
727  size_t indx = positionNames[tmp[j]];
728 
729  if (indx > i+1){
730  backWeightStart = indx;
731  break;
732  }
733  }
734  break;
735  }
736  else{
737  frontWeightMatrix.push_back(weightMatrix[i]);
738  }
739  }
740 
741  //Determine Spacer and transitions
742  variableTransition = new (std::nothrow) std::bitset<1024>;
743  for(size_t i=firstMultiTrans; i< backWeightStart; i++){
744  variableSpacerMatrix.push_back(weightMatrix[i]);
745  if (weightMatrix[i]->transitionsSize() > 1){
746  (*variableTransition)[i]=1;
747  }
748  }
749 
750 
751  //Determine Back Core
752  for(size_t i = backWeightStart; i < weightMatrix.size() ; i++){
753  backWeightMatrix.push_back(weightMatrix[i]);
754  }
755  }
756  return;
757  }
758 
759 
760  bool PWM::_parseTrack(std::string& txt){
761  stringList lst;
762  lst.splitString(txt, "\n");
763 
764  trk=new(std::nothrow) track();
765 
766  if (trk==NULL){
767  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
768  exit(1);
769  }
770 
771  if (trk->parse(lst[0])){
772  trk->setIndex(0);
773  }
774  else {
775  std::string info = "Couldn't parse new track line. Please check formatting of : " + lst[0];
776  std::cerr << info << std::endl;
777  exit(1);
778  }
779 
780  return true;
781  }
782 
783  bool PWM::_parseAmbiguous(std::string& txt){
784  stringList lst;
785  lst.splitString(txt, "\n");
786  for(size_t i=0;i<lst.size();i++){
787  stringList ln;
788  ln.splitString(lst[i],":\t ");
789 
790  if (trk!=NULL){
791  if (!trk->parseAmbiguous(ln[1])){
792  std::cerr << "Couldn't parse the Ambiguous section for " << ln[0] << std::endl;
793  return false;
794  }
795  }
796  else{
797  std::string info = "Ambiguous Characters Section\nSupplied track name doesn't correspond to any previously parsed tracks.\nPlease check the formatting and names.\n Unfound Name: " + ln[0];
798 
799  std::cerr << info << std::endl;
800  return false;
801  }
802 
803  }
804  return true;
805  }
806 
807  bool PWM::_parsePositions(std::string& txt){
808  //3. create and parse states
809 
810  stringList positions;
811  _splitPositions(txt,positions);
812 
813  stringList names;
814  _getOrderedPositionNames(positions, names);
815  for(size_t i=0;i<names.size();i++){
816  positionNames[names[i]] = i ;
817  }
818 
819  for(size_t iter=1;iter<positions.size();iter++){
820 
821  matrixPosition* temp = new (std::nothrow) matrixPosition;
822  temp->parse(positions[iter], trk, names);
823  weightMatrix.push_back(temp);
824 
825  //std::cout << positions[iter] << std::endl;
826 
827  }
828 
829  return true;
830  }
831 
832  //Split states into individual state strings
833  bool PWM::_splitPositions(std::string& txt ,stringList& sts){
834 
835  size_t start=0;
836  size_t end=0;
837 
838  while(start!=std::string::npos){
839  end=txt.find("NAME:",start+1);
840 
841  std::string st = txt.substr(start,end-start);
842 
843  clear_whitespace(st, "#><");
844 
845  sts.push_back(st);
846 
847  start=txt.find("NAME:",end);
848 
849  }
850  return true;
851  }
852 
853 
854  //Get all Name in order
856 
857  for(size_t i=0;i<pos.size();i++){
858  size_t nameHeader=pos[i].find("NAME:");
859  if (nameHeader == std::string::npos){
860  continue;
861  }
862  size_t nameLineEnding=pos[i].find_first_of("\n",nameHeader);
863  std::string name = pos[i].substr(nameHeader+5,nameLineEnding-(nameHeader+5));
864  clear_whitespace(name, " \t\n");
865  if (names.contains(name)){
866  std::cerr << "Position with name of: " << name << " is defined twice in the model\n";
867  return false;
868  }
869  else{
870  names.push_back(name);
871  }
872  }
873  return true;
874  }
875 
876 
877 
878 
879 
880 }