StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
text.cpp
Go to the documentation of this file.
1 //text.cpp
2  //Copyright (c) 2007-2012 Paul C Lott
3  //University of California, Davis
4  //Genome and Biomedical Sciences Facility
5  //UC Davis Genome Center
6  //Ian Korf Lab
7  //Website: www.korflab.ucdavis.edu
8  //Email: lottpaul@gmail.com
9  //
10  //Permission is hereby granted, free of charge, to any person obtaining a copy of
11  //this software and associated documentation files (the "Software"), to deal in
12  //the Software without restriction, including without limitation the rights to
13  //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
14  //the Software, and to permit persons to whom the Software is furnished to do so,
15  //subject to the following conditions:
16  //
17  //The above copyright notice and this permission notice shall be included in all
18  //copies or substantial portions of the Software.
19  //
20  //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
22  //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
23  //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
24  //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
25  //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 
27 #include "text.h"
28 namespace StochHMM{
29 
30  //! Create a new stringList
31  //! Remove whitespace set to true by default.
33  removeWS=true;
34  return;
35  }
36 
37  //! Create a new StringList
38  //! \param txt String to be split
39  //! \param ws Whitespace characters to remove from string before splitting
40  //! \param del Delimiter characters to use to split string
41  //! \param remove True will remove whitespace, False will leave whitespace
42 
43  stringList::stringList(std::string& txt, std::string& ws, std::string& del,bool remove):removeWS(remove), whitespace(ws),delimiters(del)
44  {
45  splitString(txt);
46  return;
47  }
48 
49  //! Searches for the string and returns bool if found or not
50  //! \param txt String to search stringList
51  bool stringList::contains(const std::string& txt){
52  for (size_t i=0;i<line.size();i++){
53  if (line[i].find(txt) != std::string::npos){
54  return true;
55  }
56  }
57  return false;
58  }
59 
60  bool stringList::containsExact(const std::string& txt){
61  for (size_t i=0;i<line.size();i++){
62  if (line[i].compare(txt) == 0){
63  return true;
64  }
65  }
66  return false;
67  }
68 
69  //! Searches the stringList for matching string and returns the index position of first match
70  //! \param txt String to search stringList for
71  size_t stringList::indexOf(const std::string&txt) {
72  for (size_t i=0;i<line.size();i++){
73  if (line[i].find(txt) != std::string::npos){
74  return i;
75  }
76  }
77  return SIZE_MAX;
78  }
79 
80  //! Searches the stringList for matching string and returns the index position of a given string from the starting position
81  //! \param txt String to search stringList for
82  //! \param pos Position to start search from
83  size_t stringList::indexOf(const std::string&txt,size_t pos){
84  for (size_t i=pos;i<line.size();i++){
85  if (line[i].find(txt) != std::string::npos){
86  return i;
87  }
88  }
89  return SIZE_MAX;
90  }
91 
92  //! Removes Comments, Removes Whitespace, and splits the string
93  //! Whitespace and Split delimiters are required to be previously set in stringList
94  void stringList::processString(std::string& txt){
95  line.clear();
96  comment.clear();
97 
98  //Extract Comments
99  comment=parseComment(txt, '#');
100 
101  //Remove Whitespace
102  if (removeWS){
104  }
105 
106  splitString(txt);
107 
108  return;
109  }
110 
111  //! Split string using delimiter
112  //! Splits the string up according to character delimiters defined in stringList.
113  //! The char delimiters are deleted from the returned value. Delimiters allows you supply multiple
114  //! delimiters in a single string.
115  //! \param txt String to be split
116  void stringList::splitString(const std::string& txt){
117 
118  line.clear();
119  comment.clear();
120 
121  size_t found = SIZE_MAX;
122  size_t initial= SIZE_MAX;
123  do {
124  found++;
125  initial=found;
126 
127  found=txt.find_first_of(delimiters.c_str(),found);
128  std::string st=txt.substr(initial,found-initial);
129 
130  if (st.size()>0){
131  line.push_back(st);
132  }
133 
134  } while (found!=std::string::npos);
135 
136  return;
137  }
138 
139  //! Split string using delimiter
140  //! Splits the string up according to character delimiters supplied in the delimiter string.
141  //! The char delimiters are deleted from the returned value. Delimiters allows you supply multiple
142  //! delimiters in a single string.
143  //! \param txt String to be split
144  //! \param del Delimiters to use to split evaluated as single characters, not whole string
145  void stringList::splitString(const std::string& txt, const std::string& del){
146  delimiters=del;
147  splitString(txt);
148  return;
149  }
150 
151  void stringList::splitString(const std::string& txt,size_t charSize){
152  for(size_t i=0;i<txt.size();i+=charSize){
153  line.push_back(txt.substr(i,charSize));
154  }
155  return;
156  }
157 
158  //! Non-destructive string split
159  //! Splits the string up using an additional string as the delimiter. Unlike
160  //! stringSplit the delimiter isn't deleted from the returned value;
161  //! \param txt String to be split
162  //! \param del Delimiter string to use to split
163  void stringList::splitND(const std::string& txt,const std::string& del){
164  line.clear();
165  comment.clear();
166  size_t initial=txt.find(del);
167 
168  do {
169  size_t found=txt.find(del,initial+1);
170  std::string st=txt.substr(initial,found-initial);
171  initial=found;
172  if (st.size()>0){
173  line.push_back(st);
174  }
175 
176  } while (initial!=std::string::npos);
177 
178  return;
179  }
180 
181 
182  //!Remove leading whitespace
183  //!Removes the leading whitespace from the sequence
184  //! \param ws String of whitespace characters to remove
185  void stringList::removeLWS(const std::string& ws){
186  for(size_t i=0;i<this->size();i++){
187  removeLeadingWS((*this)[i],ws);
188  }
189  return;
190  }
191 
192 
193  //! Remove whitespace characters '\\t' and then splits string by delimiters ':' or '\\n'
194  //! \param txt String to be split
195  bool stringList::fromTxt(std::string& txt){
196  setWhitespace("\t");
197  setDelimiters(":\n");
198  processString(txt);
199 
200  if (size()>0){
201  return true;
202  }
203  else{
204  return false;
205  }
206  }
207 
208  //! Remove whitespace characters '\\t' and then splits string by delimiters ':' or '\\n'
209  //! Uses std::istream::getline(...,\\n) and splits string
210  //! \param in std::istream reference
211  bool stringList::fromTxt(std::istream& in){
212  std::string temp;
213  getline(in, temp, '\n');
214  return fromTxt(temp);
215  }
216 
217 
218  //! Remove whitespace characters '\t' and then splits string by delimiters ':', '\n', comma or space
219  //! \param txt String to be split
220  bool stringList::fromTrack(std::string& txt){
221  setWhitespace("\t");
222  setDelimiters(":, \n");
223  processString(txt);
224 
225  if (size()>0){
226  return true;
227  }
228 
229  return false;
230  }
231  //! Remove whitespace characters '\t' and then splits string by delimiters ':', '\n', comma or space
232  //! \param in std::istream reference
233  bool stringList::fromTrack(std::istream& in){
234  std::string temp;
235  getline(in, temp, '\n');
236  return fromTrack(temp);
237  }
238 
239  //! Remove whitespace characters '\t','\n' or space and then splits string by delimiters ':' or comma
240  //! \param txt String to be split
241  bool stringList::fromNext(std::string& txt){
242  setWhitespace("\t\n ");
243  setDelimiters(":,");
244  processString(txt);
245 
246  if (size()>0){
247  return true;
248  }
249 
250  return false;
251  }
252 
253  //! Remove whitespace characters '\t','\n' or space and then splits string by delimiters ':' or comma
254  //! \param txt std::istream reference
255  bool stringList::fromNext(std::istream& in){
256  std::string temp;
257  getline(in, temp, '\n');
258  return fromNext(temp);
259  }
260 
261 
262  //! Remove whitespace characters '\t' and then splits string by delimiters ':' or '\n'
263  //! \param txt String to be split
264  bool stringList::fromAlpha(const std::string& txt,size_t alpha){
265  line.clear();
266  comment.clear();
267 
268  if (foundAlphaDelimiter(txt)){
269  splitString(txt,";, \t");
270  }
271  else{
272  splitString(txt,alpha);
273  }
274 
275  if (this->size()>0){
276  return true;
277  }
278 
279  return false;
280  }
281 
282  //! Returns true if text delimiters ":,[space]\\t" are found in the string
283  //! \param txt String used to find delimiter
284  bool stringList::foundAlphaDelimiter(const std::string& txt){
285  for(size_t i=0;i<txt.size();i++){
286  switch(txt[i]){
287  case ':':
288  return true;
289  case ',':
290  return true;
291  case ' ':
292  return true;
293  case '\t':
294  return true;
295  default:
296  break;
297  }
298 
299  }
300  return false;
301  }
302 
303 
304 
305  //! Remove whitespace characters '\t' and then splits string by delimiters ':' or '\n'
306  //! \param in std::istream stream
307  //! \param alpha Size of alphabet
308  bool stringList::fromAlpha(std::istream& in, size_t alpha){
309  std::string temp;
310  getline(in, temp, '\n');
311  return fromAlpha(temp,alpha);
312  }
313 
314 
315  //! Splits a definition from model file
316  //! \param txt string to split
317  //! \param ws Whitespace characters to use
318  //! \param del Delimiters characters to use
319  bool stringList::fromDef(std::string& txt, std::string& ws, std::string& del){
320  whitespace=ws;
321  delimiters = del;
322  processString(txt);
323 
324  if (size()>0){
325  return true;
326  }
327  else{
328  return false;
329  }
330  }
331 
332 
333  //! Splits a definition from model file
334  //! \param in std::istream to get line from
335  //! \param ws Whitespace characters to use
336  //! \param del Delimiters characters to use
337  bool stringList::fromDef(std::istream& in, std::string& ws, std::string& del){
338  whitespace=ws;
339  delimiters = del;
340  std::string temp;
341  getline(in, temp, '\n');
342  return fromDef(temp, ws, del);
343  }
344 
345 
346  std::string stringList::pop_ith(size_t pos){
347  if (pos>=line.size()){
348  return "";
349  }
350 
351  std::string temp=line[pos];
352  line.erase(line.begin()+pos);
353  return temp;
354  }
355 
356 
357  //! Returns the values in the stringList as std::vector of doubles
358  std::vector<double> stringList::toVecDouble(){
359  std::vector<double> temp;
360  for(size_t iter=0;iter<line.size();iter++){
361 
362  double val;
363 
364  stringToDouble(line[iter], val);
365  temp.push_back(val);
366  }
367  return temp;
368  }
369 
370  //! Returns the values in the stringList as std::vector of integers
371  void stringList::toVecInt(std::vector<int>& ret_val){
372 
373  for(size_t iter=0;iter<line.size();iter++){
374 
375  int val(0);
376 
377  if (!stringToInt(line[iter], val)){
378  std::cerr << "Couldn't convert " << line[iter] << " to an integer\n";
379  }
380  else{
381  ret_val.push_back(val);
382  }
383 
384  }
385 
386  return;
387  }
388 
389 
390 
391  //! Print each line to stdout
393  for(size_t i=0;i<line.size();i++){
394  std::cout << line[i] << std::endl;
395  }
396  std::cout << "#" << comment << std::endl;
397  }
398 
399 
400  //! Joins the stringList into string using "\t" and return string
401  //! \return string of stringList joined by "\\t"
402  std::string stringList::stringify(){
403  std::string output=join(line,'\t');
404  if (comment.size()>0){
405  output+="#"+ comment;
406  }
407 
408  return output;
409  }
410 
411 
412  //!Parses out the comments and stores the comment delimited by "#"
413  //!Comment can be accessed by command getComment();
415  for(size_t iter=0;iter<line.size();iter++){
416  comment+=parseComment(line[iter], '#');
417  }
418  }
419 
420 
421  //! Find first comment character and then return everything following the character
422  std::string parseComment(std::string& txt, char commentChar){
423  std::string comment;
424  size_t commentPos=txt.find_first_of(commentChar);
425  if (commentPos!=std::string::npos){
426  comment=txt.substr(commentPos);
427  txt=txt.substr(0,commentPos);
428  }
429  return comment;
430  }
431 
432  //! Given a string, and a white space character, it will remove all the whitespace characters from the string
433  //! \param input String to remove whitespace from
434  //! \param white String of whitespace characters to remove
435  void clear_whitespace(std::string &input,std::string white){
436  size_t found;
437  found=input.find_first_of(white);
438  //found=input.find_first_of("\t\n ");
439  while(found!=std::string::npos){
440  input.erase(found,1);
441  //found=input.find_first_of("\t\n ");
442  found=input.find_first_of(white);
443  }
444  return;
445  }
446 
447  //! Removes leading whitespace characters from a string
448  //! \param txt String user wants to remove whitespace from
449  //! \param ws String containing whitespace characters to remove
450  void removeLeadingWS(std::string& txt,const std::string& ws){
451  size_t start = txt.find_first_not_of(ws);
452  if (start==std::string::npos){
453  txt="";
454  }
455  else{
456  txt=txt.substr(start);
457  }
458  return;
459  }
460 
461 
462  //! Parses key and value from a line
463  //! Where key is delimited by <<KEY>> = Value
464  //! \param txt String to extract key value from
465  //! \param key String to assign key to
466  //! \param value String to assign value to
467  void getKeyValue(std::string& txt,std::string& key,std::string& value){
468  size_t found=txt.find("<<");
469 
470  if (found==std::string::npos){
471  removeLeadingWS(txt,"\t \n");
472  value=txt;
473  return;
474  }
475  else{
476  size_t ending=txt.find(">>");
477  if (ending!=std::string::npos){
478  key=txt.substr(found+2,ending-(found+2));
479  stringList lst;
480  lst.splitString(txt,"=");
481  removeLeadingWS(lst[1],"\t \n");
482  value= lst[1];
483  return;
484  }
485  else{
486  std::cerr << "Missing closing brackets on Key\n";
487  exit(1);
488  }
489 
490  }
491  }
492 
493 
494  //! Splits string using delimiters and return stringList
495  stringList& splitString(const std::string& txt, const std::string& delimiters){
496  static stringList lst;
497  lst.clear();
498  lst.splitString(txt,delimiters);
499  return lst;
500  }
501 
502 
503  //! Replace a given character with another character in a string
504  //! \param txt String to use have characters replaced
505  //! \param ch Character to search string for
506  //! \param replaceCh Character to replace found ch with
507  void replaceChar(std::string& txt, char ch, char replaceCh){
508  size_t found = txt.find(ch);
509  while(found!=std::string::npos){
510  txt[found]=replaceCh;
511  found=txt.find(ch);
512  }
513  return;
514  }
515 
516  //! Converts a vector of ints into a string delimited by a character c
517  //! \param input Vector of integers to be converted
518  //! \param c Character to use as a delimiter
519  std::string join(std::vector<int> &input, char c){
520  std::string out;
521  if (input.size()==0){
522  out="";
523  return out;
524  }
525  else if (input.size()==1){
526  out=int_to_string(input[0]);
527  return out;
528  }
529  else{
530  out=int_to_string(input[0]);
531  for(size_t i=1;i<input.size();i++){
532  out+=c;
533  out+=int_to_string(input[i]);
534  }
535  return out;
536  }
537  }
538 
539  //! Converts a vector of size_t into a string delimited by a character c
540  //! \param input Vector of integers to be converted
541  //! \param c Character to use as a delimiter
542  std::string join(std::vector<size_t> &input, char c){
543  std::string out;
544  if (input.size()==0){
545  out="";
546  return out;
547  }
548  else if (input.size()==1){
549  out=int_to_string(input[0]);
550  return out;
551  }
552  else{
553  out=int_to_string(input[0]);
554  for(size_t i=1;i<input.size();i++){
555  out+=c;
556  out+=int_to_string(input[i]);
557  }
558  return out;
559  }
560  }
561 
562  //! Convert an integer to a string
563  //! \param input Integer you want to convert to string;
564  std::string int_to_string(int input){
565  std::stringstream ss;
566  ss << input;
567  std::string s=ss.str();
568  return s;
569  }
570 
571 
572  //! Convert an size_t to a string
573  //! \param input Integer you want to convert to string;
574  std::string int_to_string(size_t input){
575  std::stringstream ss;
576  ss << input;
577  std::string s=ss.str();
578  return s;
579  }
580 
581 
582 
583  //! Convert a double to a string
584  //! \param input Double you want to convert to a string
585  std::string double_to_string(double input){
586  std::stringstream ss;
587  ss << input;
588  std::string s=ss.str();
589  return s;
590  }
591 
592  //! Convert a double to a string
593  //! \param input Double you want to convert to a string
594  std::string double_to_string(float input){
595  std::stringstream ss;
596  ss << input;
597  std::string s=ss.str();
598  return s;
599  }
600 
601 
602  //!Convert string to integer
603  //!\param txt Text representation of integer
604  //!\param val Integer to be assigned
605  //!\return true if conversion is valid
606  //!\return false if conversion can't be performed
607  bool stringToInt(std::string& txt, int& val){
608  std::istringstream input(txt);
609  if (!(input >> val)){
610  return false;
611  }
612 
613  return true;
614  }
615 
616  //!Convert string to integer
617  //!\param txt Text representation of integer
618  //!\param val Integer to be assigned
619  //!\return true if conversion is valid
620  //!\return false if conversion can't be performed
621  bool stringToInt(std::string& txt, size_t& val){
622  std::istringstream input(txt);
623  if (!(input >> val)){
624  return false;
625  }
626 
627  return true;
628  }
629 
630 
631  //!Convert string to double
632  //!\param txt Text representation of double
633  //!\param val Integer to be assigned
634  //!\return true if conversion is valid
635  //!\return false if conversion can't be performed
636  bool stringToDouble(std::string& txt, double& val){
637  std::istringstream input(txt);
638  if(!(input >> val)){
639  return false;
640  }
641  return true;
642  }
643 
644 
645 
646 
647  //! Converts a vector of shorts into a string delimited by a character c
648  //! \param input Vector of shorts to be converted
649  //! \param c Character to use as a delimiter
650  std::string join(std::vector<short> &input, char c){
651  std::string out;
652  if (input.size()==0){
653  out="";
654  return out;
655  }
656  else if (input.size()==1){
657  out=int_to_string(input[0]);
658  return out;
659  }
660  else{
661  out=int_to_string(input[0]);
662  for(size_t i=1;i<input.size();i++){
663  out+=c;
664  out+=int_to_string(input[i]);
665  }
666  return out;
667  }
668  }
669 
670  //! Converts a vector of doubles into a string delimited by a character c
671  //! \param input Vector of doubles to be converted
672  //! \param c Character to use as a delimiter
673  std::string join(std::vector<double> &input, char c){
674  std::string out;
675  if (input.size()==0){
676  out="";
677  return out;
678  }
679  else if (input.size()==1){
680  out=double_to_string(input[0]);
681  return out;
682  }
683  else{
684  out=double_to_string(input[0]);
685  for(size_t i=1;i<input.size();i++){
686  out+=c;
687  out+=double_to_string(input[i]);
688  }
689  return out;
690  }
691  }
692 
693  //! Converts a vector of strings into a string delimited by a character c
694  //! \param input Vector of strings to be converted
695  //! \param c Character to use as a delimiter
696  std::string join(std::vector<std::string> &input, char c){
697  std::string out;
698  size_t sz=input.size();
699  if (sz==1){
700  out = input[0];
701  }
702  else if(sz>1){
703  out+=input[0];
704  for(size_t i=1;i<sz;i++){
705  out+= c + input[i];
706  }
707  }
708 
709  return out;
710  }
711 
712 
713  //! Splits a line into a vector of string using delimiters ' \",[space]\\n\\t'
714  //! \param line vector of strings to split input into
715  //! \param input String to be split using delimiters ' \",[space]\\n\\t'
716  void split_line(std::vector<std::string> &line,std::string &input){
717 
718  //split line accoring to delimiters;
719  size_t found=input.find_first_of("\", \n\t");
720  while(found!=std::string::npos){
721  if (found>0){
722  line.push_back(input.substr(0,found));
723  //cout << line.back() << endl;
724  input=input.substr(found+1);
725  //cout << input << endl;
726  }
727  else{
728  input.erase(found,1);
729  //cout << input <<endl;
730  }
731  found=input.find_first_of("\", \n\t");
732  //cout <<input <<endl;
733  }
734  if (input.size()>0){
735  line.push_back(input);
736  }
737  return;
738  }
739 
740  //! Parse a line and extract a bracketed tag from the model file
741  //! Returns a stringList which contains the tag split using ":\\t[space]"
742  //! \param txt String to be have tag extracted from
743  stringList extractTag(std::string& txt){
744  stringList lst;
745  std::pair<size_t,size_t> tagCoord = balanced_brackets(txt,"[]");
746  if (tagCoord.first!=tagCoord.second){
747  std::string tag = txt.substr(tagCoord.first+1,tagCoord.second-tagCoord.first-1);
748  txt.erase(tagCoord.first,tagCoord.second-tagCoord.first+1);
749  lst.splitString(tag,":\t ");
750  }
751  return lst;
752  }
753 
754  //! Returns a pair of size_t values that describe the coordinates of between brackets
755  //! \param text String to use to search for brackets
756  //! \param brackets String of length two containing opening and closing bracket
757  //! \param offset Offset of where to start searching for balanced brackets
758  std::pair<size_t,size_t> balanced_brackets(const std::string& text, const std::string& brackets, size_t offset){
759  char opening = brackets[0];
760  char closing = brackets[1];
761 
762 
763  int currentTotal(0);
764 
765  size_t start;
766  size_t found;
767  found=text.find_first_of(opening,offset);
768 
769  if (found!=std::string::npos){
770  start=found;
771  currentTotal++;
772  }
773  else{
774  return std::make_pair(0,0);
775  }
776 
777  while(currentTotal!=0){
778  found++;
779  found=text.find_first_of(brackets,found);
780  if (found!=std::string::npos){
781  if (text[found]==opening){
782  currentTotal++;
783  }
784  else if (text[found]==closing){
785  currentTotal--;
786  }
787  }
788  else{
789  return std::make_pair(0,0);
790  }
791  }
792 
793  return std::make_pair(start,found);
794  }
795 
796 
797  //! Returns a pair of size_t values that describe the coordinates of between brackets from start of the string
798  //! \param text String to use to search for brackets
799  //! \param brackets String of length two containing opening and closing bracket
800  std::pair<size_t,size_t> balanced_brackets(const std::string& text, const std::string& brackets){
801  return balanced_brackets(text,brackets,0);
802  }
803 
804 
805 
806  //! Is the value of string numeric
807  //! \param str String to determine if it is numeric
808  bool isNumeric(const std::string& str){
809  size_t found;
810  found = str.find_first_not_of("0123456789.-eE");
811 
812  if (found!=std::string::npos){
813  return false;
814  }
815  else{
816  return true;
817  }
818  }
819 
820  //!Slurp a file into a string
821  //! \param file Filename
822  //! \return string that contains complete file
823  std::string slurpFile(std::string& file){
824  std::ifstream in(file.c_str(),std::ifstream::in);
825  if (!in.good()){
826  std::cerr << "File doesn't exist:" << file;
827  exit(1);
828  }
829 
830  std::stringstream sstr;
831  sstr << in.rdbuf();
832  return sstr.str();
833  }
834 
835 
836 }