66 size_t thresh = matrix.find(
"THRESHOLD DEFINITION");
67 size_t track = matrix.find(
"TRACK SYMBOL DEFINITIONS");
68 size_t ambig = matrix.find(
"AMBIGUOUS SYMBOL DEFINITIONS");
69 size_t pwm = matrix.find(
"POSITION WEIGHT DEFINITIONS");
70 size_t back = matrix.find(
"BACKGROUND DEFINITION");
71 size_t space = matrix.find(
"SPACER DEFINITIONS");
75 if (thresh != std::string::npos){
76 blank=matrix.find(
"\n\n",thresh);
78 size_t nlCharEq = matrix.rfind(
"####\n",blank);
79 size_t nlCharNum= matrix.rfind(
"====\n",blank);
81 if (nlCharEq!=std::string::npos){
84 else if (nlCharNum!=std::string::npos){
88 nlChar=matrix.find(
"\n",thresh);
93 std::string thr (matrix.substr(nlChar,blank-nlChar));
101 if (track != std::string::npos){
102 blank=matrix.find(
"\n\n",track);
104 size_t nlCharEq = matrix.rfind(
"####\n",blank);
105 size_t nlCharNum= matrix.rfind(
"====\n",blank);
107 if (nlCharEq!=std::string::npos){
110 else if (nlCharNum!=std::string::npos){
114 nlChar=matrix.find(
"\n",track);
119 std::string trck (matrix.substr(nlChar,blank-nlChar));
127 std::cerr <<
"Required section: TRACK SYMBOL DEFINITIONS missing from the model" << std::endl;
131 if (ambig != std::string::npos){
132 blank=matrix.find(
"\n\n",ambig);
134 size_t nlCharEq = matrix.rfind(
"####\n",blank);
135 size_t nlCharNum= matrix.rfind(
"====\n",blank);
137 if (nlCharEq!=std::string::npos){
140 else if (nlCharNum!=std::string::npos){
144 nlChar=matrix.find(
"\n",ambig);
148 std::string amb(matrix.substr(nlChar,blank-nlChar));
156 if (back!= std::string::npos){
157 blank=matrix.find(
"\n\n",back);
159 size_t nlCharEq = matrix.rfind(
"####\n",blank);
160 size_t nlCharNum= matrix.rfind(
"====\n",blank);
162 if (nlCharEq!=std::string::npos){
165 else if (nlCharNum!=std::string::npos){
169 nlChar=matrix.find(
"\n",back);
173 std::string background(matrix.substr(nlChar,blank-nlChar));
181 if (pwm != std::string::npos){
182 std::string positions = matrix.substr(pwm);
188 if (space != std::string::npos){
189 blank=matrix.find(
"\n\n",space);
191 size_t nlCharEq = matrix.rfind(
"####\n",blank);
192 size_t nlCharNum= matrix.rfind(
"====\n",blank);
194 if (nlCharEq!=std::string::npos){
197 else if (nlCharNum!=std::string::npos){
201 nlChar=matrix.find(
"\n",space);
205 std::string spacer(matrix.substr(nlChar,blank-nlChar));
237 size_t nameHeader = txt.find(
"NAME:");
238 size_t transHeader = txt.find(
"TRANSITION:");
239 size_t thresholdHeader = txt.find(
"THRESHOLD:");
241 size_t end = txt.find(
"//END");
243 if (end != std::string::npos){
244 txt = txt.substr(0,end);
248 if (nameHeader != std::string::npos){
249 size_t endline = txt.find(
"\n",nameHeader);
250 std::string temp_name = txt.substr(nameHeader,endline-nameHeader);
257 std::cerr <<
"Position is missing NAME definition\n"<< txt << std::endl;
263 if (transHeader != std::string::npos){
265 size_t endline = txt.find(
"\n",transHeader);
266 temp = txt.substr(transHeader, endline - transHeader);
269 for(
size_t i = 1; i < transi.
size(); i++){
277 if (thresholdHeader != std::string::npos){
278 size_t endline = txt.find(
"\n",thresholdHeader);
279 std::string temp = txt.substr(thresholdHeader, endline-thresholdHeader);
291 emm* temp_emm =
new(std::nothrow)
emm;
294 std::cerr <<
"OUT OF MEMORY\nFile" << __FILE__ <<
"Line:\t"<< __LINE__ << std::endl;
298 if (!temp_emm->
parse(lst[0],trk)){
310 std::string split(
"#################################\n");
311 pwm +=
"#POSITION WEIGHT MATRIX\n\n";
314 pwm +=
"<THRESHOLD DEFINITION>\n";
320 pwm +=
"<TRACK SYMBOL DEFINITIONS>\n";
327 pwm +=
"<BACKGROUND DEFINITION>\n";
333 pwm +=
"<SPACER DEFINITIONS>\n";
341 pwm +=
"<POSITION WEIGHT DEFINITIONS>\n";
355 mat =
"NAME:\t" +
name +
"\n";
408 size_t numberSeqs = seqs->
size();
409 for (
size_t i=0;i < numberSeqs ;i++){
411 std::cout << sq->
getHeader() << std::endl;
421 if (seq_size < motif_size){
426 for(
size_t position = 0; position <= seq_size - motif_size; position++){
430 for (
size_t motif_pos = 0; motif_pos < motif_size; motif_pos ++){
431 score +=
weightMatrix[motif_pos]->getEmissionValue(seq,position+motif_pos);
448 std::cout << position <<
"\t" << score <<
"\n";
453 size_t numberSeqs = seqs->
size();
454 for (
size_t i=0;i < numberSeqs ;i++){
456 std::cout << sq->
getHeader() << std::endl;
473 float front_score(0);
476 size_t spacerSize(0);
477 for(
size_t position = 0; position < seq_size - (motif_size +
max_spacer); position++){
482 for (
size_t front_pos = 0; front_pos < front_size; front_pos++){
483 front_score +=
frontWeightMatrix[front_pos]->getEmissionValue(seq,position+front_pos);
502 for(; back_pos < back_size; back_pos++){
503 back_score +=
backWeightMatrix[back_pos]->getEmissionValue(seq, position+front_size+spacerSize+back_pos);
504 sumScore = front_score+back_score;
512 if (back_pos == back_size){
517 std::cout << position <<
"\t" <<spacerSize <<
"\t" << sumScore <<
"\n";
526 size_t numberSeqs = seqs->
size();
527 for (
size_t i=0;i < numberSeqs ;i++){
529 std::cout << sq->
getHeader() << std::endl;
547 for(
size_t position = 0; position < seq_size - (motif_size +
max_spacer); position++){
551 for (
size_t front_pos = 0; front_pos < front_size; front_pos++){
573 sumScore =
calculateBack(seq, position+front_size+sp+1, score);
575 std::cout << position <<
"\t" << (sp+1) <<
"\t" << sumScore <<
"\n";
631 for(
size_t i=0;i <= frontIndex; i++){
636 std::cerr <<
"Couldn't find " <<
frontWeightName <<
" in the POSITION WEIGHT DEFINITIONS" << std::endl;
648 std::cerr <<
"Couldn't find " <<
backWeightName <<
" in the POSITION WEIGHT DEFINITIONS" << std::endl;
655 for(
size_t i=0;i<spc.
size();i++){
657 size_t val = atoi(spc[i].c_str());
679 emm* temp_emm =
new(std::nothrow)
emm;
682 std::cerr <<
"OUT OF MEMORY\nFile" << __FILE__ <<
"Line:\t"<< __LINE__ << std::endl;
701 std::vector<std::string>& tmp=
weightMatrix[i]->getTransitionNames();
702 for (
size_t j = 0; j < tmp.size(); j++) {
725 std::vector<std::string>& tmp =
weightMatrix[i]->getTransitionNames();
726 for(
size_t j = 0; j < tmp.size(); j++){
730 backWeightStart = indx;
743 for(
size_t i=firstMultiTrans; i< backWeightStart; i++){
746 (*variableTransition)[i]=1;
752 for(
size_t i = backWeightStart; i <
weightMatrix.size() ; i++){
767 std::cerr <<
"OUT OF MEMORY\nFile" << __FILE__ <<
"Line:\t"<< __LINE__ << std::endl;
775 std::string info =
"Couldn't parse new track line. Please check formatting of : " + lst[0];
776 std::cerr << info << std::endl;
786 for(
size_t i=0;i<lst.
size();i++){
792 std::cerr <<
"Couldn't parse the Ambiguous section for " << ln[0] << std::endl;
797 std::string info =
"Ambiguous Characters Section\nSupplied track name doesn't correspond to any previously parsed tracks.\nPlease check the formatting and names.\n Unfound Name: " + ln[0];
799 std::cerr << info << std::endl;
815 for(
size_t i=0;i<names.
size();i++){
819 for(
size_t iter=1;iter<positions.
size();iter++){
822 temp->
parse(positions[iter],
trk, names);
838 while(start!=std::string::npos){
839 end=txt.find(
"NAME:",start+1);
841 std::string st = txt.substr(start,end-start);
847 start=txt.find(
"NAME:",end);
857 for(
size_t i=0;i<pos.
size();i++){
858 size_t nameHeader=pos[i].find(
"NAME:");
859 if (nameHeader == std::string::npos){
862 size_t nameLineEnding=pos[i].find_first_of(
"\n",nameHeader);
863 std::string name = pos[i].substr(nameHeader+5,nameLineEnding-(nameHeader+5));
866 std::cerr <<
"Position with name of: " << name <<
" is defined twice in the model\n";