35 MOD.open(modelFile.c_str());
37 std::string info =
"Model file not found: " + modelFile;
38 std::cerr << info << std::endl;
46 getline(MOD, input,
'\n');
48 std::cout << input <<std::endl;
50 if (input.compare(
"#STOCHHMM MODEL FILE")==0){
55 std::cerr <<
"OUT OF MEMORY\nFile" << __FILE__ <<
"Line:\t"<< __LINE__ << std::endl;
60 hmms[0]->import(modelFile,funcs);
62 else if (input.compare(
"#STOCHHMM MODELS")==0){
63 std::vector<std::string> filenames;
65 while (getline(MOD,input,
'\n')){
67 sd::cout << input <<endl;
71 if (input.compare(
"")==0){
75 filenames.push_back(input);
80 for(
size_t i=0;i<filenames.size();i++){
84 std::cerr <<
"OUT OF MEMORY\nFile" << __FILE__ <<
"Line:\t"<< __LINE__ << std::endl;
91 for(
size_t i=0;i<filenames.size();i++){
92 hmms[i]->import(filenames[i],funcs);
97 std::cerr <<
"Header for " << modelFile <<
"does not indicate that it is a StochHMM model file.\n";
98 std::cerr <<
"Header should be: #STOCHHMM MODEL FILE \n" <<
"Header given: " << input <<std::endl;
145 if (iter<this->
size()) {
160 std::cerr <<
"OUT OF MEMORY\nFile" << __FILE__ <<
"Line:\t"<< __LINE__ << std::endl;
202 std::string modelString=
slurpFile(modelFile);
203 return parse(modelString,funcs,NULL,NULL);
208 std::string modelString=
slurpFile(modelFile);
209 return parse(modelString, funcs, tmpls, scl);
213 std::string modelString=
slurpFile(modelFile);
214 return parse(modelString,NULL,NULL,NULL);
219 return parse(modelString,funcs,NULL,NULL);
224 return parse(modelString, funcs, tmpls, scl);
229 return parse(modelString,NULL,NULL,NULL);
242 size_t header = model.find(
"MODEL INFORMATION");
243 size_t track = model.find(
"TRACK SYMBOL DEFINITIONS");
244 size_t ambig = model.find(
"AMBIGUOUS SYMBOL DEFINITIONS");
245 size_t templ = model.find(
"TEMPLATED STATES");
246 size_t scale = model.find(
"SCALING FUNCTIONS");
247 size_t st = model.find(
"STATE DEFINITIONS");
252 if (header!=std::string::npos){
254 blank=model.find(
"\n\n",header);
256 size_t nlCharEq = model.rfind(
"####\n",blank);
257 size_t nlCharNum= model.rfind(
"====\n",blank);
259 if (nlCharEq!=std::string::npos){
262 else if (nlCharNum!=std::string::npos){
266 nlChar=model.find(
"\n",header);
270 std::string head = model.substr(nlChar,blank-nlChar);
278 if (track!=std::string::npos){
280 blank=model.find(
"\n\n",track);
283 size_t nlCharEq = model.rfind(
"####\n",blank);
284 size_t nlCharNum= model.rfind(
"====\n",blank);
286 if (nlCharEq!=std::string::npos){
289 else if (nlCharNum!=std::string::npos){
293 nlChar=model.find(
"\n",track);
298 std::string trck (model.substr(nlChar,blank-nlChar));
305 std::cerr <<
"Required section: TRACK SYMBOL DEFINITIONS missing from the model" << std::endl;
314 if (ambig!=std::string::npos){
315 blank=model.find(
"\n\n",ambig);
317 size_t nlCharEq = model.rfind(
"####\n",blank);
318 size_t nlCharNum= model.rfind(
"====\n",blank);
320 if (nlCharEq!=std::string::npos){
323 else if (nlCharNum!=std::string::npos){
327 nlChar=model.find(
"\n",ambig);
331 std::string amb(model.substr(nlChar,blank-nlChar));
335 std::cerr <<
"Model Line:\t" << amb << std::endl;
343 if (scale!=std::string::npos){
344 blank=model.find(
"\n\n",scale);
347 size_t nlCharEq = model.rfind(
"####\n",blank);
348 size_t nlCharNum= model.rfind(
"====\n",blank);
350 if (nlCharEq!=std::string::npos){
353 else if (nlCharNum!=std::string::npos){
357 nlChar=model.find(
"\n",scale);
363 std::string scaleTxt = model.substr(nlChar,blank-nlChar);
373 if (templ!=std::string::npos){
374 blank=model.find(
"\n\n",templ);
377 size_t nlCharEq = model.rfind(
"####\n",blank);
378 size_t nlCharNum= model.rfind(
"====\n",blank);
380 if (nlCharEq!=std::string::npos){
383 else if (nlCharNum!=std::string::npos){
387 nlChar=model.find(
"\n",templ);
391 std::string tempTxt = model.substr(nlChar,blank-nlChar);
400 if (st!=std::string::npos){
401 size_t blankNum = model.find(
"####\n",st);
402 size_t blankEq = model.find(
"====\n",st);
404 blank=model.find(
"####\n",st);
407 if (blankEq!=std::string::npos){
410 else if (blankNum!=std::string::npos){
414 blank=model.find(
"\n",st);
419 nlChar=model.find(
"\n//END");
420 if (nlChar==std::string::npos){
421 nlChar=model.size()-1;
424 std::string stateTxt= model.substr(blank,nlChar-blank);
431 std::cerr <<
"Required sections <STATE DEFINITIONS> missing from the model" << std::endl;
448 std::string headers[] = {
"NAME",
"DESCRIPTION",
"CREATION_DATE",
"CREATION_COMMAND",
"AUTHOR",
"NUM_ATTRIB",
"UPPER",
"LOWER"};
455 for(
int i=0;i<5;i++){
457 index = lst.
indexOf(headers[i]);
458 if (index+1 < lst.
size()){
460 (*head[i])=lst[index];
464 std::cerr <<
"Couldn't parse " << headers[i] <<
" from \"MODEL INFORMATION\" section." << std::endl;
472 index = lst.
indexOf(headers[5]);
474 if (index+1 < lst.
size()){
480 std::cerr <<
"Numerical attribute couldn't be converted to numerical value: " << lst[index] << std::endl;
487 std::cerr <<
"Couldn't parse " << headers[5] <<
" value from \"MODEL INFORMATION\" section." << std::endl;
492 index = lst.
indexOf(headers[6]);
494 if (index+1<lst.
size()){
499 std::cerr <<
"Numerical attribute couldn't be converted to numerical value: " << lst[index] << std::endl;
506 std::cerr <<
"Couldn't parse " << headers[6] <<
" value from \"MODEL INFORMATION\" section." << std::endl;
511 index = lst.
indexOf(headers[7]);
513 if (index+1<lst.
size()){
519 std::cerr <<
"Numerical attribute couldn't be converted to numerical value: " << lst[index] << std::endl;
526 std::cerr <<
"Couldn't parse " << headers[6] <<
" value from \"MODEL INFORMATION\" section." << std::endl;
530 if (first && second){
534 std::cerr <<
"Unable to parse both UPPER and LOWER" << std::endl;
539 index = lst.
indexOf(headers[6]);
541 if (index+1<lst.
size()){
546 std::cerr <<
"Numerical attribute couldn't be converted to numerical value: " << lst[index] << std::endl;
554 std::cerr <<
"Couldn't parse " << headers[6] <<
" value from \"MODEL INFORMATION\" section." << std::endl;
559 index = lst.
indexOf(headers[6]);
561 if (index+1<lst.
size()){
566 std::cerr <<
"Numerical attribute couldn't be converted to numerical value: " << lst[index] << std::endl;
572 std::cerr <<
"Couldn't parse " << headers[7] <<
" value from \"MODEL INFORMATION\" section." << std::endl;
584 for(
size_t i=0;i<lst.
size();i++){
588 std::cerr <<
"OUT OF MEMORY\nFile" << __FILE__ <<
"Line:\t"<< __LINE__ << std::endl;
592 if (trk->
parse(lst[i])){
597 std::string
info =
"Couldn't parse new track line. Please check formatting of : " + lst[i];
598 std::cerr << info << std::endl;
614 for(
size_t i=0;i<lst.
size();i++){
620 std::cerr <<
"Couldn't parse the Ambiguous section for " << ln[0] << std::endl;
625 std::string
info =
"Ambiguous Characters Section\nSupplied track name doesn't correspond to any previously parsed tracks.\nPlease check the formatting and names.\n Unfound Name: " + ln[0];
626 std::cerr << info << std::endl;
642 std::cerr <<
"OUT OF MEMORY\nFile" << __FILE__ <<
"Line:\t"<< __LINE__ << std::endl;
649 for(
size_t i=0;i<lst.
size();i++){
653 std::cerr <<
"OUT OF MEMORY\nFile" << __FILE__ <<
"Line:\t"<< __LINE__ << std::endl;
657 if (!wt->
parse(lst[i])){
675 std::cerr <<
"OUT OF MEMORY\nFile" << __FILE__ <<
"Line:\t"<< __LINE__ << std::endl;
680 std::cerr <<
"Unable to parse Templates for the model" << std::endl;
700 for(
size_t iter=0;iter<stats.
size();iter++){
704 std::cerr <<
"OUT OF MEMORY\nFile" << __FILE__ <<
"Line:\t"<< __LINE__ << std::endl;
748 while(start!=std::string::npos){
749 end=txt.find(
"STATE:",start+1);
751 std::string st = txt.substr(start,end-start);
755 if (st.find(
"TEMPLATE:")!=std::string::npos){
760 std::cerr <<
"Unable to process template information" << std::endl;
764 for(
size_t i=0;i<tmpd.
size();i++){
772 start=txt.find(
"STATE:",end);
780 std::cerr <<
"No template provided for State Definitions. Please provide the template in the model or when calling HMM class" << std::endl;
785 std::string templateName;
786 std::string templateIdentifier;
787 std::map<std::string,std::string> parameters;
799 size_t index=nmid.
indexOf(
"TEMPLATE");
801 templateName=nmid[index];
804 std::cerr <<
"State Template definition doesn't contain \"TEMPLATE:\" keyword in first line of State definition of the template. Please check formatting\n";
810 size_t index=nmid.
indexOf(
"IDENTIFIER");
812 templateIdentifier=nmid[index];
815 std::cerr <<
"State Template definition doesn't contain \"IDENTIFIER:\" keyword in first line of State definition of the template. Please check formatting\n";
821 for(
size_t i=1;i<lst.
size();i++){
826 parameters[lastTag]+=
"\n" + value;
830 parameters[lastTag]=value;
842 std::cerr <<
"Unable to split templated states\n" << sts.
stringify() << std::endl;
851 for(
size_t i=0;i<states.
size();i++){
852 size_t nameHeader=states[i].find(
"NAME:");
853 size_t nameLineEnding=states[i].find_first_of(
"\n",nameHeader);
854 std::string
name = states[i].substr(nameHeader+5,nameLineEnding-(nameHeader+5));
857 std::cerr <<
"State with name of: " << name <<
" is defined twice in the model\n";
887 std::set<std::string> labels;
888 std::set<std::string> gff;
889 std::set<std::string>
name;
894 for(
size_t i=0;i<
states.size();i++){
895 labels.insert(
states[i]->getLabel());
896 gff.insert(
states[i]->getGFF());
900 for (
size_t i=0; i <
states.size() ; ++i){
908 for(
size_t i=0;i<
states.size();i++){
909 states[i]->checkLabels(labels,gff,name);
920 for(
size_t i=0;i<
states.size();i++){
959 for(
size_t i=0; i <
states.size();i++){
960 std::string& st_name =
states[i]->getName();
986 (*complex_emission_states)[st] =
states[st]->hasComplexEmission();
987 (*complex_transition_states)[st]=
states[st]->hasComplexTransition();
1003 for(
size_t i=0;i<
states.size();i++){
1005 std::vector<transition*>* transitions =
states[i]->getTransitions();
1007 for(
size_t trans=0;trans<transitions->size();trans++){
1008 if ((*transitions)[trans]== NULL){
1012 if ((*transitions)[trans]->getTransitionType() ==
DURATION){
1013 (*explicit_duration_states)[i]=
true;
1022 std::vector<transition*>* trans;
1026 for(
size_t i=0;i<trans->size();i++){
1031 (*trans)[i]->setState(temp);
1057 std::string lnSep(50,
'=');
1058 model+=
"#STOCHHMM MODEL FILE\n\n";
1068 std::string headerString;
1069 std::string lnSep(50,
'=');
1070 headerString+=
"MODEL INFORMATION\n" + lnSep +
"\n";
1071 std::string headers[] = {
"MODEL_NAME",
"MODEL_DESCRIPTION",
"MODEL_CREATION_DATE",
"MODEL_CREATION_COMMAND",
"MODEL_AUTHOR",
"MODEL_NUM_ATTRIB",
"MODEL_NUM_ATTRIB_UPPER",
"MODEL_NUM_ATTRIB_LOWER"};
1074 for(
int i=0;i<5;i++){
1075 if (!(*head[i]).empty()){
1076 headerString+=headers[i] +
":\t" + (*head[i]) +
"\n";
1087 return headerString;
1091 std::string trackString;
1098 std::string scaleString;
1108 std::string stateString;
1109 std::string lnSep(50,
'#');
1110 stateString+=
"STATE DEFINITIONS\n" + lnSep +
"\n";
1116 stateString+= lnSep +
"\n";
1118 for(
size_t i=0;i<
states.size();i++){
1120 stateString+=
states[i]->stringify();
1121 stateString+=lnSep +
"\n";
1124 stateString+=
"//END";
1208 double mid=
range[0];
1213 return abs(mid-val);
1220 std::vector<bool> states_visited (
states.size(),
false);
1221 std::vector<uint16_t> visited;
1223 bool ending_defined(
false);
1227 while (visited.size()>0){
1228 uint16_t st_iter = visited.back();
1231 if (!states_visited[st_iter]){
1232 std::vector<uint16_t> tmp_visited;
1234 size_t num_visited = tmp_visited.size();
1237 if (num_visited == 0 ){
1241 else if (num_visited == 1 && tmp_visited[0] == st_iter){
1244 std::cerr <<
"State: " <<
states[st_iter]->getName() <<
" is an orphaned state that has only transition to itself\n";
1251 for(
size_t i=0; i < tmp_visited.size(); i++){
1252 if (!states_visited[tmp_visited[i]]){
1253 visited.push_back(tmp_visited[i]);
1257 states_visited[st_iter] =
true;
1262 for(
size_t i=0; i<
states.size() ; i++){
1264 ending_defined =
true;
1269 if (!ending_defined){
1270 std::cerr <<
"No END state defined in the model\n";
1273 for(
size_t i=0; i< states_visited.size(); i++){
1274 if (!states_visited[i]){
1275 std::cerr <<
"State: " <<
states[i]->getName() <<
" doesn't have valid model topology\n\
1276 Please check the model transitions\n";
1285 for(
size_t i = 0 ; i < st->
transi->size() ; i++){
1286 if (st->
transi->at(i) != NULL){
1287 visited.push_back(st->
transi->at(i)->getState()->getIterator());
1296 size_t sze=x.size();
1297 size_t internal_sze=x[0].size();
1299 for(
size_t j=0;j<sze;j++){
1301 for(
size_t i=0;i<internal_sze;i++){
1302 std::cout << x[j][i] <<
'\t' ;
1304 std::cout << std::endl;