StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
output.cpp
Go to the documentation of this file.
1 //Copyright (c) 2007-2012 Paul C Lott
2 //University of California, Davis
3 //Genome and Biomedical Sciences Facility
4 //UC Davis Genome Center
5 //Ian Korf Lab
6 //Website: www.korflab.ucdavis.edu
7 //Email: lottpaul@gmail.com
8 //
9 //Permission is hereby granted, free of charge, to any person obtaining a copy of
10 //this software and associated documentation files (the "Software"), to deal in
11 //the Software without restriction, including without limitation the rights to
12 //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
13 //the Software, and to permit persons to whom the Software is furnished to do so,
14 //subject to the following conditions:
15 //
16 //The above copyright notice and this permission notice shall be included in all
17 //copies or substantial portions of the Software.
18 //
19 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
21 //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
22 //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
23 //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26 ///*
27 // * output.cpp
28 // * StochHMM3
29 // *
30 // * Created by Quidage on 8/27/08.
31 // * Copyright 2008 University of California, Davis. All rights reserved.
32 // *
33 // */
34 //
35 //#include "output.h"
36 //namespace StochHMM{
37 //
38 ////Takes trellis once all filled and outputs the tracebacks as label
39 //
40 //
41 //void map_label (trellis &scores, const HMM &model, options &opt){
42 // int seq_size=scores.trell[0].size();
43 // string modl=opt.sopt("-model");
44 // string seq=opt.sopt("-seq");
45 // string lbl=opt.sopt("-label");
46 //
47 // //Reassign stdout to file if output filename is defined
48 // streambuf* sbuf=cout.rdbuf();
49 // ofstream file;
50 //
51 // if (lbl.compare("")!=0){
52 // file.open(lbl.c_str());
53 // cout.rdbuf(file.rdbuf());
54 // }
55 //
56 //
57 // string line (50,'#');
58 //
59 // map<string,string> label;
60 // for(int state=0;state<model.states.size();state++){
61 // //cout << model.state[state].label <<endl;
62 // //cout << model.state[state].name <<endl;
63 // label[model.states[state].label]+=model.states[state].name;
64 // }
65 //
66 // cout << "#Model: " << modl <<endl;
67 // cout << "#Sequence: " << seq << endl;
68 // cout << "#Algorithm: Viterbi" <<endl;
69 // cout << "#Output: Label" << endl;
70 // cout << "#Label Key: ";
71 //
72 // map<string,string>::iterator itera;
73 // for (itera=label.begin(); itera!=label.end(); itera++){
74 // cout << "(" << (*itera).first << "=" << (*itera).second << "),";
75 // }
76 //
77 // cout <<endl;
78 //
79 // traceback_path (trellis::*funct)();
80 //
81 // if (opt.getopt("-stoch","viterbi")){
82 // funct=&trellis::trace_stoch_viterbi;
83 // }
84 // else if (opt.getopt("-stoch","forward")){
85 // funct=&trellis::trace_stoch_forward;
86 // }
87 //
88 //
89 // map<traceback_path,int> paths;
90 // int max=0;
91 //
92 // double posterior=scores.trell[0][seq_size-1].forw;
93 // for(unsigned int r=1;r<model.states.size(); r++){
94 // posterior+=log(1+exp(scores.trell[r][seq_size-1].forw-posterior));
95 // }
96 //
97 // for(int i=0;i<opt.iopt("-repetitions");i++){
98 // traceback_path theta=(scores.*funct)();
99 // paths[theta]++;
100 // max = (paths[theta]>max) ? paths[theta] : max;
101 // }
102 //
103 //
104 // //print top n paths type vector<int>
105 // int sum=0;
106 // int i=0;
107 //
108 // map<traceback_path, int>::iterator it;
109 // //int ranking=1;
110 //
111 // for (int k=max;k>0;k--){
112 // for( it=paths.begin(); it!=paths.end();it++){
113 // if ((*it).second==k){
114 // cout << line << endl;
115 //
116 // cout << "Path occured " << k << " times:" <<endl;
117 // traceback_path final=(*it).first;
118 //
119 // //Determine Path probability
120 // //double path_probi=final.path_prob(model,seq);
121 //
122 // //if (path_probi==-INFINITY){
123 // // continue;
124 // //}
125 // //cout << "Path probability:\texp(" << path_probi<<")"<<endl;
126 // //double post=100*exp(path_probi-posterior);
127 //
128 // sum+=k;
129 //
130 // final.print_label(model);
131 //
132 //
133 // i++;
134 // if (i>=opt.iopt("-report") || sum==opt.iopt("-repetitions")){
135 // goto LAST_PRINT;
136 // }
137 // }
138 // }
139 // };
140 //LAST_PRINT:
141 // cout <<endl;
142 //
143 // //Re-assign the cout to stdout
144 // if (lbl.compare("")!=0){
145 // cout.rdbuf(sbuf);
146 // file.close();
147 // }
148 //}
149 //
150 //
151 ////Mapping stochastic traceback
152 //void map_path (trellis &scores, const HMM &model, options &opt){
153 // int seq_size=scores.trell[0].size();
154 // string modl=opt.sopt("-model");
155 // string seq=opt.sopt("-seq");
156 // string path=opt.sopt("-path");
157 //
158 //
159 // streambuf* sbuf=cout.rdbuf();
160 // ofstream file;
161 //
162 //
163 // if (path.compare("")!=0){
164 // file.open(path.c_str());
165 // cout.rdbuf(file.rdbuf());
166 // }
167 //
168 // string line (50,'#');
169 //
170 // cout << "#Model: " << modl <<endl;
171 // cout << "#Sequence: " << seq<< endl;
172 // cout << "#Algorithm: Viterbi" <<endl;
173 // cout << "#Output: Path" << endl;
174 // cout << "#Path Key: ";
175 //
176 // for (int i=0; i<model.state_name.size()-1; i++){
177 // cout << "(" << i << "=" << model.state_name[i] << "),";
178 // }
179 //
180 // cout <<endl;
181 //
182 //
183 // traceback_path (trellis::*funct)();
184 //
185 // if (opt.getopt("-stoch","viterbi")){
186 // funct=&trellis::trace_stoch_viterbi;
187 // }
188 // else {
189 // funct=&trellis::trace_stoch_forward;
190 // }
191 //
192 // map<traceback_path,int> paths;
193 // int max=0;
194 //
195 //
196 //
197 // double posterior=scores.trell[0][seq_size-1].forw;
198 // for(unsigned int r=1;r<model.states.size(); r++){
199 // posterior+=log(1+exp(scores.trell[r][seq_size-1].forw-posterior));
200 // }
201 // //cout << "Posterior:\texp(" << posterior <<")"<<endl;
202 //
203 // //cout << "Viterbi:\texp("<< scores.trell[scores.vit_end][sequence.size()-1].viti <<")"<< endl<<endl;
204 //
205 // for(int i=0;i<opt.iopt("-repetitions");i++){
206 // traceback_path theta=(scores.*funct)();
207 // paths[theta]++;
208 // max = (paths[theta]>max) ? paths[theta] : max;
209 // }
210 //
211 //
212 // //print top n paths type vector<int>
213 // int sum=0;
214 // int i=0;
215 //
216 // map<traceback_path, int>::iterator it;
217 // //int ranking=1;
218 //
219 // for (int k=max;k>0;k--){
220 // for( it=paths.begin(); it!=paths.end();it++){
221 // if ((*it).second==k){
222 // cout << line << endl;
223 //
224 // cout << "Path occured " << k << " times:" <<endl;
225 // traceback_path final=(*it).first;
226 //
227 // //Determine Path probability
228 // //double path_probi=final.path_prob(model,seq);
229 //
230 // //if (path_probi==-INFINITY){
231 // // continue;
232 // //}
233 // //cout << "Path probability:\texp(" << path_probi<<")"<<endl; //Need to rewrite function for path probability
234 // //double post=100*exp(path_probi-posterior);
235 //
236 // sum+=k;
237 //
238 // final.print_path();
239 //
240 // /*
241 // if (opt.gff_flag==1){
242 // final.print_gff(model,opt.sequence,path_probi,ranking, k,posterior);
243 // ranking++;
244 // }
245 // */
246 //
247 // i++;
248 // if (i>=opt.iopt("-report") || sum==opt.iopt("-repetitions")){
249 // goto LAST_PRINT;
250 // }
251 // }
252 // }
253 // };
254 //LAST_PRINT:
255 // cout <<endl;
256 // if (path.compare("")!=0){
257 // cout.rdbuf(sbuf);
258 // file.close();
259 // }
260 //}
261 //
262 //
263 ////NEED to account for posterior based traceback in map_GFF
264 ////Mapping stochastic viterbi-based traceback
265 //void map_gff (trellis &scores, const HMM &model, options& opt){
266 // int seq_size=scores.trell[0].size();
267 // string modl=opt.sopt("-model");
268 // string seq=opt.sopt("-seq");
269 // string gff=opt.sopt("-gff");
270 //
271 // streambuf* sbuf=cout.rdbuf();
272 // ofstream file;
273 //
274 // if (gff.compare("")!=0){
275 // file.open(gff.c_str());
276 // cout.rdbuf(file.rdbuf());
277 // }
278 //
279 // string line (50,'#');
280 // int repetitions=opt.iopt("-repetitions");
281 //
282 // map<vector<char>,int> paths;
283 // map<char,string> label_key;
284 //
285 // for(int i=0;i<model.states.size();i++){
286 // if (!model.states[i].desc.empty()){
287 // label_key[model.states[i].label[0]]=model.states[i].desc;
288 // //cout << i << "\t" << model.state[i].label[0] << "\t" << model.state[i].desc <<endl;
289 // }
290 // }
291 //
292 // int max=0;
293 //
294 // //Calculate total posterior probability
295 //
296 // double posterior=scores.trell[0][seq_size-1].forw;
297 // for(unsigned int r=1;r<model.states.size(); r++){
298 // posterior+=log(1+exp(scores.trell[r][seq_size-1].forw-posterior));
299 // }
300 //
301 // //Perform repetitions by tracingback trellis
302 // for(int i=0;i<repetitions;i++){
303 // vector<char> theta=scores.trace_stoch_viterbi_char();
304 // paths[theta]++;
305 // max = (paths[theta]>max) ? paths[theta] : max;
306 // }
307 //
308 // //print top n paths type: GFF
309 // int sum=0;
310 // int i=0;
311 //
312 // map<vector<char>, int>::iterator it;
313 // int ranking=1;
314 //
315 // for (int k=max;k>0;k--){
316 // for( it=paths.begin(); it!=paths.end();it++){
317 // if ((*it).second==k){
318 //
319 // //print_gff(model,(*it).first,opt.sequence,strand,path_probi,ranking, k,posterior);
320 // convert_label(label_key,(*it).first,opt,ranking,k,posterior);
321 // ranking++;
322 // i++;
323 // if (sum==repetitions){
324 // goto LAST_PRINT;
325 // }
326 // }
327 // }
328 // };
329 //LAST_PRINT:
330 // cout <<endl;
331 //
332 // if (gff.compare("")!=0){
333 // cout.rdbuf(sbuf);
334 // file.close();
335 // }
336 //}
337 //
338 //
339 //
340 //void convert_label( map<char,string> label_key, vector<char> path, options opt, int ranking,int times,double posterior){
341 // string sequence_name=opt.sopt("-seq");
342 // char current_label;
343 // double start=0;
344 // int path_size=path.size();
345 //
346 // for(int k=path_size-1;k>=0;k--){
347 //
348 // char new_label=path[k];
349 //
350 // if (label_key.count(new_label)== 0){
351 // if (start>0){
352 // //cout << current_label << endl;
353 // cout << sequence_name << "\tStochHMM\t" << label_key[current_label] <<"\t"<< start << "\t" << path_size-(k+1) << "\t.\t+\t.\tRank:"<<ranking<<",Counts:" << times<<",Posterior:"<<posterior<<endl;
354 // start=0;
355 // current_label=new_label;
356 // }
357 // else {
358 // continue;
359 // }
360 // }
361 // else {
362 // if(k==0){
363 // //cout << current_label << endl;
364 // cout << sequence_name << "\tStochHMM\t" << label_key[current_label] <<"\t"<< start << "\t" << path_size << "\t.\t+\t.\tRank:"<<ranking<<",Counts:" << times<<",Posterior:"<<posterior<<endl;
365 // }
366 // else if (start==0){
367 // start=path_size-k;
368 // current_label=new_label;
369 // }
370 // else if (new_label==current_label){
371 // continue;
372 // }
373 // else {
374 // //cout << current_label << endl;
375 // cout << sequence_name << "\tStochHMM\t" << label_key[current_label] <<"\t"<< start << "\t" << path_size-(k+1) << "\t.\t+\t.\tRank:"<<ranking<<",Counts:" << times<<",Posterior:"<<posterior<<endl;
376 // start=path_size-k;
377 // current_label=new_label;
378 // }
379 // }
380 // }
381 //}
382 //
383 //
384 ////Outputs the traceback state hit file for heat map.
385 ////Need to integrate with previous map functions
386 ////void map_heat (trellis &scores, const HMM &model, sequence &seq, int repetitions, int top_N, int flag, options &opt, string strand){
387 //void map_heat (trellis &scores, const HMM &model, options &opt){
388 //
389 //
390 // int states=scores.trell.size();
391 // //int seq_length=model.tracks.size();
392 // int seq_length=scores.trell[0].size();
393 //
394 // map<int,int> st;
395 // vector<map<int,int> > cell (seq_length,st);
396 // vector<vector< map<int,int> > > output (model.states.size(),cell);
397 //
398 // traceback_path (trellis::*funct)();
399 //
400 // if (opt.getopt("-stoch","viterbi")){
401 // funct=&trellis::trace_stoch_viterbi;
402 // }
403 // else {
404 // funct=&trellis::trace_stoch_forward;
405 // }
406 //
407 //
408 // //cout << "Heatmap" << opt.heatmap <<endl;
409 //
410 // streambuf* sbuf=cout.rdbuf();
411 // ofstream file;
412 // file.open(opt.sopt("-heat").c_str());
413 // cout.rdbuf(file.rdbuf());
414 //
415 // for(int i=0;i<opt.iopt("-repetitions");i++){
416 // traceback_path theta= (scores.*funct)();
417 // for(int j=theta.size()-1;j>=0;j--){
418 // if (j==0){
419 // output[theta.val(j)][j][-1]++;
420 // }
421 // else{
422 // output[theta.val(j)][j][theta.val(j-1)]++;
423 // }
424 // //cout << theta[j] << endl;
425 // }
426 // }
427 //
428 // cout << "STATE KEY:\t(-1,END),";
429 // for(int k=0;k<model.states.size();k++){
430 // cout << "(" << k << "," << model.states[k].name << ")";
431 // if (k+1<states){
432 // cout << ",";
433 // }
434 // }
435 // cout <<endl;
436 //
437 // /*
438 // cout << "SEQUENCE:\t";
439 //
440 // for(int j=0;j<seq.size();j++){
441 // cout << model.alpha[seq.val(j)];
442 // }
443 // cout <<endl;
444 // */
445 //
446 //
447 // cout << "HEADER KEY:\tSTATE,SEQUENCE,(PREV_STATE,TRACEBACK_COUNTS)" << endl;
448 //
449 // /* //Sequence first
450 // for (int j=seq_length-1;j>=0;j--){
451 // for (int i=0;i<output.size();i++){
452 // map<int,int>::iterator it;
453 // for(it=output[i][j].begin();it!=output[i][j].end();it++){
454 // int position=seq_length-1-j;
455 // cout <<position<< "," << i <<","<< it->first <<"," << it->second << endl;
456 // }
457 // }
458 // }
459 // */
460 //
461 // //State first
462 // for (int i=0;i<output.size();i++){
463 // for (int j=output[i].size()-1;j>=0;j--){
464 // map<int,int>::iterator it;
465 // if (output[i][j].size()>0){
466 // int position=seq_length-1-j;
467 // cout << i << "," << position;
468 // }
469 // for(it=output[i][j].begin();it!=output[i][j].end();it++){
470 // cout <<",("<< it->first <<"," << it->second << ")";
471 // }
472 // if (output[i][j].size()>0){
473 // cout << endl;
474 // }
475 //
476 // }
477 // }
478 // cout.rdbuf(sbuf);
479 // file.close();
480 //}
481 //}