StochHMM  v0.34
Flexible Hidden Markov Model C++ Library and Application
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
seqTracks.cpp
Go to the documentation of this file.
1 //
2 // seqTracks.cpp
3 //Copyright (c) 2007-2012 Paul C Lott
4 //University of California, Davis
5 //Genome and Biomedical Sciences Facility
6 //UC Davis Genome Center
7 //Ian Korf Lab
8 //Website: www.korflab.ucdavis.edu
9 //Email: lottpaul@gmail.com
10 //
11 //Permission is hereby granted, free of charge, to any person obtaining a copy of
12 //this software and associated documentation files (the "Software"), to deal in
13 //the Software without restriction, including without limitation the rights to
14 //use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
15 //the Software, and to permit persons to whom the Software is furnished to do so,
16 //subject to the following conditions:
17 //
18 //The above copyright notice and this permission notice shall be included in all
19 //copies or substantial portions of the Software.
20 //
21 //THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 //IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
23 //FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
24 //COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
25 //IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26 //CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 
28 
29 
30 #include "seqTracks.h"
31 namespace StochHMM{
32 
33  //!Create an empty initialized seqTrack
35  _init();
36  return;
37  }
38 
39  //Single Model, Single Seq File
40 
41  //!Create, initialize, and start loading sequence file
42  //!\param mdl Single model
43  //!\param filename Sequence File filename
44  //!\param format Sequence file Format (FASTA or FASTQ)
45  seqTracks::seqTracks(model& mdl , std::string& filename){
46  _init();
47 
48  loadSeqs(mdl, filename, FASTA, NULL);
49 
50  return;
51  }
52 
53 
54  //!Create, initialize, and start loading sequence file
55  //!\param mdl Single model
56  //!\param filename Sequence File filename
57  //!\param format Sequence file Format (FASTA or FASTQ)
58  seqTracks::seqTracks(model& mdl , std::string& filename, SeqFileFormat format){
59  _init();
60 
61  loadSeqs(mdl, filename, format, NULL);
62 
63  return;
64  }
65 
66  //!Create, initialize, and start loading sequence file
67  //!\param mdl Single model
68  //!\param filename Sequence File filename
69  //!\param format Sequence file Format (FASTA or FASTQ)
70  //!\param trFunc TrackFunc* functions to create tracks based on imported sequences
71  seqTracks::seqTracks(model& mdl , std::string& filename , SeqFileFormat format, TrackFuncs* trFunc){
72  _init();
73 
74  loadSeqs(mdl, filename,format, trFunc);
75 
76  return;
77  }
78 
79 
80 
81  //Single Model, Multiple Seq File
82 
83  //!Create, initialize, and start loading sequence file
84  //!\param mdl Single model
85  //!\param filenames List of Sequence File filenames
86  //!\param format Sequence file Format (FASTA or FASTQ)
87  //!\param type Single track per file (SINGLE) or multiple tracks per file (MULTI)
88  seqTracks::seqTracks(model& mdl , std::vector<std::string>& filenames, SeqFileFormat format, SeqFilesType type){
89  _init();
90 
91  loadSeqs(mdl, filenames, format, type, NULL);
92 
93  return;
94  }
95 
96 
97  //!Create, initialize, and start loading sequence file
98  //!\param mdl Single Model
99  //!\param filenames List of Sequence File filenames
100  //!\param format Sequence file Format (FASTA or FASTQ)
101  //!\param type Single track per file (SINGLE) or multiple tracks per file (MULTI)
102  //!\param trFunc TrackFunc* functions to create tracks based on imported sequences
103  seqTracks::seqTracks(model& mdl , std::vector<std::string>& filenames, SeqFileFormat format, SeqFilesType type, TrackFuncs* trFunc){
104  _init();
105 
106 
107  loadSeqs(mdl, filenames, format, type, trFunc);
108 
109  return;
110  }
111 
112 
113  //Multiple Models, Single Seq File
114 
115 
116  //!Create, initialize, and start loading sequence file
117  //!\param mdls Multiple models
118  //!\param filename Sequence File filename
119  //!\param format Sequence file Format (FASTA or FASTQ)
120  //!\param attribFunc pt2Attrib function to choose model based on sequence attributes
121  seqTracks::seqTracks(models& mdls , std::string& filename , SeqFileFormat format, pt2Attrib* attribFunc){
122  _init();
123 
124  loadSeqs(mdls,filename,format,attribFunc, NULL);
125 
126  return;
127  }
128 
129 
130  //!Create, initialize, and start loading sequence file
131  //!\param mdls Multiple models
132  //!\param filename Sequence File filename
133  //!\param format Sequence file Format (FASTA or FASTQ)
134  //!\param attribFunc pt2Attrib function to choose model based on sequence attributes
135  //!\param trFunc TrackFunc* functions to create tracks based on imported sequences
136  seqTracks::seqTracks(models& mdls , std::string& filename , SeqFileFormat format, pt2Attrib* attribFunc, TrackFuncs* trFunc){
137  _init();
138 
139  loadSeqs(mdls,filename,format,attribFunc,trFunc);
140 
141  return;
142 
143  }
144 
145  //Multiple Models, Multiple Seq Files
146 
147  //!Create, initialize, and start loading sequence file
148  //!\param mdls Multiple models
149  //!\param filenames List of Sequence File filenames
150  //!\param format Sequence file Format (FASTA or FASTQ)
151  //!\param type Single track per file (SINGLE) or multiple tracks per file (MULTI)
152  //!\param attribFunc pt2Attrib function to choose model based on sequence attributes
153  seqTracks::seqTracks(models& mdls , std::vector<std::string>& filenames, SeqFileFormat format, SeqFilesType type, pt2Attrib* attribFunc){
154  _init();
155 
156  loadSeqs(mdls, filenames, format, type, attribFunc, NULL);
157 
158  return;
159  }
160 
161 
162  //!Create, initialize, and start loading sequence file
163  //!\param mdls Multiple models
164  //!\param filenames List of Sequence File filenames
165  //!\param format Sequence file Format (FASTA or FASTQ)
166  //!\param type Single track per file (SINGLE) or multiple tracks per file (MULTI)
167  //!\param attribFunc pt2Attrib function to choose model based on sequence attributes
168  //!\param trFunc TrackFunc* functions to create tracks based on imported sequences
169  seqTracks::seqTracks(models& mdls , std::vector<std::string>& filenames, SeqFileFormat format, SeqFilesType type, pt2Attrib* attribFunc, TrackFuncs* trFunc){
170  _init();
171 
172  loadSeqs(mdls, filenames, format, type, attribFunc, trFunc);
173 
174  return;
175  }
176 
177 
178 
179  //!Destroy seqTracks
181  for(size_t i=0;i<filehandles.size();i++){
182  if (filehandles[i]!=NULL){
183  filehandles[i]->close();
184  }
185  }
186 
187  hmms=NULL;
188  trackFunctions = NULL;
189  attribModelFunc = NULL;
190 
191  while(!jobQueue.empty()){
192  seqJob* element=jobQueue.front();
193  jobQueue.pop();
194  delete element;
195  element = NULL;
196  }
197 
198  return;
199  }
200 
201 
203  numImportJobs=1;
204  jobs=0;
205 
206  hmms = NULL;
207  hmm = NULL;
208  trackFunctions = NULL;
209  attribModelFunc = NULL;
210 
211 
212  seqFormat = FASTA; //Set default file format to fasta
214 
215  good=false;
216 
217  //Make seqTrack thread-safe
218 #ifdef THREADS
219  pthread_mutex_init(&exit_thread_flag_mutex, NULL);
220  pthread_cond_init(&exit_thread_flag_cv, NULL);
221 #endif
222  exit_thread=1;
223  }
224 
225  //Reset the Queue, Files, and Import Tracks
226  //Does not reset track Functions or Attribute model selection functions
228  for(size_t i=0;i<filehandles.size();i++){
229  delete filehandles[i];
230  }
231 
232  filehandles.clear();
233  seqFilenames.clear();
234 
235  hmms = NULL;
236  hmm = NULL;
237  seqFormat = FASTA;
239 
240  good = false;
241 
242  exit_thread = 1;
243 
244  importTracks.clear();
245  }
246 
247 
248 
249 
250  ///////////////////////////////// Importing Fasta files //////////////////////////////////
251 
252 
253  ///////////////////////////////////////////////////////////////////////////////
254  //////////////// Single Model , Single Sequence File ////////////////////////
255  ///////////////////////////////////////////////////////////////////////////////
256 
257 
258  //! Load the fasta sequence file
259  //! \param mod Model to be used
260  //! \param seqFile Fasta sequence filename
261  bool seqTracks::loadSeqs(model& mod, std::string& seqFile){
262  return loadSeqs(mod,seqFile,FASTA,NULL);
263  }
264 
265 
266  //! Load the fasta sequence file
267  //! \param mod Model to be used
268  //! \param seqFile Fasta sequence filename
269  bool seqTracks::loadSeqs(model& mod, std::string& seqFile, SeqFileFormat format){
270  return loadSeqs(mod,seqFile,format,NULL);
271  }
272 
273 
274  //! Load the fasta sequence file
275  //! \param mod Model to be used
276  //! \param seqFile Sequence filename
277  bool seqTracks::loadSeqs(model& mod, std::string& seqFile, SeqFileFormat format, TrackFuncs* trFuncs){
278 
279  if (filehandles.size()>0){
280  _reset();
281  }
282 
283  hmm = &mod;
284  seqFormat = format;
285  seqFilenames.push_back(seqFile);
286 
287  //Assign valid Track Functions
288  if (trFuncs!=NULL){
289  trackFunctions = trFuncs;
290  }
291 
292  //Get State Information and Determine # of tracks to import
293  info = mod.getStateInfo();
296 
297  _open();
298 
299  //Fill Job Queue
300  importJobs();
301 
302  return true;
303  }
304 
305 
306 
307  ///////////////////////////////////////////////////////////////////////////////
308  /////////////////// Single Model , Multiple Sequence File ///////////////////
309  ///////////////////////////////////////////////////////////////////////////////
310 
311  //! Load the fasta sequence files
312  //! \param mod Model to be used
313  //! \param seqFile Fasta sequence filenames
314  //! \param type SINGLE track per file or MULTI tracks per file
315  bool seqTracks::loadSeqs(model& mod, std::vector<std::string>& seqFiles, SeqFileFormat format, SeqFilesType type){
316  return loadSeqs(mod, seqFiles, format,type,NULL);
317  }
318 
319 
320  //! Load the fasta sequence files
321  //! \param mod Model to be used
322  //! \param seqFile Fasta sequence filenames
323  //! \param type SINGLE track per file or MULTI tracks per file
324  //! \param trFuncs Track Functions to create tracks using imported seqs
325  bool seqTracks::loadSeqs(model& mod, std::vector<std::string>& seqFiles, SeqFileFormat format, SeqFilesType type, TrackFuncs* trFuncs){
326  if (filehandles.size()>0 || importTracks.size()>0){
327  _reset();
328  }
329 
330  hmm = &mod;
331  seqFormat = format;
332  fileType = type;
333  seqFilenames = seqFiles;
334 
335 
336  //Assign valid Track Functions
337  if (trFuncs!=NULL){
338  trackFunctions = trFuncs;
339  }
340 
341  //Get State Information and Determine # of tracks to import
342  info = mod.getStateInfo();
344 
345 
346  size_t tracksToImport = importTracks.size();
347  if (fileType == SINGLE_TRACK && tracksToImport>1){
348  size_t sequenceFiles = seqFiles.size();
349 
350  if (tracksToImport!=sequenceFiles){
351  std::cerr << "Number of tracks to import and sequenced don't match. # Files == # Tracks to import " << std::endl;
352  _reset();
353  return false;
354  }
355  }
356 
357  //Open File
358  _open();
359 
360 // for(size_t i = 0; i<seqFiles.size();i++){
361 // std::ifstream *SEQ= new std::ifstream;
362 // filehandles.push_back(SEQ);
363 //
364 // filehandles[i]->open(seqFiles[i].c_str());
365 //
366 // if (!filehandles[i]->is_open()){
367 // std::cerr << "Can't open sequence file: " << seqFiles[i] << std::endl;
368 // return false;
369 // }
370 //
371 // if (filehandles[i]->good()){
372 // good = true;
373 // }
374 // else{
375 // std::cerr << "Can't read from file: " << seqFiles[i] << std::endl;
376 //
377 // if (fileType == SINGLE_TRACK && importTracks.size()>1){
378 // std::cerr << "Failed import of " << seqFiles[i] << " causes there to be a missing track in sequence data." << std::endl;
379 // return false;
380 // }
381 // else{
382 // std::cerr << "Skipped processing of " << seqFiles[i] << "." << std::endl;
383 // }
384 // }
385 // }
386 
387  //Fill Job Queue
388  importJobs();
389 
390  return true;
391  }
392 
393 
394  ///////////////////////////////////////////////////////////////////////////////
395  //////////////// Multiple Models , Single Sequence File //////////////////////
396  ///////////////////////////////////////////////////////////////////////////////
397 
398 
399  //! Load the fasta sequence file
400  //! \param mModels Models to be used
401  //! \param seqFile Sequence filename
402  //! \param format Sequence File Format
403  //! \param attribFunc Pointer to Attribute calculation function
404  //! \param trFuncs Pointer to Track Functions
405  bool seqTracks::loadSeqs(models &mModels, std::string &seqFile, SeqFileFormat format, pt2Attrib* attribFunc, TrackFuncs* trFuncs){
406 
407  if (filehandles.size()>0){
408  _reset();
409  }
410 
411  hmms = &mModels;
412  seqFormat = format;
413  seqFilenames.push_back(seqFile);
414 
415  //Assign valid Attribute Function
416  if (attribFunc!=NULL){
417  attribModelFunc = attribFunc;
418  }
419 
420  if (attribModelFunc==NULL){
421  std::cerr << "No valid Attribute calculating function" << std::endl;
422  return false;
423  }
424 
425  //Assign valid Track Functions
426  if (trFuncs!=NULL){
427  trackFunctions = trFuncs;
428  }
429 
430  //Get State Information and Determine # of tracks to import
431  info = hmms->getModel(0)->getStateInfo();
434 
435 
436  //Open File
437  _open();
438 // std::ifstream *SEQ= new std::ifstream;
439 // filehandles.push_back(SEQ);
440 //
441 // filehandles[0]->open(seqFile.c_str());
442 //
443 // if (!filehandles[0]->is_open()){
444 // std::cerr << "Can't open sequence file: " << seqFile << std::endl;
445 // return false;
446 // }
447 //
448 // if (filehandles[0]->good()){
449 // good = true;
450 // }
451 // else{
452 // std::cerr << "Can't read from file: " << seqFile << std::endl;
453 // return false;
454 // }
455 
456  //Fill Job Queue
457  importJobs();
458 
459 
460  return true;
461  }
462 
463 
464  //! Load the fasta sequence file
465  //! \param mModels Models to be used
466  //! \param seqFile Fasta sequence filename
467  bool seqTracks::loadSeqs(models &mModels, std::string &seqFile, SeqFileFormat format){
468  return loadSeqs(mModels, seqFile, format, NULL,NULL);
469  }
470 
471 
472  ///////////////////////////////////////////////////////////////////////////////
473  ////////////////// Multiple Models , Multiple Sequence File //////////////////
474  ///////////////////////////////////////////////////////////////////////////////
475 
476 
477  //! Load the fasta sequence files
478  //! \param mModels Models to be used
479  //! \param seqFiles Fasta sequence filenames
480  bool seqTracks::loadSeqs(models& mModels, std::vector<std::string>& seqFiles, SeqFileFormat format, SeqFilesType type, pt2Attrib* attribFunc, TrackFuncs* trFuncs){
481 
482  if (filehandles.size()>0){
483  _reset();
484  }
485 
486  hmms = &mModels;
487  seqFormat = format;
488  fileType = type;
489  seqFilenames = seqFiles;
490 
491 
492  //Assign valid Track Functions
493  if (trFuncs!=NULL){
494  trackFunctions = trFuncs;
495  }
496 
497  //Assign valid Attribute Function
498  if (attribFunc!=NULL){
499  attribModelFunc = attribFunc;
500  }
501 
502  if (attribModelFunc==NULL){
503  std::cerr << "No valid Attribute calculating function" << std::endl;
504  return false;
505  }
506 
507  //Get State Information and Determine # of tracks to import
508  info = hmms->getModel(0)->getStateInfo();
510 
511  size_t tracksToImport = importTracks.size();
512  if (fileType == SINGLE_TRACK && tracksToImport>1){
513  size_t sequenceFiles = seqFiles.size();
514 
515  if (tracksToImport!=sequenceFiles){
516  std::cerr << "Number of tracks to import and sequenced don't match. # Files == # Tracks to import " << std::endl;
517  return false;
518  }
519  }
520 
521  //Open File
522  _open();
523 // for(size_t i = 0; i<seqFiles.size();i++){
524 // std::ifstream *SEQ= new std::ifstream;
525 // filehandles.push_back(SEQ);
526 //
527 // filehandles[i]->open(seqFiles[i].c_str());
528 //
529 // if (!filehandles[i]->is_open()){
530 // std::cerr << "Can't open sequence file: " << seqFiles[i] << std::endl;
531 // return false;
532 // }
533 //
534 // if (filehandles[i]->good()){
535 // good = true;
536 // }
537 // else{
538 // std::cerr << "Can't read from file: " << seqFiles[i] << std::endl;
539 //
540 // if (fileType == SINGLE_TRACK && importTracks.size()>1){
541 // std::cerr << "Failed import of " << seqFiles[i] << " causes there to be a missing track in sequence data." << std::endl;
542 // return false;
543 // }
544 // else{
545 // std::cerr << "Skipped processing of " << seqFiles[i] << "." << std::endl;
546 // }
547 // }
548 // }
549 
550  //Fill Job Queue
551  importJobs();
552 
553  return true;
554  }
555 
556 
557 
558  //! Load the fasta sequence files
559  //! \param mModels Models to be used
560  //! \param seqFiles Fasta sequence filenames
561  bool seqTracks::loadSeqs(models& mModels, std::vector<std::string>& seqFiles , SeqFileFormat format, SeqFilesType type){
562  return loadSeqs(mModels, seqFiles, format, type, NULL, NULL);
563  }
564 
565 
566  //! Load the fasta sequence files
567  //! \param mModels Models to be used
568  //! \param seqFiles Fasta sequence filenames
569  bool seqTracks::loadSeqs(models& mModels, std::vector<std::string>& seqFiles, SeqFileFormat format, SeqFilesType type, pt2Attrib* attribFunc){
570  return loadSeqs(mModels, seqFiles, format, type,attribFunc,NULL);
571  }
572 
573 
574  //!Get the next sequence(s) and model from the job queue
575  //!If the number of jobs falls below MIN_JOBS/2 then refill the queue
576  //!Else give pointer to the next job in the queue
578  seqJob *jb= NULL;
579 
580  //Check job queue and if necessary fill the job queue
581  if ((jobs==0 || (jobs < (numImportJobs/2))) && good){
582  while(good && jobs<numImportJobs){
583  getNext();
584  }
585  }
586 
587  if (!good){
588  if (fileType==SINGLE_TRACK){
589  if (seqFilenames.size()>importTracks.size()){
590  _close();
591  _open();
592  }
593  else{
594  _close();
595  }
596  }
597  else {
598  if (seqFilenames.size()>0){
599  _close();
600  _open();
601  }
602  else{
603  _close();
604  }
605  }
606  }
607 
608  if (jobs>0){
609  jb=jobQueue.front();
610  jobQueue.pop();
611  jobs--;
612  }
613 
614  return jb;
615  }
616 
617 
618 
619 
620  // TODO: fix PT2TRACKFUNC function assignment
621 
623  model* temp = NULL;
624 
625  if (hmms!=NULL){
626  temp = (*hmms)[0];
627 
628  if (temp == NULL){
629  std::cerr << "seqTracks initialization error: Model not defined at index zero of models datatype. Can't initialize seqTrack with necessary model information" << std::endl;
630  return false;
631  }
632 
633  }
634  else if (hmm!=NULL){
635  temp = hmm;
636  }
637  else{
638  std::cerr << "seqTracks initialization error: Model is not defined. Therefore, can't initiate seqTrack with necessary model inforamation" << std::endl;
639  return false;
640  }
641 
642  modelTracks = temp->getTracks();
643 
644  //Determine which tracks to import and which to get by using track functions
645  track* tempTrack;
646  trackCount= temp->track_size();
647  for(size_t i=0;i<trackCount;i++){
648  tempTrack = temp->getTrack(i);
649  if (tempTrack->isTrackFuncDefined()){
650  ppTrack tmp;
651  tmp.func = NULL;
652  tmp.trackNumber=i;
653  tmp.trackToUse=temp->getTrackIter(tempTrack->getTrackToUse());
654  std::string functionTouse=tempTrack->getTrackFunction();
655 
656  //tmp.func = funcs->getFunction(functionTouse);
657  postprocessTracks.push_back(tmp);
658  }
659  else{
660  importTracks.push_back(std::make_pair(i,tempTrack->getAlphaType()));
661  }
662  }
663  return true;
664  }
665 
666  //!Print the seqTracks to stdout
668 
669  for (size_t i=0;i<jobQueue.size();i++){
670  //jobQueue[i]->print_seq();
671  }
672  return;
673  }
674 
675 
676  //!Get a job and Add to the queue
678  while(good && jobs<numImportJobs){
679  getNext();
680  }
681 
682  if (!good && seqFilenames.size()>importTracks.size()){
683  _close();
684  _open();
685  }
686 
687  return true;
688  }
689 
690 
691  //For each track it will get the required sequence type and put it in the sequences
692 
693  //TODO: Handle tracks that are created using functions
694 
695  //!Get the next sequence job
697  seqJob* temp_job=new(std::nothrow) seqJob(trackCount);
698 
699  if (temp_job==NULL){
700  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
701  exit(1);
702  }
703 
704  bool valid=true;
705 
706  sequence* sq;
707 
708  //std::cout << importTracks.size() << std::endl;
709 
710  for(size_t i=0;i<importTracks.size();i++){
711  bool success;
712 
713  if (importTracks[i].second == REAL){
714  sq=new(std::nothrow) sequence(true);
715 
716  if (sq==NULL){
717  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
718  exit(1);
719  }
720 
721  if (fileType == SINGLE_TRACK){
722  success = sq->getReal(*filehandles[i], (*modelTracks)[importTracks[i].first]);
723  }
724  else{
725  success = sq->getReal(*filehandles[0], (*modelTracks)[importTracks[i].first]);
726  }
727 
728  }
729  else if (seqFormat == FASTA){ // AlphaNum and Fasta
730  sq=new(std::nothrow) sequence(false);
731 
732  if (sq==NULL){
733  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
734  exit(1);
735  }
736 
737  if (fileType == SINGLE_TRACK){
738  success = sq->getFasta(*filehandles[i], (*modelTracks)[importTracks[i].first], info);
739  }
740  else{
741  success = sq->getFasta(*filehandles[0], (*modelTracks)[importTracks[i].first], info);
742  }
743  }
744  else{
745  sq=new(std::nothrow) sequence(false);
746  if (sq==NULL){
747  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
748  exit(1);
749  }
750  if (fileType == SINGLE_TRACK){
751  success = sq->getFastq(*filehandles[i], (*modelTracks)[importTracks[i].first]);
752  }
753  else{
754  success = sq->getFastq(*filehandles[0], (*modelTracks)[importTracks[i].first]);
755  }
756 
757  }
758 
759 
760  if (fileType == SINGLE_TRACK){
761  if (!filehandles[i]->good()){
762  good=false;
763  }
764  }
765  else{
766  if (!filehandles[0]->good()){
767  good=false;
768  }
769  }
770 
771  if (!success){
772  std::cerr << "Failed to import data track from " << seqFilenames[i] << std::endl;
773  delete sq;
774  sq = NULL;
775  }
776 
777  if (sq==NULL){ // If sequence is bad break
778  valid=false;
779  break;
780  }
781  else{
782  //If exDef is defined in sequence put it in sequences
783  if (sq->exDefDefined()){
784  temp_job->set->setExDef(sq->getExDef());
785  }
786  temp_job->set->addSeq(sq,importTracks[i].first);
787 
788  if (fileType==SINGLE_TRACK){
789  temp_job->setSeqFilename(seqFilenames[i]);
790  }
791  else{
792  temp_job->setSeqFilename(seqFilenames[0]);
793  }
794  }
795  }
796 
797 
798  if (valid){
799 
800  //Get sequences defined by sequence external function that is user-defined
801  for (size_t i =0;i<postprocessTracks.size();i++) {
802  std::vector<double>* rl = NULL;
803  if (postprocessTracks[i].func != NULL ){
804  rl = (*postprocessTracks[i].func)(temp_job->set->getUndigitized(postprocessTracks[i].trackToUse));
805  }
806  else{
807  std::cerr << "Sequence external function not defined for track number: " << postprocessTracks[i].trackNumber << std::endl;
808  std::cerr << "Using Sequences from track: " << postprocessTracks[i].trackToUse << std::endl;
809  rl = new(std::nothrow) std::vector<double>;
810  if (rl==NULL){
811  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
812  exit(1);
813  }
814  }
815 
816  sequence* sq = new(std::nothrow) sequence(rl , (*modelTracks)[postprocessTracks[i].trackNumber]);
817 
818  if (sq==NULL){
819  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
820  exit(1);
821  }
822 
823  temp_job->set->addSeq(sq,postprocessTracks[i].trackNumber);
824  }
825 
826  //TODO: Fix the selection of models based upon the attribute model function
827  //Select Model based on models distance from attributes
828  if (hmms){
829 
830  //If we have a attribute calculation function we'll access which model to use
831  if (attribModelFunc){
832  //Calculate attribute
833  double attb = (*attribModelFunc)(temp_job->set->getUndigitized(TrackToUseForAttrib));
834 
835  //assign first by default
836  double min = (*hmms)[0]->getDistanceToAttrib(attb);
837  temp_job->hmm=(*hmms)[0];
838 
839  //Check other models for one that is closer to attb
840  for(size_t i=1;i<hmms->size();i++){
841  double newVal=(*hmms)[i]->getDistanceToAttrib(attb);
842 
843  //If it's closer assign it to the job
844  if (newVal<min){
845  temp_job->hmm=(*hmms)[i];
846  }
847  }
848 
849  }
850  else{
851  temp_job->hmm=(*hmms)[0]; //Default to first HMM
852  }
853  }
854 
855  else{
856  temp_job->hmm=hmm; //assign single model to job
857  }
858 
859 
860  //Check that all sequences are same length.
861  size_t lengthOfAll=SIZE_MAX;
862  for (size_t i=0;i<trackCount;i++){
863 
864  size_t length=temp_job->set->getLength(i);
865  if (lengthOfAll==SIZE_MAX){
866  lengthOfAll=length;
867  }
868  else if (lengthOfAll!=length){
869  std::cerr << "Sequence Lengths not the same" <<std::endl;
870  delete temp_job;
871  return false;
872  }
873  else {
874  continue;
875  }
876 
877  }
878  temp_job->set->setLength(lengthOfAll);
879  jobQueue.push(temp_job);
880  jobs++;
881  }
882  else{
883  delete temp_job;
884  }
885 
886  return true;
887  }
888 
889 
891 
892  if (seqFilenames.size()==0){
893  return false;
894  }
895 
896  if (fileType == SINGLE_TRACK ){
897  if (seqFilenames.size()<importTracks.size()){
898  std::cerr << "Number of sequences provided doesn't match the number of tracks. Given that the sequences files contain a single track per file. " << std::endl;
899  }
900 
901  for(size_t i = 0; i<importTracks.size();i++){
902  std::ifstream *SEQ= new(std::nothrow) std::ifstream;
903 
904  if (SEQ==NULL){
905  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
906  exit(1);
907  }
908 
909  filehandles.push_back(SEQ);
910 
911  if (seqFilenames.size()<i+1){
912  return false;
913  }
914 
915  filehandles[i]->open(seqFilenames[i].c_str());
916 
917  if (!filehandles[i]->is_open()){
918  std::cerr << "Can't open sequence file: " << seqFilenames[i] << std::endl;
919  return false;
920  }
921 
922  if (filehandles[i]->good()){
923  good = true;
924  }
925  else{
926  std::cerr << "Can't read from file: " << seqFilenames[i] << std::endl;
927 
928  if (fileType == SINGLE_TRACK && importTracks.size()>1){
929  std::cerr << "Failed import of " << seqFilenames[i] << " causes there to be a missing track in sequence data." << std::endl;
930  return false;
931  }
932  else{
933  std::cerr << "Skipped processing of " << seqFilenames[i] << "." << std::endl;
934  }
935  }
936  }
937  }
938  else {
939 
940  std::ifstream *SEQ= new(std::nothrow) std::ifstream;
941 
942  if (SEQ==NULL){
943  std::cerr << "OUT OF MEMORY\nFile" << __FILE__ << "Line:\t"<< __LINE__ << std::endl;
944  exit(1);
945  }
946 
947  filehandles.push_back(SEQ);
948 
949  filehandles[0]->open(seqFilenames[0].c_str());
950 
951  if (!filehandles[0]->is_open()){
952  std::cerr << "Can't open sequence file: " << seqFilenames[0] << std::endl;
953  return false;
954  }
955 
956  if (filehandles[0]->good()){
957  good = true;
958  }
959  else{
960  std::cerr << "Can't read from file: " << seqFilenames[0] << std::endl;
961  return false;
962  }
963  }
964 
965 
966 
967  return true;
968  }
969 
970 
972  if (fileType == SINGLE_TRACK){
973  for(size_t i=0;i<importTracks.size();i++){
974  //std::cout << importTracks.size() << "\t" << filehandles.size() << std::endl;
975 
976  if (filehandles.size()>=importTracks.size()){
977  filehandles[i]->close();
978  delete filehandles[i];
979  filehandles[i]=NULL;
980  filehandles.erase(filehandles.begin());
981  seqFilenames.erase(seqFilenames.begin());
982  }
983 
984  }
985  }
986  else{
987  filehandles[0]->clear();
988  delete filehandles[0];
989  filehandles.erase(filehandles.begin());
990  seqFilenames.erase(seqFilenames.begin());
991  }
992 
993  return true;
994  }
995 
996 }