From eae4169b78d230b6b915d0703875d70dd3205587 Mon Sep 17 00:00:00 2001 From: "Marcus D. R. Klarqvist" Date: Fri, 16 Feb 2018 20:46:49 +0000 Subject: [PATCH 1/3] concat update --- src/concat.h | 7 +- src/index/index_contig.h | 7 ++ src/index/tomahawk_header.h | 16 ++++ src/io/output_writer.cpp | 26 +++++++ src/io/output_writer.h | 2 + src/tomahawk/tomahawk_magic_header.h | 8 ++ src/tomahawk/two/TomahawkOutputReader.cpp | 93 +++++++++++++++++++++++ src/tomahawk/two/TomahawkOutputReader.h | 1 + 8 files changed, 156 insertions(+), 4 deletions(-) diff --git a/src/concat.h b/src/concat.h index d1a02d3..5b2877a 100644 --- a/src/concat.h +++ b/src/concat.h @@ -110,19 +110,18 @@ int concat(int argc, char** argv){ Tomahawk::TomahawkOutputReader reader; if(input.size() == 0){ - /* if(!reader.concat(files, output)){ std::cerr << Tomahawk::Helpers::timestamp("ERROR", "CONCAT") << "Failed to concat files!" << std::endl; return 1; } - */ + } else { - /* + if(!reader.concat(input, output)){ std::cerr << Tomahawk::Helpers::timestamp("ERROR", "CONCAT") << "Failed to concat files!" << std::endl; return 1; } - */ + } return 0; diff --git a/src/index/index_contig.h b/src/index/index_contig.h index 4f9e08b..bac6523 100644 --- a/src/index/index_contig.h +++ b/src/index/index_contig.h @@ -49,6 +49,13 @@ struct HeaderContig{ return(sizeof(U32) + sizeof(U32) + this->n_char); } + inline const bool operator==(const self_type& other) const{ + if(this->n_bases != other.n_bases) return false; + if(this->n_char != other.n_char) return false; + if(this->name != other.name) return false; + return true; + } + friend std::ostream& operator<<(std::ostream& stream, const self_type& entry){ stream << entry.n_bases << '\t' << entry.n_char << '\t' << entry.name; return stream; diff --git a/src/index/tomahawk_header.h b/src/index/tomahawk_header.h index 2ab2f17..7a3129e 100644 --- a/src/index/tomahawk_header.h +++ b/src/index/tomahawk_header.h @@ -52,6 +52,22 @@ class TomahawkHeader{ inline void addLiteral(const std::string& string){ this->literals_ += string; } inline const bool validate(void) const{ return(this->magic_.validate()); } + // + inline const bool operator==(const self_type& other) const{ + if(!(this->magic_ == other.magic_)) return false; + + for(U32 i = 0; i < this->magic_.n_contigs; ++i){ + if(!(this->contigs_[i] == other.contigs_[i])) + return false; + } + + for(U32 i = 0; i < this->magic_.n_samples; ++i){ + if(this->sample_names_[i] != other.sample_names_[i]) + return false; + } + return true; + } + private: bool BuildHashTables(void); const U32 DetermineUncompressedSize(void) const; diff --git a/src/io/output_writer.cpp b/src/io/output_writer.cpp index ebd2643..805207e 100644 --- a/src/io/output_writer.cpp +++ b/src/io/output_writer.cpp @@ -172,6 +172,32 @@ void OutputWriter::operator<<(buffer_type& buffer){ } } +void OutputWriter::writePrecompressedBlock(buffer_type& buffer, const U64& uncompressed_size){ + if(buffer.size() == 0) return; + + assert(uncompressed_size % sizeof(entry_type) == 0); + + if(uncompressed_size > l_largest_uncompressed) + this->l_largest_uncompressed = uncompressed_size; + + // Lock + this->spin_lock->lock(); + + this->index_entry.byte_offset = (U64)this->stream->tellp(); + this->index_entry.uncompressed_size = uncompressed_size; + this->stream->write(buffer.data(), buffer.size()); + this->index_entry.byte_offset_end = (U64)this->stream->tellp(); + this->index_entry.n_variants = uncompressed_size / sizeof(entry_type); + this->index_->getContainer() += this->index_entry; + ++this->n_blocks; + + // Unlock + this->spin_lock->unlock(); + + buffer.reset(); + this->index_entry.reset(); +} + void OutputWriter::CheckOutputNames(const std::string& input){ std::vector paths = Helpers::filePathBaseExtension(input); this->basePath = paths[0]; diff --git a/src/io/output_writer.h b/src/io/output_writer.h index d9c049c..93d9504 100644 --- a/src/io/output_writer.h +++ b/src/io/output_writer.h @@ -123,6 +123,8 @@ class OutputWriter{ */ void operator<<(buffer_type& buffer); + void writePrecompressedBlock(buffer_type& buffer, const U64& uncompressed_size); + private: void CheckOutputNames(const std::string& input); diff --git a/src/tomahawk/tomahawk_magic_header.h b/src/tomahawk/tomahawk_magic_header.h index bcfdfbb..387211b 100644 --- a/src/tomahawk/tomahawk_magic_header.h +++ b/src/tomahawk/tomahawk_magic_header.h @@ -53,6 +53,14 @@ struct TomahawkMagicHeader{ return(this->validateMagic() && this->n_samples > 0 && this->n_contigs > 0 && (this->major_version > 0 || this->minor_version > 0) && this->l_header > 0 && this->l_header_uncompressed > 0); } + inline const bool operator==(const self_type& other) const{ + if(strncmp(&this->magic_string[0], &other.magic_string[0], Tomahawk::Constants::WRITE_HEADER_MAGIC_LENGTH) != 0) return false; + if(this->file_type != other.file_type) return false; + if(this->n_samples != other.n_samples) return false; + if(this->n_contigs != other.n_contigs) return false; + return true; + } + private: friend std::ostream& operator<<(std::ostream& stream, const self_type& header){ stream.write(header.magic_string, Tomahawk::Constants::WRITE_HEADER_MAGIC_LENGTH); diff --git a/src/tomahawk/two/TomahawkOutputReader.cpp b/src/tomahawk/two/TomahawkOutputReader.cpp index 823f15e..0afd565 100644 --- a/src/tomahawk/two/TomahawkOutputReader.cpp +++ b/src/tomahawk/two/TomahawkOutputReader.cpp @@ -9,6 +9,7 @@ #include "../../io/compression/GZFHeader.h" #include "../../support/helpers.h" #include "../two/TomahawkOutputStats.h" +#include "../../io/output_writer.h" namespace Tomahawk { @@ -655,6 +656,98 @@ bool TomahawkOutputReader::__checkRegionNoIndex(const entry_type& entry){ return false; } +bool TomahawkOutputReader::__concat(const std::vector& files, const std::string& output){ + if(files.size() == 0){ + std::cerr << Helpers::timestamp("ERROR","TWO") << "No input files..." << std::endl; + return false; + } + + // open first one + if(!SILENT) + std::cerr << Helpers::timestamp("LOG", "CONCAT") << "Opening input: " << files[0] << "..." << std::endl; + + if(!this->open(files[0])){ + std::cerr << Helpers::timestamp("ERROR","TWO") << "Failed to parse: " << files[0] << "..." << std::endl; + return false; + } + + this->getHeader().getLiterals() += "\n##tomahawk_concatCommand=" + Helpers::program_string(); + this->getHeader().getLiterals() += "\n##tomahawk_concatFiles="; + for(U32 i = 0; i < files.size(); ++i) + this->getHeader().getLiterals() += files[i] + ','; + + IO::OutputWriter writer; + if(!writer.open(output)){ + std::cerr << Helpers::timestamp("ERROR","SORT") << "Failed to open: " << output << "..." << std::endl; + return false; + } + writer.writeHeaders(this->getHeader()); + + while(this->parseBlock()) + writer << this->data_; + + for(U32 i = 1; i < files.size(); ++i){ + if(!SILENT) + std::cerr << Helpers::timestamp("LOG", "CONCAT") << "Opening input: " << files[i] << "..." << std::endl; + + this->stream_.close(); + self_type second_reader; + if(!second_reader.open(files[i])){ + std::cerr << Helpers::timestamp("ERROR","TWO") << "Failed to parse: " << files[i] << "..." << std::endl; + return false; + } + + if(!(second_reader.getHeader() == this->getHeader())){ + std::cerr << "header mismatch" << std::endl; + } + + while(second_reader.parseBlock()) + writer << this->data_; + } + + writer.setSorted(false); + writer.setPartialSorted(false); + writer.flush(); + writer.writeFinal(); + + return true; +} + + +bool TomahawkOutputReader::concat(const std::vector& files, const std::string& output){ + if(files.size() == 0){ + std::cerr << Helpers::timestamp("ERROR","TWO") << "No input files given..." << std::endl; + return false; + } + + return(this->__concat(files, output)); +} + +bool TomahawkOutputReader::concat(const std::string& file_list, const std::string& output){ + if(file_list.size() == 0){ + std::cerr << Helpers::timestamp("ERROR","TWO") << "No input file list given..." << std::endl; + return false; + } + + std::ifstream file_list_read(file_list); + if(!file_list_read.good()){ + std::cerr << Helpers::timestamp("ERROR","TWO") << "Failed to get file_list..." << std::endl; + return false; + } + + std::vector files; + std::string line; + while(getline(file_list_read, line)){ + if(line.size() == 0){ + std::cerr << Helpers::timestamp("WARNING","TWO") << "Empty line" << std::endl; + break; + } + files.push_back(line); + } + + return(this->__concat(files, output)); +} + /* bool TomahawkOutputReader::OpenWriter(void){ if(this->writer_output_type == WRITER_TYPE::natural){ diff --git a/src/tomahawk/two/TomahawkOutputReader.h b/src/tomahawk/two/TomahawkOutputReader.h index 5b791cf..49f75b6 100644 --- a/src/tomahawk/two/TomahawkOutputReader.h +++ b/src/tomahawk/two/TomahawkOutputReader.h @@ -25,6 +25,7 @@ namespace Tomahawk { class TomahawkOutputReader { private: + typedef TomahawkOutputReader self_type; typedef IO::OutputEntry entry_type; typedef OutputFilter filter_type; typedef OutputContainer output_container_type; From 34b1c3faf6a18c8d89220559b147af761096c124 Mon Sep 17 00:00:00 2001 From: "Marcus D. R. Klarqvist" Date: Sat, 17 Feb 2018 18:22:10 +0000 Subject: [PATCH 2/3] sort merge bug fix --- src/algorithm/sort/output_sorter.cpp | 24 +- src/concat.h | 2 +- src/io/output_writer.h | 1 + src/tomahawk/two/TomahawkOutputReader.cpp | 900 +--------------------- src/tomahawk/two/TomahawkOutputReader.h | 16 +- src/view.h | 6 +- 6 files changed, 38 insertions(+), 911 deletions(-) diff --git a/src/algorithm/sort/output_sorter.cpp b/src/algorithm/sort/output_sorter.cpp index c9f28de..87a7fbe 100644 --- a/src/algorithm/sort/output_sorter.cpp +++ b/src/algorithm/sort/output_sorter.cpp @@ -127,17 +127,20 @@ bool OutputSorter::sortMerge(const std::string& inputFile, const std::string& de writer.setFlushLimit(block_size); writer.writeHeaders(this->reader.getHeader()); - const U32 n_toi_entries = this->reader.getIndex().size(); - std::ifstream* streams = new std::ifstream[n_toi_entries]; - tgzf_iterator** iterators = new tgzf_iterator*[n_toi_entries]; + const U32 n_blocks = this->reader.getIndex().size(); + std::ifstream* streams = new std::ifstream[n_blocks]; + tgzf_iterator** iterators = new tgzf_iterator*[n_blocks]; if(!SILENT) - std::cerr << Helpers::timestamp("LOG", "SORT") << "Opening " << n_toi_entries << " file handles..."; + std::cerr << Helpers::timestamp("LOG", "SORT") << "Opening " << n_blocks << " file handles..."; - for(U32 i = 0; i < n_toi_entries; ++i){ + for(U32 i = 0; i < n_blocks; ++i){ streams[i].open(inputFile); streams[i].seekg(this->reader.getIndex().getContainer()[i].byte_offset); - iterators[i] = new tgzf_iterator(streams[i], 65536, this->reader.getIndex().getContainer()[i].byte_offset, this->reader.getIndex().getContainer()[i].byte_offset_end); + iterators[i] = new tgzf_iterator(streams[i], + 65536, + this->reader.getIndex().getContainer()[i].byte_offset, + this->reader.getIndex().getContainer()[i].byte_offset_end); } if(!SILENT) @@ -152,12 +155,12 @@ bool OutputSorter::sortMerge(const std::string& inputFile, const std::string& de // draw one from each const entry_type* e = nullptr; - for(U32 i = 0; i < n_toi_entries; ++i){ + for(U32 i = 0; i < n_blocks; ++i){ if(!iterators[i]->nextEntry(e)){ std::cerr << Helpers::timestamp("ERROR", "SORT") << "Failed to get an entry..." << std::endl; return false; } - outQueue.push( queue_entry(e, i, entry_type::sortAscending) ); + outQueue.push( queue_entry(e, i, entry_type::sortDescending) ); } if(outQueue.empty()){ @@ -174,10 +177,9 @@ bool OutputSorter::sortMerge(const std::string& inputFile, const std::string& de // remove this record from the queue outQueue.pop(); - while(iterators[id]->nextEntry(e)){ if(!(*e < outQueue.top().data)){ - outQueue.push( queue_entry(e, id, entry_type::sortAscending) ); + outQueue.push( queue_entry(e, id, entry_type::sortDescending) ); break; } writer << *e; @@ -193,7 +195,7 @@ bool OutputSorter::sortMerge(const std::string& inputFile, const std::string& de std::cerr << Helpers::timestamp("LOG") << "Output: " << Helpers::ToPrettyString(writer.sizeEntries()) << " entries into " << Helpers::ToPrettyString(writer.sizeBlocks()) << " blocks..." << std::endl; // Cleanup - for(U32 i = 0; i < n_toi_entries; ++i) + for(U32 i = 0; i < n_blocks; ++i) delete iterators[i]; delete [] iterators; diff --git a/src/concat.h b/src/concat.h index 5b2877a..f14a23b 100644 --- a/src/concat.h +++ b/src/concat.h @@ -90,6 +90,7 @@ int concat(int argc, char** argv){ if(input.size() == 0 && files.length() == 0){ std::cerr << Tomahawk::Helpers::timestamp("ERROR") << "No input file specified..." << std::endl; + std::cerr << input.size() << '\t' << files.size() << std::endl; return(1); } @@ -116,7 +117,6 @@ int concat(int argc, char** argv){ } } else { - if(!reader.concat(input, output)){ std::cerr << Tomahawk::Helpers::timestamp("ERROR", "CONCAT") << "Failed to concat files!" << std::endl; return 1; diff --git a/src/io/output_writer.h b/src/io/output_writer.h index 93d9504..1ecea5b 100644 --- a/src/io/output_writer.h +++ b/src/io/output_writer.h @@ -60,6 +60,7 @@ class OutputWriter{ // Getters inline const bool isSorted(void) const{ return(this->writing_sorted_); } inline const bool isPartialSorted(void) const{ return(this->writing_sorted_partial_); } + inline index_type& getIndex(void) const{ return(*this->index_); } bool open(const std::string& output_file); int writeHeaders(twk_header_type& twk_header); diff --git a/src/tomahawk/two/TomahawkOutputReader.cpp b/src/tomahawk/two/TomahawkOutputReader.cpp index 0afd565..f84e826 100644 --- a/src/tomahawk/two/TomahawkOutputReader.cpp +++ b/src/tomahawk/two/TomahawkOutputReader.cpp @@ -519,48 +519,31 @@ bool TomahawkOutputReader::view(void){ } bool TomahawkOutputReader::__viewOnly(void){ - //std::cerr << Helpers::timestamp("LOG") << "Sorted: " << (int)this->getIndex().getController().isSorted << " partial: " << (int)this->getIndex().getController().isPartialSorted << std::endl; - this->getHeader().getLiterals() += "\n##tomahawk_viewCommand=" + Helpers::program_string(); - this->getHeader().getLiterals() += "\n##tomahawk_viewFilters=" + this->filters_.getInterpretedString() + " filter=NO regions=NO"; - - //if(!this->OpenWriter()) - // return false; - - if(this->showHeader_ == true){ + if(this->getShowHeader()){ + this->getHeader().getLiterals() += "\n##tomahawk_viewCommand=" + Helpers::program_string(); + this->getHeader().getLiterals() += "\n##tomahawk_viewFilters=" + this->filters_.getInterpretedString() + " filter=NO regions=NO"; std::cout << this->getHeader().getLiterals() << '\n'; } // Natural output required parsing size_t n_total = 0; - //if(this->writer_output_type == WRITER_TYPE::natural){ - while(this->parseBlock()){ - OutputContainerReference o = this->getContainerReference(); - n_total += o.size(); - for(U32 i = 0; i < o.size(); ++i){ - o[i].write(std::cout, this->getHeader().contigs_); - } - } - //std::cerr << "total: " << n_total << std::endl; - //} - // Binary output without filtering simply writes it back out -/* - else if(this->writer_output_type == WRITER_TYPE::binary){ - while(this->parseBlock()){ - OutputContainerReference o(this->compressed_buffer); - //this->writer->write(this->data_); - std::cout << o[0] << std::endl; + while(this->parseBlock()){ + OutputContainerReference o = this->getContainerReference(); + n_total += o.size(); + for(U32 i = 0; i < o.size(); ++i){ + o[i].write(std::cout, this->getHeader().contigs_); } } -*/ return true; } bool TomahawkOutputReader::__viewRegion(void){ - this->getHeader().getLiterals() += "\n##tomahawk_viewCommand=" + Helpers::program_string(); - if(this->filters_.any_filter_user_set) + if(this->filters_.any_filter_user_set){ + this->getHeader().getLiterals() += "\n##tomahawk_viewCommand=" + Helpers::program_string(); this->getHeader().getLiterals() += "\n##tomahawk_viewFilters=" + this->filters_.getInterpretedString() + " filter=YES regions=YES"; + } - if(this->showHeader_ == true){ + if(this->getShowHeader()){ std::cout << this->getHeader().getLiterals() << '\n'; } @@ -576,11 +559,11 @@ bool TomahawkOutputReader::__viewRegion(void){ } bool TomahawkOutputReader::__viewFilter(void){ - this->getHeader().getLiterals() += "\n##tomahawk_viewCommand=" + Helpers::program_string(); - this->getHeader().getLiterals() += "\n##tomahawk_viewFilters=" + this->filters_.getInterpretedString() + " filter=YES regions=NO"; - - if(this->showHeader_ == true) + if(this->getShowHeader()){ + this->getHeader().getLiterals() += "\n##tomahawk_viewCommand=" + Helpers::program_string(); + this->getHeader().getLiterals() += "\n##tomahawk_viewFilters=" + this->filters_.getInterpretedString() + " filter=YES regions=NO"; std::cout << this->getHeader().getLiterals() << '\n'; + } while(this->parseBlock()){ output_container_reference_type o(this->data_); @@ -683,6 +666,7 @@ bool TomahawkOutputReader::__concat(const std::vector& files, const } writer.writeHeaders(this->getHeader()); + while(this->parseBlock()) writer << this->data_; @@ -690,7 +674,6 @@ bool TomahawkOutputReader::__concat(const std::vector& files, const if(!SILENT) std::cerr << Helpers::timestamp("LOG", "CONCAT") << "Opening input: " << files[i] << "..." << std::endl; - this->stream_.close(); self_type second_reader; if(!second_reader.open(files[i])){ std::cerr << Helpers::timestamp("ERROR","TWO") << "Failed to parse: " << files[i] << "..." << std::endl; @@ -702,7 +685,8 @@ bool TomahawkOutputReader::__concat(const std::vector& files, const } while(second_reader.parseBlock()) - writer << this->data_; + writer << second_reader.data_; + } writer.setSorted(false); @@ -748,850 +732,4 @@ bool TomahawkOutputReader::concat(const std::string& file_list, const std::strin return(this->__concat(files, output)); } -/* -bool TomahawkOutputReader::OpenWriter(void){ - if(this->writer_output_type == WRITER_TYPE::natural){ - this->writer = new OutputWriterNatural(this->contigs, &this->header); - } - else this->writer = new OutputWriter(this->contigs, &this->header); - - if(!this->writer->open()) - return false; - - if(this->output_header) - this->writer->writeHeader(this->literals); - - return true; -} - -bool TomahawkOutputReader::OpenWriter(const std::string output_file){ - if(this->writer_output_type == WRITER_TYPE::natural){ - this->writer = new OutputWriterNatural(this->contigs, &this->header); - } - else this->writer = new OutputWriter(this->contigs, &this->header); - - if(!this->writer->open(output_file)){ - std::cerr << Helpers::timestamp("ERROR","WRITER") << "Failed to open output file: " << output_file << std::endl; - return false; - } - - if(this->output_header) - this->writer->writeHeader(this->literals); - - return true; -} - -bool TomahawkOutputReader::__viewRegion(void){ - if(!this->OpenWriter()) - return false; - - // If indexed and expanded - if(this->toi_reader.ERROR_STATE == toi_reader_type::TOI_OK && (this->toi_reader.getIsSortedExpanded())){ - if(!SILENT) - std::cerr << Helpers::timestamp("LOG", "TWO") << "Indexed query..." << std::endl; - return(this->__viewRegionIndexed()); - } - - if(!SILENT) - std::cerr << Helpers::timestamp("LOG", "TWO") << "Unindexed query..." << std::endl; - - if(this->interval_tree != nullptr){ - const entry_type* entry = nullptr; - - - while(this->parseBlock()){ - output_container_reference_type o(this->data_); - for(U32 i = 0; i < o.size(); ++i) - this->__checkRegionNoIndex(o[i]); - } // end while next variant - } - - return true; -} -*/ - -bool TomahawkOutputReader::__viewRegionIndexed(void){ - /* - if(this->interval_tree == nullptr){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "Interval tree not set!" << std::endl; - return false; - } - - if(!this->__ParseRegionIndexedBlocks()){ - std::cerr << Helpers::timestamp("LOG","TOI") << "No valid entries..." << std::endl; - return false; - } - - // Init - const entry_type* two_entry = nullptr; - - // Todo - // sort entries - // merge - // for i in entries: seek, uncompress, and jump or limit - for(U32 i = 0; i < this->interval_totempole_enties->size(); ++i){ - const totempole_sorted_entry_type& entry = this->interval_totempole_enties->at(i); - const U32 block_length = entry.toBlock - entry.fromBlock; - - // 1 entry - if(block_length == 0){ - if(!this->seekBlock(entry.fromBlock)){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "Could not get block" << std::endl; - return false; - } - this->iterator_position_block = entry.fromBlock_entries_offset; - - - while(this->nextVariantLimited(two_entry)){ - this->__checkRegionIndex(two_entry); - } // end while next variant - } - // 2 entries - else if(block_length == 1){ - // First one - if(!this->seekBlock(entry.fromBlock)){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "Could not get block" << std::endl; - return false; - } - this->iterator_position_block = entry.fromBlock_entries_offset; - - while(this->nextVariantLimited(two_entry)){ - this->__checkRegionIndex(two_entry); - } // end while next variant - - // Second one - if(!this->seekBlock(entry.toBlock)){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "Could not get block" << std::endl; - return false; - } - //this->position = entry.toBlock_entries_offset; - - while(this->nextVariantLimited(two_entry)){ - this->__checkRegionIndex(two_entry); - } // end while next variant - - } - // > 2 entries - else { - // First block - U32 j = entry.fromBlock; - if(!this->seekBlock(j)){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "Could not get block" << std::endl; - return false; - } - this->iterator_position_block = entry.fromBlock_entries_offset; - - while(this->nextVariantLimited(two_entry)){ - this->__checkRegionIndex(two_entry); - } // end while next variant - ++j; - - // Middle blocks - for(; j < entry.toBlock - 1; ++j){ - if(!this->seekBlock(j)){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "Could not get block" << std::endl; - return false; - } - - while(this->nextVariantLimited(two_entry)){ - this->__checkRegionIndex(two_entry); - } // end while next variant - } - - // last block - if(!this->seekBlock(j)){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "Could not get block" << std::endl; - return false; - } - //this->position = entry.toBlock_entries_offset; - - while(this->nextVariantLimited(two_entry)){ - this->__checkRegionIndex(two_entry); - } // end while next variant - } - } - - */ - return true; -} - -/* -bool TomahawkOutputReader::__checkRegionIndex(const entry_type& entry){ - // If iTree for contigA exists - if(this->interval_tree[entry.AcontigID] != nullptr){ - std::vector rets = this->interval_tree[entry.AcontigID]->findOverlapping(entry.Aposition, entry.Aposition); - if(rets.size() > 0){ - for(U32 i = 0; i < rets.size(); ++i){ - if(this->filter.filter(entry)) - *this->writer << entry; - - return true; - } - } - } - return false; -} - -bool TomahawkOutputReader::__checkRegionNoIndex(const entry_type& entry){ - // If iTree for contigA exists - if(this->interval_tree[entry.AcontigID] != nullptr){ - std::vector rets = this->interval_tree[entry.AcontigID]->findOverlapping(entry.Aposition, entry.Aposition); - if(rets.size() > 0){ - for(U32 i = 0; i < rets.size(); ++i){ - if(rets[i].value != nullptr){ // if linked - if((entry.BcontigID == rets[i].value->contigID) && - (entry.Bposition >= rets[i].value->start && entry.Bposition <= rets[i].value->stop)){ - if(this->filter.filter(entry)) - //entry.write(std::cout, this->contigs); - *this->writer << entry; - - return true; - } // end match - } else { // not linked - if(this->filter.filter(entry)) - //entry.write(std::cout, this->contigs); - *this->writer << entry; - - return true; - } - } - } - } - - // If iTree for contigB exists - if(this->interval_tree[entry.BcontigID] != nullptr){ - std::vector rets = this->interval_tree[entry.BcontigID]->findOverlapping(entry.Bposition, entry.Bposition); - if(rets.size() > 0){ - for(U32 i = 0; i < rets.size(); ++i){ - if(rets[i].value != nullptr){ // if linked - if((entry.AcontigID == rets[i].value->contigID) && - (entry.Aposition >= rets[i].value->start && entry.Aposition <= rets[i].value->stop)){ - if(this->filter.filter(entry)){ - //entry.write(std::cout, this->contigs); - *this->writer << entry; - } - return true; - } // end match - } else { // not linked - if(this->filter.filter(entry)) - //entry.write(std::cout, this->contigs); - *this->writer << entry; - - return true; - } - } - } // end if any hit in iTree b - } // end iTree b - - return false; -} - -bool TomahawkOutputReader::__viewOnly(void){ - this->literals += "\n##tomahawk_viewCommand=" + Helpers::program_string(); - this->literals += "\n##tomahawk_viewFilters=" + this->filter.getInterpretedString() + " filter=NO regions=FALSE"; - - if(!this->OpenWriter()) - return false; - - // Natural output required parsing - size_t n_total = 0; - if(this->writer_output_type == WRITER_TYPE::natural){ - while(this->parseBlock()){ - OutputContainerReference o = this->getContainerReference(); - std::cerr << o.size() << '\t' << this->data_.size() << std::endl; - n_total += o.size(); - for(U32 i = 0; i < o.size(); ++i) - std::cout << o[i] << '\n'; - } - std::cerr << "total: " << n_total << std::endl; - } - // Binary output without filtering simply writes it back out - else if(this->writer_output_type == WRITER_TYPE::binary){ - while(this->parseBlock()){ - OutputContainerReference o(this->compressed_buffer); - //this->writer->write(this->data_); - std::cout << o[0] << std::endl; - } - } - - return true; -} - -bool TomahawkOutputReader::__viewFilter(void){ - this->literals += "\n##tomahawk_viewCommand=" + Helpers::program_string(); - this->literals += "\n##tomahawk_viewFilters=" + this->filter.getInterpretedString() + " filter=YES regions=FALSE"; - - if(!this->OpenWriter()) - return false; - - while(this->parseBlock()){ - output_container_reference_type o(this->data_); - for(U32 i = 0; i < o.size(); ++i){ - if(this->filter.filter(o[i])) - *this->writer << o[i]; - } - } // end while next variant - return true; -} - -bool TomahawkOutputReader::addRegionsIndexed(std::vector& positions){ - for(U32 i = 0; i < positions.size(); ++i){ - if(positions[i].find(',') != std::string::npos){ - std::vector ret = Helpers::split(positions[i], ','); - if(ret.size() == 1){ - std::cerr << Helpers::timestamp("ERROR", "INTERVAL") << "Illegal interval: " << positions[i] << "!" << std::endl; - return false; - - } else if(ret.size() == 2){ - // parse left - interval_type intervalLeft; - if(this->__ParseRegionIndexed(ret[0], intervalLeft)) - this->interval_tree_entries[intervalLeft.contigID].push_back(interval_type(intervalLeft)); - - // parse right - interval_type intervalRight; - if(this->__ParseRegionIndexed(ret[1], intervalRight)) - this->interval_tree_entries[intervalRight.contigID].push_back(interval_type(intervalRight)); - - } else { - std::cerr << Helpers::timestamp("ERROR", "INTERVAL") << "Illegal interval: " << positions[i] << "!" << std::endl; - return false; - } - } - // Has no comma in string - else { - interval_type interval; - if(this->__ParseRegionIndexed(positions[i], interval)) - this->interval_tree_entries[interval.contigID].push_back(interval_type(interval)); - } - } - - return true; -} - -bool TomahawkOutputReader::addRegionsUnindexed(std::vector& positions){ - for(U32 i = 0; i < positions.size(); ++i){ - // Pattern cA:pAf-pAt;cB:pBf-pBt - if(positions[i].find(',') != std::string::npos){ - //std::cerr << "linked intervals" << std::endl; - std::vector ret = Helpers::split(positions[i], ','); - if(ret.size() == 1){ - std::cerr << Helpers::timestamp("ERROR", "INTERVAL") << "Illegal interval: " << positions[i] << "!" << std::endl; - return false; - - } else if(ret.size() == 2){ - // parse left - interval_type intervalLeft; - if(!this->__ParseRegion(ret[0], intervalLeft)) - return false; - - // parse right - interval_type intervalRight; - if(!this->__ParseRegion(ret[1], intervalRight)) - return false; - - // Todo: WARNING - // This results in illegal pointers if the vector resizes - // and pointers change - this->interval_tree_entries[intervalLeft.contigID].push_back(interval_type(intervalLeft)); - this->interval_tree_entries[intervalRight.contigID].push_back(interval_type(intervalRight)); - if(intervalLeft.contigID != intervalRight.contigID){ - this->interval_tree_entries[intervalLeft.contigID].back().value = &this->interval_tree_entries[intervalRight.contigID].back(); - this->interval_tree_entries[intervalRight.contigID].back().value = &this->interval_tree_entries[intervalLeft.contigID].back(); - } else { - this->interval_tree_entries[intervalLeft.contigID].back().value = &this->interval_tree_entries[intervalLeft.contigID][this->interval_tree_entries[intervalLeft.contigID].size() - 2]; - this->interval_tree_entries[intervalLeft.contigID][this->interval_tree_entries[intervalLeft.contigID].size() - 2].value = &this->interval_tree_entries[intervalLeft.contigID].back(); - } - - } else { - std::cerr << Helpers::timestamp("ERROR", "INTERVAL") << "Illegal interval: " << positions[i] << "!" << std::endl; - return false; - } - } else { - interval_type interval; - if(!this->__ParseRegion(positions[i], interval)) - return false; - - this->interval_tree_entries[interval.contigID].push_back(interval_type(interval)); - } - } - return true; -} - -bool TomahawkOutputReader::addRegions(std::vector& positions){ - if(positions.size() == 0) - return true; - - if(this->interval_tree_entries == nullptr) - this->interval_tree_entries = new std::vector[this->header.n_contig]; - - if(this->interval_tree == nullptr){ - this->interval_tree = new tree_type*[this->header.n_contig]; - for(U32 i = 0; i < this->header.n_contig; ++i) - this->interval_tree[i] = nullptr; - } - - if(this->toi_reader.ERROR_STATE == toi_reader_type::TOI_OK && (this->toi_reader.getIsSortedExpanded())){ - if(!this->addRegionsIndexed(positions)) - return false; - } else { - if(!this->addRegionsUnindexed(positions)) - return false; - } - - for(U32 i = 0; i < this->header.n_contig; ++i){ - if(this->interval_tree_entries[i].size() != 0){ - this->interval_tree[i] = new tree_type(this->interval_tree_entries[i]); - } else - this->interval_tree[i] = nullptr; - } - - return true; -} - -bool TomahawkOutputReader::__ParseRegion(const std::string& region, interval_type& interval){ - std::vector ret = Helpers::split(region, ':'); - if(ret.size() == 1){ - if(ret[0].find('-') != std::string::npos){ - std::cerr << Helpers::timestamp("ERROR", "INTERVAL") << "Illegal interval: " << region << "!" << std::endl; - return false; - } - - // is contigID only - U32* contigID; - if(!this->contig_htable->GetItem(®ion[0], ®ion, contigID, region.size())){ - std::cerr << Helpers::timestamp("ERROR", "INTERVAL") << "Contig: " << region << " is not defined in the header!" << std::endl; - return false; - } - interval(*contigID, 0, this->contigs[*contigID].n_bases); - - } else if(ret.size() == 2){ - // is contigID:pos-pos - U32* contigID; - if(!this->contig_htable->GetItem(&ret[0][0], &ret[0], contigID, ret[0].size())){ - std::cerr << Helpers::timestamp("ERROR", "INTERVAL") << "Contig: " << ret[0] << " is not defined in the header!" << std::endl; - return false; - } - - std::vector retPos = Helpers::split(ret[1], '-'); - if(retPos.size() == 1){ - // only one pos - const double pos = std::stod(retPos[0]); - interval(*contigID, pos, pos); - - } else if(retPos.size() == 2){ - // is two positions - double posA = std::stod(retPos[0]); - double posB = std::stod(retPos[1]); - - if(posB < posA) - std::swap(posA, posB); - - interval(*contigID, posA, posB); - - } else { - std::cerr << Helpers::timestamp("ERROR", "INTERVAL") << "Illegal interval: " << region << "!" << std::endl; - return false; - } - } else { - std::cerr << Helpers::timestamp("ERROR", "INTERVAL") << "Illegal interval: " << region << "!" << std::endl; - return false; - } - - return true; -} - -bool TomahawkOutputReader::__ParseRegionIndexed(const std::string& region, interval_type& interval){ - std::vector ret = Helpers::split(region, ':'); - - // If vector does not contain a colon - if(ret.size() == 1){ - if(ret[0].find('-') != std::string::npos){ - std::cerr << Helpers::timestamp("ERROR", "INTERVAL") << "Illegal interval: " << region << "!" << std::endl; - return false; - } - - // is contigID only - U32* contigID; - if(!this->contig_htable->GetItem(®ion[0], ®ion, contigID, region.size())){ - std::cerr << Helpers::timestamp("ERROR", "INTERVAL") << "Contig: " << region << " is not defined in the header!" << std::endl; - return false; - } - interval(*contigID, 0, this->contigs[*contigID].n_bases); - interval.state = interval_type::INTERVAL_TYPE::INTERVAL_CONTIG_ONLY; - } - // If vector contain colon - else if(ret.size() == 2){ - // is contigID:pos-pos - U32* contigID; - if(!this->contig_htable->GetItem(&ret[0][0], &ret[0], contigID, ret[0].size())){ - std::cerr << Helpers::timestamp("ERROR", "INTERVAL") << "Contig: " << ret[0] << " is not defined in the header!" << std::endl; - return false; - } - - std::vector retPos = Helpers::split(ret[1], '-'); - if(retPos.size() == 1){ - // only one pos - const double pos = std::stod(retPos[0]); - //std::cerr << "single position: " << pos << std::endl; - interval(*contigID, pos, pos); - interval.state = interval_type::INTERVAL_TYPE::INTERVAL_POSITION; - - } else if(retPos.size() == 2){ - // is two positions - double posA = std::stod(retPos[0]); - double posB = std::stod(retPos[1]); - - // Swap pA and pB iff pB > pA - if(posB < posA) - std::swap(posA, posB); - - interval(*contigID, posA, posB); - interval.state = interval_type::INTERVAL_TYPE::INTERVAL_FULL; - - } else { - std::cerr << Helpers::timestamp("ERROR", "INTERVAL") << "Illegal interval: " << region << "!" << std::endl; - return false; - } - } - // contains > 1 colons - // illegal - else { - std::cerr << Helpers::timestamp("ERROR", "INTERVAL") << "Illegal interval: " << region << "!" << std::endl; - return false; - } - - return true; -} - -bool TomahawkOutputReader::__ParseRegionIndexedBlocks(void){ - if(this->interval_tree_entries == nullptr){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "No data is set" << std::endl; - return false; - } - - if(this->interval_totempole_enties == nullptr) - this->interval_totempole_enties = new std::vector; - - for(U32 k = 0; k < this->header.n_contig; ++k){ - for(U32 i = 0; i < this->interval_tree_entries[k].size(); ++i){ - const interval_type& interval = this->interval_tree_entries[k][i]; - if(interval.state == interval_type::INTERVAL_TYPE::INTERVAL_CONTIG_ONLY){ - // Contig only - //std::cerr << "contig only: " << interval << std::endl; - totempole_sorted_entry_type entry; - if(!this->toi_reader.findOverlap(interval.contigID, entry)){ - //std::cerr << "could not find: " << interval << std::endl; - continue; - } - //std::cerr << "contigID found: " << entry << std::endl; - this->interval_totempole_enties->push_back(entry); - - } else if(interval.state == interval_type::INTERVAL_TYPE::INTERVAL_POSITION){ - //std::cerr << "contig:posiiton only: " << interval << std::endl; - totempole_sorted_entry_type entry; - if(!this->toi_reader.findOverlap(interval.contigID, interval.start, entry)){ - //std::cerr << "could not find: " << interval << std::endl; - continue; - } - //std::cerr << "contigID:pos found: " << entry << std::endl; - this->interval_totempole_enties->push_back(entry); - - } else { - //std::cerr << "full interval: " << interval << std::endl; - std::vector entries; - if(!this->toi_reader.findOverlap(interval.contigID, interval.start, interval.stop, entries)){ - //std::cerr << "could not find: " << interval << std::endl; - continue; - } - - for(U32 i = 0; i < entries.size(); ++i){ - //std::cerr << "contigID:pos-pos found: " << entries[i] << std::endl; - this->interval_totempole_enties->push_back(entries[i]); - } - } - } - } - - return(this->interval_totempole_enties->size() > 0); -} - -bool TomahawkOutputReader::__Open(const std::string input){ - this->stream_.open(input, std::ios::binary | std::ios::in | std::ios::ate); - if(!this->stream_.good()){ - std::cerr << Tomahawk::Helpers::timestamp("ERROR", "TWO") << "Failed to open file: " << input << std::endl; - return false; - } - - this->filesize = this->stream_.tellg(); - this->stream_.seekg(0); - - if(!this->stream_.good()){ - std::cerr << Tomahawk::Helpers::timestamp("ERROR", "TWO") << "Bad stream!" << std::endl; - return false; - } - - this->stream_ >> this->header; - if(!this->header.validate(Tomahawk::Constants::WRITE_HEADER_LD_MAGIC)){ - std::cerr << Tomahawk::Helpers::timestamp("ERROR", "TWO") << "Failed to validate header!" << std::endl; - return false; - } - - return true; -} - -bool TomahawkOutputReader::Open(const std::string input){ - if(!this->__Open(input)) - return false; - - if(!this->ParseHeader()){ - std::cerr << Tomahawk::Helpers::timestamp("ERROR", "TWO") << "Failed to parse header!" << std::endl; - return false; - } - - if(this->toi_reader.Open(input + "." + Tomahawk::Constants::OUTPUT_LD_SORT_INDEX_SUFFIX, this->contigs)){ - this->hasIndex = true; - } - - return true; -} - -bool TomahawkOutputReader::OpenExtend(const std::string input){ - if(!this->__Open(input)) - return false; - - if(!this->ParseHeaderExtend()){ - std::cerr << Tomahawk::Helpers::timestamp("ERROR", "TWO") << "Failed to extend header!" << std::endl; - return false; - } - - return true; -} - -bool TomahawkOutputReader::__concat(const std::vector& files, const std::string& output){ - if(files.size() == 0){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "No input files..." << std::endl; - return false; - } - - // open first one - if(!SILENT) - std::cerr << Helpers::timestamp("LOG", "CONCAT") << "Opening input: " << files[0] << "..." << std::endl; - - if(!this->Open(files[0])){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "Failed to parse: " << files[0] << "..." << std::endl; - return false; - } - - this->setWriterType(0); - this->setWriteHeader(true); - this->literals += "\n##tomahawk_concatCommand=" + Helpers::program_string(); - this->literals += "\n##tomahawk_concatFiles="; - for(U32 i = 0; i < files.size(); ++i) - this->literals += files[i] + ','; - - if(!this->OpenWriter(output)){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "Failed to open writer..." << std::endl; - return false; - } - - while(this->parseBlock()){ - this->writer->write(this->data_); - } - - for(U32 i = 1; i < files.size(); ++i){ - if(!SILENT) - std::cerr << Helpers::timestamp("LOG", "CONCAT") << "Opening input: " << files[i] << "..." << std::endl; - - this->stream_.close(); - if(!this->OpenExtend(files[i])){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "Failed to parse: " << files[i] << "..." << std::endl; - return false; - } - - while(this->parseBlock()){ - this->writer->write(this->data_); - } - } - - this->writer->flush(); - this->writer->close(); - return true; -} - -bool TomahawkOutputReader::concat(const std::vector& files, const std::string& output){ - if(files.size() == 0){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "No input files given..." << std::endl; - return false; - } - - return(this->__concat(files, output)); -} - -bool TomahawkOutputReader::concat(const std::string& file_list, const std::string& output){ - if(file_list.size() == 0){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "No input file list given..." << std::endl; - return false; - } - - std::ifstream file_list_read(file_list); - if(!file_list_read.good()){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "Failed to get file_list..." << std::endl; - return false; - } - - std::vector files; - std::string line; - while(getline(file_list_read, line)){ - if(line.size() == 0){ - std::cerr << Helpers::timestamp("WARNING","TWO") << "Empty line" << std::endl; - break; - } - files.push_back(line); - } - - return(this->__concat(files, output)); -} - -bool TomahawkOutputReader::ParseHeader(void){ - if(this->header.n_contig == 0) - return false; - - if(this->header.n_contig < 1024) - this->contig_htable = new hash_table(1024); - else - this->contig_htable = new hash_table(this->header.n_contig * 2); - - this->contigs = new contig_type[this->header.n_contig]; - U32* ret; - - for(U32 i = 0; i < this->header.n_contig; ++i){ - this->stream_ >> this->contigs[i]; - if(!this->contig_htable->GetItem(&this->contigs[i].name[0], &this->contigs[i].name, ret, this->contigs[i].name.size())){ - // Add to hash table - this->contig_htable->SetItem(&this->contigs[i].name[0], &this->contigs[i].name, i, this->contigs[i].name.size()); - } else { - std::cerr << Helpers::timestamp("ERROR", "INTERVAL") << "Duplicated contig name: " << this->contigs[i].name << "!" << std::endl; - exit(1); // unrecoverable error - } - } - - if(!this->tgzf_controller.InflateBlock(this->stream_, this->compressed_buffer)){ - std::cerr << Helpers::timestamp("ERROR","TGZF") << "Failed to get TWO block" << std::endl; - return false; - } - - this->literals = std::string(this->tgzf_controller.buffer.data()); - - return true; -} - -bool TomahawkOutputReader::ParseHeaderExtend(void){ - if(this->header.n_contig == 0) - return false; - - U32* ret; - for(U32 i = 0; i < this->header.n_contig; ++i){ - this->stream_ >> this->contigs[i]; - // std::cerr << this->contigs[i] << std::endl; - if(!this->contig_htable->GetItem(&this->contigs[i].name[0], &this->contigs[i].name, ret, this->contigs[i].name.size())){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "Contig does not exist in other file" << std::endl; - return false; - } - } - - if(!this->tgzf_controller.InflateBlock(this->stream_, this->compressed_buffer)){ - std::cerr << Helpers::timestamp("ERROR","TGZF") << "Failed to get TWO block" << std::endl; - return false; - } - - return true; -} - -bool TomahawkOutputReader::seekBlock(const U32 blockID){ - if(this->toi_reader.ERROR_STATE != toi_reader_type::TOI_OK){ - std::cerr << Helpers::timestamp("ERROR","TOI") << "Index is bad!" << std::endl; - return false; - } - - if(blockID > this->toi_reader.size()){ - std::cerr << Helpers::timestamp("ERROR","TOI") << "Illegal blockID (" << blockID << ">" << this->toi_reader.size() << ")!" << std::endl; - return false; - } - - if(!this->stream_.good()){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "Stream is bad!" << std::endl; - return false; - } - - this->stream_.seekg(this->toi_reader[blockID].getStartOffset()); - if(!this->stream_.good()){ - std::cerr << Helpers::timestamp("ERROR","TWO") << "Stream is bad following seek!" << std::endl; - return false; - } - - return(this->parseBlock()); -} - -bool TomahawkOutputReader::summary(const std::string& input, const U32 bins){ - TWO::TomahawkOutputStatsContainer container(bins); - - // Natural output required parsing - while(this->parseBlock()){ - output_container_reference_type o(this->data_); - for(U32 i = 0; i < o.size(); ++i) - container += o[i]; - } - - std::cerr << "R2\t" << container.R2.within.getTotal() << '\t' << container.R2.across.getTotal() << '\t' << container.R2.global.getTotal() << std::endl; - std::cerr << container.R2 << std::endl; - std::cerr << "D\t" << container.D.within.getTotal() << '\t' << container.D.across.getTotal() << '\t' << container.D.global.getTotal() << std::endl; - std::cerr << container.D << std::endl; - std::cerr << "Dprime\t" << container.Dprime.within.getTotal() << '\t' << container.Dprime.across.getTotal() << '\t' << container.Dprime.global.getTotal() << std::endl; - std::cerr << container.Dprime << std::endl; - - return true; -} - -bool TomahawkOutputReader::index(const std::string& input){ - std::vector paths = Helpers::filePathBaseExtension(input); - std::string basePath = paths[0]; - std::string baseName; - - if(basePath.size() > 0) - basePath += '/'; - - if(paths[3].size() == Tomahawk::Constants::OUTPUT_LD_SUFFIX.size() && - strncasecmp(&paths[3][0], &Tomahawk::Constants::OUTPUT_LD_SUFFIX[0], Tomahawk::Constants::OUTPUT_LD_SUFFIX.size()) == 0) - baseName = paths[2]; - else baseName = paths[1]; - - // Open writer - // Set controller - toi_header_type toi_header(Tomahawk::Constants::WRITE_HEADER_LD_SORT_MAGIC, this->header.samples, this->header.n_contig); - // We assume data is expanded and sorted - toi_header.controller.sorted = 1; - toi_header.controller.expanded = 1; - toi_header.controller.partial_sort = 0; - - twoi_writer_type writer(this->contigs, &this->header, toi_header); - writer.open(basePath + baseName + '.' + Tomahawk::Constants::OUTPUT_LD_SUFFIX + '.' + Tomahawk::Constants::OUTPUT_LD_SORT_INDEX_SUFFIX); - - - return true; -} - -bool TomahawkOutputReader::setWriterType(const int type){ - if(type == 0) - this->writer_output_type = WRITER_TYPE::binary; - else if(type == 1) - this->writer_output_type = WRITER_TYPE::natural; - else { - std::cerr << Tomahawk::Helpers::timestamp("ERROR","READER") << "Unknown writer type: " << type << std::endl; - return false; - } - return true; -} -*/ - } /* namespace Tomahawk */ diff --git a/src/tomahawk/two/TomahawkOutputReader.h b/src/tomahawk/two/TomahawkOutputReader.h index 49f75b6..1cbe66f 100644 --- a/src/tomahawk/two/TomahawkOutputReader.h +++ b/src/tomahawk/two/TomahawkOutputReader.h @@ -114,6 +114,8 @@ class TomahawkOutputReader { output_container_type getContainerBlock(const U32 blockID); output_container_type getContainerBlock(std::vector blocks); + inline void setShowHeader(const bool yes){ this->showHeader_ = yes; } + inline const bool getShowHeader(void) const{ return(this->showHeader_); } inline const bool isSorted(void) const{ return(this->index_->getController().isSorted == true); } // Basic operations @@ -121,32 +123,20 @@ class TomahawkOutputReader { bool view(const interval_type& interval); bool view(const std::vector& intervals); - // Other - bool index(const std::string& filename); - bool summary(const std::string& input, const U32 bins); - // Concatenate bool concat(const std::string& file_list, const std::string& output); bool concat(const std::vector& files, const std::string& output); - // - bool setWriterType(const int type); - void setWriteHeader(const bool write){ this->showHeader_ = write; } - inline filter_type& getFilter(void){ return this->filters_; } - bool OpenWriter(void); - bool OpenWriter(const std::string output_file); private: - bool __Open(const std::string input); bool ParseHeader(void); bool ParseHeaderExtend(void); bool __viewOnly(void); bool __viewFilter(void); bool __viewRegion(void); - bool __viewRegionIndexed(void); - bool __checkRegionIndex(const entry_type& entry); + bool __checkRegionNoIndex(const entry_type& entry); bool __concat(const std::vector& files, const std::string& output); diff --git a/src/view.h b/src/view.h index 72e83e4..7dbf78e 100644 --- a/src/view.h +++ b/src/view.h @@ -339,13 +339,9 @@ int view(int argc, char** argv){ } else if(end == Tomahawk::Constants::OUTPUT_LD_SUFFIX){ Tomahawk::TomahawkOutputReader reader; - reader.setWriteHeader(outputHeader); Tomahawk::OutputFilter& filter = reader.getFilter(); filter = Tomahawk::OutputFilter(two_filter); // use copy ctor to transfer data - - - //if(!reader.setWriterType(outputType)) - // return 1; + reader.setShowHeader(outputHeader); if(!reader.open(input)) return 1; From f4e855f8b23217641cefd94d62a74b0cea3b86f9 Mon Sep 17 00:00:00 2001 From: "Marcus D. R. Klarqvist" Date: Sat, 17 Feb 2018 18:32:02 +0000 Subject: [PATCH 3/3] release notes --- RELEASE.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/RELEASE.md b/RELEASE.md index 940ca2f..f79b18a 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,10 @@ +# Release 0.3.1 + +## Bug Fixes and Other Changes +* Bug fixes + * Sort merge (`tomahawk sort -M`) now produces the correct output + * `view` ABI command now correctly triggers `-h`/`-H` flag + # Release 0.3.0 ## Breaking Changes