Skip to content

Commit

Permalink
Merge pull request #13 from mklarqvist/merging
Browse files Browse the repository at this point in the history
bug fixes
  • Loading branch information
mklarqvist authored Feb 17, 2018
2 parents 36aaf89 + f4e855f commit 7401777
Show file tree
Hide file tree
Showing 11 changed files with 130 additions and 844 deletions.
7 changes: 7 additions & 0 deletions RELEASE.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# Release 0.3.1

## Bug Fixes and Other Changes
* Bug fixes
* Sort merge (`tomahawk sort -M`) now produces the correct output
* `view` ABI command now correctly triggers `-h`/`-H` flag

# Release 0.3.0

## Breaking Changes
Expand Down
24 changes: 13 additions & 11 deletions src/algorithm/sort/output_sorter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,17 +127,20 @@ bool OutputSorter::sortMerge(const std::string& inputFile, const std::string& de
writer.setFlushLimit(block_size);
writer.writeHeaders(this->reader.getHeader());

const U32 n_toi_entries = this->reader.getIndex().size();
std::ifstream* streams = new std::ifstream[n_toi_entries];
tgzf_iterator** iterators = new tgzf_iterator*[n_toi_entries];
const U32 n_blocks = this->reader.getIndex().size();
std::ifstream* streams = new std::ifstream[n_blocks];
tgzf_iterator** iterators = new tgzf_iterator*[n_blocks];

if(!SILENT)
std::cerr << Helpers::timestamp("LOG", "SORT") << "Opening " << n_toi_entries << " file handles...";
std::cerr << Helpers::timestamp("LOG", "SORT") << "Opening " << n_blocks << " file handles...";

for(U32 i = 0; i < n_toi_entries; ++i){
for(U32 i = 0; i < n_blocks; ++i){
streams[i].open(inputFile);
streams[i].seekg(this->reader.getIndex().getContainer()[i].byte_offset);
iterators[i] = new tgzf_iterator(streams[i], 65536, this->reader.getIndex().getContainer()[i].byte_offset, this->reader.getIndex().getContainer()[i].byte_offset_end);
iterators[i] = new tgzf_iterator(streams[i],
65536,
this->reader.getIndex().getContainer()[i].byte_offset,
this->reader.getIndex().getContainer()[i].byte_offset_end);
}

if(!SILENT)
Expand All @@ -152,12 +155,12 @@ bool OutputSorter::sortMerge(const std::string& inputFile, const std::string& de

// draw one from each
const entry_type* e = nullptr;
for(U32 i = 0; i < n_toi_entries; ++i){
for(U32 i = 0; i < n_blocks; ++i){
if(!iterators[i]->nextEntry(e)){
std::cerr << Helpers::timestamp("ERROR", "SORT") << "Failed to get an entry..." << std::endl;
return false;
}
outQueue.push( queue_entry(e, i, entry_type::sortAscending) );
outQueue.push( queue_entry(e, i, entry_type::sortDescending) );
}

if(outQueue.empty()){
Expand All @@ -174,10 +177,9 @@ bool OutputSorter::sortMerge(const std::string& inputFile, const std::string& de
// remove this record from the queue
outQueue.pop();


while(iterators[id]->nextEntry(e)){
if(!(*e < outQueue.top().data)){
outQueue.push( queue_entry(e, id, entry_type::sortAscending) );
outQueue.push( queue_entry(e, id, entry_type::sortDescending) );
break;
}
writer << *e;
Expand All @@ -193,7 +195,7 @@ bool OutputSorter::sortMerge(const std::string& inputFile, const std::string& de
std::cerr << Helpers::timestamp("LOG") << "Output: " << Helpers::ToPrettyString(writer.sizeEntries()) << " entries into " << Helpers::ToPrettyString(writer.sizeBlocks()) << " blocks..." << std::endl;

// Cleanup
for(U32 i = 0; i < n_toi_entries; ++i)
for(U32 i = 0; i < n_blocks; ++i)
delete iterators[i];

delete [] iterators;
Expand Down
7 changes: 3 additions & 4 deletions src/concat.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ int concat(int argc, char** argv){

if(input.size() == 0 && files.length() == 0){
std::cerr << Tomahawk::Helpers::timestamp("ERROR") << "No input file specified..." << std::endl;
std::cerr << input.size() << '\t' << files.size() << std::endl;
return(1);
}

Expand All @@ -110,19 +111,17 @@ int concat(int argc, char** argv){

Tomahawk::TomahawkOutputReader reader;
if(input.size() == 0){
/*
if(!reader.concat(files, output)){
std::cerr << Tomahawk::Helpers::timestamp("ERROR", "CONCAT") << "Failed to concat files!" << std::endl;
return 1;
}
*/

} else {
/*
if(!reader.concat(input, output)){
std::cerr << Tomahawk::Helpers::timestamp("ERROR", "CONCAT") << "Failed to concat files!" << std::endl;
return 1;
}
*/

}

return 0;
Expand Down
7 changes: 7 additions & 0 deletions src/index/index_contig.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ struct HeaderContig{
return(sizeof(U32) + sizeof(U32) + this->n_char);
}

inline const bool operator==(const self_type& other) const{
if(this->n_bases != other.n_bases) return false;
if(this->n_char != other.n_char) return false;
if(this->name != other.name) return false;
return true;
}

friend std::ostream& operator<<(std::ostream& stream, const self_type& entry){
stream << entry.n_bases << '\t' << entry.n_char << '\t' << entry.name;
return stream;
Expand Down
16 changes: 16 additions & 0 deletions src/index/tomahawk_header.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,22 @@ class TomahawkHeader{
inline void addLiteral(const std::string& string){ this->literals_ += string; }
inline const bool validate(void) const{ return(this->magic_.validate()); }

//
inline const bool operator==(const self_type& other) const{
if(!(this->magic_ == other.magic_)) return false;

for(U32 i = 0; i < this->magic_.n_contigs; ++i){
if(!(this->contigs_[i] == other.contigs_[i]))
return false;
}

for(U32 i = 0; i < this->magic_.n_samples; ++i){
if(this->sample_names_[i] != other.sample_names_[i])
return false;
}
return true;
}

private:
bool BuildHashTables(void);
const U32 DetermineUncompressedSize(void) const;
Expand Down
26 changes: 26 additions & 0 deletions src/io/output_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,32 @@ void OutputWriter::operator<<(buffer_type& buffer){
}
}

void OutputWriter::writePrecompressedBlock(buffer_type& buffer, const U64& uncompressed_size){
if(buffer.size() == 0) return;

assert(uncompressed_size % sizeof(entry_type) == 0);

if(uncompressed_size > l_largest_uncompressed)
this->l_largest_uncompressed = uncompressed_size;

// Lock
this->spin_lock->lock();

this->index_entry.byte_offset = (U64)this->stream->tellp();
this->index_entry.uncompressed_size = uncompressed_size;
this->stream->write(buffer.data(), buffer.size());
this->index_entry.byte_offset_end = (U64)this->stream->tellp();
this->index_entry.n_variants = uncompressed_size / sizeof(entry_type);
this->index_->getContainer() += this->index_entry;
++this->n_blocks;

// Unlock
this->spin_lock->unlock();

buffer.reset();
this->index_entry.reset();
}

void OutputWriter::CheckOutputNames(const std::string& input){
std::vector<std::string> paths = Helpers::filePathBaseExtension(input);
this->basePath = paths[0];
Expand Down
3 changes: 3 additions & 0 deletions src/io/output_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ class OutputWriter{
// Getters
inline const bool isSorted(void) const{ return(this->writing_sorted_); }
inline const bool isPartialSorted(void) const{ return(this->writing_sorted_partial_); }
inline index_type& getIndex(void) const{ return(*this->index_); }

bool open(const std::string& output_file);
int writeHeaders(twk_header_type& twk_header);
Expand Down Expand Up @@ -123,6 +124,8 @@ class OutputWriter{
*/
void operator<<(buffer_type& buffer);

void writePrecompressedBlock(buffer_type& buffer, const U64& uncompressed_size);

private:
void CheckOutputNames(const std::string& input);

Expand Down
8 changes: 8 additions & 0 deletions src/tomahawk/tomahawk_magic_header.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,14 @@ struct TomahawkMagicHeader{
return(this->validateMagic() && this->n_samples > 0 && this->n_contigs > 0 && (this->major_version > 0 || this->minor_version > 0) && this->l_header > 0 && this->l_header_uncompressed > 0);
}

inline const bool operator==(const self_type& other) const{
if(strncmp(&this->magic_string[0], &other.magic_string[0], Tomahawk::Constants::WRITE_HEADER_MAGIC_LENGTH) != 0) return false;
if(this->file_type != other.file_type) return false;
if(this->n_samples != other.n_samples) return false;
if(this->n_contigs != other.n_contigs) return false;
return true;
}

private:
friend std::ostream& operator<<(std::ostream& stream, const self_type& header){
stream.write(header.magic_string, Tomahawk::Constants::WRITE_HEADER_MAGIC_LENGTH);
Expand Down
Loading

0 comments on commit 7401777

Please sign in to comment.