Skip to content

Commit

Permalink
Fixed PrintUnitigGfa
Browse files Browse the repository at this point in the history
  • Loading branch information
lipi0056 committed Oct 17, 2023
1 parent 90eafed commit 0c76498
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 240 deletions.
1 change: 0 additions & 1 deletion RavenExe/src/main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,6 @@ int main(int argc, char** argv) {
raven::PrintGfa(graph, gfa_path);

if (!unitig_gfa_path.empty()) {
raven::CreateUnitigGraph(graph);
raven::PrintUnitigGfa(graph, unitig_gfa_path);
}

Expand Down
6 changes: 1 addition & 5 deletions RavenLib/include/raven/graph/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ struct Node {
Node* pair;
std::uint16_t coverage = 0;

std::vector<Node*> unitig_nodes;
std::unordered_set<std::string> original_node_sequence_names;
};

struct Edge {
Expand Down Expand Up @@ -191,10 +191,6 @@ struct RAVEN_EXPORT Graph {
std::vector<std::unique_ptr<Pile>> piles;
std::vector<std::unique_ptr<Node>> nodes;
std::vector<std::unique_ptr<Edge>> edges;

std::vector<std::shared_ptr<Node>> unitig_nodes;
std::vector<std::shared_ptr<Edge>> unitig_edges;

};

} // namespace raven
Expand Down
2 changes: 0 additions & 2 deletions RavenLib/include/raven/graph/serialization/graph_repr.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ RAVEN_EXPORT Graph LoadGfa(const std::string& path);
RAVEN_EXPORT std::vector<std::string> getGfa(const Graph& graph);
RAVEN_EXPORT std::vector<std::string> getCsv(const Graph& graph, bool printSequenceName = false, bool printPileBeginEnd = false, bool printEdgeSimilarity = false);

RAVEN_EXPORT void CreateUnitigGraph(Graph& graph);

} // namespace raven

#endif // RAVEN_GRAPH_SERIALIZATION_GRAPH_REPR_H_
37 changes: 37 additions & 0 deletions RavenLib/src/common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ void RemoveEdges(Graph& graph, const std::unordered_set<std::uint32_t>& indices,

std::uint32_t CreateUnitigs(Graph& graph, std::uint32_t epsilon) {
std::unordered_set<std::uint32_t> marked_edges;
std::unordered_set<Edge *> single_unting_marked_edges;
std::unordered_set<Edge *> single_rc_unting_marked_edges;
std::vector<std::unique_ptr<Node>> unitigs;
std::vector<std::unique_ptr<Edge>> unitig_edges;
std::vector<std::uint32_t> node_updates(graph.nodes.size(), 0);
Expand Down Expand Up @@ -118,10 +120,17 @@ std::uint32_t CreateUnitigs(Graph& graph, std::uint32_t epsilon) {
unitig->pair = rc_unitig;
rc_unitig->pair = unitig;

if (begin == end) {
unitig->original_node_sequence_names.insert(begin->sequence.name);
rc_unitig->original_node_sequence_names.insert(begin->pair->sequence.name);
}

if (begin != end) { // connect unitig to graph
if (begin->indegree()) {
marked_edges.emplace(begin->inedges.front()->id);
marked_edges.emplace(begin->inedges.front()->pair->id);
single_unting_marked_edges.insert(begin->inedges.front());
single_rc_unting_marked_edges.insert(begin->inedges.front()->pair);

auto edge =
emplace_edge_through_factory(begin->inedges.front()->tail, unitig,
Expand All @@ -137,6 +146,8 @@ std::uint32_t CreateUnitigs(Graph& graph, std::uint32_t epsilon) {
if (end->outdegree()) {
marked_edges.emplace(end->outedges.front()->id);
marked_edges.emplace(end->outedges.front()->pair->id);
single_unting_marked_edges.insert(end->outedges.front());
single_rc_unting_marked_edges.insert(end->outedges.front()->pair);

auto edge = emplace_edge_through_factory(
unitig, end->outedges.front()->head,
Expand All @@ -156,6 +167,8 @@ std::uint32_t CreateUnitigs(Graph& graph, std::uint32_t epsilon) {
while (true) {
marked_edges.emplace(jt->outedges.front()->id);
marked_edges.emplace(jt->outedges.front()->pair->id);
single_unting_marked_edges.insert(jt->outedges.front());
single_rc_unting_marked_edges.insert(jt->outedges.front()->pair);

// update transitive edges
node_updates[jt->id & ~1UL] = unitig->id;
Expand All @@ -166,6 +179,30 @@ std::uint32_t CreateUnitigs(Graph& graph, std::uint32_t epsilon) {
break;
}
}

if (single_unting_marked_edges.size() > 0) {
for (const auto single_unting_marked_edge : single_unting_marked_edges) {
if (single_unting_marked_edge->head != nullptr) {
unitig->original_node_sequence_names.insert(single_unting_marked_edge->head->sequence.name);
}
if (single_unting_marked_edge->tail != nullptr) {
unitig->original_node_sequence_names.insert(single_unting_marked_edge->tail->sequence.name);
}
}
}
if (single_rc_unting_marked_edges.size() > 0) {
for (const auto single_rc_unting_marked_edge : single_rc_unting_marked_edges) {
if (single_rc_unting_marked_edge->head != nullptr) {
rc_unitig->original_node_sequence_names.insert(single_rc_unting_marked_edge->head->sequence.name);
}
if (single_rc_unting_marked_edge->tail != nullptr) {
rc_unitig->original_node_sequence_names.insert(single_rc_unting_marked_edge->tail->sequence.name);
}
}
}

single_unting_marked_edges.clear();
single_rc_unting_marked_edges.clear();
}

std::move(unitigs.begin(), unitigs.end(), std::back_inserter(graph.nodes));
Expand Down
81 changes: 1 addition & 80 deletions RavenLib/src/graph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -56,85 +56,6 @@ Node::Node(const std::uint32_t id, Node* begin, Node* end)
(is_unitig ? "Utg" : "Ctg") + std::to_string(id & (~1UL)), data);
}

Node::Node(Node* begin, Node* end, bool is_unitig)
: id(num_objects++),
sequence(),
count(),
is_unitig(is_unitig),
is_circular(false),
is_polished(),
transitive(),
front_inedges(),
front_outedges(),
back_inedges(),
back_outedges(),
pair(),
unitig_nodes() {

std::string data{};

auto it = begin;
if(begin == end){
data += it->sequence.InflateData();
count += it->count;
unitig_nodes.emplace_back(it);
}
else{
while (true) {
data += it->outedges.front()->Label();
count += it->count;
unitig_nodes.emplace_back(it);
if ((it = it->outedges.front()->head) == end) {
unitig_nodes.emplace_back(it);
break;
}
}

if (begin != end) {
data += end->sequence.InflateData();
count += end->count;
}
}


std::uint32_t front_inedge_count{0};
std::uint32_t back_inedge_count{0};
std::uint32_t front_outedge_count{0};
std::uint32_t back_outedge_count{0};

for(auto& inedge : begin->inedges){
if(std::find(unitig_nodes.begin(), unitig_nodes.end(), inedge->tail) == unitig_nodes.end()){
front_inedges.emplace_back(inedge);
front_inedge_count++;
}
};

for(auto& outedge : begin->outedges){
if(std::find(unitig_nodes.begin(), unitig_nodes.end(), outedge->head) == unitig_nodes.end()){
front_outedges.emplace_back(outedge);
front_outedge_count++;
}
};

for(auto& inedge : end->inedges){
if(std::find(unitig_nodes.begin(), unitig_nodes.end(), inedge->tail) == unitig_nodes.end()){
back_inedges.emplace_back(inedge);
back_inedge_count++;
};
};


for(auto& outedge : end->outedges){
if(std::find(unitig_nodes.begin(), unitig_nodes.end(), outedge->head) == unitig_nodes.end()){
back_outedges.emplace_back(outedge);
back_outedge_count++;
};
};
sequence = biosoup::NucleicAcid(
"Utg" + std::to_string(id & (~1UL)),
data);
}

Edge::Edge(Node* tail, Node* head, std::uint32_t length)
: Edge(0, tail, head, length) {}

Expand All @@ -144,6 +65,6 @@ Edge::Edge(const std::uint32_t id, Node* tail, Node* head, std::uint32_t length)
head->inedges.emplace_back(this);
}

Graph::Graph() : stage(-5), piles(), nodes(), edges(), unitig_nodes(), unitig_edges() {}
Graph::Graph() : stage(-5), piles(), nodes(), edges() {}

} // namespace raven
161 changes: 9 additions & 152 deletions RavenLib/src/graph_repr.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ void PrintUnitigGfa(const Graph& graph, const std::string& path) {
return;
}
std::ofstream os(path);
for (const auto& it : graph.unitig_nodes) {
for (const auto& it : graph.nodes) {
if ((it == nullptr) || it->is_rc() ||
(it->count == 1 && it->outdegree() == 0 && it->indegree() == 0)) {
continue;
Expand All @@ -82,21 +82,16 @@ void PrintUnitigGfa(const Graph& graph, const std::string& path) {
<< "\t" << it->sequence.InflateData()
<< "\tLN:i:" << it->sequence.inflated_len
<< "\tRC:i:" << it->count
<< "\tCL:z:" << (it->color ? "blue" : "orange")
<< std::endl;
if (it->is_circular) {
os << "A\t" << it->sequence.name << "\t"
<< it->sequence.name << "\t"
<< std::endl;
continue;
<< "\tCL:z:" << (it->color ? "blue" : "orange");
if (it->original_node_sequence_names.size() > 0) {
os << "\tA\t";
for(const auto& original_node_sequence_name : it->original_node_sequence_names){
os << original_node_sequence_name << "\t";
}
}
for(const auto& unitig_node : it->unitig_nodes){
os << "A\t" << it->sequence.name << "\t"
<< unitig_node->sequence.name << "\t"
<< std::endl;
};
os << std::endl;
}
for (const auto& it : graph.unitig_edges) {
for (const auto& it : graph.edges) {
if (it == nullptr || it->is_rc()) {
continue;
}
Expand All @@ -109,144 +104,6 @@ void PrintUnitigGfa(const Graph& graph, const std::string& path) {

}

void CreateUnitigGraph(Graph& graph) {
std::unordered_set<std::uint32_t> marked_edges;
std::vector<std::shared_ptr<Node>> unitigs;
std::vector<std::shared_ptr<Edge>> unitig_edges;
std::vector<std::uint32_t> node_updates(graph.nodes.size(), 0);
std::vector<char> is_visited(graph.nodes.size(), 0);


for (const auto& it : graph.nodes) {

if (it == nullptr || is_visited[it->id]) {
continue;
}

std::uint32_t extension = 1;

bool is_circular = false;
auto begin = it.get();

while (true) { // extend left
is_visited[begin->id] = 1;
is_visited[begin->pair->id] = 1;

if(begin->indegree() > 1 || begin->indegree() == 0){
break;
}

auto next = begin->inedges.front()->tail;
if(next->outdegree() != 1){
break;
}

begin = next;
++extension;
if (begin == it.get()) {
is_circular = true;
break;
}
}


auto end = it.get();
while (true) { // extend right
is_visited[end->id] = 1;
is_visited[end->pair->id] = 1;

if(end->outdegree() > 1 || end->outdegree() == 0){
break;
}

auto next = end->outedges.front()->head;

if(next->indegree() != 1){
break;
}
end = next;
++extension;
if (end == it.get()) {
is_circular = true;
break;
}
}

auto unitig = std::make_shared<Node>(begin, end, true);
unitigs.emplace_back(unitig);
unitigs.emplace_back(std::make_shared<Node>(end->pair, begin->pair, true));
unitig->pair = unitigs.back().get();
unitig->pair->pair = unitig.get();
}

auto GetUnitigFromNode = [&] (Node* query_node) -> Node* {
for(auto& unitig : unitigs){
for(auto unitig_node : unitig->unitig_nodes){
if(unitig_node->id == query_node->id){
return unitig.get();
};
};
};
return nullptr;
};

std::uint32_t counter = 0;
for(auto& unitig : unitigs){
if (!unitig->is_circular) { // connect unitig to graph
counter++;
for(auto& inedge : unitig->front_inedges){
marked_edges.emplace(inedge->id);
marked_edges.emplace(inedge->pair->id);

auto tail_unitig = GetUnitigFromNode(inedge->tail);

auto edge = std::make_shared<Edge>(
tail_unitig,
unitig.get(),
tail_unitig->sequence.inflated_len - inedge->tail->sequence.inflated_len + inedge->length
);
unitig_edges.emplace_back(edge);

auto rc_head_unitig = GetUnitigFromNode(inedge->pair->head);
auto rc_edge = std::make_shared<Edge>(
unitig->pair,
rc_head_unitig,
unitig->pair->sequence.inflated_len - inedge->pair->head->sequence.inflated_len + inedge->length
);
unitig_edges.emplace_back(rc_edge);
edge->pair = unitig_edges.back().get();
edge->pair->pair = edge.get();
};

for(auto& outedge : unitig->back_outedges){
marked_edges.emplace(outedge->id);
marked_edges.emplace(outedge->pair->id);

auto head_unitig = GetUnitigFromNode(outedge->head);
auto edge = std::make_shared<Edge>(
unitig.get(),
head_unitig,
unitig->sequence.inflated_len - outedge->tail->sequence.inflated_len + outedge->length
);
unitig_edges.emplace_back(edge);

auto rc_tail_unitig = GetUnitigFromNode(outedge->pair->head);
auto rc_edge = std::make_shared<Edge>(
rc_tail_unitig,
unitig->pair,
rc_tail_unitig->sequence.inflated_len - outedge->pair->head->sequence.inflated_len + outedge->length
);
unitig_edges.emplace_back(rc_edge);
edge->pair = unitig_edges.back().get();
edge->pair->pair = edge.get();
};
};

}
graph.unitig_nodes.insert(graph.unitig_nodes.end(), unitigs.begin(), unitigs.end());
graph.unitig_edges.insert(graph.unitig_edges.end(), unitig_edges.begin(), unitig_edges.end());
}

std::vector<std::string> getGfa(const Graph& graph) {
std::vector<std::string> resultVector;
std::string line;
Expand Down

0 comments on commit 0c76498

Please sign in to comment.