Skip to content

Commit

Permalink
Merge pull request #305 from PoonLab/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
ArtPoon authored Jul 9, 2021
2 parents 1a2b182 + 520ad13 commit 7e5614c
Show file tree
Hide file tree
Showing 9 changed files with 97 additions and 89 deletions.
4 changes: 2 additions & 2 deletions batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def parse_args():
'quantile of Poisson distribution (molecular clock). Default 0.001 '
'corresponds to 99.9%% cutoff.')

parser.add_argument('--batchsize', type=int, default=500,
parser.add_argument('--batchsize', type=int, default=2000,
help='option, number of records to batch process with minimap2')
parser.add_argument('--max-variants', type=int, default=5000,
help='option, limit number of variants per lineage (default 5000)')
Expand All @@ -49,7 +49,7 @@ def parse_args():
help="option, path to FASTA file with reference genome")
parser.add_argument('--mmbin', type=str, default='minimap2',
help="option, path to minimap2 binary executable")
parser.add_argument('-mmt', "--mmthreads", type=int, default=8,
parser.add_argument('-mmt', "--mmthreads", type=int, default=16,
help="option, number of threads for minimap2.")

parser.add_argument('--misstol', type=int, default=300,
Expand Down
14 changes: 8 additions & 6 deletions covizu/clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,15 @@ def recode_features(records, callback=None, limit=10000):
union = {}
labels = []
indexed = []
for _, fvec in intermed[:limit]:
for count, item in enumerate(intermed):
fvec = item[1]
labels.append(fvecs[fvec])
for feat in fvec:
if feat not in union:
union.update({feat: len(union)})
# recode feature vectors as integers (0-index)
indexed.append(set(union[feat] for feat in fvec))
if count < limit:
for feat in fvec:
if feat not in union:
union.update({feat: len(union)})
# recode feature vectors as integers (0-index)
indexed.append(set(union[feat] for feat in fvec))

return union, labels, indexed

Expand Down
21 changes: 13 additions & 8 deletions covizu/utils/batch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def beadplot_serial(lineage, features, args, callback=None):
intermed = [label.split('|')[::-1] for label in labels[0]]
intermed.sort()
variant = intermed[0][1]
beaddict.update({'sampled_variants': len(labels)})
beaddict['nodes'].update({variant: []})

for coldate, accn, label1 in intermed:
Expand All @@ -49,6 +50,7 @@ def beadplot_serial(lineage, features, args, callback=None):
# convert to JSON format
beaddict = beadplot.serialize_tree(ctree)
beaddict.update({'lineage': lineage})
beaddict.update({'sampled_variants': len(labels)})
return beaddict


Expand Down Expand Up @@ -82,14 +84,15 @@ def make_beadplots(by_lineage, args, callback=None, t0=None, txtfile='minor_line
:return: list, beadplot data by lineage
"""
# partition lineages into major and minor categories
major = set([lineage for lineage, features in by_lineage.items()
if len(features) > args.mincount])
intermed = [(len(features), lineage) for lineage, features in by_lineage.items()
if len(features) < args.mincount]
intermed.sort(reverse=True) # descending order
minor = dict([(lineage, None) for _, lineage in intermed if lineage is not None])

# export minor lineages to text file
with open(txtfile, 'w') as handle:
for lineage in by_lineage:
if lineage not in major and lineage is not None:
handle.write('{}\n'.format(lineage))
for lineage in minor:
handle.write('{}\n'.format(lineage))

# launch MPI job across minor lineages
if callback:
Expand All @@ -105,8 +108,9 @@ def make_beadplots(by_lineage, args, callback=None, t0=None, txtfile='minor_line
subprocess.check_call(cmd)

# process major lineages
for lineage in major:
features = by_lineage[lineage]
for lineage, features in by_lineage.items():
if lineage in minor:
continue
if callback:
callback('start {}, {} entries'.format(lineage, len(features)))
# call out to MPI
Expand All @@ -115,7 +119,7 @@ def make_beadplots(by_lineage, args, callback=None, t0=None, txtfile='minor_line
args.bylineage, lineage, # positional arguments <JSON file>, <str>
"--mode", "deep",
"--max-variants", str(args.max_variants),
"--nboot", str(args.nboot), "--outdir", "data"
"--nboot", str(args.nboot), "--outdir", "data", "--binpath", args.binpath
]
if t0:
cmd.extend(["--timestamp", str(t0)])
Expand Down Expand Up @@ -155,6 +159,7 @@ def make_beadplots(by_lineage, args, callback=None, t0=None, txtfile='minor_line

ctree = beadplot.annotate_tree(ctree, label_dict)
beaddict = beadplot.serialize_tree(ctree)
beaddict.update({'sampled_variants': len(label_dict)})

beaddict.update({'lineage': lineage})
result.append(beaddict)
Expand Down
7 changes: 4 additions & 3 deletions covizu/utils/gisaid_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,10 @@ def sort_by_lineage(records, callback=None):
if callback and i % 1000 == 0:
callback('aligned {} records'.format(i))
lineage = record['covv_lineage']
if lineage not in result:
result.update({lineage: []})
result[lineage].append(record)
if lineage is not None:
if lineage not in result:
result.update({lineage: []})
result[lineage].append(record)
return result


Expand Down
3 changes: 1 addition & 2 deletions data/countries.json
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@
"Kashgar": "China",
"Albania": "Europe",
"Hebei": "China",
"Brasil": "South Africa",
"Urumqi": "China",
"Shulan": "China",
"Crimea": "Europe",
Expand Down Expand Up @@ -241,4 +240,4 @@
"Sint Eustatius": "North America",
"Malawi": "Africa",
"Eswatini": "Africa"
}
}
9 changes: 9 additions & 0 deletions js/beadplot.js
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,9 @@ function clear_selection() {
* @param {Number} cid: integer index of cluster to draw as beadplot
*/
function beadplot(cid) {
// Update global cindex for SVG and NWK filenames
cindex = cid;

var variants = beaddata[cid].variants,
edgelist = beaddata[cid].edgelist,
points = beaddata[cid].points;
Expand Down Expand Up @@ -872,6 +875,12 @@ function beadplot(cid) {
var tickCount = 0.005*currentWidth;
if (tickCount <= 4) tickCount = 4;

// Ensures that there aren't extra ticks in the time axis of the beadplot when the first and last coldates are within a few days
var dateDiff = d3.timeDay.count(
d3.min(variants, function(d) {return d.x1}),
d3.max(variants, function(d) {return d.x2}));
if (dateDiff !=0 && dateDiff < tickCount) tickCount = dateDiff;

// draw x-axis
visBaxis.append("g")
.attr("class", "treeaxis")
Expand Down
18 changes: 12 additions & 6 deletions js/covizu.js
Original file line number Diff line number Diff line change
Expand Up @@ -572,12 +572,18 @@ function save_beadplot() {

function export_svg() {
var config = {filename: clusters[cindex].lineage};
var svg_beadplot = d3.select('#svg-cluster>svg').node();
d3_save_svg.save(svg_beadplot, config);
svg_beadplot.removeAttribute("style");
svg_beadplot.removeAttribute("version");
svg_beadplot.removeAttribute("xmlns");
svg_beadplot.removeAttribute("xmlns:xlink");

// Creates a duplicate of the beadplot
var svg_beadplot = d3.select('#svg-cluster>svg').clone(true);
var svg_axis = d3.select('#svg-clusteraxis>svg').clone(true);
svg_beadplot.select("g").attr("transform", "translate(0, 30)");
svg_axis.node().appendChild(svg_beadplot.selectAll("g").node())
svg_axis.attr("height", svg_beadplot.attr("height"))

d3_save_svg.save(svg_axis.node(), config);

svg_axis.remove();
svg_beadplot.remove()
}

function export_csv() {
Expand Down
99 changes: 38 additions & 61 deletions js/drawtree.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
var margin = {top: 10, right: 20, bottom: 10, left: 0},
var margin = {top: 10, right: 40, bottom: 10, left: 0},
width = 250 - margin.left - margin.right,
height = 1200 - margin.top - margin.bottom;

Expand All @@ -12,10 +12,8 @@ var xValue = function(d) { return d.x; },
xMap2 = function(d) { return xScale(d.x2); },
xWide = function(d) { return xScale(d.x2 - d.x1)};

// Scale to plot rects and axis using Dates
var xAxisTree = d3.scaleTime().range([0, width]),
minRectWidth = 7,
firstDate, lastDate;
// define minimum width for rect elements (required in instances where first and last coldates are the same)
var minRectWidth = 7;

var yValue = function(d) { return d.y; },
yScale = d3.scaleLinear().range([height, 40]), // inversion
Expand Down Expand Up @@ -46,19 +44,17 @@ let cTooltip = d3.select("#tooltipContainer")
*/
function draw_cluster_box(rect) {
var d = rect.datum();
var rectWidth = xAxisTree(d.last_date) - xAxisTree(d.first_date);
var rectWidth = xScale(date_to_xaxis(d.last_date)) - xScale(d.x2);

// draw a box around the cluster rectangle
vis.append("rect")
.attr('class', "clickedH")
.attr("x", function() {
return xAxisTree(d.first_date) - 2
})
.attr("x", xMap2(d) - 2)
.attr("y", yMap(d) - 2)
.attr("width", function() {
if (rectWidth < minRectWidth)
return minRectWidth + 4
else
return (xAxisTree(d.last_date) - xAxisTree(d.first_date)) + 4
return xScale(date_to_xaxis(d.last_date)) - xScale(d.x2) + 4
})
.attr("height", 14)
.attr("fill", "white")
Expand Down Expand Up @@ -146,29 +142,14 @@ function drawtree(df) {
d3.min(df, yValue)-1, d3.max(df, yValue)+1
]);

firstDate = d3.min(df, function(d) {return d.first_date});
firstDate = d3.timeDay.offset(firstDate, -70)
lastDate = d3.max(df, function(d) {return d.last_date});
lastDate = d3.timeDay.offset(lastDate, 60);
xAxisTree.domain([firstDate, lastDate]);

// draw lines
vis.selectAll("lines")
.data(edgeset)
.enter().append("line")
.attr("class", "lines")
.attr("x1", function(d) {
if (d.first_date!==undefined && d.y1 === d.y2) {
return xAxisTree(d.first_date);
}

// Moves the time scaled tree to the left
return xScale(d.x1) - 7.2
})
.attr("x1", xMap1)
.attr("y1", yMap1)
.attr("x2", function(d) {
return xScale(d.x2) - 7.2
})
.attr("x2", xMap2)
.attr("y2", yMap2)
.attr("stroke-width", 1.5)
.attr("stroke", "#777");
Expand Down Expand Up @@ -271,12 +252,24 @@ function map_clusters_to_tips(df, clusters) {
* @returns {string} new date in ISO format (yyyy-mm-dd)
*/
function xaxis_to_date(x, tip) {
var coldate = new Date(tip.coldate); // collection date of reference tip
coldate.setDate(coldate.getDate() + 365.25*(x - tip.x));
var coldate = new Date(tip.first_date); // collection date of reference tip
coldate = d3.timeDay.offset(coldate, 365.25*(x - tip.x));
return (coldate.toISOString().split('T')[0]);
}


/**
* Converts Date to x-coordinate of the tree scale
*
* @param {Date} coldate
* @returns {float} x-coordinate value
*/
function date_to_xaxis(coldate) {
var numDays = d3.timeDay.count(tips[0].first_date, coldate)
return (numDays/365.25) + tips[0].x;
}


function mutations_to_string(mutations) {
let mutStr = `<b>${i18n_text.tip_mutations}:</b><br/>`;
for (mutation of mutations) {
Expand All @@ -291,29 +284,16 @@ function mutations_to_string(mutations) {
*/
function draw_clusters(tips) {

// numTicks + 1 ticks for the time-scaled tree
var numTicks = 3,
stepValue = Math.floor(d3.timeDay.count(firstDate, lastDate) / numTicks),
currStep = stepValue,
tickValues = [];

// Sets the values of the ticks
tickValues.push(firstDate);

for (i = 0 ; i < numTicks; i++) {
tickValues.push(d3.timeDay.offset(firstDate, currStep));
currStep += stepValue;
}

// Draws the axis for the time scaled tree
axis.append("g")
.attr("class", "treeaxis")
.attr("transform", "translate(0,20)")
.call(
d3.axisTop(xAxisTree)
.tickValues(tickValues)
.tickFormat(d3.timeFormat("%Y-%m-%d"))
);
.call(d3.axisTop(xScale)
.ticks(3)
.tickFormat(function(d) {
return xaxis_to_date(d, tips[0])
})
);

function mouseover(d) {
d3.select("[cidx=cidx-" + d.cluster_idx + "]")
Expand Down Expand Up @@ -349,16 +329,13 @@ function draw_clusters(tips) {
.lower()
.append("rect")
//.attr("selected", false)
.attr("x", function(d) {
return xAxisTree(d.first_date)
})
.attr("x", xMap2)
.attr("y", yMap)
.attr("width", function(d) {
var rectWidth = xAxisTree(d.last_date) - xAxisTree(d.first_date);
.attr("width", function(d) {
var rectWidth = xScale(date_to_xaxis(d.last_date)) - xScale(d.x2);
if (rectWidth < minRectWidth)
return minRectWidth
else
return rectWidth
return minRectWidth;
return rectWidth;
})
.attr("height", 10)
.attr("class", "default")
Expand Down Expand Up @@ -408,10 +385,10 @@ function draw_clusters(tips) {
.attr("cursor", "default")
.attr("id", function(d) { return "cidx-" + d.cluster_idx; })
.attr("x", function(d) {
if (xAxisTree(d.last_date) - xAxisTree(d.first_date) < minRectWidth)
return(xAxisTree(d.first_date) + minRectWidth + 3);

return(xAxisTree(d.last_date) + 3);
var rectWidth = xScale(date_to_xaxis(d.last_date)) - xScale(d.x2);
if (rectWidth < minRectWidth)
return xScale(d.x2) + minRectWidth + 3;
return xScale(d.x2) + rectWidth + 3;
})
.attr("y", function(d) {
return(yScale(d.y-0.15));
Expand Down
11 changes: 10 additions & 1 deletion local.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from covizu.utils import seq_utils, gisaid_utils
from covizu.utils.progress_utils import Callback
from covizu.utils.batch_utils import *
from covizu.utils.seq_utils import SC2Locator


def parse_args():
Expand Down Expand Up @@ -191,6 +192,13 @@ def process_local(args, callback=None):
outfile.write(json.dumps(result)) # serialize results to JSON
outfile.close()

# get mutation info
locator = SC2Locator()
mutations = {}
for lineage, features in get_mutations(by_lineage).items():
annots = [locator.parse_mutation(f) for f in features]
mutations.update({lineage: [a for a in annots if a is not None]})

# write data stats
dbstat_file = os.path.join(args.outdir, 'dbstats.{}.json'.format(timestamp))
with open(dbstat_file, 'w') as handle:
Expand All @@ -207,7 +215,8 @@ def process_local(args, callback=None):
'lastcoldate': max(x['covv_collection_date'] for x in samples),
'residual': residuals[lineage],
'max_ndiffs': max(ndiffs),
'mean_ndiffs': sum(ndiffs)/len(ndiffs)
'mean_ndiffs': sum(ndiffs)/len(ndiffs),
'mutations': mutations[lineage]
}
json.dump(val, handle)

Expand Down

0 comments on commit 7e5614c

Please sign in to comment.