Skip to content

Commit 3e86a9f

Browse files
committed
Remove version columns from Nextclade TSV
We now check the Nextclade versions using the separate version JSON, so we no longer need to track version per row. This is a breaking change for the cache, so will need to be run with the renew flag to manually force a full-rerun.
1 parent 029f79a commit 3e86a9f

File tree

1 file changed

+3
-37
lines changed

1 file changed

+3
-37
lines changed

workflow/snakemake_rules/nextclade.smk

Lines changed: 3 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -218,7 +218,7 @@ rule run_wuhan_nextclade:
218218
f"--output-translations=data/{database}/nextclade.translation_{{cds}}.upd.fasta"
219219
),
220220
output:
221-
info=f"data/{database}/nextclade_new_raw.tsv",
221+
info=f"data/{database}/nextclade_new.tsv",
222222
alignment=temp(f"data/{database}/nextclade.aligned.upd.fasta"),
223223
translations=[
224224
temp(f"data/{database}/nextclade.translation_{gene}.upd.fasta")
@@ -249,7 +249,7 @@ rule run_21L_nextclade:
249249
dataset=lambda w: f"data/nextclade_data/sars-cov-2-21L.zip",
250250
sequences=f"data/{database}/nextclade_21L.sequences.fasta",
251251
output:
252-
info=f"data/{database}/nextclade_21L_new_raw.tsv",
252+
info=f"data/{database}/nextclade_21L_new.tsv",
253253
threads:
254254
workflow.cores * 0.5
255255
benchmark:
@@ -264,47 +264,13 @@ rule run_21L_nextclade:
264264
"""
265265

266266

267-
rule nextclade_tsv_concat_versions:
268-
input:
269-
nextclade="data/nextclade",
270-
tsv=f"data/{database}/nextclade{{reference}}_new_raw.tsv",
271-
dataset=lambda w: f"data/nextclade_data/sars-cov-2{w.reference.replace('_','-')}.zip",
272-
output:
273-
tsv=f"data/{database}/nextclade{{reference}}_new.tsv",
274-
benchmark:
275-
f"benchmarks/nextclade_tsv_concat_versions_{database}{{reference}}.txt"
276-
shell:
277-
"""
278-
if [ -s {input.tsv} ]; then
279-
# Get version numbers
280-
nextclade_version="$({input.nextclade:q} --version)"
281-
dataset_version="$(unzip -p {input.dataset} pathogen.json | jq -r '.version.tag')"
282-
timestamp="$(date -u +"%Y-%m-%dT%H:%M:%SZ")"
283-
284-
# Combine input file with version numbers and write to output
285-
printf "%s\tnextclade_version\tdataset_version\trun_timestamp\n" \
286-
"$(head -n 1 {input.tsv})" \
287-
> {output.tsv}
288-
289-
tail -n +2 {input.tsv} | \
290-
awk -v v1="$nextclade_version" \
291-
-v v2="$dataset_version" \
292-
-v v3="$timestamp" \
293-
-v OFS='\t' '{{print $0, v1, v2, v3}}' \
294-
>> {output.tsv}
295-
else
296-
cp {input.tsv} {output.tsv}
297-
fi
298-
"""
299-
300-
301267
rule nextclade_info:
302268
"""
303269
Generates nextclade info TSV for all sequences (new + old)
304270
"""
305271
input:
306272
old_info=f"data/{database}/nextclade{{reference}}_old.tsv",
307-
new_info=rules.nextclade_tsv_concat_versions.output.tsv,
273+
new_info=f"data/{database}/nextclade{{reference}}_new.tsv",
308274
output:
309275
nextclade_info=f"data/{database}/nextclade{{reference}}.tsv",
310276
benchmark:

0 commit comments

Comments
 (0)