Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add outputs and adjust split size #7

Open
wants to merge 2 commits into
base: img
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 36 additions & 2 deletions annotation_full.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,13 @@ workflow annotation {
supfam_domtblouts = f_annotate.supfam_domtblout,
cath_funfam_domtblouts = f_annotate.cath_funfam_domtblout,
crt_crisprs_s = s_annotate.crisprs,
crt_gffs = s_annotate.crt_gff,
genemark_gffs = s_annotate.genemark_gff,
prodigal_gffs = s_annotate.prodigal_gff,
trna_gffs = s_annotate.trna_gff,
misc_bind_misc_feature_regulatory_gffs = s_annotate.misc_bind_misc_feature_regulatory_gff,
rrna_gffs = s_annotate.rrna_gff,
ncrna_tmrna_gffs = s_annotate.ncrna_tmrna_gff,
container=container
}
call final_stats {
Expand All @@ -98,6 +105,13 @@ workflow annotation {
File? smart_gff = merge_outputs.smart_gff
File? supfam_gff = merge_outputs.supfam_gff
File? cath_funfam_gff = merge_outputs.cath_funfam_gff
File? crt_gff = merge_outputs.crt_gff
File? genemark_gff = merge_outputs.genemark_gff
File? prodigal_gff = merge_outputs.prodigal_gff
File? trna_gff = merge_outputs.trna_gff
File? misc_bind_misc_feature_regulatory_gff = merge_outputs.misc_bind_misc_feature_regulatory_gff
File? rrna_gff = merge_outputs.rrna_gff
File? ncrna_tmrna_gff = merge_outputs.ncrna_tmrna_gff
File? proteins_cog_domtblout = merge_outputs.proteins_cog_domtblout
File? proteins_pfam_domtblout = merge_outputs.proteins_pfam_domtblout
File? proteins_tigrfam_domtblout = merge_outputs.proteins_tigrfam_domtblout
Expand Down Expand Up @@ -126,7 +140,7 @@ workflow annotation {

task split{
File infile
String blocksize=10
String blocksize=100
String zfile="zscore.txt"
String cmzfile="cmzscore.txt"
String container
Expand Down Expand Up @@ -175,6 +189,13 @@ task merge_outputs {
Array[File?] cath_funfam_domtblouts
Array[File?] product_name_tsvs
Array[File?] crt_crisprs_s
Array[File?] crt_gffs
Array[File?] genemark_gffs
Array[File?] prodigal_gffs
Array[File?] trna_gffs
Array[File?] misc_bind_misc_feature_regulatory_gffs
Array[File?] rrna_gffs
Array[File?] ncrna_tmrna_gffs
String container

command {
Expand All @@ -191,6 +212,13 @@ task merge_outputs {
cat ${sep=" " smart_gffs} > "${project_id}_smart.gff"
cat ${sep=" " supfam_gffs} > "${project_id}_supfam.gff"
cat ${sep=" " cath_funfam_gffs} > "${project_id}_cath_funfam.gff"
cat ${sep=" " crt_gffs} > "${project_id}_crt.gff"
cat ${sep=" " genemark_gffs} > "${project_id}_genemark.gff"
cat ${sep=" " prodigal_gffs} > "${project_id}_prodigal.gff"
cat ${sep=" " trna_gffs} > "${project_id}_trna.gff"
cat ${sep=" " misc_bind_misc_feature_regulatory_gffs} > "${project_id}_rfam_misc_bind_misc_feature_regulatory.gff"
cat ${sep=" " rrna_gffs} > "${project_id}_rfam_rrna.gff"
cat ${sep=" " ncrna_tmrna_gffs} > "${project_id}_rfam_ncrna_tmrna.gff"

cat ${sep=" " cog_domtblouts} > "${project_id}_proteins.cog.domtblout"
cat ${sep=" " pfam_domtblouts} > "${project_id}_proteins.pfam.domtblout"
Expand All @@ -216,7 +244,13 @@ task merge_outputs {
File smart_gff = "${project_id}_smart.gff"
File supfam_gff = "${project_id}_supfam.gff"
File cath_funfam_gff = "${project_id}_cath_funfam.gff"

File crt_gff = "${project_id}_crt.gff"
File genemark_gff = "${project_id}_genemark.gff"
File prodigal_gff = "${project_id}_prodigal.gff"
File trna_gff = "${project_id}_trna.gff"
File misc_bind_misc_feature_regulatory_gff = "${project_id}_rfam_misc_bind_misc_feature_regulatory.gff"
File rrna_gff = "${project_id}_rfam_rrna.gff"
File ncrna_tmrna_gff = "${project_id}_rfam_ncrna_tmrna.gff"
File proteins_cog_domtblout = "${project_id}_proteins.cog.domtblout"
File proteins_pfam_domtblout = "${project_id}_proteins.pfam.domtblout"
File proteins_tigrfam_domtblout = "${project_id}_proteins.tigrfam.domtblout"
Expand Down
14 changes: 9 additions & 5 deletions rfam.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,15 @@ task run {
set -euo pipefail
${bin} --notextw --cut_tc --cpu ${threads} -Z ${cmzscore} --tblout ${project_id}_rfam.tbl ${cm} ${input_fasta}
tool_and_version=$(${bin} -h | grep INFERNAL | perl -pne 's/^.*INFERNAL/INFERNAL/' )
grep -v '^#' ${project_id}_rfam.tbl | \
awk '$17 == "!" {print $1,$3,$4,$6,$7,$8,$9,$10,$11,$15,$16}' | \
sort -k1,1 -k10,10nr -k11,11n | \
${clan_filter_bin} "$tool_and_version" \
${claninfo_tsv} ${feature_lookup_tsv} > ${project_id}_rfam.gff
if [ $(grep -c -v \# ${project_id}_rfam.tbl) -eq 0 ] ; then
touch ${project_id}_rfam.gff
else
grep -v '^#' ${project_id}_rfam.tbl | \
awk '$17 == "!" {print $1,$3,$4,$6,$7,$8,$9,$10,$11,$15,$16}' | \
sort -k1,1 -k10,10nr -k11,11n | \
${clan_filter_bin} "$tool_and_version" \
${claninfo_tsv} ${feature_lookup_tsv} > ${project_id}_rfam.gff
fi
awk -F'\t' '$3 == "misc_bind" || $3 == "misc_feature" || $3 == "regulatory" {print $0}' \
${project_id}_rfam.gff > ${project_id}_rfam_misc_bind_misc_feature_regulatory.gff
awk -F'\t' '$3 == "rRNA" {print $0}' ${project_id}_rfam.gff > ${project_id}_rfam_rrna.gff
Expand Down
8 changes: 7 additions & 1 deletion structural-annotation.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,17 @@ workflow s_annotate {
}
}
output {
#File gff = "${output_dir}"+"/"+"${imgap_project_id}_structural_annotation.gff"
#File gff = gff_merge.final_gff
#File gff = post_qc.out
File? gff = fasta_merge.final_modified_gff
File? crt_gff = crt.gff
File? crisprs = crt.crisprs
File? genemark_gff = genemark.gff
File? prodigal_gff = prodigal.gff
File? trna_gff = trnascan.gff
File? misc_bind_misc_feature_regulatory_gff = rfam.misc_bind_misc_feature_regulatory_gff
File? rrna_gff = rfam.rrna_gff
File? ncrna_tmrna_gff = rfam.ncrna_tmrna_gff
File? proteins = fasta_merge.final_proteins
}
}
Expand Down