Skip to content

Commit

Permalink
More modules added to align
Browse files Browse the repository at this point in the history
  • Loading branch information
toniher committed Oct 11, 2024
1 parent 45fa2e9 commit cafc67b
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 22 deletions.
26 changes: 4 additions & 22 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ LOCAL_MODULES='./modules/local/exorthist'
include { CHECK_INPUT } from "${LOCAL_MODULES}/check_input.nf"
include { GENERATE_ANNOTATIONS } from "${LOCAL_MODULES}/generate_annotations.nf"
include { PARSE_IPA_PROT_ALN } from "${LOCAL_MODULES}/align_pairs.nf"
include { REALIGN_EX_PAIRS } from "${LOCAL_MODULES}/realign_pairs.nf"
include { SPLIT_CLUSTERS_IN_CHUNKS } from "${LOCAL_MODULES}/split_clusters_chunks.nf"
include { SPLIT_CLUSTERS_BY_SPECIES_PAIRS } from "${LOCAL_MODULES}/split_clusters_species.nf"
include { SPLIT_EX_PAIRS_TO_REALIGN } from "${LOCAL_MODULES}/split_pairs.nf"
Expand Down Expand Up @@ -210,6 +211,8 @@ workflow {
EXs_to_realign_batches = SPLIT_EX_PAIRS_TO_REALIGN.out.EXs_to_realign_batches
// Flatten the results from the previous batch run and combine with sp1 and sp2 information, using sp1-sp2 as key.
EXs_to_realign = EXs_to_realign_batches.flatten().map{[it.getName().toString().split("_")[0],it]}.groupTuple().join(clusters_split_ch).transpose()
// Realign exons pairs (with multiple hits)
REALIGN_EX_PAIRS(blosumfile, EXs_to_realign)

// Review outputs below
CHECK_INPUT.out.run_info.view()
Expand All @@ -222,31 +225,10 @@ workflow {
PARSE_IPA_PROT_ALN.out.aligned_subclusters_4_splitting.view { "SC: $it" }
PARSE_IPA_PROT_ALN.out.EXs_to_split.view { "EX: $it" }
EXs_to_realign.view { "EXR: $it" }

REALIGN_EX_PAIRS.out.realigned_exons_4_merge.view{ "RER: $it" }
}


// /*
// * Realign exons pairs (with multiple hits)
// */
//
// process realign_EX_pairs {
// label 'incr_time_cpus'
//
// input:
// file(blosumfile)
// set val(comp_id), file(EXs_to_realign), file(sp1), file(sp2) from EXs_to_realign //05/03/21
//
// output:
// set val(comp_id), file("realigned_*") into realigned_exons_4_merge //05/03/21
//
// script:
// """
// B3_realign_EX_pairs.pl ${sp1} ${sp2} ${EXs_to_realign} \
// ${sp1}/${sp1}.exint ${sp2}/${sp2}.exint 1 realigned_${EXs_to_realign} \
// ${sp1}_${sp2} ${blosumfile} ${task.cpus}
// """
// }
//
// //Combine all the aln_info with the realigned_exon_info for each species pair
// aligned_subclusters_4_splitting.groupTuple().join(realigned_exons_4_merge.groupTuple()).set{data_4_merge}
Expand Down
37 changes: 37 additions & 0 deletions modules/local/exorthist/align_pairs.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
process PARSE_IPA_PROT_ALN {
tag { cls_part_file.name }
label 'big_cpus'

input:
path blosumfile
tuple val(combid), path(sp1), path(sp2), path(cls_part_file), val(dist_range)

output:
tuple val("${sp1.name}-${sp2.name}"), path("${sp1.name}-${sp2.name}-*"), emit: aligned_subclusters_4_splitting
path "${sp1.name}-${sp2.name}_EXs_to_split_part_*.txt", emit: EXs_to_split

script:
def prev_alignments = params.prevaln ? params.prevaln : ""
def cls_parts = cls_part_file.name.split("_")
def dist_range_par

switch(dist_range) {
case "long":
dist_range_par = params.long_dist.split(",")
break
case "medium":
dist_range_par = params.medium_dist.split(",")
break
case "short":
dist_range_par = params.short_dist.split(",")
break
}

"""
B1_parse_IPA_prot_aln.pl ${sp1.name} ${sp2.name} ${cls_part_file} \
${sp1}/${sp1.name}_annot_exons_prot_ids.txt ${sp2}/${sp2.name}_annot_exons_prot_ids.txt \
${sp1}/${sp1.name}_protein_ids_exons_pos.txt ${sp2}/${sp2.name}_protein_ids_exons_pos.txt \
${sp1}/${sp1.name}_protein_ids_intron_pos_CDS.txt ${sp2}/${sp2.name}_protein_ids_intron_pos_CDS.txt \
${sp1}/${sp1.name}.exint ${sp2}/${sp2.name}.exint ${cls_parts[1]} ${blosumfile} ${sp1.name}-${sp2.name}-${cls_parts[1]} ${dist_range_par[3]} ${task.cpus} ${prev_alignments}
"""
}
17 changes: 17 additions & 0 deletions modules/local/exorthist/realign_pairs.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
process REALIGN_EX_PAIRS {
label 'incr_time_cpus'

input:
path blosumfile
tuple val(comp_id), path(EXs_to_realign), path(sp1), path(sp2)

output:
tuple val(comp_id), path("realigned_*"), emit: realigned_exons_4_merge

script:
"""
B3_realign_EX_pairs.pl ${sp1.name} ${sp2.name} ${EXs_to_realign} \
${sp1}/${sp1.name}.exint ${sp2}/${sp2.name}.exint 1 realigned_${EXs_to_realign.name} \
${sp1.name}_${sp2.name} ${blosumfile} ${task.cpus}
"""
}
15 changes: 15 additions & 0 deletions modules/local/exorthist/split_pairs.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
process SPLIT_EX_PAIRS_TO_REALIGN {
label 'pandas'
input:
path '*'

output:
path '*EXs_to_realign_part_*', emit: EXs_to_realign_batches

script:
"""
for file in \$(ls *); do
B2_split_EX_pairs_to_realign.py -i \${file} -n ${params.alignmentnum}
done
"""
}

0 comments on commit cafc67b

Please sign in to comment.