diff --git a/config_ali b/config_ali index 934f3f5..3fcbcca 100644 --- a/config_ali +++ b/config_ali @@ -14,8 +14,8 @@ binbase: /nfs/esnitkin/bin_group/variant_calling_bin/ resources: nodes=1:ppn=4,pmem=4000mb,walltime=250:00:00 large_resources: nodes=1:ppn=12,mem=47gb,walltime=250:00:00 email: apirani@med.umich.edu -queue: flux -flux_account: esnitkin_flux +queue: fluxod +flux_account: esnitkin_fluxod notification: ae # Set Parameters for individual tools. Set the binbase of each tool: This should be the folder name of respective tools where the executables for each resp. tool resides. @@ -1009,6 +1009,7 @@ Ref_Path: /nfs/esnitkin/bin_group/variant_calling_bin/reference/AB030/ Ref_Name: Steno_K279a.fasta # path to the reference genome fasta file. Ref_Path: /nfs/esnitkin/bin_group/variant_calling_bin/reference/Steno_K279a/ +<<<<<<< HEAD [lactobacillus_crispatus_ST1] # Name of reference genome fasta file. @@ -1021,3 +1022,5 @@ Ref_Path: /nfs/esnitkin/Project_Crispatus/Sequence_data/Project_mother_daughter/ Ref_Name: USA500-2395.fasta # path to the reference genome fasta file. Ref_Path: /nfs/esnitkin/bin_group/variant_calling_bin/reference/USA500-2395/ +======= +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be diff --git a/modules/beast/beast.py b/modules/beast/beast.py index 818a739..c6a99ef 100644 --- a/modules/beast/beast.py +++ b/modules/beast/beast.py @@ -6,7 +6,10 @@ import argparse from subprocess import call import re +<<<<<<< HEAD from fasta_functions import * +======= +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be # Get command line arguments parser = argparse.ArgumentParser(description='Run BEAST pipeline.') @@ -27,17 +30,43 @@ tree = args.tree scripts_dir = args.scripts_dir +<<<<<<< HEAD +======= +# Set paths +python = '/nfs/esnitkin/bin_group/anaconda3/bin/python' +mask_vars_path = '/nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/mask_gubbins_variants.py' + +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be # Get invariant site counts if invar_sites is not None: if not invar_sites.endswith(tuple(['fa','fasta'])): # Path to file with invariant site counts invar_counts_file = invar_sites else: +<<<<<<< HEAD # Count invariant sites in whole genome alignment invar_counts_file = count_invar_sites(invar_sites, gubbins_gff) with open(invar_counts_file) as f: invar_counts = f.readline().strip() +======= + # Mask recombinant regions before counting invariant sites + if gubbins_gff is not None: + call([python, mask_vars_path, invar_sites, gubbins_gff]) + aln_file = re.split('/|\.', invar_sites)[-2] + \ + '_gubbins_masked.fa' + else: + aln_file = invar_sites + # Count invariant sites in whole genome alignment + call([python, scripts_dir + '/get_num_invariant_sites_v2.py', aln_file]) + invar_counts_file = re.split('/|\.', aln_file)[-2] + \ + '_invar_site_counts.txt' + # Read file with invariant site counts + with open(invar_counts_file) as f: + invar_counts = f.readline().strip() + + +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be # Final xmls for beast xmls_for_beast = [] diff --git a/modules/beast/beast_pbs_flux.pl b/modules/beast/beast_pbs_flux.pl index 5062933..53f5081 100755 --- a/modules/beast/beast_pbs_flux.pl +++ b/modules/beast/beast_pbs_flux.pl @@ -3,7 +3,10 @@ #Takes as input: # 1) config_file - A 1-column file where each line is the file name (minus the .xml extension) for a beast run # UPDATED 2017-09-21 from beast2/2.4.5 to beast2/2.4.7 +<<<<<<< HEAD # UPDATED 2018-11-19 from beast2/2.4.7 to /nfs/esnitkin/bin_group/beast_v2.5.1/beast/bin/beast +======= +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be #PRINT USAGE if(@ARGV == 0) @@ -75,6 +78,7 @@ print PBS "cd $out_dir"; print PBS "\n"; print PBS "#Load modules\n"; +<<<<<<< HEAD #print PBS "module load beast2/2.4.7\n"; print PBS "module load beagle\n"; #print PBS "addonmanager -add BEAST_CLASSIC\n"; @@ -82,6 +86,15 @@ print PBS "\n"; print PBS "# Put your job commands here:\n"; print PBS "/nfs/esnitkin/bin_group/beast_v2.5.1/beast/bin/beast -beagle_SSE -instances 12 -threads 12 beast$count\_$fnameorig\n"; +======= + print PBS "module load beast2/2.4.7\n"; + print PBS "module load beagle\n"; + #print PBS "addonmanager -add BEAST_CLASSIC\n"; + #print PBS "addonmanager -add SCOTTI\n"; + print PBS "\n"; + print PBS "# Put your job commands here:\n"; + print PBS "beast -beagle_SSE -instances 12 -threads 12 beast$count\_$fnameorig\n"; +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be print PBS "\n"; close PBS; diff --git a/modules/beast/fasta_functions.py b/modules/beast/fasta_functions.py index 70c2085..7e505b9 100755 --- a/modules/beast/fasta_functions.py +++ b/modules/beast/fasta_functions.py @@ -17,16 +17,25 @@ import re import os +<<<<<<< HEAD def subset_fasta(ids, fastain, fastaout): """Creates a new fasta file with a subset of original sequences. Args: idnames: List of sequence names to be extracted. +======= +def subset_fasta(idnames, fastain, fastaout): + """Creates a new fasta file with a subset of original sequences. + + Args: + idnames: Text file with list of names of sequences to be extracted. +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be fastain: Path to fasta file containing all sequences of interest. fastaout: Name of output fasta file containing only the sequences of interest. Output: Fasta file containing only the sequences of interest. +<<<<<<< HEAD """ with open(fastaout, 'w') as f: @@ -62,6 +71,28 @@ def get_fasta_subsets(csv, fasta): fas.append(fa) return(fas) +======= + + Returns: + Path to subsetted fasta file. + """ + + ids = [] + + for line in open(idnames, 'r'): + if line.strip() == 'x': + pass + else: + ids.append(line.strip()) + + with open(fastaout, 'w') as f: + for rec in SeqIO.parse(fastain, 'fasta'): + if str(rec.id) in ids: + f.write('>' + names[ids.index(rec.id)] + '\n') + f.write(str(rec.seq) + '\n') + + return(fastaout) +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be def rm_invar_sites(fasta, outfile=None, outfmt=['fasta','vcf'], outdir='.', @@ -82,7 +113,11 @@ def rm_invar_sites(fasta, outfile=None, outfmt=['fasta','vcf'], outdir='.', Path to output file. """ +<<<<<<< HEAD if len(outfmt) > 1: +======= + if len(outfmt) > 1: +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be outfmt = outfmt[0] # Get output fasta file name @@ -237,13 +272,18 @@ def count_invar_sites(fasta,gff=None,outdir='.',path={'snpsites':'/nfs/esnitkin/ if not li.startswith("#"): positions.append(line.split('\t')[1]) +<<<<<<< HEAD # Get allele for invariant sites invar_sites = [] +======= + # Get ref allele for invariant sites +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be for record in aln: seq_str = list(str(record.seq)) for index in positions: index = int(index) seq_str[index] = '' +<<<<<<< HEAD #tmp = ''.join(seq_str) if len(invar_sites) == 0: invar_sites = seq_str @@ -260,6 +300,14 @@ def count_invar_sites(fasta,gff=None,outdir='.',path={'snpsites':'/nfs/esnitkin/ # Get invariant site count for each base #print('Counting bases.') #invar_counts = Counter(invar_sites) +======= + invar_sites = ''.join(seq_str) + break + + # Get invariant site count for each base + print('Counting bases.') + invar_counts = Counter(invar_sites) +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be # Write base counts to files invar_counts_file = outdir + '/' + re.split('/|\.', aln_file)[-2] + \ @@ -267,9 +315,14 @@ def count_invar_sites(fasta,gff=None,outdir='.',path={'snpsites':'/nfs/esnitkin/ print('Writing ', invar_counts_file, ' (order: A C G T).', sep='') with open(invar_counts_file, 'w') as f: for base, count in sorted(invar_counts.items()): +<<<<<<< HEAD #if base in ['A','a','C','c','G','g','T','t']: print(base + ' ' + str(count)) f.write('%s ' % (count)) #else: # print(base + ' ' + str(count)) +======= + f.write('%s ' % (count)) + +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be return(invar_counts_file) diff --git a/modules/beast/scotti.py b/modules/beast/scotti.py index 4900931..df6d574 100644 --- a/modules/beast/scotti.py +++ b/modules/beast/scotti.py @@ -1,4 +1,8 @@ +<<<<<<< HEAD # PIPELINE TO RUN BEAST ON FLUX +======= +# PIPELINE TO RUN SCOTTI ON FLUX +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be # Import modules import sys @@ -6,6 +10,7 @@ import argparse from subprocess import call import re +<<<<<<< HEAD from fasta_functions import * from scotti_functions import * @@ -106,3 +111,28 @@ print('Generating PBS script(s) and submitting jobs to flux.') call(['perl', scripts_dir + '/beast_pbs_flux.pl', 'input_beast.txt']) +======= + +# Get command line arguments +parser = argparse.ArgumentParser(description='Generate SCOTTI XMLs and run SCOTTI.') +parser.add_argument('xmls', nargs='*', help='input SCOTTI BEAST xml file(s)') +parser.add_argument('--nReps', '-n', metavar='N', type=int, help='number of beast runs for each xml', default=3) +parser.add_argument('--invarSites', '-i', metavar='FILE', help=''' + option 1: fasta file to get number of invariant As, Cs, Gs, and Ts not in alignment + option 2: text file with number of invariant As, Cs, Gs, and Ts in alignment''') +parser.add_argument('--gubbinsGFF', '-g', metavar='GFF', help='''gubbins output GFF file to mask recombinant sites in alignment before counting invariant sites''') +parser.add_argument('--tree', '-t', help='starting tree file in newick format') +parser.add_argument('-scriptsDir', '-d', metavar='DIR', help='directory where all of the scripts are housed', default='/nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/beast') +args = parser.parse_args() +# Rename variables +xmls = args.xmls +nReps = args.nReps +invarSites = args.invarSites +gubbinsGFF = args.gubbinsGFF +tree = args.tree +scriptsDir = args.scriptsDir + +# Set paths +python = '/nfs/esnitkin/bin_group/anaconda3/bin/python' +maskVarsPath = '/nfs/esnitkin/bin_group/pipeline/Github/variant_calling_pipeline_dev/modules/variant_diagnostics/mask_gubbins_variants.py' +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be diff --git a/modules/stages.py b/modules/stages.py index 5bf4990..106f916 100644 --- a/modules/stages.py +++ b/modules/stages.py @@ -139,11 +139,40 @@ def qualimap(out_sorted_bam, out_path, analysis, logger, Config): """ Variant Filteration """ def filter_variants(final_raw_vcf, out_path, analysis, ref_index, logger, Config, Avg_dp): reference = ConfigSectionMap(ref_index, Config)['ref_path'] + "/" + ConfigSectionMap(ref_index, Config)['ref_name'] +<<<<<<< HEAD gatk_filter_final_vcf_file = gatk_filter(final_raw_vcf, out_path, analysis, reference, logger, Config, Avg_dp) #gatk_filter_final_vcf_contamination_file = gatk_filter_contamination(final_raw_vcf, out_path, analysis, reference, logger, Config, Avg_dp) gatk_filter_final_vcf_file_no_proximate_snp = remove_proximate_snps(gatk_filter_final_vcf_file, out_path, analysis, reference, logger, Config) keep_logging('The vcf file with no proximate snp: {}'.format(gatk_filter_final_vcf_file_no_proximate_snp), 'The vcf file with no proximate snp: {}'.format(gatk_filter_final_vcf_file_no_proximate_snp), logger, 'debug') +======= + gatk_filter2_final_vcf_file = gatk_filter2(final_raw_vcf, out_path, analysis, reference, logger, Config, Avg_dp) + gatk_filter2_final_vcf_contamination_file = gatk_filter2_contamination(final_raw_vcf, out_path, analysis, reference, logger, Config, Avg_dp) + gatk_filter2_final_vcf_file_no_proximate_snp = remove_proximate_snps(gatk_filter2_final_vcf_file, out_path, analysis, reference, logger, Config) + keep_logging('The vcf file with no proximate snp: {}'.format(gatk_filter2_final_vcf_file_no_proximate_snp), 'The vcf file with no proximate snp: {}'.format(gatk_filter2_final_vcf_file_no_proximate_snp), logger, 'debug') + #gatk_vcf2fasta_filter2_file = gatk_vcf2fasta_filter2(gatk_filter2_final_vcf_file, out_path, analysis, reference, logger, Config) + #gatk_vcf2fasta_filter2_file_no_proximate = gatk_vcf2fasta_filter2(gatk_filter2_final_vcf_file_no_proximate_snp, out_path, analysis, reference, logger, Config) + #vcftools_vcf2fasta_filter2_file = vcftools_vcf2fasta_filter2(gatk_filter2_final_vcf_file, out_path, analysis, reference, logger, Config) + #vcftools_vcf2fasta_filter2_file_no_proximate = vcftools_vcf2fasta_filter2(gatk_filter2_final_vcf_file_no_proximate_snp, out_path, analysis, reference, logger, Config) + #keep_logging('The final Consensus Fasta file: {}'.format(gatk_vcf2fasta_filter2_file), 'The final Consensus Fasta file: {}'.format(gatk_vcf2fasta_filter2_file), logger, 'debug') + #keep_logging('The final Consensus Fasta file with no proximate: {}'.format(gatk_vcf2fasta_filter2_file_no_proximate), 'The final Consensus Fasta file with no proximate: {}'.format(gatk_vcf2fasta_filter2_file_no_proximate), logger, 'debug') + #keep_logging('The final Consensus Fasta file from VCF-consensus: {}'.format(vcftools_vcf2fasta_filter2_file), 'The final Consensus Fasta file from VCF-consensus: {}'.format(vcftools_vcf2fasta_filter2_file), logger, 'debug') + #keep_logging('The final Consensus Fasta file from VCF-consensus with no proximate: {}'.format(vcftools_vcf2fasta_filter2_file_no_proximate), 'The final Consensus Fasta file from VCF-consensus with no proximate: {}'.format(vcftools_vcf2fasta_filter2_file_no_proximate), logger, 'debug') + +def filter2_indels(final_raw_indel_vcf, out_path, analysis, ref_index, logger, Config, Avg_dp): + reference = ConfigSectionMap(ref_index, Config)['ref_path'] + "/" + ConfigSectionMap(ref_index, Config)['ref_name'] + gatk_filter2_final_vcf_file = gatk_filter2_indel(final_raw_indel_vcf, out_path, analysis, reference, logger, Config, Avg_dp) + # gatk_filter2_final_vcf_file_no_proximate_snp = remove_proximate_snps(gatk_filter2_final_vcf_file, out_path, analysis, reference, logger, Config) + # keep_logging('The vcf file with no proximate snp: {}'.format(gatk_filter2_final_vcf_file_no_proximate_snp), 'The vcf file with no proximate snp: {}'.format(gatk_filter2_final_vcf_file_no_proximate_snp), logger, 'debug') + # gatk_vcf2fasta_filter2_file = gatk_vcf2fasta_filter2(gatk_filter2_final_vcf_file, out_path, analysis, reference, logger, Config) + # gatk_vcf2fasta_filter2_file_no_proximate = gatk_vcf2fasta_filter2(gatk_filter2_final_vcf_file_no_proximate_snp, out_path, analysis, reference, logger, Config) + # vcftools_vcf2fasta_filter2_file = vcftools_vcf2fasta_filter2(gatk_filter2_final_vcf_file, out_path, analysis, reference, logger, Config) + # vcftools_vcf2fasta_filter2_file_no_proximate = vcftools_vcf2fasta_filter2(gatk_filter2_final_vcf_file_no_proximate_snp, out_path, analysis, reference, logger, Config) + # keep_logging('The final Consensus Fasta file: {}'.format(gatk_vcf2fasta_filter2_file), 'The final Consensus Fasta file: {}'.format(gatk_vcf2fasta_filter2_file), logger, 'debug') + # keep_logging('The final Consensus Fasta file with no proximate: {}'.format(gatk_vcf2fasta_filter2_file_no_proximate), 'The final Consensus Fasta file with no proximate: {}'.format(gatk_vcf2fasta_filter2_file_no_proximate), logger, 'debug') + # keep_logging('The final Consensus Fasta file from VCF-consensus: {}'.format(vcftools_vcf2fasta_filter2_file), 'The final Consensus Fasta file from VCF-consensus: {}'.format(vcftools_vcf2fasta_filter2_file), logger, 'debug') + # keep_logging('The final Consensus Fasta file from VCF-consensus with no proximate: {}'.format(vcftools_vcf2fasta_filter2_file_no_proximate), 'The final Consensus Fasta file from VCF-consensus with no proximate: {}'.format(vcftools_vcf2fasta_filter2_file_no_proximate), logger, 'debug') +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be def filter_indels(final_raw_indel_vcf, out_path, analysis, ref_index, logger, Config, Avg_dp): reference = ConfigSectionMap(ref_index, Config)['ref_path'] + "/" + ConfigSectionMap(ref_index, Config)['ref_name'] diff --git a/modules/variant_diagnostics/core_pipeline.py b/modules/variant_diagnostics/core_pipeline.py index 0ded491..371da92 100644 --- a/modules/variant_diagnostics/core_pipeline.py +++ b/modules/variant_diagnostics/core_pipeline.py @@ -3386,7 +3386,11 @@ def someOtherFunc(data, key): files_for_tabix = glob.glob("%s/*.vcf" % args.filter2_only_snp_vcf_dir) tabix(files_for_tabix, "vcf", logger, Config) +<<<<<<< HEAD # # Get the cluster option; create and run jobs based on given parameter. The jobs will parse all the intermediate vcf file to extract information such as if any unique variant position was unmapped in a sample, if it was filtered out dur to DP,MQ, FQ, proximity to indel, proximity to other SNPs and other variant filter parameters set in config file. +======= + # Get the cluster option; create and run jobs based on given parameter. The jobs will parse all the intermediate vcf file to extract information such as if any unique variant position was unmapped in a sample, if it was filtered out dur to DP,MQ, FQ, proximity to indel, proximity to other SNPs and other variant filter parameters set in config file. +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be tmp_dir = "/tmp/temp_%s/" % log_unique_time create_job(args.jobrun, vcf_filenames, unique_position_file, tmp_dir) create_indel_job(args.jobrun, vcf_filenames, unique_indel_position_file, tmp_dir) @@ -3681,6 +3685,10 @@ def someOtherFunc(data, key): call("%s" % prepare_ref_var_consensus_input_cmd, logger) call("%s" % prepare_var_consensus_input_cmd, logger) call("%s" % prepare_allele_var_consensus_input_cmd, logger) +<<<<<<< HEAD +======= + #call("%s" % prepare_ref_allele_var_consensus_input_cmd, logger) +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be call("%s" % prepare_ref_allele_unmapped_consensus_input_cmd, logger) @@ -3713,9 +3721,15 @@ def someOtherFunc(data, key): # # Read new allele matrix and generate fasta; generate a seperate function keep_logging('Generating Fasta from Variant Alleles...\n', 'Generating Fasta from Variant Alleles...\n', logger, 'info') +<<<<<<< HEAD # create_job_allele_variant_fasta(args.jobrun, vcf_filenames, args.filter2_only_snp_vcf_dir, config_file) # # extract_only_ref_variant_fasta_from_reference_allele_variant() +======= + #create_job_allele_variant_fasta(args.jobrun, vcf_filenames, args.filter2_only_snp_vcf_dir, config_file) + + #extract_only_ref_variant_fasta_from_reference_allele_variant() +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be call("cp %s %s/Logs/core/" % ( log_file_handle, os.path.dirname(os.path.dirname(args.filter2_only_snp_vcf_dir))), logger) diff --git a/pipeline.py b/pipeline.py index c0ef03b..534235a 100755 --- a/pipeline.py +++ b/pipeline.py @@ -481,7 +481,11 @@ def cleanup(args, logger): files_to_delete = [] Config = ConfigParser.ConfigParser() Config.read(config_file) +<<<<<<< HEAD #pipeline(args, logger) +======= + pipeline(args, logger) +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be cleanup(args, logger) keep_logging('End: Pipeline', 'End: Pipeline', logger, 'info') time_taken = datetime.now() - start_time_2 diff --git a/variant_call.py b/variant_call.py index f2769c5..b979587 100644 --- a/variant_call.py +++ b/variant_call.py @@ -615,7 +615,11 @@ def run_tree_analysis(core_temp_dir, reference, analysis_name, log_unique_time, call("cp %s %s/%s_%s_config_copy.txt" % (config_file, core_prep_logs_folder, log_unique_time, args.analysis_name), logger) make_sure_path_exists(core_temp_dir) keep_logging('\nCopying vcf files to %s\n' % core_temp_dir, '\nCopying vcf files to %s\n' % core_temp_dir, logger, 'info') +<<<<<<< HEAD cp_command = "cp %s/*/*_vcf_results/*_filter2_indel_final.vcf %s/*/*_vcf_results/*_aln_mpileup_raw.vcf %s/*/*_vcf_results/*_raw.vcf_5bp_indel_removed.vcf* %s/*/*_vcf_results/*filter2_final.vcf* %s/*/*_vcf_results/*vcf_no_proximate_snp.vcf* %s/*/*_vcf_results/*array %s/*/*unmapped.bed_positions %s/*/*_vcf_results/*_indel_gatk.vcf %s" % (args.output_folder, args.output_folder, args.output_folder, args.output_folder, args.output_folder, args.output_folder, args.output_folder, args.output_folder, core_temp_dir) +======= + cp_command = "cp %s/*/*_vcf_results/*_filter2_indel_final.vcf %s/*/*_vcf_results/*_aln_mpileup_raw.vcf %s/*/*_vcf_results/*_raw.vcf_5bp_indel_removed.vcf* %s/*/*_vcf_results/*filter2_final.vcf* %s/*/*_vcf_results/*vcf_no_proximate_snp.vcf* %s/*/*_vcf_results/*array %s/*/*unmapped.bed_positions %s" % (args.output_folder, args.output_folder, args.output_folder, args.output_folder, args.output_folder, args.output_folder, args.output_folder, core_temp_dir) +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be # cp_command = "cp %s/*/*_filter2_indel_final.vcf %s/*/*_aln_mpileup_raw.vcf %s/*/*_raw.vcf_5bp_indel_removed.vcf* %s/*/*filter2_final.vcf* %s/*/*vcf_no_proximate_snp.vcf* %s/*/*array %s/*/*unmapped.bed_positions %s" % ( # args.output_folder, args.output_folder, args.output_folder, args.output_folder, args.output_folder, # args.output_folder, args.output_folder, core_temp_dir) @@ -691,6 +695,53 @@ def run_tree_analysis(core_temp_dir, reference, analysis_name, log_unique_time, core_results_dir = args.output_folder + "/%s_core_results/" % log_unique_time make_sure_path_exists(core_results_dir) +<<<<<<< HEAD +======= + # Commented Out: 2018-10-23 + # if args.filenames: + # list_of_files = get_filenames(args.dir, args.type, args.filenames, args.analysis_name, args.suffix) + # list_of_vcf_files = generate_custom_vcf_file_list(sorted(list_of_files), logger) + # list_of_label_files = [] + # + # for i in list_of_vcf_files: + # list_of_label_files.append(i + '_positions_label') + # if len(list_of_label_files) == len(list_of_vcf_files): + # for i in list_of_label_files: + # if os.stat("%s/%s" % (core_temp_dir, i)).st_size == 0: + # keep_logging('The file {} is empty. Please rerun core_prep step again.\n'.format(i), + # 'The file {} is empty. Please rerun core_prep step again.\n'.format(i), logger, + # 'exception') + # exit() + # else: + # keep_logging('Problem in core_prep results. Rerun the core_prep step\n', + # 'Problem in core_prep results. Rerun the core_prep step\n', logger, 'exception') + # exit() + # + # with open("%s/vcf_filenames" % core_temp_dir, 'w') as out_fp: + # for file in list_of_vcf_files: + # out_fp.write(os.path.basename(file) + '\n') + # out_fp.close() + # else: + # list_of_label_files = glob.glob("%s/*_no_proximate_snp.vcf_positions_label" % core_temp_dir) + # list_of_vcf_files = glob.glob("%s/*_filter2_final.vcf_no_proximate_snp.vcf" % core_temp_dir) + # # print sorted(list_of_vcf_files) + # if len(list_of_label_files) == len(list_of_vcf_files): + # for i in list_of_label_files: + # if os.stat(i).st_size == 0: + # keep_logging('The file {} is empty. Please rerun core_prep step again.\n'.format(i), + # 'The file {} is empty. Please rerun core_prep step again.\n'.format(i), logger, + # 'exception') + # exit() + # else: + # keep_logging('Problem in core_prep results. Rerun the core_prep step\n', + # 'Problem in core_prep results. Rerun the core_prep step\n', logger, 'exception') + # exit() + # with open("%s/vcf_filenames" % core_temp_dir, 'w') as out_fp: + # for file in sorted(list_of_vcf_files): + # out_fp.write(os.path.basename(file) + '\n') + # out_fp.close() + +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be reference = ConfigSectionMap(args.index, Config)['ref_path'] + "/" + ConfigSectionMap(args.index, Config)[ 'ref_name'] @@ -749,9 +800,12 @@ def run_tree_analysis(core_temp_dir, reference, analysis_name, log_unique_time, fp.close() f1.close() +<<<<<<< HEAD copy_phage_results = "cp %s/summary.txt %s/phage_region_positions.txt %s/detail.txt" % (os.path.dirname(reference), os.path.dirname(reference), os.path.dirname(reference)) call(copy_phage_results, logger) +======= +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be core_pipeline_cmd = run_core_analysis(core_temp_dir, reference, args.analysis_name, log_unique_time, args.cluster, logger, core_results_dir, config_file) core_All_cmds.append(core_pipeline_cmd) @@ -856,12 +910,15 @@ def run_tree_analysis(core_temp_dir, reference, analysis_name, log_unique_time, out.write(cmds + '\n') out.close() +<<<<<<< HEAD keep_logging('Running: %s\n' % combine_job_name, 'Running: %s\n' % combine_job_name, logger, 'info') call("qsub %s" % combine_job_name, logger) +======= +>>>>>>> 7b2c92a5aa76895d2a815d185b3d861945add2be elif "core_prep" in args.steps: """ Set Up Core Prep logs folder/logger object, cluster mode and copy config files to it"""