Skip to content

Commit 39cc056

Browse files
committed
v0.8.8
1 parent 1f199cb commit 39cc056

File tree

10 files changed

+177
-127
lines changed

10 files changed

+177
-127
lines changed

files/lm_manh.png

-14.9 KB
Loading

files/lm_qq.png

-16.4 KB
Loading

files/lmm_manh.png

-14.4 KB
Loading

files/lmm_qq.png

-17.2 KB
Loading

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
setup(
1919
name='vcf2gwas',
20-
version='0.8.7',
20+
version='0.8.8',
2121
description="Python API for comprehensive GWAS analysis using GEMMA",
2222
license="GNUv3",
2323
author="Frank Vogt",

vcf2gwas/__main__.py

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,15 @@
2323
import sys
2424
import os
2525
import subprocess
26+
import time
2627

27-
from vcf2gwas.parsing import *
28+
#from vcf2gwas.parsing import *
29+
from parsing import *
2830
from vcf2gwas.install import main as installer
2931

3032
argvals = None
3133

32-
def main(argvals=argvals):
34+
def main(timestamp, argvals=argvals):
3335

3436
version = set_version_number()
3537
print(f"\nvcf2gwas v{version} \n")
@@ -39,13 +41,20 @@ def main(argvals=argvals):
3941
args.insert(0, 'python3.9')
4042
args.insert(1, os.path.join(os.path.dirname(__file__), 'starter.py'))
4143

44+
try:
45+
args = delete_string(args, ['--timestamp'])
46+
except:
47+
pass
48+
args.insert(2, timestamp)
49+
args.insert(2, "--timestamp")
50+
4251
geno = P.set_geno()
4352
if geno == "test":
4453
source = os.path.join(os.path.dirname(__file__), 'starter.py')
4554
installer()
4655
vcf = os.path.join("input", "example.vcf.gz")
4756
pheno = os.path.join("input", "example.csv")
48-
args = f'python3.9 {source} -v {vcf} -pf {pheno} -p 1 -lm'.split()
57+
args = f'python3.9 {source} --timestamp {timestamp} -v {vcf} -pf {pheno} -p 1 -lm'.split()
4958

5059
lm = P.set_lm()
5160
lmm = P.set_lmm()
@@ -59,19 +68,21 @@ def main(argvals=argvals):
5968
return process.returncode
6069

6170
def run_main():
71+
timestamp = time.strftime("%Y%m%d_%H%M%S")
6272
try:
63-
sys.exit(main())
73+
sys.exit(main(timestamp))
6474
except KeyboardInterrupt as e:
6575
print("\nvcf2gwas interrupted")
6676
print("Cleaning up temporary files\n")
6777
finally:
68-
shutil.rmtree("_vcf2gwas_temp", ignore_errors=True)
78+
shutil.rmtree(f'_vcf2gwas_temp_{timestamp}', ignore_errors=True)
6979

7080
if __name__ == '__main__':
81+
timestamp = time.strftime("%Y%m%d_%H%M%S")
7182
try:
72-
sys.exit(main())
83+
sys.exit(main(timestamp))
7384
except KeyboardInterrupt as e:
7485
print("\nvcf2gwas interrupted")
7586
print("Cleaning up temporary files\n")
7687
finally:
77-
shutil.rmtree("_vcf2gwas_temp", ignore_errors=True)
88+
shutil.rmtree(f'_vcf2gwas_temp_{timestamp}', ignore_errors=True)

vcf2gwas/analysis.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,10 @@
1919
along with vcf2gwas. If not, see <https://www.gnu.org/licenses/>.
2020
"""
2121

22-
from vcf2gwas.parsing import *
23-
from vcf2gwas.utils import *
22+
#from vcf2gwas.parsing import *
23+
#from vcf2gwas.utils import *
24+
from parsing import *
25+
from utils import *
2426

2527
import sys
2628
import time
@@ -37,14 +39,13 @@
3739

3840
############################## Initialising Program ##############################
3941

40-
timestamp = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())
41-
timestamp2 = read_timestamp()
4242
start = time.perf_counter()
4343
# set argument parser
4444
P = Parser(argvals)
4545
out_dir = P.set_out_dir()
4646
out_dir2 = os.path.join(out_dir, "Output")
47-
dir_temp = "_vcf2gwas_temp"
47+
timestamp2 = P.set_timestamp()
48+
dir_temp = f'_vcf2gwas_temp_{timestamp2}'
4849
memory = P.set_memory()
4950
threads = P.set_threads()
5051
os.environ['NUMEXPR_MAX_THREADS'] = str(threads)
@@ -203,7 +204,7 @@
203204
msg = "No individuals left! Check if IDs in VCF and phenotype file are of the same format"
204205
raise_error(ValueError, msg, Log)
205206

206-
file = open(os.path.join("_vcf2gwas_temp", "vcf2gwas_ind_count.txt"), 'a')
207+
file = open(os.path.join(dir_temp, "vcf2gwas_ind_count.txt"), 'a')
207208
file.write(f'{len(list1_org)-diff_num}\n')
208209
file.close()
209210

@@ -335,7 +336,7 @@
335336
N_list = []
336337
path_list = []
337338

338-
file = open(os.path.join("_vcf2gwas_temp", f"vcf2gwas_process_report_{pc_prefix}.txt"), 'a')
339+
file = open(os.path.join(dir_temp, f"vcf2gwas_process_report_{pc_prefix}.txt"), 'a')
339340
file.close()
340341

341342
if model == None:
@@ -355,13 +356,15 @@
355356
i = "results"
356357

357358
if X == [] and Y == []:
358-
N = "1"
359+
N = [1]
360+
#N = "1"
359361
else:
360-
N = str(x)
362+
N = [x]
363+
#N = str(x)
361364
x = x + 1
362365
if multi == True:
363366
N = concat_lists(X, Y)
364-
N = listtostring(N)
367+
#N = listtostring(N)
365368

366369
path_temp = None
367370
if model not in ("-gk", "-eigen"):
@@ -388,12 +391,12 @@
388391
executor.map(
389392
Gemma.run_gemma, prefix_list, prefix2_list, itertools.repeat(model), itertools.repeat(n), N_list, path_list, itertools.repeat(Log), itertools.repeat(filename),
390393
itertools.repeat(filename2), itertools.repeat(pca), itertools.repeat(covar_file_name), i_list, itertools.repeat(i_list2),
391-
itertools.repeat(burn), itertools.repeat(sampling), itertools.repeat(snpmax), itertools.repeat(pc_prefix)
394+
itertools.repeat(burn), itertools.repeat(sampling), itertools.repeat(snpmax), itertools.repeat(pc_prefix), itertools.repeat(dir_temp)
392395
)
393396
timer_end = time.perf_counter()
394397
timer_total = round(timer_end - timer, 2)
395-
Post_analysis.check_return_codes(pc_prefix)
396-
Post_analysis.get_gemma_success(i_list, prefix2_list, path_list, columns, i_list2)
398+
Post_analysis.check_return_codes(pc_prefix, dir_temp)
399+
Post_analysis.get_gemma_success(i_list, prefix2_list, path_list, columns, i_list2, dir_temp)
397400
Log.print_log(f'\nGEMMA completed successfully (Duration: {runtime_format(timer_total)})\n')
398401

399402
############################## Processing and plotting ##############################
@@ -403,7 +406,7 @@
403406
itertools.repeat(top_ten), itertools.repeat(top_sig), itertools.repeat(top_all), itertools.repeat(Log), itertools.repeat(model), itertools.repeat(n), prefix2_list, path_list,
404407
itertools.repeat(n_top), i_list, itertools.repeat(sigval), itertools.repeat(nolabel), itertools.repeat(noplot)
405408
):
406-
Post_analysis.run_postprocessing(top_ten, top_sig, top_all, Log, model, n, prefix2, path_temp, n_top, i, sigval, nolabel, noplot)
409+
Post_analysis.run_postprocessing(top_ten, top_sig, top_all, Log, model, n, prefix2, path_temp, n_top, i, sigval, nolabel, noplot, dir_temp)
407410
Log.print_log("Analysis of GEMMA results completed successfully\n")
408411

409412
############################## Summary and Clean up ##############################

vcf2gwas/parsing.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,16 @@
3131
from psutil import virtual_memory
3232

3333
def set_version_number():
34-
return "0.8.7"
34+
return "0.8.8"
35+
36+
def delete_string(args, strings):
37+
38+
for string in strings:
39+
temp_list = [i for i, e in enumerate(args) if e == string]
40+
temp_list2 = [x+1 for x in temp_list]
41+
temp_list = temp_list+temp_list2
42+
args = [j for i, j in enumerate(args) if i not in temp_list]
43+
return args
3544

3645
def getArgs(argv=None):
3746
"""Description:
@@ -44,6 +53,9 @@ def getArgs(argv=None):
4453
parser.add_argument(
4554
'--version', action='version', version='%(prog)s Version {}'.format(version)
4655
)
56+
parser.add_argument(
57+
'--timestamp', type=str
58+
)
4759
parser.add_argument(
4860
"-v", "--vcf", metavar="<filename>", required=True, type=str, help="(required) Genotype .vcf or .vcf.gz filename"
4961
)
@@ -212,6 +224,9 @@ class Parser:
212224
def __init__(self, args):
213225
self.args = getArgs(args)
214226

227+
def set_timestamp(self):
228+
return self.args.timestamp
229+
215230
def set_geno(self):
216231
return self.args.vcf
217232

vcf2gwas/starter.py

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,16 @@
1919
along with vcf2gwas. If not, see <https://www.gnu.org/licenses/>.
2020
"""
2121

22-
from vcf2gwas.parsing import *
23-
from vcf2gwas.utils import *
22+
#from vcf2gwas.parsing import *
23+
#from vcf2gwas.utils import *
24+
from parsing import *
25+
from utils import *
2426

2527
import time
2628
import os
2729
import sys
2830
import math
31+
import itertools
2932
import concurrent.futures
3033
import multiprocessing as mp
3134
from psutil import virtual_memory
@@ -43,17 +46,20 @@
4346
out_dir2 = os.path.join(out_dir, "Output")
4447
os.makedirs(out_dir2, exist_ok=True)
4548

46-
timestamp = time.strftime("%a, %d %b %Y %H:%M:%S", time.localtime())
47-
timestamp2 = time.strftime("%Y%m%d_%H%M%S")
49+
timestamp = time.strftime("%a, %d %b %Y %H:%M", time.localtime())
50+
timestamp2 = P.set_timestamp()
51+
if timestamp2 == None:
52+
# for testing
53+
timestamp2 = time.strftime("%Y%m%d_%H%M%S")
54+
4855
start = time.perf_counter()
4956

5057
version = set_version_number()
5158

52-
shutil.rmtree("_vcf2gwas_temp", ignore_errors=True)
53-
dir_temp = "_vcf2gwas_temp"
59+
shutil.rmtree(f'_vcf2gwas_temp_{timestamp2}', ignore_errors=True)
60+
dir_temp = f'_vcf2gwas_temp_{timestamp2}'
5461
qc_dir = os.path.join(dir_temp, f'QC_{timestamp2}')
5562
make_dir(qc_dir)
56-
write_timestamp(timestamp2)
5763

5864
file = open(os.path.join(dir_temp, "vcf2gwas_process_report.txt"), 'a')
5965
file.close()
@@ -178,7 +184,7 @@
178184
path = os.path.join(out_dir2, model2)
179185
make_dir(path)
180186

181-
pc_prefix = set_pc_prefix(pheno, covar, ".")
187+
pc_prefix = set_pc_prefix(pheno, covar, timestamp2)
182188
pc_prefix2 = set_pc_prefix(pheno, covar, "_")
183189
if model in ["-gk", "-eigen"]:
184190
pc_prefix2 = set_pc_prefix(pheno, covar, "")
@@ -642,12 +648,15 @@
642648
# for testing
643649
if args == []:
644650
args = argvals
651+
args = Starter.delete_string(args, ['--timestamp'])
645652
input_str = f'vcf2gwas {listtostring(args)}'
646653
args = Starter.delete_string(args, ['-v', '--vcf', '-T', '--threads', '-M', '--memory'])
647654
if covar == None:
648655
args = Starter.delete_string(args, ['-cf', '--cfile', '-c', '--covar'])
649656
args.insert(0, snp_file2)
650657
args.insert(0, "--vcf")
658+
args.insert(0, timestamp2)
659+
args.insert(0, "--timestamp")
651660
args.insert(0, 'python3.9')
652661
args.insert(1, os.path.join(os.path.dirname(__file__), 'analysis.py'))
653662
args.insert(2, '--memory')
@@ -702,8 +711,8 @@
702711

703712
Log.print_log("\nStarting analysis..")
704713
with concurrent.futures.ProcessPoolExecutor(mp_context=mp.get_context('fork'), max_workers=threads_org) as executor:
705-
executor.map(Starter.run_vcf2gwas, args_list)
706-
Starter.check_return_codes(Log)
714+
executor.map(Starter.run_vcf2gwas, args_list, itertools.repeat(dir_temp))
715+
Starter.check_return_codes(Log, dir_temp)
707716

708717
#################### summary and clean-up ####################
709718

@@ -721,7 +730,7 @@
721730
pc_prefix3 = set_pc_prefix(pheno, covar, "_")
722731
prefix_list.append(pc_prefix3)
723732
filenames, str_list = Summary.summarizer(path3, path2, pc_prefix3, snp_prefix, n_top, Log, prefix_list)
724-
temp, file_dict = Summary.ind_summary(path2, filenames, str_list)
733+
temp, file_dict = Summary.ind_summary(path2, filenames, str_list, dir_temp)
725734
filenames = temp[0]
726735
str_list = temp[1]
727736
filenames2 = []
@@ -863,13 +872,13 @@
863872
Y = pca_n
864873

865874
# print summary and move log files
866-
snp_total, snp_sig = Starter.get_snpcounts()
875+
snp_total, snp_sig = Starter.get_snpcounts(dir_temp)
867876
if noplot == True:
868877
snp_sig = "-"
869-
sig_level = Starter.get_count("vcf2gwas_sig_level.txt")
870-
ind_count = Starter.get_count("vcf2gwas_ind_count.txt")
871-
gemma_count = Starter.get_count("vcf2gwas_ind_gemma.txt")
872-
failed_count, failed_list = Starter.get_gemma_fail()
878+
sig_level = Starter.get_count("vcf2gwas_sig_level.txt", dir_temp)
879+
ind_count = Starter.get_count("vcf2gwas_ind_count.txt", dir_temp)
880+
gemma_count = Starter.get_count("vcf2gwas_ind_gemma.txt", dir_temp)
881+
failed_count, failed_list = Starter.get_gemma_fail(dir_temp)
873882
Log.summary(
874883
snp_file, pheno_files, covar, X, Y, model2_temp, n, filename, min_af, A, B,
875884
pca, keep, memory, threads, n_top, gene_file, species, gene_thresh, multi, umap_n, umapmetric, pca_n,
@@ -880,4 +889,4 @@
880889
if model != None:
881890
shutil.move(os.path.join(path, f'vcf2gwas{pc_prefix}.log.txt'), os.path.join(path, f'vcf2gwas_{snp_prefix}{pc_prefix2}_{timestamp2}.log.txt'))
882891

883-
shutil.rmtree("_vcf2gwas_temp", ignore_errors=True)
892+
shutil.rmtree(dir_temp, ignore_errors=True)

0 commit comments

Comments
 (0)