-
Notifications
You must be signed in to change notification settings - Fork 1
/
getAlphaFoldPDBs.py
executable file
·82 lines (60 loc) · 2.52 KB
/
getAlphaFoldPDBs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 12 00:33:50 2023
Script to retrieve separate AF file from a list of UniProt IDs
Note: If AF file is not available, the UniProt ID will be written in the missingAF.log
Note: Only working with the AF v4!!!
@author: Bui, K.H., McGill University
"""
import argparse
import os
import urllib.error
import urllib.request
import time
BASE_URL = "https://alphafold.ebi.ac.uk/files/AF-{}-F1-model_v4.pdb"
def retrieve_af_file(uniprot_id, outdir, ignore_existing):
print(f'Retrieving {uniprot_id}.pdb')
outfile = os.path.join(outdir, f"{uniprot_id}.pdb")
if os.path.exists(outfile) and ignore_existing:
print(f'Skipping {uniprot_id} due to existing file')
return 1
try:
response = urllib.request.urlopen(BASE_URL.format(uniprot_id)).read().decode('utf-8')
except urllib.error.HTTPError as e:
print('Error:', uniprot_id)
return 0
with open(outfile, 'w') as outhandle:
outhandle.write(response)
return 1
def main():
parser = argparse.ArgumentParser(description='Retrieve sequences of UniProt IDs')
parser.add_argument('--id', help='Input UniProt ID', required=False)
parser.add_argument('--ilist', help='Input ID list', required=False)
parser.add_argument('--odir', help='Output directory location', required=True)
parser.add_argument('--ignore_existing', help='Ignore existing file (1/0)', required=False, default='0')
args = parser.parse_args()
if not args.id and not args.ilist:
parser.error("Require either --id or --ilist")
if args.id and args.ilist:
parser.error("Use either --id or --ilist")
outdir = args.odir
os.makedirs(outdir, exist_ok=True)
ignore_existing = int(args.ignore_existing)
use_list = args.ilist is not None
input_list = open(args.ilist, 'r') if use_list else [args.id]
missing = []
with open('missingAF.log', 'w') as log:
for uniprot_id in input_list:
uniprot_id = uniprot_id.strip()
if uniprot_id:
res = retrieve_af_file(uniprot_id, outdir, ignore_existing)
if res == 0:
missing.append(uniprot_id)
if use_list:
input_list.close()
print("\nThere are {} missing AlphaFold structures written in missingAF.log".format(len(missing)))
with open('missingAF.log', 'a') as log:
log.write("\n".join(missing))
if __name__ == '__main__':
main()