-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBcell_Selection.py
119 lines (92 loc) · 4.53 KB
/
Bcell_Selection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import pandas as pd
import numpy as np
import random
import os
from tqdm import tqdm
import Arg_Parser
############ Set PAM matrix location here ############
matrix = np.loadtxt(os.path.join(Arg_Parser.root_dir, "Resources/PAM_250.txt"))
matrix = np.array(matrix)
col_names = ("A", "R", "N", "D", "C", "Q", "E", "G", "H", "I", "L", "K", "M", "F", "P", "S", "T", "W", "Y", "V")
row_names = col_names
pam = pd.DataFrame(matrix, columns=col_names, index=row_names) # Creates a square matrix of substitution likelihoods.
pam = pam / np.max(pam)
class Selection:
def __init__(self):
self.result_pop = dict()
self.selection_dict = dict()
self.likelihood = dict()
def clonal_selection(self, exchange_iter, antigen, lymphocyte, max_affinity=True):
"""
Performs clonal selection on each B-cell population generated in Ant_Lymph.py. Each random paratope generated
represents a population with a property n as the number of individuals in the population. This will become
relevant in the immune response. Substitution likelihoods are calculated for each character of the paratope
taken from the PAM 250 matrix. This will run for exchange_iter number of iterations.
:arg lymphocyte
:arg antigen
:arg max_affinity --> Breaks the selection loop if paratope fitness is equal to 1.
:arg exchange_iter --> The amount of selection iterations before fitness is calculated.
:return: Population(s) with max affinity to the antigen epitope.
"""
ant = antigen.epitope
aa_list = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M',
'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']
# Create a dictionary with each population as keys and the paratopes as values
self.selection_dict = lymphocyte.pops
for item in aa_list:
self.likelihood[item] = 0
for aa in self.likelihood.keys():
other_likelihood = dict()
for row in row_names:
# Calls the df column for the amino acid
col = pam[aa]
# Calls the value for the column and row
value = col[row]
other_likelihood[row] = value
self.likelihood[aa] = other_likelihood
# Identifies the index - specific match integer
for key, value in self.selection_dict.items():
para = value
match_number = len(list(filter(lambda xy: xy[0] == xy[1], zip(ant, para))))
# Calculate and append fitness value
fitness = match_number / len(ant)
value.append(lymphocyte.n)
value.append(fitness)
# Each population will undergo clonal selection as opposed to each individual because the likelihood of
# substitution would be the same for each individual in the population.
############# Selection Process #############
print("Starting populations: ", self.selection_dict)
print("Antigen Epitope: ", ant)
for i in tqdm(range(0, exchange_iter)):
# Iterate through each population
for item in self.selection_dict.values():
product = ''
# Iterate through each amino acid in the paratope sequence
for a in item[0]:
# Index amino acids in the paratope to their PAM likelihood values
v = list(self.likelihood[a].values())
k = list(self.likelihood[a].keys())
# Remove and prevent self substitutions
v.remove(v[k.index(a)])
k.remove(a)
# Identify the maximum likelihood and substitute
max1 = max(v)
if max1 > 0:
q = random.randrange(0, 1)
if max1 >= q:
# Substitutes the original amino acid with the new
product += k[v.index(max1)]
else:
product += a
else:
product += a
item[0] = product
para = item[0]
# Calculate the fitness of each paratope after selection
match_number = len(list(filter(lambda xy: xy[0] == xy[1], zip(ant, para))))
fitness = match_number / len(ant)
item[2] = fitness
if max_affinity:
if fitness == 1:
break
return self.selection_dict