Skip to content

Commit 203b03e

Browse files
committed
test v0.0.9.3
1 parent 9bc664d commit 203b03e

File tree

8 files changed

+125
-34
lines changed

8 files changed

+125
-34
lines changed

examples/test.sh

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
#ace generate \
2+
# --num-peptides 120 \
3+
# --num-peptides-per-pool 12 \
4+
# --num-coverage 3 \
5+
# --num-processes 6 \
6+
# --mode golfy \
7+
# --output-excel-file test_golfy.xlsx \
8+
# --assign-well-ids 1 \
9+
# --plate-type 96-well_plate
10+
11+
ace generate \
12+
--num-peptides 120 \
13+
--num-peptides-per-pool 12 \
14+
--num-coverage 3 \
15+
--num-processes 6 \
16+
--mode golfy \
17+
--golfy-allow-extra-pools False \
18+
--output-excel-file test_golfy.xlsx \
19+
--assign-well-ids 1 \
20+
--plate-type 96-well_plate
21+
22+
echo ""
23+
24+
ace generate \
25+
--num-peptides 120 \
26+
--num-peptides-per-pool 12 \
27+
--num-coverage 3 \
28+
--num-processes 6 \
29+
--mode sat_solver \
30+
--output-excel-file test_sat.xlsx \
31+
--shuffle-iters 1000 \
32+
--max-peptides-per-block 100 \
33+
--max-peptides-per-pool 10 \
34+
--assign-well-ids 1 \
35+
--plate-type 96-well_plate
36+
37+
#ace generate \
38+
# --num-peptides 100 \
39+
# --num-peptides-per-pool 5 \
40+
# --num-coverage 3 \
41+
# --num-processes 6 \
42+
# --mode sat_solver \
43+
# --output-excel-file test.xlsx \
44+
# --shuffle-iters 0 \
45+
# --max-peptides-per-block 100 \
46+
# --max-peptides-per-pool 10 \
47+
# --assign-well-ids 1 \
48+
# --plate-type 96-well_plate
49+

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ requires = [
2525

2626
[project]
2727
name = "ace"
28-
version = "0.0.9.0"
28+
version = "0.0.9.3"
2929
requires-python = ">=3.7"
3030
keywords = [
3131
"elispot",

src/acelib/block_assignment.py

Lines changed: 44 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -87,27 +87,31 @@ def add_peptide(
8787
self.assignments[coverage][pool] = []
8888
self.assignments[coverage][pool].append((peptide_id, peptide_sequence))
8989

90-
def count_violations(self) -> int:
90+
def num_violations(self) -> float:
9191
"""
92-
Counts the number of violations (i.e. number of peptides with
93-
non-unique pool assignment).
92+
Number of violations
93+
(i.e. number of times two peptides appear together more than once).
9494
9595
Returns
9696
-------
9797
num_violations : Number of violations.
9898
"""
99+
# Step 1. Create a dictionary of peptides and pools
99100
df_assignments = self.to_dataframe()
100-
pool_ids_peptides_dict = defaultdict(list)
101-
for peptide_id in list(df_assignments['peptide_id'].unique()):
102-
pool_ids = list(df_assignments.loc[df_assignments['peptide_id'] == peptide_id, 'pool_id'].unique())
103-
pool_ids = sorted(pool_ids)
104-
pool_ids_peptides_dict[','.join([str(i) for i in pool_ids])].append(peptide_id)
101+
peptide_pool_dict = defaultdict(list)
102+
peptide_ids = list(df_assignments['peptide_id'].unique())
103+
for peptide_id in peptide_ids:
104+
peptide_pool_dict[peptide_id] = list(df_assignments.loc[df_assignments['peptide_id'] == peptide_id, 'pool_id'].unique())
105105

106+
# Step 2. Enumerate the number of violations
106107
num_violations = 0
107-
for key, value in pool_ids_peptides_dict.items():
108-
if len(value) > 1:
109-
for peptide_id in value:
110-
num_violations += 1
108+
for i in range(0, len(peptide_ids)):
109+
for j in range(i + 1, len(peptide_ids)):
110+
p1_pools = peptide_pool_dict[peptide_ids[i]]
111+
p2_pools = peptide_pool_dict[peptide_ids[j]]
112+
shared_pools = set(p1_pools).intersection(set(p2_pools))
113+
if len(shared_pools) > 1:
114+
num_violations += len(shared_pools) - 1
111115
return num_violations
112116

113117
def to_dataframe(self) -> pd.DataFrame:
@@ -144,7 +148,8 @@ def is_optimal(
144148
Verifies whether a given ELISpot assignment satisfies the following constraints:
145149
1. Each peptide is in 'num_coverage' number of different pools.
146150
2. Each peptide is in exactly one unique combination of pool IDs.
147-
3. There is an optimal (minimal) number of pools.
151+
3. Two peptides are not pooled together more than once.
152+
4. There is an optimal (minimal) number of pools.
148153
149154
Parameters
150155
---------
@@ -182,28 +187,41 @@ def is_optimal(
182187
for key, value in pool_ids_peptides_dict.items():
183188
if len(value) > 1:
184189
if verbose:
185-
logger.info("Assignment does not meet constraint #2. Pools %s have the following peptides:" % key)
186-
for peptide_id in value:
187-
logger.info(peptide_id)
190+
if constraint_2_bool:
191+
logger.info("Assignment does not meet constraint #2: there are peptides that do not belong to exactly one unique combination of pool IDs.")
192+
logger.info("\tPools %s have the following peptides: %s." % (key, ','.join(value)))
188193
constraint_2_bool = False
189194
if constraint_2_bool:
190195
if verbose:
191196
logger.info('Assignment meets constraint #2: each peptide belongs to exactly one unique combination of pool IDs.')
192197

193-
# Step 3. Check that there is an optimal number of pools
198+
# Step 3. Two peptides are not pooled together more than once.
194199
constraint_3_bool = True
200+
num_violations = self.num_violations()
201+
if num_violations > 0:
202+
constraint_3_bool = False
203+
if verbose:
204+
logger.info("Assignment does not meet constraint #3: violation score is %i "
205+
"(proxy of number of times peptide pairs are pooled together more than once)." %
206+
num_violations)
207+
else:
208+
if verbose:
209+
logger.info('Assignment meets constraint #3: every pair of peptides is pooled together at most once.')
210+
211+
# Step 4. Check that there is an optimal number of pools
212+
constraint_4_bool = True
195213
num_pools = math.ceil(len(df_assignments['peptide_id'].unique()) / num_peptides_per_pool) * num_coverage
196214
if len(df_assignments['pool_id'].unique()) != num_pools:
197215
num_extra_pools = len(df_assignments['pool_id'].unique()) - num_pools
198216
if verbose:
199-
logger.info('Assignment does not meet constraint #3: %i extra pool(s) than the minimum possible number of pools (%i).' %
217+
logger.info('Assignment does not meet constraint #4: %i extra pool(s) than the minimum possible number of pools (%i).' %
200218
(num_extra_pools, num_pools))
201-
constraint_3_bool = False
202-
if constraint_3_bool:
219+
constraint_4_bool = False
220+
if constraint_4_bool:
203221
if verbose:
204-
logger.info('Assignment meets constraint #3: there is an optimal (minimal) number of pools (%i).' % num_pools)
222+
logger.info('Assignment meets constraint #4: there is an optimal (minimal) number of pools (%i).' % num_pools)
205223

206-
return constraint_1_bool & constraint_2_bool & constraint_3_bool
224+
return constraint_1_bool & constraint_2_bool & constraint_3_bool & constraint_4_bool
207225

208226
def shuffle_pool_ids(self):
209227
"""
@@ -552,7 +570,7 @@ def minimize_violations(
552570
verbose: bool = True
553571
) -> List['BlockAssignment']:
554572
"""
555-
Minimizes violations (i.e. non-unique pool assignment) in a list of
573+
Minimizes violations (i.e. number of times peptide pairs are pooled together more than once) in a list of
556574
block assignments by shuffling pool IDs.
557575
558576
Parameters
@@ -564,16 +582,16 @@ def minimize_violations(
564582
-------
565583
block_assignments : List of BlockAssignment objects.
566584
"""
567-
min_violations = BlockAssignment.merge(block_assignments=block_assignments).count_violations()
585+
min_violations = BlockAssignment.merge(block_assignments=block_assignments).num_violations()
568586
curr_block_assignments = copy.deepcopy(block_assignments)
569587
best_block_assignments = copy.deepcopy(block_assignments)
570588
for _ in range(0, shuffle_iters):
571589
random_idx = random.choice(list(range(0, len(curr_block_assignments))))
572590
curr_block_assignments[random_idx].shuffle_pool_ids()
573-
curr_num_violations = BlockAssignment.merge(block_assignments=curr_block_assignments).count_violations()
591+
curr_num_violations = BlockAssignment.merge(block_assignments=curr_block_assignments).num_violations()
574592
if curr_num_violations < min_violations:
575593
if verbose:
576-
logger.info('Found a better assignment: current number of violations: %i, new number of violations: %i' %
594+
logger.info('\tFound a better assignment; current number of violations: %i, new number of violations: %i' %
577595
(min_violations, curr_num_violations))
578596
best_block_assignments = copy.deepcopy(curr_block_assignments)
579597
min_violations = curr_num_violations

src/acelib/block_design.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ def divide_block_design(
377377
peptides = block_design.peptides[start_peptide_idx:end_peptide_idx + 1]
378378
start_peptide_idx = end_peptide_idx + 1
379379
if verbose:
380-
logger.info('\t\tAppending block design for %i peptides, %i peptides per pool' %
380+
logger.info('\tAppending block design for %i peptides, %i peptides per pool' %
381381
(len(peptides), num_peptides_per_pool))
382382
block_design_ = BlockDesign(
383383
peptides=peptides,

src/acelib/cli/cli_generate.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"""
1818

1919

20+
import argparse
2021
import math
2122
import pandas as pd
2223
import os
@@ -171,6 +172,15 @@ def add_ace_generate_arg_parser(sub_parsers):
171172
required=False,
172173
help="Initialization mode for golfy (default: %s)." % GENERATE_GOLFY_INIT_MODE
173174
)
175+
parser_optional_golfy.add_argument(
176+
"--golfy-allow-extra-pools",
177+
dest="golfy_allow_extra_pools",
178+
type=eval,
179+
default=GENERATE_GOLFY_ALLOW_EXTRA_POOLS,
180+
choices=[True, False],
181+
required=False,
182+
help="Allow extra pools for golfy (default: %r)." % GENERATE_GOLFY_ALLOW_EXTRA_POOLS
183+
)
174184

175185
parser_optional_sat_solver = parser.add_argument_group("optional arguments (applies when '--mode sat_solver')")
176186
parser_optional_sat_solver.add_argument(
@@ -254,6 +264,7 @@ def run_ace_generate_from_parsed_args(args):
254264
random_seed
255265
golfy_max_iters
256266
golfy_init_mode
267+
golfy_allow_extra_pools
257268
num_processes
258269
shuffle_iters
259270
max_peptides_per_block
@@ -320,13 +331,15 @@ def run_ace_generate_from_parsed_args(args):
320331
random_seed=args.random_seed,
321332
max_iters=args.golfy_max_iters,
322333
init_mode=args.golfy_init_mode,
334+
allow_extra_pools=args.golfy_allow_extra_pools,
323335
verbose=args.verbose
324336
)
325337
elif args.mode == GenerateModes.SAT_SOLVER:
326338
block_assignment = run_ace_sat_solver(
327339
block_design=block_design,
328340
max_peptides_per_pool=args.max_peptides_per_pool,
329341
num_processes=args.num_processes,
342+
shuffle_iters=args.shuffle_iters,
330343
verbose=args.verbose
331344
)
332345
else:

src/acelib/default_parameters.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,12 @@
1919
"""generate"""
2020
# Number of processes.
2121
GENERATE_NUM_PROCESSES = 4
22-
GENERATE_GOLFY_MAX_ITERS = 2000
2322
GENERATE_RANDOM_SEED = 42
23+
GENERATE_GOLFY_MAX_ITERS = 2000
2424
GENERATE_GOLFY_INIT_MODE = 'greedy'
25+
GENERATE_GOLFY_ALLOW_EXTRA_POOLS = True
2526
GENERATE_SEQUENCE_SIMILARITY_THRESHOLD = 0.7
2627
GENERATE_SEQUENCE_SIMILARITY_FUNCTION = 'euclidean'
27-
GENERATE_SHUFFLE_ITERS = 100
28+
GENERATE_SHUFFLE_ITERS = 1000
2829
GENERATE_MAX_PEPTIDES_PER_BLOCK = 100
2930
GENERATE_MAX_PEPTIDES_PER_POOL = 10

src/acelib/main.py

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def run_ace_golfy(
4242
random_seed: int = GENERATE_RANDOM_SEED,
4343
max_iters: int = GENERATE_GOLFY_MAX_ITERS,
4444
init_mode: str = GENERATE_GOLFY_INIT_MODE,
45+
allow_extra_pools: bool = GENERATE_GOLFY_ALLOW_EXTRA_POOLS,
4546
verbose: bool = True
4647
) -> BlockAssignment:
4748
"""
@@ -53,6 +54,7 @@ def run_ace_golfy(
5354
random_seed : Random seed.
5455
max_iters : Number of maximum iterations for golfy.
5556
init_mode : Init mode.
57+
allow_extra_pools : Allow extra pools.
5658
verbose : If True, prints messages.
5759
5860
Returns
@@ -81,9 +83,15 @@ def run_ace_golfy(
8183
num_replicates=block_design.num_coverage,
8284
strategy=init_mode,
8385
preferred_neighbors=preferred_neighbors,
86+
allow_extra_pools=allow_extra_pools,
87+
verbose=verbose
88+
)
89+
optimize(
90+
golfy_solution,
91+
max_iters=max_iters,
92+
allow_extra_pools=allow_extra_pools,
8493
verbose=verbose
8594
)
86-
optimize(golfy_solution, max_iters=max_iters, verbose=verbose)
8795

8896
if verbose:
8997
logger.info('Finished running golfy.')
@@ -190,16 +198,19 @@ def run_ace_sat_solver(
190198
block_assignments.append(block_assignment)
191199

192200
# Step 4. Merge assignments
201+
logger.info('Started minimizing violations.')
193202
block_assignments = BlockAssignment.minimize_violations(
194203
block_assignments=block_assignments,
195204
shuffle_iters=shuffle_iters,
196205
verbose=verbose
197206
)
207+
logger.info('Finished minimizing violations.')
198208
block_assignment = BlockAssignment.merge(block_assignments=block_assignments)
199209

200210
if verbose:
201211
logger.info('Finished running SAT solver.')
202-
logger.info('The returning block assignment has %i pools in total.' % block_assignment.num_pools)
203-
logger.info('The returning block assignment has %i peptides in total.' % len(block_assignment.peptide_ids))
212+
logger.info('The returning block assignment has the following:')
213+
logger.info('\t%i pools in total.' % block_assignment.num_pools)
214+
logger.info('\t%i peptides in total.' % len(block_assignment.peptide_ids))
204215
return block_assignment
205216

src/acelib/sequence_features.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,6 @@ def forward(self, inputs, representation='last_hidden_state'):
123123

124124
def load_weights(self, weights_path):
125125
"""Load weights from a file"""
126-
logger.info(self.device)
127126
self.load_state_dict(torch.load(weights_path, map_location=self.device))
128127

129128
def save_weights(self, weights_path):

0 commit comments

Comments
 (0)