@@ -87,27 +87,31 @@ def add_peptide(
87
87
self .assignments [coverage ][pool ] = []
88
88
self .assignments [coverage ][pool ].append ((peptide_id , peptide_sequence ))
89
89
90
- def count_violations (self ) -> int :
90
+ def num_violations (self ) -> float :
91
91
"""
92
- Counts the number of violations (i.e. number of peptides with
93
- non-unique pool assignment ).
92
+ Number of violations
93
+ (i.e. number of times two peptides appear together more than once ).
94
94
95
95
Returns
96
96
-------
97
97
num_violations : Number of violations.
98
98
"""
99
+ # Step 1. Create a dictionary of peptides and pools
99
100
df_assignments = self .to_dataframe ()
100
- pool_ids_peptides_dict = defaultdict (list )
101
- for peptide_id in list (df_assignments ['peptide_id' ].unique ()):
102
- pool_ids = list (df_assignments .loc [df_assignments ['peptide_id' ] == peptide_id , 'pool_id' ].unique ())
103
- pool_ids = sorted (pool_ids )
104
- pool_ids_peptides_dict [',' .join ([str (i ) for i in pool_ids ])].append (peptide_id )
101
+ peptide_pool_dict = defaultdict (list )
102
+ peptide_ids = list (df_assignments ['peptide_id' ].unique ())
103
+ for peptide_id in peptide_ids :
104
+ peptide_pool_dict [peptide_id ] = list (df_assignments .loc [df_assignments ['peptide_id' ] == peptide_id , 'pool_id' ].unique ())
105
105
106
+ # Step 2. Enumerate the number of violations
106
107
num_violations = 0
107
- for key , value in pool_ids_peptides_dict .items ():
108
- if len (value ) > 1 :
109
- for peptide_id in value :
110
- num_violations += 1
108
+ for i in range (0 , len (peptide_ids )):
109
+ for j in range (i + 1 , len (peptide_ids )):
110
+ p1_pools = peptide_pool_dict [peptide_ids [i ]]
111
+ p2_pools = peptide_pool_dict [peptide_ids [j ]]
112
+ shared_pools = set (p1_pools ).intersection (set (p2_pools ))
113
+ if len (shared_pools ) > 1 :
114
+ num_violations += len (shared_pools ) - 1
111
115
return num_violations
112
116
113
117
def to_dataframe (self ) -> pd .DataFrame :
@@ -144,7 +148,8 @@ def is_optimal(
144
148
Verifies whether a given ELISpot assignment satisfies the following constraints:
145
149
1. Each peptide is in 'num_coverage' number of different pools.
146
150
2. Each peptide is in exactly one unique combination of pool IDs.
147
- 3. There is an optimal (minimal) number of pools.
151
+ 3. Two peptides are not pooled together more than once.
152
+ 4. There is an optimal (minimal) number of pools.
148
153
149
154
Parameters
150
155
---------
@@ -182,28 +187,41 @@ def is_optimal(
182
187
for key , value in pool_ids_peptides_dict .items ():
183
188
if len (value ) > 1 :
184
189
if verbose :
185
- logger . info ( "Assignment does not meet constraint #2. Pools %s have the following peptides:" % key )
186
- for peptide_id in value :
187
- logger . info ( peptide_id )
190
+ if constraint_2_bool :
191
+ logger . info ( "Assignment does not meet constraint #2: there are peptides that do not belong to exactly one unique combination of pool IDs." )
192
+ logger . info ( " \t Pools %s have the following peptides: %s." % ( key , ',' . join ( value )) )
188
193
constraint_2_bool = False
189
194
if constraint_2_bool :
190
195
if verbose :
191
196
logger .info ('Assignment meets constraint #2: each peptide belongs to exactly one unique combination of pool IDs.' )
192
197
193
- # Step 3. Check that there is an optimal number of pools
198
+ # Step 3. Two peptides are not pooled together more than once.
194
199
constraint_3_bool = True
200
+ num_violations = self .num_violations ()
201
+ if num_violations > 0 :
202
+ constraint_3_bool = False
203
+ if verbose :
204
+ logger .info ("Assignment does not meet constraint #3: violation score is %i "
205
+ "(proxy of number of times peptide pairs are pooled together more than once)." %
206
+ num_violations )
207
+ else :
208
+ if verbose :
209
+ logger .info ('Assignment meets constraint #3: every pair of peptides is pooled together at most once.' )
210
+
211
+ # Step 4. Check that there is an optimal number of pools
212
+ constraint_4_bool = True
195
213
num_pools = math .ceil (len (df_assignments ['peptide_id' ].unique ()) / num_peptides_per_pool ) * num_coverage
196
214
if len (df_assignments ['pool_id' ].unique ()) != num_pools :
197
215
num_extra_pools = len (df_assignments ['pool_id' ].unique ()) - num_pools
198
216
if verbose :
199
- logger .info ('Assignment does not meet constraint #3 : %i extra pool(s) than the minimum possible number of pools (%i).' %
217
+ logger .info ('Assignment does not meet constraint #4 : %i extra pool(s) than the minimum possible number of pools (%i).' %
200
218
(num_extra_pools , num_pools ))
201
- constraint_3_bool = False
202
- if constraint_3_bool :
219
+ constraint_4_bool = False
220
+ if constraint_4_bool :
203
221
if verbose :
204
- logger .info ('Assignment meets constraint #3 : there is an optimal (minimal) number of pools (%i).' % num_pools )
222
+ logger .info ('Assignment meets constraint #4 : there is an optimal (minimal) number of pools (%i).' % num_pools )
205
223
206
- return constraint_1_bool & constraint_2_bool & constraint_3_bool
224
+ return constraint_1_bool & constraint_2_bool & constraint_3_bool & constraint_4_bool
207
225
208
226
def shuffle_pool_ids (self ):
209
227
"""
@@ -552,7 +570,7 @@ def minimize_violations(
552
570
verbose : bool = True
553
571
) -> List ['BlockAssignment' ]:
554
572
"""
555
- Minimizes violations (i.e. non-unique pool assignment ) in a list of
573
+ Minimizes violations (i.e. number of times peptide pairs are pooled together more than once ) in a list of
556
574
block assignments by shuffling pool IDs.
557
575
558
576
Parameters
@@ -564,16 +582,16 @@ def minimize_violations(
564
582
-------
565
583
block_assignments : List of BlockAssignment objects.
566
584
"""
567
- min_violations = BlockAssignment .merge (block_assignments = block_assignments ).count_violations ()
585
+ min_violations = BlockAssignment .merge (block_assignments = block_assignments ).num_violations ()
568
586
curr_block_assignments = copy .deepcopy (block_assignments )
569
587
best_block_assignments = copy .deepcopy (block_assignments )
570
588
for _ in range (0 , shuffle_iters ):
571
589
random_idx = random .choice (list (range (0 , len (curr_block_assignments ))))
572
590
curr_block_assignments [random_idx ].shuffle_pool_ids ()
573
- curr_num_violations = BlockAssignment .merge (block_assignments = curr_block_assignments ).count_violations ()
591
+ curr_num_violations = BlockAssignment .merge (block_assignments = curr_block_assignments ).num_violations ()
574
592
if curr_num_violations < min_violations :
575
593
if verbose :
576
- logger .info ('Found a better assignment: current number of violations: %i, new number of violations: %i' %
594
+ logger .info ('\t Found a better assignment; current number of violations: %i, new number of violations: %i' %
577
595
(min_violations , curr_num_violations ))
578
596
best_block_assignments = copy .deepcopy (curr_block_assignments )
579
597
min_violations = curr_num_violations
0 commit comments