Skip to content

Commit a5c4dcd

Browse files
authored
Probability Enhanced Attributes (#53)
1 parent 7b09d2f commit a5c4dcd

File tree

11 files changed

+580
-24
lines changed

11 files changed

+580
-24
lines changed

examples/acs2/maze/acs2_in_maze.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ def maze_metrics(population, environment):
2626
parser.add_argument("-e", "--environment", default="Maze4-v0")
2727
parser.add_argument("--epsilon", default=1.0, type=float)
2828
parser.add_argument("--ga", action="store_true")
29+
parser.add_argument("--pee", action="store_true")
2930
parser.add_argument("--explore-trials", default=50, type=int)
3031
parser.add_argument("--exploit-trials", default=10, type=int)
3132
args = parser.parse_args()
@@ -37,6 +38,7 @@ def maze_metrics(population, environment):
3738
cfg = Configuration(8, 8,
3839
epsilon=args.epsilon,
3940
do_ga=args.ga,
41+
do_pee=args.pee,
4042
metrics_trial_frequency=1,
4143
user_metrics_collector_fcn=maze_metrics)
4244

lcs/agents/ImmutableSequence.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,15 @@
44
class ImmutableSequence:
55

66
WILDCARD = '#'
7-
OK_TYPES = (str, )
7+
OK_TYPES = (str, dict) # PEEs are stored in dict
88

99
def __init__(self, observation):
10-
assert type(self.WILDCARD) in self.OK_TYPES
10+
obs = tuple(observation)
1111

12-
for attr in observation:
13-
assert type(attr) in self.OK_TYPES
12+
assert type(self.WILDCARD) in self.OK_TYPES
13+
assert all(isinstance(o, self.OK_TYPES) for o in obs)
1414

15-
self._items = tuple(observation)
15+
self._items = obs
1616

1717
@classmethod
1818
def empty(cls, length: int):
@@ -45,7 +45,7 @@ def __getitem__(self, index):
4545
return self._items[index]
4646

4747
def __setitem__(self, index, value):
48-
assert type(value) in self.OK_TYPES
48+
assert isinstance(value, self.OK_TYPES)
4949
lst = list(self._items)
5050
lst[index] = value
5151

lcs/agents/acs2/Classifier.py

Lines changed: 75 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
from lcs import Perception
88
from . import Configuration, Condition, Effect, PMark, matching
9+
from . import ProbabilityEnhancedAttribute
910

1011

1112
logger = logging.getLogger(__name__)
@@ -139,21 +140,26 @@ def fitness(self):
139140
@property
140141
def specified_unchanging_attributes(self) -> List[int]:
141142
"""
142-
Determines the number of specified unchanging attributes in
143-
the classifier. An unchanging attribute is one that is anticipated
144-
not to change in the effect part.
143+
Determines the specified unchanging attributes in the classifier.
144+
An unchanging attribute is one that is anticipated not to change
145+
in the effect part.
145146
146147
Returns
147148
-------
148149
List[int]
149-
list specified unchanging attributes indices
150+
list of specified unchanging attributes indices
150151
"""
151152
indices = []
152153

153154
for idx, (cpi, epi) in enumerate(zip(self.condition, self.effect)):
154-
if cpi != self.cfg.classifier_wildcard and \
155-
epi == self.cfg.classifier_wildcard:
156-
indices.append(idx)
155+
if isinstance(epi, ProbabilityEnhancedAttribute):
156+
if cpi != self.cfg.classifier_wildcard and \
157+
epi.does_contain(cpi):
158+
indices.append(idx)
159+
else:
160+
if cpi != self.cfg.classifier_wildcard and \
161+
epi == self.cfg.classifier_wildcard:
162+
indices.append(idx)
157163

158164
return indices
159165

@@ -178,6 +184,9 @@ def is_reliable(self) -> bool:
178184
def is_inadequate(self) -> bool:
179185
return self.q < self.cfg.theta_i
180186

187+
def is_enhanceable(self):
188+
return self.ee
189+
181190
def increase_experience(self) -> int:
182191
self.exp += 1
183192
return self.exp
@@ -206,16 +215,62 @@ def specialize(self,
206215
Requires the effect attribute to be a wildcard to specialize it.
207216
By default false
208217
"""
209-
for idx, item in enumerate(situation):
218+
for idx in range(len(situation)):
210219
if leave_specialized:
211220
if self.effect[idx] != self.cfg.classifier_wildcard:
212221
# If we have a specialized attribute don't change it.
213222
continue
214223

215224
if previous_situation[idx] != situation[idx]:
216-
self.effect[idx] = situation[idx]
225+
if self.effect[idx] == self.cfg.classifier_wildcard:
226+
self.effect[idx] = situation[idx]
227+
else:
228+
if not isinstance(self.effect[idx],
229+
ProbabilityEnhancedAttribute):
230+
self.effect[idx] = ProbabilityEnhancedAttribute(
231+
self.effect[idx])
232+
self.effect[idx].insert_symbol(situation[idx])
233+
217234
self.condition[idx] = previous_situation[idx]
218235

236+
def merge_with(self, other_classifier, perception, time):
237+
assert self.cfg.do_pee
238+
239+
result = Classifier(cfg=self.cfg)
240+
241+
result.condition = Condition(self.condition)
242+
result.condition.specialize_with_condition(other_classifier.condition)
243+
244+
# action is an int, so we can assign directly
245+
result.action = self.action
246+
247+
result.effect = Effect.enhanced_effect(
248+
self.effect, other_classifier.effect,
249+
self.q, other_classifier.q,
250+
perception)
251+
252+
result.mark = PMark(cfg=self.cfg)
253+
254+
result.r = (self.r + other_classifier.r) / 2.0
255+
result.q = (self.q + other_classifier.q) / 2.0
256+
257+
# This 0.5 is Q_INI constant in the original C++ code
258+
if result.q < 0.5:
259+
result.q = 0.5
260+
261+
result.num = 1
262+
result.tga = time
263+
result.talp = time
264+
result.tav = 0
265+
result.exp = 1
266+
267+
result.ee = False
268+
269+
return result
270+
271+
def reverse_increase_quality(self):
272+
self.q = (self.q - self.cfg.beta) / (1.0 - self.cfg.beta)
273+
219274
def predicts_successfully(self,
220275
p0: Perception,
221276
action: int,
@@ -270,14 +325,18 @@ def does_anticipate_correctly(self,
270325
False otherwise
271326
"""
272327
def effect_item_is_correct(effect_item, p0_item, p1_item):
273-
if effect_item == self.cfg.classifier_wildcard:
274-
if p0_item != p1_item:
275-
return False
328+
if not isinstance(effect_item, ProbabilityEnhancedAttribute):
329+
if effect_item == self.cfg.classifier_wildcard:
330+
if p0_item != p1_item:
331+
return False
332+
else:
333+
if p0_item == p1_item:
334+
return False
335+
336+
if effect_item != p1_item:
337+
return False
276338
else:
277-
if p0_item == p1_item:
278-
return False
279-
280-
if effect_item != p1_item:
339+
if not effect_item.does_contain(p1_item):
281340
return False
282341

283342
# All checks passed

lcs/agents/acs2/ClassifiersList.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
import random
4+
import logging
45
from itertools import chain
56
from typing import Optional, List
67
import lcs.agents.acs2.components.alp as alp_acs2
@@ -66,6 +67,41 @@ def get_maximum_fitness(self) -> float:
6667

6768
return 0.0
6869

70+
@staticmethod
71+
def apply_enhanced_effect_part_check(action_set: ClassifiersList,
72+
new_list: ClassifiersList,
73+
previous_situation: Perception,
74+
time: int,
75+
cfg: Configuration):
76+
# Create a list of candidates.
77+
# Every enhanceable classifier is a candidate.
78+
candidates = [classifier for classifier in action_set
79+
if classifier.is_enhanceable()]
80+
81+
logging.debug(
82+
"Applying enhanced effect part; number of candidates={}; " +
83+
"previous situation: {}".format(
84+
len(candidates), previous_situation))
85+
86+
# If there are less than 2 candidates, don't do it
87+
if len(candidates) < 2:
88+
return
89+
90+
for candidate in candidates:
91+
candidates2 = [classifier for classifier in candidates
92+
if candidate != classifier]
93+
if len(candidates2) > 0:
94+
merger = random.choice(candidates2)
95+
new_classifier = candidate.merge_with(merger,
96+
previous_situation,
97+
time)
98+
if new_classifier is not None:
99+
candidate.reverse_increase_quality()
100+
alp.add_classifier(new_classifier, action_set, new_list,
101+
cfg.theta_exp)
102+
103+
return new_list
104+
69105
@staticmethod
70106
def apply_alp(population: ClassifiersList,
71107
match_set: ClassifiersList,
@@ -125,6 +161,13 @@ def apply_alp(population: ClassifiersList,
125161
new_cl.tga = time
126162
alp.add_classifier(new_cl, action_set, new_list, theta_exp)
127163

164+
if cfg.do_pee:
165+
ClassifiersList.apply_enhanced_effect_part_check(action_set,
166+
new_list,
167+
p0,
168+
time,
169+
cfg)
170+
128171
# No classifier anticipated correctly - generate new one
129172
if not was_expected_case:
130173
new_cl = alp_acs2.cover(p0, action, p1, time, cfg)

lcs/agents/acs2/Configuration.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ def __init__(self,
1212
user_metrics_collector_fcn: Callable = None,
1313
fitness_fcn=None,
1414
metrics_trial_frequency: int = 5,
15+
do_pee: bool = False,
1516
do_ga: bool = False,
1617
do_subsumption: bool = True,
1718
do_action_planning: bool = False,
@@ -38,6 +39,12 @@ def __init__(self,
3839
:param environment_adapter: EnvironmentAdapter class ACS2 needs to use
3940
to interact with the environment
4041
:param fitness_fcn: Custom fitness function
42+
:param do_pee: switch *Probability-Enhanced Effects*.
43+
This is the mechanism described and implemented in C++
44+
in Martin V. Butz, David E. Goldberg, Wolfgang Stolzmann,
45+
"Probability-Enhanced Predictions in the Anticipatory Classifier
46+
System", University of Illinois at Urbana-Champaign:
47+
Illinois Genetic Algorithms Laboratory, Urbana, 2000.
4148
:param do_ga: switch *Genetic Generalization* module
4249
:param do_subsumption:
4350
:param do_action_planning: switch Action Planning phase
@@ -70,6 +77,7 @@ def __init__(self,
7077
self.environment_adapter = environment_adapter
7178
self.metrics_trial_frequency = metrics_trial_frequency
7279
self.user_metrics_collector_fcn = user_metrics_collector_fcn
80+
self.do_pee = do_pee
7381
self.fitness_fcn = fitness_fcn
7482
self.do_ga = do_ga
7583
self.do_subsumption = do_subsumption

0 commit comments

Comments
 (0)