|
7 | 7 |
|
8 | 8 |
|
9 | 9 | class AcquisitionFunction(object):
|
| 10 | + """ Abstract base class for acquisition functions.""" |
10 | 11 |
|
11 | 12 | def set_boundaries(self, boundaries):
|
| 13 | + """Sets boundaries of search space. |
| 14 | +
|
| 15 | + Parameters |
| 16 | + ---------- |
| 17 | + boundaries: ndarray-like, shape=(n_params_dims, 2) |
| 18 | + Box constraint on search space. boundaries[:, 0] defines the lower |
| 19 | + bounds on the dimensions, boundaries[:, 1] defines the upper |
| 20 | + bounds. |
| 21 | + """ |
12 | 22 | pass
|
13 | 23 |
|
14 | 24 |
|
@@ -299,13 +309,175 @@ def set_boundaries(self, boundaries, X_candidate=None):
|
299 | 309 | self.base_entropy = entropy(p_max)
|
300 | 310 |
|
301 | 311 |
|
| 312 | +class ContextualEntropySearch(AcquisitionFunction): |
| 313 | + """ |
| 314 | + n_context_samples: int, default: 20 |
| 315 | + The number of context sampled from context space on which sample |
| 316 | + policies are evaluated |
| 317 | + """ |
| 318 | + def __init__(self, model, n_context_dims, n_context_samples, |
| 319 | + n_candidates=20, n_gp_samples=500, |
| 320 | + n_samples_y=10, n_trial_points=100): |
| 321 | + self.model = model |
| 322 | + self.n_context_dims = n_context_dims |
| 323 | + self.n_context_samples = n_context_samples |
| 324 | + |
| 325 | + self.n_candidates = n_candidates |
| 326 | + self.n_gp_samples = n_gp_samples |
| 327 | + self.n_samples_y = n_samples_y |
| 328 | + self.n_trial_points = n_trial_points |
| 329 | + |
| 330 | + def __call__(self, x, incumbent=0, *args, **kwargs): |
| 331 | + ind = list(self.nbrs.kneighbors(x[:self.n_context_dims], |
| 332 | + return_distance=False)[0]) |
| 333 | + mean_entropy_reduction = \ |
| 334 | + np.mean([self.entropy_search_ensemble[i](x) for i in ind]) |
| 335 | + return mean_entropy_reduction |
| 336 | + |
| 337 | + def set_boundaries(self, boundaries): |
| 338 | + self._sample_contexts(boundaries[:self.n_context_dims]) |
| 339 | + |
| 340 | + self.entropy_search_ensemble = [] |
| 341 | + for i in range(self.n_context_samples): |
| 342 | + boundaries_i = np.copy(boundaries) |
| 343 | + boundaries_i[:self.n_context_dims] = \ |
| 344 | + self.context_samples[i][:, np.newaxis] |
| 345 | + entropy_search_fixed_context = \ |
| 346 | + EntropySearch(self.model, self.n_candidates, self.n_gp_samples, |
| 347 | + self.n_samples_y, self.n_trial_points) |
| 348 | + entropy_search_fixed_context.set_boundaries(boundaries_i) |
| 349 | + |
| 350 | + self.entropy_search_ensemble.append(entropy_search_fixed_context) |
| 351 | + |
| 352 | + def _sample_contexts(self, context_boundaries): |
| 353 | + self.context_samples = \ |
| 354 | + np.random.uniform(context_boundaries[:, 0], |
| 355 | + context_boundaries[:, 1], |
| 356 | + (self.n_context_samples*25, self.n_context_dims)) |
| 357 | + from sklearn.cluster import KMeans |
| 358 | + from sklearn.neighbors import NearestNeighbors |
| 359 | + kmeans = KMeans(n_clusters=self.n_context_samples).fit(self.context_samples) |
| 360 | + self.context_samples = kmeans.cluster_centers_ |
| 361 | + self.nbrs = NearestNeighbors(n_neighbors=10, algorithm='ball_tree') |
| 362 | + self.nbrs.fit(self.context_samples) |
| 363 | + |
| 364 | + |
| 365 | +class ACEPS(ContextualEntropySearch): |
| 366 | + def __init__(self, model, policy, n_context_dims, |
| 367 | + n_context_samples, n_policy_samples=20, n_gp_samples=1000, |
| 368 | + n_samples_y=10, policy_training="model-free"): |
| 369 | + super(ACEPS, self).__init__(model, n_context_dims, n_context_samples, |
| 370 | + n_policy_samples, n_gp_samples, |
| 371 | + n_samples_y) |
| 372 | + self.policy = policy |
| 373 | + self.policy_training = policy_training |
| 374 | + |
| 375 | + |
| 376 | + def set_boundaries(self, boundaries): |
| 377 | + self._sample_contexts(boundaries[:self.n_context_dims]) |
| 378 | + |
| 379 | + selected_params = \ |
| 380 | + self._sample_policy_parameters(boundaries[:self.n_context_dims], |
| 381 | + boundaries[self.n_context_dims:]) |
| 382 | + |
| 383 | + self.entropy_search_ensemble = [] |
| 384 | + for i in range(self.n_context_samples): |
| 385 | + boundaries_i = np.copy(boundaries) |
| 386 | + boundaries_i[:self.n_context_dims] = \ |
| 387 | + self.context_samples[i][:, np.newaxis] |
| 388 | + entropy_search_fixed_context = \ |
| 389 | + EntropySearch(self.model, self.n_candidates, self.n_gp_samples, |
| 390 | + self.n_samples_y, self.n_trial_points) |
| 391 | + |
| 392 | + X_candidate = np.empty((self.n_candidates, 4)) # XXX |
| 393 | + X_candidate[:, :self.n_context_dims] = self.context_samples[i] |
| 394 | + X_candidate[:, self.n_context_dims:] = selected_params[:, i] |
| 395 | + entropy_search_fixed_context.set_boundaries(boundaries_i, |
| 396 | + X_candidate) |
| 397 | + |
| 398 | + self.entropy_search_ensemble.append(entropy_search_fixed_context) |
| 399 | + |
| 400 | + |
| 401 | + def _sample_policy_parameters(self, context_boundaries, parameter_boundaries): |
| 402 | + """ Sample close-to-optimal policies and let them select parameters. |
| 403 | +
|
| 404 | + We determine a set of policies which is close-to-optimal according to |
| 405 | + samples drawn from the model's posterior and let these policies |
| 406 | + determine parameters |
| 407 | + """ |
| 408 | + from bolero_bayes_opt.representation.ul_policies \ |
| 409 | + import model_free_policy_training, model_based_policy_training_pretrained |
| 410 | + # Compute policy which is close to optimal according to current model |
| 411 | + contexts = self.model.gp.X_train_[:, :self.n_context_dims] |
| 412 | + parameters = self.model.gp.X_train_[:, self.n_context_dims:] |
| 413 | + returns = self.model.gp.y_train_ |
| 414 | + if self.policy_training == "model-free": |
| 415 | + self.policy = model_free_policy_training( |
| 416 | + self.policy, contexts, parameters, returns, |
| 417 | + epsilon=1.0, min_eta=1e-6) |
| 418 | + elif self.policy_training == "model-based": |
| 419 | + self.policy.fit(contexts, |
| 420 | + [self.parameter_boundaries.mean(1)]*contexts.shape[0], |
| 421 | + weights=np.ones(contexts.shape[0])) |
| 422 | + self.policy = model_based_policy_training_pretrained( |
| 423 | + policy=self.policy, model=self.model.gp, |
| 424 | + contexts=contexts, boundaries=self.parameter_boundaries) |
| 425 | + else: |
| 426 | + raise NotImplementedError() |
| 427 | + |
| 428 | + # Draw context samples, let policy select parameters for these context |
| 429 | + # (with exploration), and sample multiple possible outcomes for these |
| 430 | + # (context, parameter) samples from the GP model. |
| 431 | + while True: # XXX |
| 432 | + X_sample = np.empty((self.n_context_samples*5, |
| 433 | + self.n_context_dims+parameter_boundaries.shape[0])) |
| 434 | + X_sample[:, :self.n_context_dims] = \ |
| 435 | + np.random.uniform(context_boundaries[:, 0], |
| 436 | + context_boundaries[:, 1], |
| 437 | + (self.n_context_samples*5, self.n_context_dims)) |
| 438 | + X_sample[:, self.n_context_dims:] = \ |
| 439 | + [self.policy(X_sample[i, :self.n_context_dims]) |
| 440 | + for i in range(self.n_context_samples*5)] |
| 441 | + try: |
| 442 | + y_sample = self.model.gp.sample_y(X_sample, self.n_candidates) |
| 443 | + break |
| 444 | + except np.linalg.LinAlgError: |
| 445 | + continue |
| 446 | + |
| 447 | + # Train for each possible outcome one policy and evaluate this policy |
| 448 | + # on the context samples |
| 449 | + selected_params = [] # XXX: Vectorize |
| 450 | + for i in range(y_sample.shape[1]): |
| 451 | + policy_sample = model_free_policy_training( |
| 452 | + self.policy, X_sample[:, range(self.n_context_dims)], |
| 453 | + X_sample[:, range(self.n_context_dims, X_sample.shape[1])], |
| 454 | + y_sample[:, i]) |
| 455 | + |
| 456 | + params = [policy_sample(np.atleast_1d(self.context_samples[i]), |
| 457 | + explore=False).ravel() |
| 458 | + for i in range(self.context_samples.shape[0])] |
| 459 | + selected_params.append(params) |
| 460 | + |
| 461 | + selected_params = np.array(selected_params) |
| 462 | + # Enforce lower and upper bound on possible parameters |
| 463 | + for i in range(selected_params.shape[2]): |
| 464 | + selected_params[:, :, i][selected_params[:, :, i] < parameter_boundaries[i, 0]] = \ |
| 465 | + parameter_boundaries[i, 0] |
| 466 | + selected_params[:, :, i][selected_params[:, :, i] > parameter_boundaries[i, 1]] = \ |
| 467 | + parameter_boundaries[i, 1] |
| 468 | + |
| 469 | + return selected_params |
| 470 | + |
| 471 | + |
302 | 472 | ACQUISITION_FUNCTIONS = {
|
303 | 473 | "PI": ProbabilityOfImprovement,
|
304 | 474 | "EI": ExpectedImprovement,
|
305 | 475 | "UCB": UpperConfidenceBound,
|
306 | 476 | "GREEDY": Greedy,
|
307 | 477 | "RANDOM": Random,
|
308 |
| - "EntropySearch": EntropySearch} |
| 478 | + "EntropySearch": EntropySearch, |
| 479 | + "ContextualEntropySearch": ContextualEntropySearch, |
| 480 | + "ACEPS": ACEPS} |
309 | 481 |
|
310 | 482 |
|
311 | 483 | def create_acquisition_function(name, model, **kwargs):
|
|
0 commit comments