[API] Requesting a formal `ask()` and `tell()` interface #31

eddiebergman · 2023-11-13T18:54:29Z

Currently I have a version that basically hacks into the internals of metahyper to get an ask() and tell() interface to all NePs has to offer in terms of optimizers. This implementation basically relieves NeP's of actually having to evaluate anything, I just want the suggestions from the optimizers.

Updating to 0.10.0 gives a new warning:

WARNING:amltk.optimization.optimizers.neps:There are 1 configs that were sampled, but have no worker assigned. Sometimes this is due to a delay in the filesystem communication, but most likely some configs crashed during their execution or a jobtime-limit was reached.

I can't really complain as NePs doesn't expose this. I'd like to keep NePs as an optional dependancy for AMLTK but I would need a stable API to base off of.

class NEPSOptimizer(Optimizer[NEPSTrialInfo]):
    """An optimizer that uses SMAC to optimize a config space."""

    def __init__(
        self,
        *,
        space: SearchSpace,
        optimizer: BaseOptimizer,
        working_dir: Path,
        bucket: Bucket | None = None,
        ignore_errors: bool = True,
        loss_value_on_error: float | None = None,
        cost_value_on_error: float | None = None,
    ) -> None:
        """Initialize the optimizer.

        Args:
            space: The space to use.
            optimizer: The optimizer to use.
            working_dir: The directory to use for the optimization.
            bucket: The bucket to give to trials generated from this optimizer.
            ignore_errors: Whether the optimizers should ignore errors from trials.
            loss_value_on_error: The value to use for the loss if the trial fails.
            cost_value_on_error: The value to use for the cost if the trial fails.
        """
        super().__init__(bucket=bucket)
        self.space = space
        self.optimizer = optimizer
        self.working_dir = working_dir
        self.ignore_errors = ignore_errors
        self.loss_value_on_error = loss_value_on_error
        self.cost_value_on_error = cost_value_on_error

        self.optimizer_state_file = self.working_dir / "optimizer_state.yaml"
        self.base_result_directory = self.working_dir / "results"
        self.serializer = metahyper.utils.YamlSerializer(self.optimizer.load_config)

        self.working_dir.mkdir(parents=True, exist_ok=True)
        self.base_result_directory.mkdir(parents=True, exist_ok=True)

    @classmethod
    def create(  # noqa: PLR0913
        cls,
        *,
        space: (
            SearchSpace
            | ConfigurationSpace
            | Mapping[str, ConfigurationSpace | Parameter]
        ),
        bucket: Bucket | None = None,
        searcher: str | BaseOptimizer = "default",
        working_dir: str | Path = "neps",
        overwrite: bool = True,
        loss_value_on_error: float | None = None,
        cost_value_on_error: float | None = None,
        max_cost_total: float | None = None,
        ignore_errors: bool = True,
        searcher_kwargs: Mapping[str, Any] | None = None,
    ) -> Self:
        """Create a new NEPS optimizer.

        Args:
            space: The space to use.
            bucket: The bucket to give to trials generated by this optimizer.
            searcher: The searcher to use.
            working_dir: The directory to use for the optimization.
            overwrite: Whether to overwrite the working directory if it exists.
            loss_value_on_error: The value to use for the loss if the trial fails.
            cost_value_on_error: The value to use for the cost if the trial fails.
            max_cost_total: The maximum cost to use for the optimization.

                !!! warning

                    This only effects the optimization if the searcher utilizes the
                    budget for it's actual suggestion of the next config. If the
                    searcher does not use the budget. This parameter has no effect.

                    The user is still expected to stop `ask()`'ing for configs when
                    they have reached some budget.

            ignore_errors: Whether the optimizers should ignore errors from trials
                or whether they should be taken into account. Please set `loss_on_value`
                and/or `cost_value_on_error` if you set this to `False`.
            searcher_kwargs: Additional kwargs to pass to the searcher.
        """
        space = _to_neps_space(space)
        searcher = _to_neps_searcher(
            space=space,
            searcher=searcher,
            loss_value_on_error=loss_value_on_error,
            cost_value_on_error=cost_value_on_error,
            max_cost_total=max_cost_total,
            ignore_errors=ignore_errors,
            searcher_kwargs=searcher_kwargs,
        )
        working_dir = Path(working_dir)
        if working_dir.exists() and overwrite:
            logger.info(f"Removing existing working directory {working_dir}")
            shutil.rmtree(working_dir)

        return cls(
            space=space,
            bucket=bucket,
            optimizer=searcher,
            working_dir=working_dir,
            loss_value_on_error=loss_value_on_error,
            cost_value_on_error=cost_value_on_error,
        )

    @override
    def ask(self) -> Trial[NEPSTrialInfo]:
        """Ask the optimizer for a new config.

        Returns:
            The trial info for the new config.
        """
        with self.optimizer.using_state(self.optimizer_state_file, self.serializer):
            (
                config_id,
                config,
                pipeline_directory,
                previous_pipeline_directory,
            ) = metahyper.api._sample_config(  # type: ignore
                optimization_dir=self.working_dir,
                sampler=self.optimizer,
                serializer=self.serializer,
                logger=logger,
            )

        if isinstance(config, SearchSpace):
            _config = config.hp_values()
        else:
            _config = {
                k: v.value if isinstance(v, Parameter) else v for k, v in config.items()
            }

        info = NEPSTrialInfo(
            name=str(config_id),
            config=deepcopy(_config),
            pipeline_directory=pipeline_directory,
            previous_pipeline_directory=previous_pipeline_directory,
        )
        trial = Trial(
            name=info.name,
            config=info.config,
            info=info,
            seed=None,
            bucket=self.bucket,
        )
        logger.debug(f"Asked for trial {trial.name}")
        return trial

    @override
    def tell(self, report: Trial.Report[NEPSTrialInfo]) -> None:
        """Tell the optimizer the result of the sampled config.

        Args:
            report: The report of the trial.
        """
        logger.debug(f"Telling report for trial {report.trial.name}")
        info = report.info
        assert info is not None

        # This is how NEPS handles errors
        result: Literal["error"] | dict[str, Any]
        if report.status in (Trial.Status.CRASHED, Trial.Status.FAIL):
            result = "error"
        else:
            result = report.results

        metadata: dict[str, Any] = {"time_end": report.time.end}
        if result == "error":
            if not self.ignore_errors:
                if self.loss_value_on_error is not None:
                    report.results["loss"] = self.loss_value_on_error
                if self.cost_value_on_error is not None:
                    report.results["cost"] = self.cost_value_on_error
        else:
            if (loss := result.get("loss")) is not None:
                report.results["loss"] = float(loss)
            else:
                raise ValueError(
                    "The 'loss' should be provided if the trial is successful"
                    f"\n{result=}",
                )

            cost = result.get("cost")
            if (cost := result.get("cost")) is not None:
                cost = float(cost)
                result["cost"] = cost
                account_for_cost = result.get("account_for_cost", True)

                if account_for_cost:
                    with self.optimizer.using_state(
                        self.optimizer_state_file,
                        self.serializer,
                    ):
                        self.optimizer.used_budget += cost

                metadata["budget"] = {
                    "max": self.optimizer.budget,
                    "used": self.optimizer.used_budget,
                    "eval_cost": cost,
                    "account_for_cost": account_for_cost,
                }
            elif self.optimizer.budget is not None:
                raise ValueError(
                    "'cost' should be provided when the optimizer has a budget"
                    f"\n{result=}",
                )

        # Dump results
        self.serializer.dump(result, info.pipeline_directory / "result")

        # Load and dump metadata
        config_metadata = self.serializer.load(info.pipeline_directory / "metadata")
        config_metadata.update(metadata)
        self.serializer.dump(config_metadata, info.pipeline_directory / "metadata")

    @override
    @classmethod
    def preferred_parser(cls) -> NEPSPreferredParser:
        """The preferred parser for this optimizer."""
        # TODO: We might want a custom one for neps.SearchSpace, for now we will
        # use config space but without conditions as NePs doesn't support conditionals
        return partial(configspace_parser, conditionals=False)

The text was updated successfully, but these errors were encountered:

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[API] Requesting a formal `ask()` and `tell()` interface #31

[API] Requesting a formal `ask()` and `tell()` interface #31

eddiebergman commented Nov 13, 2023

[API] Requesting a formal ask() and tell() interface #31

[API] Requesting a formal ask() and tell() interface #31

Comments

eddiebergman commented Nov 13, 2023

[API] Requesting a formal `ask()` and `tell()` interface #31

[API] Requesting a formal `ask()` and `tell()` interface #31