Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[API] Requesting a formal ask() and tell() interface #31

Open
eddiebergman opened this issue Nov 13, 2023 · 0 comments
Open

[API] Requesting a formal ask() and tell() interface #31

eddiebergman opened this issue Nov 13, 2023 · 0 comments

Comments

@eddiebergman
Copy link
Contributor

Currently I have a version that basically hacks into the internals of metahyper to get an ask() and tell() interface to all NePs has to offer in terms of optimizers. This implementation basically relieves NeP's of actually having to evaluate anything, I just want the suggestions from the optimizers.

Updating to 0.10.0 gives a new warning:

WARNING:amltk.optimization.optimizers.neps:There are 1 configs that were sampled, but have no worker assigned. Sometimes this is due to a delay in the filesystem communication, but most likely some configs crashed during their execution or a jobtime-limit was reached.

I can't really complain as NePs doesn't expose this. I'd like to keep NePs as an optional dependancy for AMLTK but I would need a stable API to base off of.


class NEPSOptimizer(Optimizer[NEPSTrialInfo]):
    """An optimizer that uses SMAC to optimize a config space."""

    def __init__(
        self,
        *,
        space: SearchSpace,
        optimizer: BaseOptimizer,
        working_dir: Path,
        bucket: Bucket | None = None,
        ignore_errors: bool = True,
        loss_value_on_error: float | None = None,
        cost_value_on_error: float | None = None,
    ) -> None:
        """Initialize the optimizer.

        Args:
            space: The space to use.
            optimizer: The optimizer to use.
            working_dir: The directory to use for the optimization.
            bucket: The bucket to give to trials generated from this optimizer.
            ignore_errors: Whether the optimizers should ignore errors from trials.
            loss_value_on_error: The value to use for the loss if the trial fails.
            cost_value_on_error: The value to use for the cost if the trial fails.
        """
        super().__init__(bucket=bucket)
        self.space = space
        self.optimizer = optimizer
        self.working_dir = working_dir
        self.ignore_errors = ignore_errors
        self.loss_value_on_error = loss_value_on_error
        self.cost_value_on_error = cost_value_on_error

        self.optimizer_state_file = self.working_dir / "optimizer_state.yaml"
        self.base_result_directory = self.working_dir / "results"
        self.serializer = metahyper.utils.YamlSerializer(self.optimizer.load_config)

        self.working_dir.mkdir(parents=True, exist_ok=True)
        self.base_result_directory.mkdir(parents=True, exist_ok=True)

    @classmethod
    def create(  # noqa: PLR0913
        cls,
        *,
        space: (
            SearchSpace
            | ConfigurationSpace
            | Mapping[str, ConfigurationSpace | Parameter]
        ),
        bucket: Bucket | None = None,
        searcher: str | BaseOptimizer = "default",
        working_dir: str | Path = "neps",
        overwrite: bool = True,
        loss_value_on_error: float | None = None,
        cost_value_on_error: float | None = None,
        max_cost_total: float | None = None,
        ignore_errors: bool = True,
        searcher_kwargs: Mapping[str, Any] | None = None,
    ) -> Self:
        """Create a new NEPS optimizer.

        Args:
            space: The space to use.
            bucket: The bucket to give to trials generated by this optimizer.
            searcher: The searcher to use.
            working_dir: The directory to use for the optimization.
            overwrite: Whether to overwrite the working directory if it exists.
            loss_value_on_error: The value to use for the loss if the trial fails.
            cost_value_on_error: The value to use for the cost if the trial fails.
            max_cost_total: The maximum cost to use for the optimization.

                !!! warning

                    This only effects the optimization if the searcher utilizes the
                    budget for it's actual suggestion of the next config. If the
                    searcher does not use the budget. This parameter has no effect.

                    The user is still expected to stop `ask()`'ing for configs when
                    they have reached some budget.

            ignore_errors: Whether the optimizers should ignore errors from trials
                or whether they should be taken into account. Please set `loss_on_value`
                and/or `cost_value_on_error` if you set this to `False`.
            searcher_kwargs: Additional kwargs to pass to the searcher.
        """
        space = _to_neps_space(space)
        searcher = _to_neps_searcher(
            space=space,
            searcher=searcher,
            loss_value_on_error=loss_value_on_error,
            cost_value_on_error=cost_value_on_error,
            max_cost_total=max_cost_total,
            ignore_errors=ignore_errors,
            searcher_kwargs=searcher_kwargs,
        )
        working_dir = Path(working_dir)
        if working_dir.exists() and overwrite:
            logger.info(f"Removing existing working directory {working_dir}")
            shutil.rmtree(working_dir)

        return cls(
            space=space,
            bucket=bucket,
            optimizer=searcher,
            working_dir=working_dir,
            loss_value_on_error=loss_value_on_error,
            cost_value_on_error=cost_value_on_error,
        )

    @override
    def ask(self) -> Trial[NEPSTrialInfo]:
        """Ask the optimizer for a new config.

        Returns:
            The trial info for the new config.
        """
        with self.optimizer.using_state(self.optimizer_state_file, self.serializer):
            (
                config_id,
                config,
                pipeline_directory,
                previous_pipeline_directory,
            ) = metahyper.api._sample_config(  # type: ignore
                optimization_dir=self.working_dir,
                sampler=self.optimizer,
                serializer=self.serializer,
                logger=logger,
            )

        if isinstance(config, SearchSpace):
            _config = config.hp_values()
        else:
            _config = {
                k: v.value if isinstance(v, Parameter) else v for k, v in config.items()
            }

        info = NEPSTrialInfo(
            name=str(config_id),
            config=deepcopy(_config),
            pipeline_directory=pipeline_directory,
            previous_pipeline_directory=previous_pipeline_directory,
        )
        trial = Trial(
            name=info.name,
            config=info.config,
            info=info,
            seed=None,
            bucket=self.bucket,
        )
        logger.debug(f"Asked for trial {trial.name}")
        return trial

    @override
    def tell(self, report: Trial.Report[NEPSTrialInfo]) -> None:
        """Tell the optimizer the result of the sampled config.

        Args:
            report: The report of the trial.
        """
        logger.debug(f"Telling report for trial {report.trial.name}")
        info = report.info
        assert info is not None

        # This is how NEPS handles errors
        result: Literal["error"] | dict[str, Any]
        if report.status in (Trial.Status.CRASHED, Trial.Status.FAIL):
            result = "error"
        else:
            result = report.results

        metadata: dict[str, Any] = {"time_end": report.time.end}
        if result == "error":
            if not self.ignore_errors:
                if self.loss_value_on_error is not None:
                    report.results["loss"] = self.loss_value_on_error
                if self.cost_value_on_error is not None:
                    report.results["cost"] = self.cost_value_on_error
        else:
            if (loss := result.get("loss")) is not None:
                report.results["loss"] = float(loss)
            else:
                raise ValueError(
                    "The 'loss' should be provided if the trial is successful"
                    f"\n{result=}",
                )

            cost = result.get("cost")
            if (cost := result.get("cost")) is not None:
                cost = float(cost)
                result["cost"] = cost
                account_for_cost = result.get("account_for_cost", True)

                if account_for_cost:
                    with self.optimizer.using_state(
                        self.optimizer_state_file,
                        self.serializer,
                    ):
                        self.optimizer.used_budget += cost

                metadata["budget"] = {
                    "max": self.optimizer.budget,
                    "used": self.optimizer.used_budget,
                    "eval_cost": cost,
                    "account_for_cost": account_for_cost,
                }
            elif self.optimizer.budget is not None:
                raise ValueError(
                    "'cost' should be provided when the optimizer has a budget"
                    f"\n{result=}",
                )

        # Dump results
        self.serializer.dump(result, info.pipeline_directory / "result")

        # Load and dump metadata
        config_metadata = self.serializer.load(info.pipeline_directory / "metadata")
        config_metadata.update(metadata)
        self.serializer.dump(config_metadata, info.pipeline_directory / "metadata")

    @override
    @classmethod
    def preferred_parser(cls) -> NEPSPreferredParser:
        """The preferred parser for this optimizer."""
        # TODO: We might want a custom one for neps.SearchSpace, for now we will
        # use config space but without conditions as NePs doesn't support conditionals
        return partial(configspace_parser, conditionals=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant