diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index f4d86e7..f0f7778 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -45,7 +45,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11"] os: ["ubuntu-latest", "macos-latest", "windows-latest"] steps: @@ -73,6 +73,15 @@ jobs: run: | pytest ${{ env.pytest-args }} ${{ env.test-dir }} + - name: Coveralls GitHub Action + uses: coverallsapp/github-action@v2 + if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10' + + - name: Remove Coverage file + uses: JesseTG/rm@v1.0.3 + with: + path: coverage.lcov + - name: Check for files left behind by test run: | before="${{ steps.status-before.outputs.BEFORE }}" @@ -82,79 +91,4 @@ jobs: echo "git status from after: $after" echo "Not all generated files have been deleted!" exit 1 - fi - -# Testing with conda - conda-tests: - name: conda-${{ matrix.python-version }}-${{ matrix.os }} - runs-on: ${{ matrix.os }} - defaults: - run: - shell: bash -l {0} # Default to using bash on all and load (-l) .bashrc which miniconda uses - - strategy: - fail-fast: false - matrix: - python-version: ["3.8", "3.9", "3.10"] - os: ["ubuntu-latest", "macos-latest", "windows-latest"] - - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Conda install - uses: conda-incubator/setup-miniconda@v2 - with: - auto-update-conda: true - python-version: ${{ matrix.python-version }} - - - name: Install ${{ env.package-name }} - run: | - python -V - python -m pip install --upgrade pip - python -m pip install wheel - python -m pip install -e ".${{ env.extra-requires }}" - - - name: Tests - run: | - pytest ${{ env.pytest-args }} ${{ env.test-dir }} - - # Testing a dist install - dist-test: - name: dist-${{ matrix.python-version }}-${{ matrix.os }} - - runs-on: ${{ matrix.os }} - defaults: - run: - shell: bash - - strategy: - fail-fast: false - matrix: - python-version: ["3.8", "3.9", "3.10"] - os: ["ubuntu-latest", "macos-latest", "windows-latest"] - - steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Setup Python - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - - name: Create sdist - id: sdist - run: | - python -m pip install --upgrade pip - python setup.py sdist - echo "${{env.package-name}}" - echo "sdist_name=$(ls -t dist/${{ env.package-name }}-*.tar.gz | head -n 1)" >> $GITHUB_ENV - - - name: Install ${{ env.package-name }} - run: | - python -m pip install ${{ env.sdist_name }}${{ env.extra-requires }} - - - name: Tests - run: | - pytest ${{ env.pytest-args }} ${{ env.test-dir }} + fi \ No newline at end of file diff --git a/README.md b/README.md index 6fd44a5..25b0391 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,9 @@ # DEHB: Evolutionary Hyperband for Scalable, Robust and Efficient Hyperparameter Optimization - +[![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) +[![Tests](https://github.com/automl/DEHB/actions/workflows/pytest.yml/badge.svg)](https://github.com/automl/DEHB/actions/workflows/pytest.yml) +[![Coverage Status](https://coveralls.io/repos/github/automl/DEHB/badge.svg)](https://coveralls.io/github/automl/DEHB) +[![PyPI](https://img.shields.io/pypi/v/dehb)](https://pypi.org/project/dehb/) +[![Static Badge](https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11%20-blue)](https://pypi.org/project/dehb/) ### Installation ```bash # from pypi @@ -16,6 +20,7 @@ pip install -e DEHB # -e stands for editable, lets you modify the code and reru * [01 - Using DEHB to optimize 4 hyperparameters of a Scikit-learn's Random Forest on a classification dataset](examples/01_Optimizing_RandomForest_using_DEHB.ipynb) * [02 - Optimizing Scikit-learn's Random Forest without using ConfigSpace to represent the hyperparameter space](examples/02_using%20DEHB_without_ConfigSpace.ipynb) * [03 - Hyperparameter Optimization for MNIST in PyTorch](examples/03_pytorch_mnist_hpo.py) +* [04 - A generic template to use MODEHB for multi-objectives Hyperparameter Optimization](examples/04_mo_pytorch_mnist_hpo.py) To run PyTorch example: (*note additional requirements*) ```bash @@ -81,7 +86,8 @@ python examples/03_pytorch_mnist_hpo.py --min_budget 1 --max_budget 3 \ --verbose --runtime 60 --scheduler_file dask_dump/scheduler.json ``` - +### Running DEHB to optimize multiple objectives +To run multi-objective optimization we require 1 extra parameter mo_strategy: we provide MO-optimization using Non-dominated sorted (NDS) with crowding distance (NSGA-II) and NDS with eps-net(EPSNET). Find 04_mo_pytorch_mnist_hpo.py example to help you to get started ### DEHB Hyperparameters @@ -122,3 +128,10 @@ represents the *mutation* strategy while `bin` represents the *binomial crossove editor = {Z. Zhou}, year = {2021} } + +@online{Awad-arXiv-2023, +title = {MO-DEHB: Evolutionary-based Hyperband for Multi-Objective Optimization}, +author = {Noor Awad and Ayushi Sharma and Frank Hutter}, +year = {2023}, +keywords = {} +} diff --git a/examples/04_mo_pytorch_mnist_hpo.py b/examples/04_mo_pytorch_mnist_hpo.py new file mode 100644 index 0000000..0b06795 --- /dev/null +++ b/examples/04_mo_pytorch_mnist_hpo.py @@ -0,0 +1,322 @@ +""" +This script runs a Multi-Objective Hyperparameter Optimisation using MODEHB to tune the architecture and +training hyperparameters for training a neural network on MNIST in PyTorch. It minimizes two objectives: loss and model size +This example is an extension of single objective problem:'03_pytorch_mnist_hpo.py' to multi-objective setting +Additional requirements: +* torch>=1.7.1 +* torchvision>=0.8.2 +* torchsummary>=1.5.1 + +PyTorch code referenced from: https://github.com/pytorch/examples/blob/master/mnist/main.py +""" + +import argparse +import os +import pickle +import time + +import ConfigSpace as CS +import ConfigSpace.hyperparameters as CSH +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim +import torchvision +from distributed import Client +from torchsummary import summary +from torchvision import transforms + +from dehb import MODEHB + + +class Model(nn.Module): + def __init__(self, config, img_dim=28, output_dim=10): + super().__init__() + self.output_dim = output_dim + self.pool_kernel = 2 + self.pool_stride = 1 + self.maxpool = nn.MaxPool2d(self.pool_kernel, self.pool_stride) + self.conv1 = nn.Conv2d( + in_channels=1, + out_channels=config["channels_1"], + kernel_size=config["kernel_1"], + stride=config["stride_1"], + padding=0, + dilation=1 + ) + # updating image size after conv1 + img_dim = self._update_size(img_dim, config["kernel_1"], config["stride_1"], 0, 1) + self.conv2 = nn.Conv2d( + in_channels=config["channels_1"], + out_channels=config["channels_2"], + kernel_size=config["kernel_2"], + stride=config["stride_2"], + padding=0, + dilation=1 + ) + # updating image size after conv2 + img_dim = self._update_size(img_dim, config["kernel_2"], config["stride_2"], 0, 1) + # updating image size after maxpool + img_dim = self._update_size(img_dim, self.pool_kernel, self.pool_stride, 0, 1) + self.dropout = nn.Dropout(config["dropout"]) + hidden_dim = config["hidden"] + self.fc1 = nn.Linear(img_dim * img_dim * config["channels_2"], hidden_dim) + self.fc2 = nn.Linear(hidden_dim, self.output_dim) + + def forward(self, x): + # Layer 1 + x = self.conv1(x) + x = F.relu(x) + x = self.dropout(x) + # Layer 2 + x = self.conv2(x) + x = F.relu(x) + x = self.maxpool(x) + x = self.dropout(x) + # FC Layer 1 + x = torch.flatten(x, 1) + x = self.fc1(x) + # Output layer + x = self.fc2(x) + output = F.log_softmax(x, dim=1) + return output + + def _update_size(self, dim, kernel_size, stride, padding, dilation): + return int(np.floor((dim + 2 * padding - (dilation * (kernel_size - 1) + 1)) / stride + 1)) + + +def get_configspace(seed=None): + cs = CS.ConfigurationSpace(seed) + + # Hyperparameter defining first Conv layer + kernel1 = CSH.OrdinalHyperparameter("kernel_1", sequence=[3, 5, 7], default_value=5) + channels1 = CSH.UniformIntegerHyperparameter("channels_1", lower=3, upper=64, + default_value=32) + stride1 = CSH.UniformIntegerHyperparameter("stride_1", lower=1, upper=2, default_value=1) + cs.add_hyperparameters([kernel1, channels1, stride1]) + + # Hyperparameter defining second Conv layer + kernel2 = CSH.OrdinalHyperparameter("kernel_2", sequence=[3, 5, 7], default_value=5) + channels2 = CSH.UniformIntegerHyperparameter("channels_2", lower=3, upper=64, + default_value=32) + stride2 = CSH.UniformIntegerHyperparameter("stride_2", lower=1, upper=2, default_value=1) + cs.add_hyperparameters([kernel2, channels2, stride2]) + + # Hyperparameter for FC layer + hidden = CSH.UniformIntegerHyperparameter( + "hidden", lower=32, upper=256, log=True, default_value=128 + ) + cs.add_hyperparameter(hidden) + + # Regularization Hyperparameter + dropout = CSH.UniformFloatHyperparameter("dropout", lower=0, upper=0.5, default_value=0.1) + cs.add_hyperparameter(dropout) + + # Training Hyperparameters + batch_size = CSH.OrdinalHyperparameter( + "batch_size", sequence=[2, 4, 8, 16, 32, 64], default_value=4 + ) + lr = CSH.UniformFloatHyperparameter("lr", lower=1e-6, upper=0.1, log=True, + default_value=1e-3) + cs.add_hyperparameters([batch_size, lr]) + return cs + + +def train(model, device, train_loader, optimizer): + model.train() + for batch_idx, (data, target) in enumerate(train_loader): + data, target = data.to(device), target.to(device) + optimizer.zero_grad() + output = model(data) + loss = F.nll_loss(output, target) + loss.backward() + optimizer.step() + + +def evaluate(model, device, data_loader, acc=False): + model.eval() + loss = 0 + correct = 0 + with torch.no_grad(): + for data, target in data_loader: + data, target = data.to(device), target.to(device) + output = model(data) + loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss + pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability + correct += pred.eq(target.view_as(pred)).sum().item() + + loss /= len(data_loader.dataset) + correct /= len(data_loader.dataset) + + if acc: + return correct + return loss + + +def train_and_evaluate(config, max_budget, verbose=False, **kwargs): + device = kwargs["device"] + batch_size = config["batch_size"] + train_set = kwargs["train_set"] + test_set = kwargs["test_set"] + train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True) + test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False) + model = Model(config).to(device) + optimizer = optim.Adadelta(model.parameters(), lr=config["lr"]) + for epoch in range(1, int(max_budget) + 1): + train(model, device, train_loader, optimizer) + accuracy = evaluate(model, device, test_loader, acc=True) + num_params = np.log(np.sum(p.numel() for p in model.parameters())) + if verbose: + summary(model, (1, 28, 28)) # image dimensions for MNIST + return [accuracy, num_params] + + +def objective_function(config, budget, **kwargs): + """ The target function to minimize for HPO""" + device = kwargs["device"] + + # Data Loaders + batch_size = config["batch_size"] + train_set = kwargs["train_set"] + valid_set = kwargs["valid_set"] + test_set = kwargs["test_set"] + train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_size, shuffle=True) + valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=batch_size, shuffle=False) + test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_size, shuffle=False) + + # Build model + model = Model(config).to(device) + + # Optimizer + optimizer = optim.Adadelta(model.parameters(), lr=config["lr"]) + + start = time.time() # measuring wallclock time + for epoch in range(1, int(budget) + 1): + train(model, device, train_loader, optimizer) + loss = evaluate(model, device, valid_loader) + cost = time.time() - start + + # not including test score computation in the `cost` + test_loss = evaluate(model, device, test_loader) + + # get number of model parameters + num_params = np.log(np.sum(p.numel() for p in model.parameters())) + + # dict representation that DEHB requires + res = { + "fitness": [loss, num_params], + "cost": cost, + "info": {"test_loss": test_loss, "budget": budget} + } + return res + + +def input_arguments(): + parser = argparse.ArgumentParser(description='Optimizing MNIST in PyTorch using DEHB.') + parser.add_argument('--no_cuda', action='store_true', default=False, + help='disables CUDA training') + parser.add_argument('--seed', type=int, default=123, metavar='S', + help='random seed (default: 123)') + parser.add_argument('--refit_training', action='store_true', default=False, + help='Refit with incumbent configuration on full training data and budget') + parser.add_argument('--min_budget', type=float, default=1, + help='Minimum budget (epoch length)') + parser.add_argument('--max_budget', type=float, default=25, + help='Maximum budget (epoch length)') + parser.add_argument('--eta', type=int, default=3, + help='Parameter for Hyperband controlling early stopping aggressiveness') + parser.add_argument('--output_path', type=str, default="./pytorch_mnist_dehb", + help='Directory for DEHB to write logs and outputs') + parser.add_argument('--scheduler_file', type=str, default=None, + help='The file to connect a Dask client with a Dask scheduler') + parser.add_argument('--n_workers', type=int, default=1, + help='Number of CPU workers for DEHB to distribute function evaluations to') + parser.add_argument('--single_node_with_gpus', default=False, action="store_true", + help='If True, signals the DEHB run to assume all required GPUs are on ' + 'the same node/machine. To be specified as True if no client is ' + 'passed and n_workers > 1. Should be set to False if a client is ' + 'specified as a scheduler-file created. The onus of GPU usage is then' + 'on the Dask workers created and mapped to the scheduler-file.') + mo_strategy_choices = ['EPSNET', 'NSGA-II'] + parser.add_argument('--mo_strategy', default="EPSNET", choices=mo_strategy_choices, + type=str, nargs='?', + help="specify the multiobjective strategy from among {}".format(mo_strategy_choices)) + parser.add_argument('--verbose', action="store_true", default=False, + help='Decides verbosity of DEHB optimization') + parser.add_argument('--runtime', type=float, default=300, + help='Total time in seconds as budget to run DEHB') + args = parser.parse_args() + return args + + +def main(): + args = input_arguments() + + use_cuda = not args.no_cuda and torch.cuda.is_available() + device = torch.device("cuda" if use_cuda else "cpu") + + torch.manual_seed(args.seed) + + # Data Preparation + transform = transforms.Compose([ + transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) + ]) + train_set = torchvision.datasets.MNIST( + root='./data', train=True, download=True, transform=transform + ) + train_set, valid_set = torch.utils.data.random_split(train_set, [50000, 10000]) + test_set = torchvision.datasets.MNIST( + root='./data', train=False, download=True, transform=transform + ) + + # Get configuration space + cs = get_configspace(args.seed) + dimensions = len(cs.get_hyperparameters()) + + # Some insights into Dask interfaces to DEHB and handling GPU devices for parallelism: + # * if args.scheduler_file is specified, args.n_workers need not be specifed --- since + # args.scheduler_file indicates a Dask client/server is active + # * if args.scheduler_file is not specified and args.n_workers > 1 --- the DEHB object + # creates a Dask client as at instantiation and dies with the associated DEHB object + # * if args.single_node_with_gpus is True --- assumes that all GPU devices indicated + # through the environment variable "CUDA_VISIBLE_DEVICES" resides on the same machine + + # Dask checks and setups + single_node_with_gpus = args.single_node_with_gpus + if args.scheduler_file is not None and os.path.isfile(args.scheduler_file): + client = Client(scheduler_file=args.scheduler_file) + # explicitly delegating GPU handling to Dask workers defined + single_node_with_gpus = False + else: + client = None + + ########################### + # DEHB optimisation block # + ########################### + np.random.seed(args.seed) + modehb = MODEHB(objective_function=objective_function, cs=cs, dimensions=dimensions, min_budget=args.min_budget, + max_budget=args.max_budget, eta=args.eta, output_path=args.output_path, + num_objectives=2, mo_strategy=args.mo_strategy, + # if client is not None and of type Client, n_workers is ignored + # if client is None, a Dask client with n_workers is set up + client=client, n_workers=args.n_workers) + runtime, history = modehb.run(total_cost=args.runtime, verbose=args.verbose, + # arguments below are part of **kwargs shared across workers + train_set=train_set, valid_set=valid_set, test_set=test_set, + single_node_with_gpus=single_node_with_gpus, device=device) + # end of DEHB optimisation + + # Saving optimisation trace history + name = time.strftime("%x %X %Z", time.localtime(modehb.start)) + name = name.replace("/", '-').replace(":", '-').replace(" ", '_') + modehb.logger.info("Saving optimisation trace history...") + with open(os.path.join(args.output_path, "history_{}.pkl".format(name)), "wb") as f: + pickle.dump(history, f) + modehb.logger.info("pareto population:{}", modehb.pareto_pop) + modehb.logger.info("pareto fitness:{}", modehb.pareto_fit) + modehb.logger.debug("runtime:{}", runtime) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index 11d7e9f..9f119d1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,10 @@ [tool.pytest.ini_options] testpaths = ["tests"] # path to the test directory minversion = "3.8" -addopts = "--cov=dehb" # Should be package name +addopts = "--cov=dehb --cov-report=lcov" # Should be package name +pythonpath = [ + "." +] [tool.coverage.run] branch = true diff --git a/tests/test_dehb.py b/tests/test_dehb.py new file mode 100644 index 0000000..1170830 --- /dev/null +++ b/tests/test_dehb.py @@ -0,0 +1,100 @@ +import pytest +import typing +import ConfigSpace +import numpy as np +import time +from dehb.optimizers.dehb import DEHB + +def create_toy_searchspace(): + """Creates a toy searchspace with a single hyperparameter. + + Can be used in order to instantiate a DEHB instance for simple unittests not + requiring a proper configuration space for optimization. + + + Returns: + ConfigurationSpace: Toy searchspace + """ + cs = ConfigSpace.ConfigurationSpace() + cs.add_hyperparameter( + ConfigSpace.UniformFloatHyperparameter("x0", lower=3, upper=10, log=False)) + return cs + +def create_toy_optimizer(configspace: ConfigSpace.ConfigurationSpace, min_budget: float, + max_budget: float, eta: int, + objective_function: typing.Callable): + """Creates a DEHB instance. + + Args: + configspace (ConfigurationSpace): Searchspace to use + min_budget (float): Minimum budget for DEHB + max_budget (float): Maximum budget for DEHB + eta (int): Eta parameter of DEHB + objective_function (Callable): Function to optimize + + Returns: + _type_: _description_ + """ + dim = len(configspace.get_hyperparameters()) + return DEHB(f=objective_function, cs=configspace, dimensions=dim, + min_budget=min_budget, + max_budget=max_budget, eta=eta, n_workers=1) + + +def objective_function(x: ConfigSpace.Configuration, budget: float, **kwargs): + """Toy objective function. + + Args: + x (ConfigSpace.Configuration): Configuration to evaluate + budget (float): Budget to evaluate x on + + Returns: + dict: Result dictionary + """ + y = np.random.uniform() + cost = 5 + result = { + "fitness": y, + "cost": cost + } + return result + +class TestBudgetExhaustion(): + """Class that bundles all Budget exhaustion tests. + + These tests include budget exhaustion tests for runtime, number of function + evaluations and number of brackets to run. + """ + def test_runtime_exhaustion(self): + """Test for runtime budget exhaustion. + """ + cs = create_toy_searchspace() + dehb = create_toy_optimizer(configspace=cs, min_budget=3, max_budget=27, eta=3, + objective_function=objective_function) + + dehb.start = time.time() - 10 + + assert dehb._is_run_budget_exhausted(total_cost=1), "Run budget should be exhausted" + + def test_fevals_exhaustion(self): + """Test for function evaluations budget exhaustion. + """ + cs = create_toy_searchspace() + dehb = create_toy_optimizer(configspace=cs, min_budget=3, max_budget=27, eta=3, + objective_function=objective_function) + + dehb.traj.append("Just needed for the test") + + assert dehb._is_run_budget_exhausted(fevals=1), "Run budget should be exhausted" + + def test_brackets_exhaustion(self): + """Test for bracket budget exhaustion. + """ + cs = create_toy_searchspace() + dehb = create_toy_optimizer(configspace=cs, min_budget=3, max_budget=27, eta=3, + objective_function=objective_function) + + dehb.iteration_counter = 5 + + assert dehb._is_run_budget_exhausted(brackets=1), "Run budget should be exhausted" + diff --git a/tests/test_myfile.py b/tests/test_myfile.py deleted file mode 100644 index 83f44ef..0000000 --- a/tests/test_myfile.py +++ /dev/null @@ -1,20 +0,0 @@ -import pytest - -from dehb.myfile import MyClass - - -def test_oreos(): - """ - Should add `a` and the param `x` - """ - myclass = MyClass(a=3, b={}) - assert myclass.oreos(2) == 5 - - -@pytest.mark.parametrize("value", [0, -1, -10]) -def test_construction_with_negative_a_raises_error(value): - """ - Should raise a ValueError with a negative `a` - """ - with pytest.raises(ValueError): - MyClass(a=value, b={})