Merge pull request #153 from rodrigo-arenas/0.11.0dev0

0.11.0 release
rodrigo-arenas · Sep 12, 2024 · 1314a7c · 1314a7c
2 parents 1e9740b + a19c6d6
commit 1314a7c
Show file tree

Hide file tree

Showing 12 changed files with 317 additions and 125 deletions.
diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml
@@ -9,7 +9,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [ '3.8', '3.9', '3.10', '3.11']
+        python-version: [ '3.9', '3.10', '3.11']
         os: [ubuntu-latest, windows-latest, macOS-latest]
         include:
           - os: ubuntu-latest
@@ -19,12 +19,12 @@ jobs:
           - os: windows-latest
             path: ~\AppData\Local\pip\Cache
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v3
+        uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
-      - uses: actions/cache@v3
+      - uses: actions/cache@v4
         with:
           path: ${{ matrix.path }}
           key: ${{ runner.os }}-pip-${{ hashFiles('dev-requirements.txt') }}
@@ -38,7 +38,7 @@ jobs:
         run: |
           pytest sklearn_genetic/ --verbose --color=yes --assert=plain --cov-fail-under=95 --cov-config=.coveragerc --cov=./ -p no:warnings
       - name: "Upload coverage to Codecov"
-        uses: codecov/codecov-action@v3
+        uses: codecov/codecov-action@v4
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           fail_ci_if_error: true
diff --git a/README.rst b/README.rst
@@ -8,8 +8,8 @@
 .. |Codecov| image:: https://codecov.io/gh/rodrigo-arenas/Sklearn-genetic-opt/branch/master/graphs/badge.svg?branch=master&service=github
 .. _Codecov: https://codecov.io/github/rodrigo-arenas/Sklearn-genetic-opt?branch=master
 
-.. |PythonVersion| image:: https://img.shields.io/badge/python-3.8%20%7C%203.9%20%7C%203.10%20%7C%203.11-blue
-.. _PythonVersion: https://www.python.org/downloads/
+.. |PythonVersion| image:: https://img.shields.io/badge/python-3.9%20%7C%203.10%20%7C%203.11-blue
+.. _PythonVersion : https://www.python.org/downloads/
 
 .. |PyPi| image:: https://badge.fury.io/py/sklearn-genetic-opt.svg
 .. _PyPi: https://badge.fury.io/py/sklearn-genetic-opt
@@ -111,65 +111,60 @@ Example: Hyperparameters Tuning
 
 .. code-block:: python
 
-    # Import necessary libraries
-    from sklearn_genetic import GASearchCV
-    from sklearn_genetic.space import Continuous, Categorical, Integer
-    from sklearn.ensemble import RandomForestClassifier
-    from sklearn.model_selection import train_test_split, StratifiedKFold
-    from sklearn.datasets import load_digits
-    from sklearn.metrics import accuracy_score
-
-    # Load the dataset
-    data = load_digits()
-    n_samples = len(data.images)
-    X = data.images.reshape((n_samples, -1))
-    y = data['target']
-
-    # Split the dataset into training and testing sets
-    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
-
-    # Define the RandomForestClassifier
-    clf = RandomForestClassifier()
-
-    # Define the parameter grid for GASearchCV
-    param_grid = {
-        'min_weight_fraction_leaf': Continuous(0.01, 0.5, distribution='log-uniform'),
-        'bootstrap': Categorical([True, False]),
-        'max_depth': Integer(2, 30),
-        'max_leaf_nodes': Integer(2, 35),
-        'n_estimators': Integer(100, 300)
-    }
-
-    # Configure cross-validation
-    cv = StratifiedKFold(n_splits=3, shuffle=True)
-
-    # Initialize and configure GASearchCV
-    evolved_estimator = GASearchCV(
-        estimator=clf,
-        cv=cv,
-        scoring='accuracy',
-        population_size=20,
-        generations=35,
-        param_grid=param_grid,
-        n_jobs=-1,
-        verbose=True,
-        keep_top_k=4
-    )
-
-    # Train and optimize the estimator
-    evolved_estimator.fit(X_train, y_train)
-
-    # Display best parameters found
-    print("Best parameters:", evolved_estimator.best_params_)
-
-    # Use the model fitted with the best parameters to make predictions
-    y_predict_ga = evolved_estimator.predict(X_test)
-    print("Test accuracy:", accuracy_score(y_test, y_predict_ga))
-
-    # Display additional information about the optimization process
-    print("Stats achieved in each generation:", evolved_estimator.history)
-    print("Best k solutions:", evolved_estimator.hof)
 
+   from sklearn_genetic import GASearchCV
+   from sklearn_genetic.space import Continuous, Categorical, Integer
+   from sklearn.ensemble import RandomForestClassifier
+   from sklearn.model_selection import train_test_split, StratifiedKFold
+   from sklearn.datasets import load_digits
+   from sklearn.metrics import accuracy_score
+
+   data = load_digits()
+   n_samples = len(data.images)
+   X = data.images.reshape((n_samples, -1))
+   y = data['target']
+   X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
+
+   clf = RandomForestClassifier()
+
+   # Defines the possible values to search
+   param_grid = {'min_weight_fraction_leaf': Continuous(0.01, 0.5, distribution='log-uniform'),
+                 'bootstrap': Categorical([True, False]),
+                 'max_depth': Integer(2, 30),
+                 'max_leaf_nodes': Integer(2, 35),
+                 'n_estimators': Integer(100, 300)}
+
+   # Seed solutions
+   warm_start_configs = [
+              {"min_weight_fraction_leaf": 0.02, "bootstrap": True, "max_depth": None, "n_estimators": 100},
+              {"min_weight_fraction_leaf": 0.4, "bootstrap": True, "max_depth": 5, "n_estimators": 200},
+       ]
+
+   cv = StratifiedKFold(n_splits=3, shuffle=True)
+
+   evolved_estimator = GASearchCV(estimator=clf,
+                                  cv=cv,
+                                  scoring='accuracy',
+                                  population_size=20,
+                                  generations=35,
+                                  param_grid=param_grid,
+                                  n_jobs=-1,
+                                  verbose=True,
+                                  use_cache=True,
+                                  warm_start_configs=warm_start_configs,
+                                  keep_top_k=4)
+
+   # Train and optimize the estimator
+   evolved_estimator.fit(X_train, y_train)
+   # Best parameters found
+   print(evolved_estimator.best_params_)
+   # Use the model fitted with the best parameters
+   y_predict_ga = evolved_estimator.predict(X_test)
+   print(accuracy_score(y_test, y_predict_ga))
+
+   # Saved metadata for further analysis
+   print("Stats achieved in each generation: ", evolved_estimator.history)
+   print("Best k solutions: ", evolved_estimator.hof)
 
 
 Example: Feature Selection

diff --git a/dev-requirements.txt b/dev-requirements.txt
@@ -1,4 +1,4 @@
-scikit-learn>=1.1.0
+scikit-learn>=1.3.0
 deap>=1.3.3
 numpy>=1.19.0
 pytest==7.4.0
@@ -15,6 +15,8 @@ sphinx_rtd_theme
 sphinx-copybutton
 numpydoc
 nbsphinx
+ipython
+Pygments
 tensorflow>=2.4.0
 tqdm>=4.61.1
 tk
diff --git a/docs/index.rst b/docs/index.rst
@@ -26,8 +26,8 @@ inside the env use::
 
    pip install sklearn-genetic-opt
 
-.. |PythonMinVersion| replace:: 3.8
-.. |ScikitLearnMinVersion| replace:: 1.1.0
+.. |PythonMinVersion| replace:: 3.9
+.. |ScikitLearnMinVersion| replace:: 1.3.0
 .. |NumPyMinVersion| replace:: 1.19.0
 .. |SeabornMinVersion| replace:: 0.11.2
 .. |DEAPMinVersion| replace:: 1.3.3

diff --git a/docs/release_notes.rst b/docs/release_notes.rst
@@ -3,6 +3,53 @@ Release Notes
 
 Some notes on new features in various releases
 
+What's new in 0.11.0
+--------------------
+
+^^^^^^^^^
+Features:
+^^^^^^^^^
+
+* Added a parameter `use_cache`, which defaults to ``True``. When enabled, the algorithm will skip re-evaluating solutions that have already been evaluated, retrieving the performance metrics from the cache instead.
+  If use_cache is set to ``False``, the algorithm will always re-evaluate solutions, even if they have been seen before, to obtain fresh performance metrics.
+* Add a parameter in `GAFeatureSelectionCV` named warm_start_configs, defaults to ``None``, a list of predefined hyperparameter configurations to seed the initial population.
+  Each element in the list is a dictionary where the keys are the names of the hyperparameters,
+  and the values are the corresponding hyperparameter values to be used for the individual.
+
+  Example:
+
+    .. code-block:: python
+       :linenos:
+
+       warm_start_configs = [
+              {"min_weight_fraction_leaf": 0.02, "bootstrap": True, "max_depth": None, "n_estimators": 100},
+              {"min_weight_fraction_leaf": 0.4, "bootstrap": True, "max_depth": 5, "n_estimators": 200},
+       ]
+
+  The genetic algorithm will initialize part of the population with these configurations to
+  warm-start the optimization process. The remaining individuals in the population will
+  be initialized randomly according to the defined hyperparameter space.
+
+  This parameter is useful when prior knowledge of good hyperparameter configurations exists,
+  allowing the algorithm to focus on refining known good solutions while still exploring new
+  areas of the hyperparameter space. If set to ``None``, the entire population will be initialized
+  randomly.
+* Introduced a **novelty search strategy** to the `GASearchCV` class. This strategy rewards solutions that are more distinct from others
+  in the population by incorporating a **novelty score** into the fitness evaluation. The novelty score encourages exploration and promotes diversity,
+  reducing the risk of premature convergence to local optima.
+
+       - **Novelty Score**: Calculated based on the distance between an individual and its nearest neighbors in the population.
+         Individuals with higher novelty scores are more distinct from the rest of the population.
+       - **Fitness Evaluation**: The overall fitness is now a combination of the traditional performance score and the novelty score,
+         allowing the algorithm to balance between exploiting known good solutions and exploring new, diverse ones.
+       - **Improved Exploration**: This strategy helps explore new areas of the hyperparameter space, increasing the likelihood of discovering better solutions and avoiding local optima.
+
+^^^^^^^^^^^^
+API Changes:
+^^^^^^^^^^^^
+
+* Dropped support for python 3.8
+
 What's new in 0.10.1
 --------------------
 

diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+addopts = --verbose --color=yes --assert=plain --cov-fail-under=95 --cov-config=.coveragerc --cov=./ -p no:warnings --tb=short --cov-report=term-missing:skip-covered
diff --git a/setup.py b/setup.py
@@ -40,7 +40,7 @@
         include=["sklearn_genetic", "sklearn_genetic.*"], exclude=["*tests*"]
     ),
     install_requires=[
-        "scikit-learn>=1.1.0",
+        "scikit-learn>=1.3.0",
         "numpy>=1.19.0",
         "deap>=1.3.3",
         "tqdm>=4.61.1",
@@ -51,6 +51,6 @@
         "tensorflow": ["tensorflow>=2.0.0"],
         "all": ["mlflow>=1.30.0", "seaborn>=0.11.2", "tensorflow>=2.0.0"],
     },
-    python_requires=">=3.8",
+    python_requires=">=3.9",
     include_package_data=True,
 )
diff --git a/sklearn_genetic/_version.py b/sklearn_genetic/_version.py
@@ -1 +1 @@
-__version__ = "0.10.2dev0"
+__version__ = "0.11.0"