Skip to content

Wrapper FS based on Cuckoo Search algorithm #335

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 39 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
6066f3a
Add files via upload
Mohammed-Ryiad-Eiadeh Apr 21, 2023
e729e18
Update TransferFunction.java
Mohammed-Ryiad-Eiadeh Apr 22, 2023
e33157e
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Apr 22, 2023
580fb6a
Update Binarizing.java
Mohammed-Ryiad-Eiadeh Apr 22, 2023
02ddebb
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Apr 23, 2023
1079ee7
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Apr 24, 2023
1a323c9
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Apr 25, 2023
8228ffe
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Apr 27, 2023
4f7a25f
Update TransferFunction.java
Mohammed-Ryiad-Eiadeh Apr 27, 2023
44a33bd
Update Binarizing.java
Mohammed-Ryiad-Eiadeh Apr 27, 2023
5d58e5d
Add files via upload
Mohammed-Ryiad-Eiadeh Apr 28, 2023
39c340b
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Apr 28, 2023
2f85c7b
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Apr 29, 2023
6392dd1
Update FitnessFunction.java
Mohammed-Ryiad-Eiadeh Apr 29, 2023
e214c10
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Apr 30, 2023
f0a1300
Update FitnessFunction.java
Mohammed-Ryiad-Eiadeh Apr 30, 2023
2c9c47d
Update FitnessFunction.java
Mohammed-Ryiad-Eiadeh Apr 30, 2023
40da8a8
Update FitnessFunction.java
Mohammed-Ryiad-Eiadeh May 1, 2023
b381c2a
Update FitnessFunction.java
Mohammed-Ryiad-Eiadeh May 7, 2023
e05f30d
Update FitnessFunction.java
Mohammed-Ryiad-Eiadeh May 13, 2023
5aa563d
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh May 13, 2023
c5d5f62
Add files via upload
Mohammed-Ryiad-Eiadeh May 29, 2023
99f83e6
Delete Classification/FeatureSelection/src/main/java/org/tribuo/class…
Mohammed-Ryiad-Eiadeh May 29, 2023
63e4aa0
Delete Binarizing.java
Mohammed-Ryiad-Eiadeh Jul 10, 2023
8c9d3e2
Update TransferFunction.java
Mohammed-Ryiad-Eiadeh Jul 10, 2023
6d2f940
Delete FitnessFunction.java
Mohammed-Ryiad-Eiadeh Jul 10, 2023
2df7c7e
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Jul 10, 2023
50be1fb
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Jul 10, 2023
96ecc6b
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Jul 10, 2023
21a9c3f
Update pom.xml
Mohammed-Ryiad-Eiadeh Jul 11, 2023
fe512ae
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Jul 11, 2023
d4a11db
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Jul 11, 2023
f45bde5
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Jul 12, 2023
7670a28
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Jul 16, 2023
0993bc0
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Jul 23, 2023
9038ef4
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Aug 8, 2023
76dc715
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Aug 9, 2023
28509c0
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Aug 9, 2023
2910d99
Update CuckooSearchOptimizer.java
Mohammed-Ryiad-Eiadeh Aug 10, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Classification/FeatureSelection/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,11 @@
</properties>

<dependencies>
<dependency>
<groupId>org.ojalgo</groupId>
<artifactId>ojalgo</artifactId>
<version>53.0.0</version>
</dependency>
<dependency>
<groupId>${project.groupId}</groupId>
<artifactId>tribuo-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package FS_Wrapper_Approaches.Discreeting;

import org.ojalgo.function.special.ErrorFunction;

import java.util.function.DoubleUnaryOperator;

/**
* Enumeration that contains the types of transfer functions in which they are used to define the type of transfer function
*/
public enum TransferFunction implements DoubleUnaryOperator {
V1, V2, V3, V4, S1, S2, S3, S4;

/**
* Applies this operator to the given value.
*
* @param value the operand as continuous value to be converted to either 1 or 0
* @return the operator result that is a d
*/
@Override
public double applyAsDouble(double value) {
return switch (this) {
case V1 -> Math.abs(ErrorFunction.erf(Math.sqrt(Math.PI) / 2 * value)) >= 0.5 ? 1 : 0;
case V2 -> Math.abs(Math.tan(value)) >= 0.5 ? 1 : 0;
case V3 -> Math.abs(value / Math.abs(1 + Math.pow(value, 2))) >= 0.5 ? 1 : 0;
case V4 -> Math.abs(2 / Math.PI * Math.atan(Math.PI / 2 * value)) >= 0.5 ? 1 : 0;
case S1 -> 1 / (1 + Math.pow(Math.E, - 2 * value)) >= 0.5 ? 1 : 0;
case S2 -> 1 / (1 + Math.pow(Math.E, - value)) >= 0.5 ? 1 : 0;
case S3 -> 1 / (1 + Math.pow(Math.E, - value / 2)) >= 0.5 ? 1 : 0;
case S4 -> 1 / (1 + Math.pow(Math.E, - value / 3)) >= 0.5 ? 1 : 0;
};
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,350 @@
package FS_Wrapper_Approaches.Optimizers;

import FS.Discreeting.TransferFunction;
import com.oracle.labs.mlrg.olcut.util.Pair;
import org.tribuo.Dataset;
import org.tribuo.FeatureSelector;
import org.tribuo.ImmutableFeatureMap;
import org.tribuo.Model;
import org.tribuo.SelectedFeatureSet;
import org.tribuo.classification.Label;
import org.tribuo.classification.ensemble.VotingCombiner;
import org.tribuo.classification.evaluation.LabelEvaluation;
import org.tribuo.classification.evaluation.LabelEvaluator;
import org.tribuo.common.nearest.KNNModel;
import org.tribuo.common.nearest.KNNTrainer;
import org.tribuo.dataset.SelectedFeatureDataset;
import org.tribuo.evaluation.CrossValidation;
import org.tribuo.math.distance.L1Distance;
import org.tribuo.math.neighbour.NeighboursQueryFactoryType;
import org.tribuo.provenance.FeatureSelectorProvenance;
import org.tribuo.provenance.FeatureSetProvenance;
import org.tribuo.provenance.impl.FeatureSelectorProvenanceImpl;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Random;
import java.util.SplittableRandom;
import java.util.concurrent.ThreadLocalRandom;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

/**
* Select features based on Cuckoo Search algorithm with binary transfer functions, KNN classifier and 10-fold cross validation
* <p>
* see:
* <pre>
* Xin-She Yang and Suash Deb.
* "Cuckoo Search via L´evy Flights", 2010.
*
* L. A. M. Pereira et al.
* "A Binary Cuckoo Search and its Application for Feature Selection", 2014.
* </pre>
*/
public final class CuckooSearchOptimizer implements FeatureSelector<Label> {
private final Trainer<Label> trainer;
private final TransferFunction transferFunction;
private final int populationSize;
private final double stepSizeScaling;
private final double lambda;
private final double worstNestProbability;
private final double delta;
private final double mutationRate;
private int [][] setOfSolutions;
private final int maxIteration;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All the final variables here should not be final and need to be tagged @Config (with appropriate description fields) so they are automatically captured by the provenance system.

private final SplittableRandom rng;
private final int seed;

/**
* The default constructor for feature selection based on Cuckoo Search Algorithm
*/
public CuckooSearchOptimizer() {
this.trainer = new KNNTrainer<>(1, new L1Distance(), Runtime.getRuntime().availableProcessors(), new VotingCombiner(), KNNModel.Backend.THREADPOOL, NeighboursQueryFactoryType.BRUTE_FORCE);
this.transferFunction = TransferFunction.V2;
this.populationSize = 50;
this.stepSizeScaling = 2d;
this.lambda = 2d;
this.worstNestProbability = 0.1d;
this.delta = 1.5d;
this.mutationRate = 0.2d;
this.maxIteration = 3;
this.seed = 12345;
this.rng = new SplittableRandom(seed);
}

/**
* Constructs the wrapper feature selection based on cuckoo search algorithm
* @param trainer The used trainer in the evaluation process
* @param transferFunction The transfer function to convert continuous values to binary ones
* @param populationSize The size of the solution in the initial population
* @param maxIteration The number of times that is used to enhance generation
* @param seed This seed is required for the SplittableRandom
*/
public CuckooSearchOptimizer(Trainer<Label> trainer, TransferFunction transferFunction, int populationSize, int maxIteration, int seed) {
this.trainer = trainer;
this.transferFunction = transferFunction;
this.populationSize = populationSize;
this.stepSizeScaling = 2d;
this.lambda = 2d;
this.worstNestProbability = 1.5d;
this.delta = 1.5d;
this.mutationRate = 0.2d;
this.maxIteration = maxIteration;
this.seed = seed;
this.rng = new SplittableRandom(seed);
}

/**
* Constructs the wrapper feature selection based on cuckoo search algorithm
* @param trainer The used trainer in the evaluation process
* @param transferFunction The transfer function to convert continuous values to binary ones
* @param populationSize The size of the solution in the initial population
* @param stepSizeScaling The cuckoo step size
* @param lambda The lambda of the levy flight function
* @param worstNestProbability The fraction of the nests to be abandoned
* @param delta The delta that is used in the abandon nest function
* @param mutationRate The proportion to apply the mutation operator
* @param maxIteration The number of times that is used to enhance generation
* @param seed This seed is required for the SplittableRandom
*/
public CuckooSearchOptimizer(Trainer<Label> trainer, TransferFunction transferFunction, int populationSize, double stepSizeScaling, double lambda, double worstNestProbability, double delta, double mutationRate, int maxIteration, int seed) {
this.trainer = trainer;
this.transferFunction = transferFunction;
this.populationSize = populationSize;
this.stepSizeScaling = stepSizeScaling;
this.lambda = lambda;
this.worstNestProbability = worstNestProbability;
this.delta = delta;
this.mutationRate = mutationRate;
this.maxIteration = maxIteration;
this.seed = seed;
this.rng = new SplittableRandom(seed);
}

/**
* This method is used to generate the initial population (set of solutions)
* @param totalNumberOfFeatures The number of features in the given dataset
* @return The population of subsets of selected features
*/
private int[][] GeneratePopulation(int totalNumberOfFeatures) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned elsewhere this method should accept a SplittableRandom rather than make a fresh RNG each time.

setOfSolutions = new int[this.populationSize][totalNumberOfFeatures];
for (int[] subSet : setOfSolutions) {
int[] values = new int[subSet.length];
for (int i = 0; i < values.length; i++) {
values[i] = rng.nextInt(2);
}
System.arraycopy(values, 0, subSet, 0, setOfSolutions[0].length);
}
return setOfSolutions;
}

/**
* Does this feature selection algorithm return an ordered feature set?
* @return True if the set is ordered.
*/
@Override
public boolean isOrdered() {
return true;
}

/**
* Selects features according to this selection algorithm from the specified dataset.
* @param dataset The dataset to use.
* @return A selected feature set.
*/
@Override
public SelectedFeatureSet select(Dataset<Label> dataset) {
ImmutableFeatureMap FMap = new ImmutableFeatureMap(dataset.getFeatureMap());
setOfSolutions = generatePopulation(FMap.size());
List<CuckooSearchFeatureSet> subSet_fScores = new ArrayList<>();
SelectedFeatureSet selectedFeatureSet;
for (int iter = 0; iter < maxIteration; iter++) {
for (int solution = 0; solution < setOfSolutions.length; solution++) {
int[] evolvedSolution = new int[setOfSolutions[0].length];
// Update the solution based on the levy flight function
for (int i = 0; i < setOfSolutions[0].length; i++) {
evolvedSolution[i] = (int) transferFunction.applyAsDouble(setOfSolutions[solution][i] + stepSizeScaling * Math.pow(solution + 1, -lambda));
}
int randomCuckooIndex = rng.nextInt(setOfSolutions.length);
System.arraycopy(retrieveBestAfterEvaluation(dataset, FMap, evolvedSolution, setOfSolutions[randomCuckooIndex]), 0, setOfSolutions[randomCuckooIndex], 0, setOfSolutions[randomCuckooIndex].length);
// Update the solution based on the abandone nest function
if (new Random().nextDouble() < worstNestProbability) {
int r1 = rng.nextInt(setOfSolutions.length);
int r2 = rng.nextInt(setOfSolutions.length);
for (int j = 0; j < setOfSolutions[0].length; j++) {
evolvedSolution[j] = (int) transferFunction.applyAsDouble(setOfSolutions[solution][j] + delta * (setOfSolutions[r1][j] - setOfSolutions[r2][j]));
}
System.arraycopy(retrieveBestAfterEvaluation(dataset, FMap, evolvedSolution, setOfSolutions[solution]), 0, setOfSolutions[solution], 0, setOfSolutions[solution].length);
}
// Update the solution based on mutation operator
int[] mutedSolution = mutation(setOfSolutions[subSet.get()]);
System.arraycopy(retrieveBestAfterEvaluation(dataset, FMap, mutedSolution, setOfSolutions[solution]), 0, setOfSolutions[solution], 0, setOfSolutions[solution].length);
// Update the solution based on inversion mutation
mutedSolution = inversionMutation(setOfSolutions[subSet.get()]);
System.arraycopy(retrieveBestAfterEvaluation(dataset, FMap, mutedSolution, setOfSolutions[solution]), 0, setOfSolutions[solution], 0, setOfSolutions[solution].length);
// Update the solution based on swapped mutation
mutedSolution = swappedMutation(setOfSolutions[subSet.get()]);
System.arraycopy(retrieveBestAfterEvaluation(dataset, FMap, mutedSolution, setOfSolutions[solution]), 0, setOfSolutions[solution], 0, setOfSolutions[solution].length);
// Updata the solution based on Jaya operator
int[] jayaSolution = jayaOperator(setOfSolutions[subSet.get()], subSet_fScores.get(0).subSet(), subSet_fScores.get(subSet_fScores.size() - 1).subSet());
System.arraycopy(retrieveBestAfterEvaluation(dataset, FMap, jayaSolution, setOfSolutions[solution]), 0, setOfSolutions[solution], 0, setOfSolutions[solution].length);
}
Arrays.stream(setOfSolutions).map(subSet -> new CuckooSearchFeatureSet(subSet, FN.EvaluateSolution(this, dataset, FMap, subSet))).forEach(subSet_fScores::add);
}
subSet_fScores.sort(Comparator.comparing(CuckooSearchFeatureSet::score).reversed());
selectedFeatureSet = FN.getSFS(this, dataset, FMap, subSet_fScores.get(0).subSet);
return selectedFeatureSet;
}

@Override
public FeatureSelectorProvenance getProvenance() {
return new FeatureSelectorProvenanceImpl(this);
}

/**
* This method is used to compute the fitness score of each solution of the population
* @param optimizer The optimizer that is used for FS
* @param trainer The used trainer in the evaluation process
* @param dataset The dataset to use
* @param Fmap The dataset feature map
* @param solution The current subset of features
* @return The fitness score of the given subset
*/
private <T extends FeatureSelector<Label>> double evaluateSolution(T optimizer, Trainer<Label> trainer, Dataset<Label> dataset, ImmutableFeatureMap Fmap, int... solution) {
SelectedFeatureDataset<Label> selectedFeatureDataset = new SelectedFeatureDataset<>(dataset,getSFS(optimizer, dataset, Fmap, solution));
CrossValidation<Label, LabelEvaluation> crossValidation = new CrossValidation<>(trainer, selectedFeatureDataset, new LabelEvaluator(), 10);
double avgAccuracy = 0d;
for (Pair<LabelEvaluation, Model<Label>> ACC : crossValidation.evaluate()) {
avgAccuracy += ACC.getA().accuracy();
}
avgAccuracy /= crossValidation.getK();

return avgAccuracy + 0.001 * (1 - ((double) selectedFeatureDataset.getSelectedFeatures().size() / Fmap.size()));
}

/**
* This methid is used to return the selected subset of features
* @param optimizer The optimizer that is used for FS
* @param dataset The dataset to use
* @param featureMap The dataset feature map
* @param solution The current subset of featurs
* @return The selected feature set
*/
private <T extends FeatureSelector<Label>> SelectedFeatureSet getSFS(T optimizer, Dataset<Label> dataset, ImmutableFeatureMap featureMap, int... solution) {
List<String> names = new ArrayList<>();
List<Double> scores = new ArrayList<>();
for (int i = 0; i < solution.length; i++) {
if (solution[i] == 1) {
names.add(featureMap.get(i).getName());
scores.add(1d);
}
}
FeatureSetProvenance provenance = new FeatureSetProvenance(SelectedFeatureSet.class.getName(), dataset.getProvenance(), optimizer.getProvenance());

return new SelectedFeatureSet(names, scores, optimizer.isOrdered(), provenance);
}

/**
* @param dataset The dataset to use
* @param trainer The used trainer in the evaluation process
* @param FMap The map of selected features
* @param alteredSolution The modified solution
* @param oldSolution The old solution
*/
public int[] retrieveBestAfterEvaluation(Dataset<Label> dataset, ImmutableFeatureMap FMap, int[] alteredSolution, int... oldSolution) {
if (FN.EvaluateSolution(this, dataset, FMap, alteredSolution) > FN.EvaluateSolution(this, dataset, FMap, oldSolution)) {
return alteredSolution;
}
else
return oldSolution;
}

/**
* The simple mutation method of Genetic algorithm
* <p>
* see:
* <pre>
* Steven Bayer and Lui Wang.
* "A Genetic Algorithm Programming Environment: Splicer", 1991.
* </pre>
* @param currentSolution The solution to be altered by the mutation operator
* @return The altered solution after mutation
*/
private int[] mutation(int... currentSolution) {
return Arrays.stream(currentSolution).map(x -> ThreadLocalRandom.current().nextDouble() < mutationRate ? 1 - x : x).toArray();
}

/**
* The inversion mutation
* <p>
* see:
* <pre>
* Nitashs Soni and Tapsa Kumar.
* "Study of Various Mutation Operators in Genetic Algorithms", 2014.
* </pre>
* @param currentSolution The solution to be altered by the mutation operator
* @return The altered solution after inversion mutation
*/
private int[] inversionMutation(int... currentSolution) {
int[] solution = new int[currentSolution.length];
System.arraycopy(currentSolution, 0, solution, 0, solution.length);
int rand1 = new Random().nextInt(solution.length);
int rand2 = new Random().nextInt(solution.length);
while (rand1 >= rand2) {
rand1 = new Random().nextInt(solution.length);
rand2 = new Random().nextInt(solution.length);
}
for (; rand1 < rand2; rand1++) {
solution[rand1] = 1 - solution[rand1];
}
return solution;
}

/**
* Sswapped mutation
* <p>
* see:
* <pre>
* Ming-Wen Tsai et al.
* "A Two-Dimensional Genetic Algorithm and Its Application to Aircraft Scheduling Problem", 2015.
* </pre>
* @param currentSolution The solution to be altered by the mutation operator
* @return The altered solution after swapped mutation
*/
private int[] swappedMutation(int... currentSolution) {
int[] solution = new int[currentSolution.length];
System.arraycopy(currentSolution, 0, solution, 0, solution.length);
int firstGeneIndex = new Random().nextInt(currentSolution.length);
int secondGeneIndex = new Random().nextInt(currentSolution.length);
int secondGene = solution[secondGeneIndex];
solution[secondGeneIndex] = solution[firstGeneIndex];
solution[firstGeneIndex] = secondGene;
return solution;
}

/**
* The main equation of Jaya optimization algorithm
* <p>
* see:
* <pre>
* Venkata Rao.
* "Jaya: A simple and new optimization algorithm for solving constrained and unconstrained optimization problems", 2016.
* </pre>
* @param currentSolution The solution to be altered by the jaya operator
* @param currentBest The best solution in the current generation
* @param currentWorst The worst solution in the current generation
* @return The altered solution after appling jaya operator
*/
private int[] jayaOperator(int[] currentSolution, int[] currentBest, int[] currentWorst) {
int[] newSolution = new int[currentSolution.length];
Arrays.setAll(newSolution, i -> (int) transferFunction.applyAsDouble(currentSolution[i] + new Random().nextDouble() * (currentBest[i] - currentSolution[i]) - new Random().nextDouble() * (currentWorst[i] - currentSolution[i])));
return newSolution;
}

/**
* This record is used to hold subset of features with its corresponding fitness score
*/
record CuckooSearchFeatureSet(int[] subSet, double score) { }
}