-
Notifications
You must be signed in to change notification settings - Fork 191
Wrapper FS based on Cuckoo Search algorithm #335
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 21 commits
6066f3a
e729e18
e33157e
580fb6a
02ddebb
1079ee7
1a323c9
8228ffe
4f7a25f
44a33bd
5d58e5d
39c340b
2f85c7b
6392dd1
e214c10
f0a1300
2c9c47d
40da8a8
b381c2a
e05f30d
5aa563d
c5d5f62
99f83e6
63e4aa0
8c9d3e2
6d2f940
2df7c7e
50be1fb
96ecc6b
21a9c3f
fe512ae
d4a11db
f45bde5
7670a28
0993bc0
9038ef4
76dc715
28509c0
2910d99
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
package FS_Wrapper_Approaches.Discreeting; | ||
|
||
import static org.apache.commons.math3.special.Erf.erf; | ||
|
||
|
||
/** | ||
* This interface includes a static method that is utilized to convert continuous value to binary one | ||
*/ | ||
public interface Binarizing { | ||
/** | ||
* This method used to convert continuous values to binary ones | ||
* @param TF is the type (id) of the transfer function | ||
* @param Value is the continuous value to be converted | ||
* @return return the converted value based on the selected function | ||
*/ | ||
static int discreteValue(TransferFunction TF, double Value) { | ||
return switch (TF) { | ||
case V1 -> Math.abs(erf(Math.sqrt(Math.PI) / 2 * Value)) >= 0.5 ? 1 : 0; | ||
case V2 -> Math.abs(Math.tan(Value)) >= 0.5 ? 1 : 0; | ||
case V3 -> Math.abs(Value / Math.abs(1 + Math.pow(Value, 2))) >= 0.5 ? 1 : 0; | ||
case V4 -> Math.abs(2 / Math.PI * Math.atan(Math.PI / 2 * Value)) >= 0.5 ? 1 : 0; | ||
case S1 -> 1 / (1 + Math.pow(Math.E, - 2 * Value)) >= 0.5 ? 1 : 0; | ||
case S2 -> 1 / (1 + Math.pow(Math.E, - Value)) >= 0.5 ? 1 : 0; | ||
case S3 -> 1 / (1 + Math.pow(Math.E, - Value / 2)) >= 0.5 ? 1 : 0; | ||
case S4 -> 1 / (1 + Math.pow(Math.E, - Value / 3)) >= 0.5 ? 1 : 0; | ||
}; | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
package FS_Wrapper_Approaches.Discreeting; | ||
|
||
/** | ||
* Enumeration that contains the types of transfer functions in which they are used to define the type of transfer function | ||
*/ | ||
public enum TransferFunction { | ||
V1, V2, V3, V4, S1, S2, S3, S4 | ||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
package FS_Wrapper_Approaches.Evaluation; | ||
|
||
import FS_Wrapper_Approaches.Optimizers.CuckooSearchOptimizer; | ||
import com.oracle.labs.mlrg.olcut.util.Pair; | ||
import org.tribuo.Dataset; | ||
import org.tribuo.ImmutableFeatureMap; | ||
import org.tribuo.Model; | ||
import org.tribuo.SelectedFeatureSet; | ||
import org.tribuo.classification.Label; | ||
import org.tribuo.classification.evaluation.LabelEvaluation; | ||
import org.tribuo.classification.evaluation.LabelEvaluator; | ||
import org.tribuo.common.nearest.KNNClassifierOptions; | ||
|
||
import org.tribuo.dataset.SelectedFeatureDataset; | ||
import org.tribuo.evaluation.CrossValidation; | ||
import org.tribuo.provenance.FeatureSetProvenance; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
|
||
/** | ||
* This interface includes the evaluation function of each solution | ||
*/ | ||
public interface FitnessFunction { | ||
|
||
/** | ||
* This method is used to compute the fitness score of each solution of the population | ||
* @param optimizer The optimizer that is used for FS | ||
* @param dataset The dataset to use | ||
* @param Fmap The dataset feature map | ||
* @param solution The current subset of features | ||
* @return The fitness score of the given subset | ||
*/ | ||
static <T extends FeatureSelector<Label>> double EvaluateSolution(T optimizer, Dataset<Label> dataset, ImmutableFeatureMap Fmap, int[] solution) { | ||
|
||
SelectedFeatureDataset<Label> selectedFeatureDataset = new SelectedFeatureDataset<>(dataset,getSFS(optimizer, dataset, Fmap, solution)); | ||
KNNClassifierOptions classifier = new KNNClassifierOptions(); | ||
CrossValidation<Label, LabelEvaluation> crossValidation = new CrossValidation<>(classifier.getTrainer(), selectedFeatureDataset, new LabelEvaluator(), 10); | ||
|
||
double avgAccuracy = 0d; | ||
for (Pair<LabelEvaluation, Model<Label>> ACC : crossValidation.evaluate()) | ||
|
||
avgAccuracy += ACC.getA().accuracy(); | ||
avgAccuracy /= crossValidation.getK(); | ||
|
||
return avgAccuracy + 0.0001 * (1 - ((double) selectedFeatureDataset.getSelectedFeatures().size() / Fmap.size())); | ||
} | ||
|
||
/** | ||
* This methid is used to return the selected subset of features | ||
* @param optimizer The optimizer that is used for FS | ||
* @param dataset The dataset to use | ||
* @param Fmap The dataset feature map | ||
* @param solution The current subset of featurs | ||
* @return The selected feature set | ||
*/ | ||
static <T extends FeatureSelector<Label>> SelectedFeatureSet getSFS(T optimizer, Dataset<Label> dataset, ImmutableFeatureMap Fmap, int[] solution) { | ||
List<String> names = new ArrayList<>(); | ||
List<Double> scores = new ArrayList<>(); | ||
for (int i = 0; i < solution.length; i++) | ||
if (solution[i] == 1) { | ||
names.add(Fmap.get(i).getName()); | ||
scores.add(1D); | ||
} | ||
FeatureSetProvenance provenance = new FeatureSetProvenance(SelectedFeatureSet.class.getName(), dataset.getProvenance(), optimizer.getProvenance()); | ||
|
||
return new SelectedFeatureSet(names, scores, optimizer.isOrdered(), provenance); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,157 @@ | ||
package FS_Wrapper_Approaches.Optimizers; | ||
|
||
import FS_Wrapper_Approaches.Discreeting.Binarizing; | ||
import FS_Wrapper_Approaches.Discreeting.TransferFunction; | ||
import com.oracle.labs.mlrg.olcut.util.Pair; | ||
import org.tribuo.*; | ||
|
||
import org.tribuo.classification.Label; | ||
import org.tribuo.classification.evaluation.LabelEvaluation; | ||
import org.tribuo.classification.evaluation.LabelEvaluator; | ||
import org.tribuo.common.nearest.KNNClassifierOptions; | ||
import org.tribuo.dataset.SelectedFeatureDataset; | ||
import org.tribuo.evaluation.CrossValidation; | ||
import org.tribuo.provenance.FeatureSelectorProvenance; | ||
import org.tribuo.provenance.FeatureSetProvenance; | ||
import org.tribuo.provenance.impl.FeatureSelectorProvenanceImpl; | ||
|
||
import java.util.*; | ||
import java.util.concurrent.atomic.AtomicInteger; | ||
import java.util.stream.IntStream; | ||
|
||
/** | ||
* Select features based on Cuckoo Search algorithm with binary transfer functions, KNN classifier and 10-fold cross validation | ||
* <p> | ||
* see: | ||
* <pre> | ||
* Xin-She Yang and Suash Deb. | ||
* "Cuckoo Search via L´evy Flights", 2010. | ||
* | ||
* L. A. M. Pereira et al. | ||
* "A Binary Cuckoo Search and its Application for Feature Selection", 2014. | ||
* </pre> | ||
*/ | ||
public class CuckooSearchOptimizer implements FeatureSelector<Label> { | ||
|
||
private final TransferFunction transferFunction; | ||
private final double stepSizeScaling; | ||
private final double lambda; | ||
private final double worstNestProbability; | ||
private final double delta; | ||
private final int populationSize; | ||
private int [][] setOfSolutions; | ||
private final int maxIteration; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All the final variables here should not be |
||
|
||
/** | ||
* The default constructor for feature selection based on Cuckoo Search Algorithm | ||
*/ | ||
public CuckooSearchOptimizer() { | ||
this.transferFunction = TransferFunction.TFunction_V2; | ||
this.populationSize = 50; | ||
this.stepSizeScaling = 2d; | ||
this.lambda = 2d; | ||
this.worstNestProbability = 0.1d; | ||
this.delta = 1.5d; | ||
this.maxIteration = 30; | ||
} | ||
|
||
/** | ||
* Constructs the wrapper feature selection based on cuckoo search algorithm | ||
* @param transferFunction The transfer function to convert continuous values to binary ones | ||
* @param populationSize The size of the solution in the initial population | ||
* @param maxIteration The number of times that is used to enhance generation | ||
*/ | ||
public CuckooSearchOptimizer(TransferFunction transferFunction, int populationSize, int maxIteration) { | ||
this.transferFunction = transferFunction; | ||
this.populationSize = populationSize; | ||
this.stepSizeScaling = 2d; | ||
this.lambda = 2d; | ||
this.worstNestProbability = 1.5d; | ||
this.delta = 1.5d; | ||
this.maxIteration = maxIteration; | ||
} | ||
|
||
/** | ||
* @param transferFunction The transfer function to convert continuous values to binary ones | ||
* @param populationSize The size of the solution in the initial population | ||
* @param stepSizeScaling The cuckoo step size | ||
* @param lambda The lambda of the levy flight function | ||
* @param worstNestProbability The fraction of the nests to be abandoned | ||
* @param delta The delta that is used in the abandon nest function | ||
* @param maxIteration The number of times that is used to enhance generation | ||
*/ | ||
public CuckooSearchOptimizer(TransferFunction transferFunction, int populationSize, double stepSizeScaling, double lambda, double worstNestProbability, double delta, int maxIteration) { | ||
this.transferFunction = transferFunction; | ||
this.populationSize = populationSize; | ||
this.stepSizeScaling = stepSizeScaling; | ||
this.lambda = lambda; | ||
this.worstNestProbability = worstNestProbability; | ||
this.delta = delta; | ||
this.maxIteration = maxIteration; | ||
} | ||
|
||
/** | ||
* @param totalNumberOfFeatures The number of features in the given dataset | ||
* @return The population of subsets of selected features | ||
*/ | ||
private int[][] GeneratePopulation(int totalNumberOfFeatures) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As mentioned elsewhere this method should accept a |
||
setOfSolutions = new int[this.populationSize][totalNumberOfFeatures]; | ||
for (int[] subSet : setOfSolutions) | ||
System.arraycopy(new Random().ints(totalNumberOfFeatures, 0, 2).toArray(), 0, subSet, 0, setOfSolutions[0].length); | ||
return setOfSolutions; | ||
} | ||
|
||
/** | ||
* Does this feature selection algorithm return an ordered feature set? | ||
* | ||
* @return True if the set is ordered. | ||
*/ | ||
@Override | ||
public boolean isOrdered() { | ||
return true; | ||
} | ||
|
||
/** | ||
* Selects features according to this selection algorithm from the specified dataset. | ||
* @param dataset The dataset to use. | ||
* @return A selected feature set. | ||
*/ | ||
@Override | ||
public SelectedFeatureSet select(Dataset<Label> dataset) { | ||
ImmutableFeatureMap FMap = new ImmutableFeatureMap(dataset.getFeatureMap()); | ||
setOfSolutions = GeneratePopulation(dataset.getFeatureMap().size()); | ||
|
||
List<FeatureSet_FScore_Container> subSet_fScores = new ArrayList<>(); | ||
SelectedFeatureSet selectedFeatureSet = null; | ||
// Update the solution based on the levy flight function | ||
for (int i = 0; i < maxIteration; i++) { | ||
IntStream.range(0, setOfSolutions.length).parallel().forEach(subSet -> { | ||
|
||
AtomicInteger currentIter = new AtomicInteger(subSet); | ||
int[] evolvedSolution = Arrays.stream(setOfSolutions[subSet]).map(x -> Binarizing.discreteValue(transferFunction, x + stepSizeScaling * Math.pow(currentIter.get() + 1, -lambda))).toArray(); | ||
int[] randomCuckoo = setOfSolutions[new Random().nextInt(setOfSolutions.length)]; | ||
|
||
if (FitnessFunction.EvaluateSolution(this, dataset, FMap, evolvedSolution) > FitnessFunction.EvaluateSolution(this, dataset, FMap, randomCuckoo)) | ||
System.arraycopy(evolvedSolution, 0, setOfSolutions[subSet], 0, evolvedSolution.length); | ||
// Update the solution based on the abandone nest function | ||
if (new Random().nextDouble() < worstNestProbability) { | ||
int r1 = new Random().nextInt(setOfSolutions.length); | ||
int r2 = new Random().nextInt(setOfSolutions.length); | ||
for (var j = 0; j < setOfSolutions[subSet].length; j++) | ||
evolvedSolution[j] = Binarizing.discreteValue(transferFunction, setOfSolutions[subSet][j] + delta * (setOfSolutions[r1][j] - setOfSolutions[r2][j])); | ||
if (FitnessFunction.EvaluateSolution(this, dataset, FMap, evolvedSolution) > FitnessFunction.EvaluateSolution(this, dataset, FMap, setOfSolutions[subSet])) | ||
System.arraycopy(evolvedSolution, 0, setOfSolutions[subSet], 0, evolvedSolution.length); | ||
} | ||
subSet_fScores.add(new FeatureSet_FScore_Container(setOfSolutions[subSet], FitnessFunction.EvaluateSolution(this, dataset, FMap, setOfSolutions[subSet]))); | ||
}); | ||
subSet_fScores.sort(Comparator.comparing(FeatureSet_FScore_Container::score).reversed()); | ||
selectedFeatureSet = FitnessFunction.getSFS(this, dataset, FMap, subSet_fScores.get(0).subSet); | ||
|
||
} | ||
return selectedFeatureSet; | ||
} | ||
|
||
@Override | ||
public FeatureSelectorProvenance getProvenance() { | ||
return new FeatureSelectorProvenanceImpl(this); | ||
} | ||
|
||
/** | ||
* This record is used to hold subset of features with its corresponding fitness score | ||
*/ | ||
record FeatureSet_FScore_Container(int[] subSet, double score) { } | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The base package name needs updating to
org.tribuo.classification.fs.wrapper
and then the files should be moved into the right directory. At the moment this won't compile because the directory and package names don't line up.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also all the files need the copyright and license header.