diff --git a/src/main/java/examples/es/example1/example.md b/src/main/java/examples/es/example1/example.md
deleted file mode 100644
index 8aebe50..0000000
--- a/src/main/java/examples/es/example1/example.md
+++ /dev/null
@@ -1,203 +0,0 @@
-# Example 1: Load data from an Elasticsearch index
-## Contents
-* [Overview](#overview)
-* [Import files](#include_files)
-* [The main function](#m_func)
-* [Results](#results)
-* [Source Code](#source_code)
-## Overview
-This example shows you how to load data from Elasticsearch
-Thus, this example requires you to have Elasticsearch installed on your machine.
-You also need to add in the ```pom.xml``` file of the project the following
- org.elasticsearch
- elasticsearch
- 6.6.1
- org.elasticsearch.client
- elasticsearch-rest-high-level-client
- 6.6.1
-Elasticsearch listens to the 9200 port for upcoming HTTP queries by default. Start the Elasticsearch server and point your
-browser to http://localhost:9200/ URL. You should be able to see something like the following:
- "name" : "david-A15",
- "cluster_name" : "elasticsearch",
- "cluster_uuid" : "Lvn93INcQZKL9ZqJKLI0qg",
- "version" : {
- "number" : "7.6.0",
- "build_flavor" : "default",
- "build_type" : "tar",
- "build_hash" : "7f634e9f44834fbc12724506cc1da681b0c3b1e3",
- "build_date" : "2020-02-06T00:09:00.449973Z",
- "build_snapshot" : false,
- "lucene_version" : "8.4.0",
- "minimum_wire_compatibility_version" : "6.8.0",
- "minimum_index_compatibility_version" : "6.0.0-beta1"
- },
- "tagline" : "You Know, for Search"
-We will assume that there are no indexes in Elasticsearch. Change accordingly if this is not true.
-Since there are no indices we need to create a new index and insert some documents into it.
-After doing this, it's easy to retrieve the documents by issuing a ```SearchRequest```.
-We will store the data into a ```MapDataSet``` however this is not compulsory.
-## The main function
-package examples.es.example1;
-import datasets.MapDataSet;
-import datasets.VectorDouble;
-import org.apache.http.HttpHost;
-import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
-import org.elasticsearch.action.index.IndexRequest;
-import org.elasticsearch.action.index.IndexResponse;
-import org.elasticsearch.action.search.SearchRequest;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.client.RequestOptions;
-import org.elasticsearch.client.RestClient;
-import org.elasticsearch.client.RestHighLevelClient;
-import org.elasticsearch.common.xcontent.XContentType;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.SearchHits;
-import java.io.IOException;
-import java.util.Map;
-public class Example1 {
- public static final String INDEX = "jstat_es_example_1";
- public static void deleteIndex(RestHighLevelClient client) throws IOException{
- DeleteIndexRequest request = new DeleteIndexRequest(Example1.INDEX);
- client.indices().delete(request, RequestOptions.DEFAULT);
- }
- protected static void index(String json, RestHighLevelClient client) throws IOException{
- IndexRequest indexRequest = new IndexRequest(Example1.INDEX)
- .type("es_exe1")
- .source(json, XContentType.JSON);
- // create the index
- IndexResponse response = client.index(indexRequest);
- }
- public static void loadElasticsearchDB(RestHighLevelClient client)throws IOException{
- String json = "{" +
- "\"Production\":4.51," +
- "\"Electricity Usage\":2.48" + "}";
- Example1.index(json, client);
- json = "{" +
- "\"Production\":3.58," +
- "\"Electricity Usage\":2.26" + "}";
- Example1.index(json, client);
- json = "{" +
- "\"Production\":4.31," +
- "\"Electricity Usage\":2.47" + "}";
- Example1.index(json, client);
- }
- public static MapDataSet loadDataSet(RestHighLevelClient client) throws IOException{
- MapDataSet data = new MapDataSet<>();
- SearchRequest request = new SearchRequest(Example1.INDEX);
- SearchResponse response = client.search(request, RequestOptions.DEFAULT);
- // let's retrieve the documents
- SearchHits hits = response.getHits();
- SearchHit[] searchHits = hits.getHits();
- int rowCounter = 0;
- for (SearchHit hit : searchHits) {
- Map hitData = hit.getSourceAsMap();
- VectorDouble row = new VectorDouble(hitData.size());
- int counter=0;
- for(Map.Entry entry: hitData.entrySet()){
- row.set(counter++, (Double)entry.getValue());
- }
- data.add("Row"+rowCounter++, row);
- }
- return data;
- }
- public static void main(String[] args){
- // we need a client to interact with Elasticsearch
- RestHighLevelClient client = new RestHighLevelClient(
- RestClient.builder(new HttpHost("localhost", 9200, "http"),
- new HttpHost("localhost", 9201, "http")));
- try {
- // create the index
- Example1.loadElasticsearchDB(client);
- // let's now populate the MapDataSet
- Example1.loadDataSet(client);
- // delete the index we created
- Example1.deleteIndex(client);
- client.close();
- }
- catch(IOException e){
- System.out.println("An IOException occurred");
- }
- }
-## Results
-Upon executing the code above and depending on your configuration you may be
-getting something like the following:
-Feb 25, 2020 5:34:47 PM org.elasticsearch.client.RestClient logResponse
-WARNING: request [POST http://localhost:9200/jstat_es_example_1/es_exe1?timeout=1m] returned 1 warnings: [299 Elasticsearch-7.6.0-7f634e9f44834fbc12724506cc1da681b0c3b1e3 "[types removal] Specifying types in document index requests is deprecated, use the typeless endpoints instead (/{index}/_doc/{id}, /{index}/_doc, or /{index}/_create/{id})."]
-Feb 25, 2020 5:34:47 PM org.elasticsearch.client.RestClient logResponse
-WARNING: request [POST http://localhost:9200/jstat_es_example_1/es_exe1?timeout=1m] returned 1 warnings: [299 Elasticsearch-7.6.0-7f634e9f44834fbc12724506cc1da681b0c3b1e3 "[types removal] Specifying types in document index requests is deprecated, use the typeless endpoints instead (/{index}/_doc/{id}, /{index}/_doc, or /{index}/_create/{id})."]
-Feb 25, 2020 5:34:47 PM org.elasticsearch.client.RestClient logResponse
-WARNING: request [POST http://localhost:9200/jstat_es_example_1/es_exe1?timeout=1m] returned 1 warnings: [299 Elasticsearch-7.6.0-7f634e9f44834fbc12724506cc1da681b0c3b1e3 "[types removal] Specifying types in document index requests is deprecated, use the typeless endpoints instead (/{index}/_doc/{id}, /{index}/_doc, or /{index}/_create/{id})."]
-## Source Code
diff --git a/src/main/java/examples/mc/example1/Example1.java b/src/main/java/examples/mc/example1/Example1.java
deleted file mode 100644
index e3a5b3f..0000000
--- a/src/main/java/examples/mc/example1/Example1.java
+++ /dev/null
@@ -1,42 +0,0 @@
-package examples.mc.example1;
-import java.util.Random;
-public class Example1 {
- public static void main(String[] args){
- final int N_ITRS = 1000;
- final String[] doors = new String[3];
- doors[0] = "A";
- doors[1] = "B";
- doors[2] = "C";
- int first_choice_wins = 0;
- int change_wins = 0;
- Random rand = new Random();
- for(int itr=0; itr Overview
-## Import files
- ```
-package examples.mc.example1;
-import java.util.Random;
- ```
-## The main function
-public class Example1 {
- public static void main(String[] args){
- final int N_ITRS = 1000;
- final String[] doors = new String[3];
- doors[0] = "A";
- doors[1] = "B";
- doors[2] = "C";
- int first_choice_wins = 0;
- int change_wins = 0;
- Random rand = new Random();
- for(int itr=0; itr Results
-You chose: B Winner door: B
-You chose: A Winner door: C
-You chose: A Winner door: C
-You chose: B Winner door: C
-You chose: C Winner door: B
-You chose: A Winner door: B
-You chose: A Winner door: A
-You chose: A Winner door: A
-You chose: C Winner door: B
-You chose: C Winner door: B
-You chose: C Winner door: C
-You chose: C Winner door: B
-You chose: A Winner door: A
-You chose: A Winner door: B
-You chose: B Winner door: B
-You chose: A Winner door: B
-You chose: C Winner door: A
-You chose: C Winner door: C
-You chose: C Winner door: B
-Wins with original choice: 313
-Wins with change choice: 687
-Probability of winning with initial guess: 0.313
-Probability of winning with change guess: 0.687
- ## Source Code
- Example1.java
\ No newline at end of file
diff --git a/src/main/java/examples/mc/example2/Example2.java b/src/main/java/examples/mc/example2/Example2.java
deleted file mode 100644
index f5fa63b..0000000
--- a/src/main/java/examples/mc/example2/Example2.java
+++ /dev/null
@@ -1,72 +0,0 @@
-package examples.mc.example2;
-import java.util.concurrent.ThreadLocalRandom;
-public class Example2 {
- public class Circle{
- public Circle(double r, double x, double y){
- this.r = r;
- this.x = x;
- this.y = y;
- }
- public double radius(){return this.r;}
- public boolean isInside(double x, double y){
- if(Math.pow((this.x - x), 2) + Math.pow(this.y - y, 2) - r*r < 1.0e-9){
- return true;
- }
- return false;
- }
- public double area(){
- return 3.14*r*r;
- }
- double r;
- double x;
- double y;
- }
- public static void main(String[] args){
- Example2 exe = new Example2();
- final int N_ITERATIONS = 10000;
- Circle circle = exe.new Circle(2.0, 0.0, 0.0);
- final double x0 = -circle.radius();
- final double x1 = circle.radius();
- final double y0 = -circle.radius();
- final double y1 = circle.radius();
- final double RECT_AREA = (x1 - x0)*(y1 - y0);
- double totalArea = 0.0;
- double areaUnderCurve = 0.0;
- for(int i=0; i Overview
-## Import files
- ```
-package examples.mc.example2;
-import java.util.concurrent.ThreadLocalRandom;
- ```
-## The main function
-public class Example2 {
- public class Circle{
- public Circle(double r, double x, double y){
- this.r = r;
- this.x = x;
- this.y = y;
- }
- public double radius(){return this.r;}
- public boolean isInside(double x, double y){
- if(Math.pow((this.x - x), 2) + Math.pow(this.y - y, 2) - r*r < 1.0e-9){
- return true;
- }
- return false;
- }
- public double area(){
- return 3.14*r*r;
- }
- double r;
- double x;
- double y;
- }
- public static void main(String[] args){
- Example2 exe = new Example2();
- final int N_ITERATIONS = 10000;
- Circle circle = exe.new Circle(2.0, 0.0, 0.0);
- final double x0 = -circle.radius();
- final double x1 = circle.radius();
- final double y0 = -circle.radius();
- final double y1 = circle.radius();
- final double RECT_AREA = (x1 - x0)*(y1 - y0);
- double totalArea = 0.0;
- double areaUnderCurve = 0.0;
- for(int i=0; i Results
-Rectangle area: 16.0
-Total area points: 10000.0
-Area under curve points: 7789.0
-Calculated area: 12.4624
-Circle area: 12.56
- ## Source Code
- Example2.java
\ No newline at end of file
diff --git a/src/main/java/examples/mc/example3/Example3.java b/src/main/java/examples/mc/example3/Example3.java
deleted file mode 100644
index 1eee42b..0000000
--- a/src/main/java/examples/mc/example3/Example3.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package examples.mc.example3;
-import java.util.concurrent.ThreadLocalRandom;
-public class Example3 {
- static public void main(String[] args){
- final int N_ITERATIONS = 10000;
- final double DELTA = 0.1;
- double x = 1.0;
- double y = 1.0;
- double area_under_curve = 0.0;
- for(int itr=0; itr < N_ITERATIONS; ++itr){
- double del_x = ThreadLocalRandom.current().nextDouble(-DELTA, DELTA);
- double del_y = ThreadLocalRandom.current().nextDouble(-DELTA, DELTA);
- if(Math.abs(x + del_x) < 1.0 && Math.abs(y + del_y) < 1.0){
- x += del_x;
- y += del_y;
- }
- if(x*x + y*y < 1.0){
- area_under_curve += 1;
- }
- }
- System.out.println("Pi is: " + 4.0* area_under_curve/(double)N_ITERATIONS);
- }
diff --git a/src/main/java/examples/mc/example3/example.md b/src/main/java/examples/mc/example3/example.md
deleted file mode 100644
index f984955..0000000
--- a/src/main/java/examples/mc/example3/example.md
+++ /dev/null
@@ -1,64 +0,0 @@
-# Markov Chain Monte Carlo For Pi Calculation
-## Contents
- * [Overview](#overview)
- * [Import files](#include_files)
- * [The main function](#m_func)
- * [Results](#results)
- * [Source Code](#source_code)
-## Overview
-## Import files
- ```
-package examples.mc.example3;
-import java.util.concurrent.ThreadLocalRandom;
- ```
-## The main function
-public class Example3 {
- static public void main(String[] args){
- final int N_ITERATIONS = 10000;
- final double DELTA = 0.1;
- double x = 1.0;
- double y = 1.0;
- double area_under_curve = 0.0;
- for(int itr=0; itr < N_ITERATIONS; ++itr){
- double del_x = ThreadLocalRandom.current().nextDouble(-DELTA, DELTA);
- double del_y = ThreadLocalRandom.current().nextDouble(-DELTA, DELTA);
- if(Math.abs(x + del_x) < 1.0 && Math.abs(y + del_y) < 1.0){
- x += del_x;
- y += del_y;
- }
- if(x*x + y*y < 1.0){
- area_under_curve += 1;
- }
- }
- System.out.println("Pi is: " + 4.0* area_under_curve/(double)N_ITERATIONS);
- }
-## Results
-Pi is: 3.0244
- ## Source Code
- Example3.java
\ No newline at end of file
diff --git a/src/main/java/examples/mc/example4/Example4.java b/src/main/java/examples/mc/example4/Example4.java
deleted file mode 100644
index fa21efc..0000000
--- a/src/main/java/examples/mc/example4/Example4.java
+++ /dev/null
@@ -1,55 +0,0 @@
-package examples.mc.example4;
-import base.CommonConstants;
-import io.CSVFileWriter;
-import org.apache.commons.math3.distribution.AbstractRealDistribution;
-import org.apache.commons.math3.distribution.NormalDistribution;
-import java.util.ArrayList;
-import java.util.List;
-public class Example4 {
- public static double target(double x){
- if(x < 0.){
- return 0.0;
- }
- return Math.exp(-x);
- }
- public static void main(String[] args){
- final int N_ITERATIONS = 10000;
- List pos = new ArrayList<>(N_ITERATIONS);
- for(int i=0; i Acknowledgements
- This example was taken from fiveMinuteStats.
- ## Overview
- ### Metropolis algorithm
- ## Import files
- ```
- ```
- ## The main function
- ```
- ```
- ## Results
- ```
- ```
- ## Source Code
- Example4.java
\ No newline at end of file
diff --git a/src/main/java/examples/mc/example4/positions.csv b/src/main/java/examples/mc/example4/positions.csv
deleted file mode 100644
index a4fe364..0000000
--- a/src/main/java/examples/mc/example4/positions.csv
+++ /dev/null
@@ -1,9791 +0,0 @@
\ No newline at end of file
diff --git a/src/main/java/examples/ml/example1/Example1.java b/src/main/java/examples/ml/example1/Example1.java
deleted file mode 100644
index 578b984..0000000
--- a/src/main/java/examples/ml/example1/Example1.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package examples.ml.example1;
-import dataloader.CsvDataLoader;
-import tech.tablesaw.api.Table;
-import visualizations.ScatterChart;
-import java.io.File;
-import java.io.IOException;
-public class Example1 {
- public static void main(String[] args ) throws IOException {
- /*File file = new File("data/car_plant.csv");
- Table table = CsvDataLoader.TableLoader.parseFile(file);
- // let's plot the data
- ScatterChart plotter = new ScatterChart();
- ScatterChart.ScatterChartOptions options = plotter.new ScatterChartOptions();
- options.chartTitle = "Production vs Electricity Usage";
- options.xAxisName = "Production";
- options.yAxisName = "Electricity Usage";
- ScatterChart.plotScatter(options, table);
- Simple1DLinearRegression regression = new Simple1DLinearRegression();
- regression.fit(table, "Production", "Electricity Usage");
- double[] coeffs = regression.getCoeffs();
- double intercept = regression.getIntercept();
- System.out.println("Regression coefficients. Intercept: "+intercept+" Slope: "+coeffs[0]);
- //TODO embed the regression line into the Scatter plot somehow*/
- }
diff --git a/src/main/java/examples/ml/example1/example.md b/src/main/java/examples/ml/example1/example.md
deleted file mode 100644
index f895033..0000000
--- a/src/main/java/examples/ml/example1/example.md
+++ /dev/null
@@ -1,69 +0,0 @@
-# Example 1: Linear Regression
-## Contents
-* [Overview](#overview)
- * [Linear Regression](#linear_regression)
- * [How Good Is The Fit?](#how_good_is_the_fit)
- * [```R^2``` Coefficient](#r2_coefficient)
-* [Include files](#include_files)
-* [Program structure](#prg_struct)
-* [The main function](#m_func)
-* [Results](#results)
-* [Source Code](#source_code)
-## Overview
-This example discusses the linear regression model. In Statistics, linear regression is a mathematical approach to model
-the relationship between a scalar response (or dependent variable) and one or more explanatory variables (or independent variables).
-The case of one explanatory variable is called simple linear regression. For more than one explanatory variable, the process is called multiple linear regression
-(checkout the wikipedia article Linear regression).
-Contrary to classification that is concerned with
-class indexes, the outcome of a linear regression model or more general of a regression model is a real number.
-### Linear Regression
-
-### How Good Is The Fit?
-So we established the linear regression model but how can we measure how good it is?
-One metric to do so is the so-called ```R^2``` Coefficient also called the Coefficient of determination
-#### ```R^2``` Coefficient
-The coefficient is defined as
-
-where SSR and SST are defined respectively as
-
-
-## Import files
-## The main function
-int main(){
-## Results
-## Source Code
\ No newline at end of file
diff --git a/src/main/java/examples/ml/example1/linear_regression.png b/src/main/java/examples/ml/example1/linear_regression.png
deleted file mode 100644
index dda06b8..0000000
Binary files a/src/main/java/examples/ml/example1/linear_regression.png and /dev/null differ
diff --git a/src/main/java/examples/ml/example1/r2.gif b/src/main/java/examples/ml/example1/r2.gif
deleted file mode 100644
index 94402d3..0000000
Binary files a/src/main/java/examples/ml/example1/r2.gif and /dev/null differ
diff --git a/src/main/java/examples/ml/example1/ssr.gif b/src/main/java/examples/ml/example1/ssr.gif
deleted file mode 100644
index 8f049dd..0000000
Binary files a/src/main/java/examples/ml/example1/ssr.gif and /dev/null differ
diff --git a/src/main/java/examples/ml/example1/sst.gif b/src/main/java/examples/ml/example1/sst.gif
deleted file mode 100644
index 25e7785..0000000
Binary files a/src/main/java/examples/ml/example1/sst.gif and /dev/null differ
diff --git a/src/main/java/examples/ml/example2/Example2.java b/src/main/java/examples/ml/example2/Example2.java
deleted file mode 100644
index b92dd5d..0000000
--- a/src/main/java/examples/ml/example2/Example2.java
+++ /dev/null
@@ -1,67 +0,0 @@
-package examples.ml.example2;
-import datasets.VectorDouble;
-import optimization.GradientDescent;
-import optimization.GDInput;
-import utils.DefaultIterativeAlgorithmController;
-import utils.IterativeAlgorithmResult;
-import datasets.DenseMatrixSet;
-import datastructs.RowBuilder;
-import datastructs.RowType;
-import maths.errorfunctions.MSEVectorFunction;
-import maths.functions.NonLinearVectorPolynomial;
-import maths.functions.ScalarMonomial;
-import ml.regression.NonLinearRegressor;
-import tech.tablesaw.api.Table;
-import utils.TableDataSetLoader;
-import java.io.File;
-import java.io.IOException;
-/** Category: Machine Learning
- * ID: Example7
- * Description: Non-linear Regression
- * Taken From:
- * Details:
- * TODO
- */
-public class Example2 {
- public static void main(String[] args)throws IOException {
- // load the data
- Table dataSet = TableDataSetLoader.loadDataSet(new File("src/main/resources/datasets/car_plant.csv"));
- VectorDouble labels = new VectorDouble(dataSet, "Electricity Usage");
- Table reducedDataSet = dataSet.removeColumns("Electricity Usage").first(dataSet.rowCount());
- DenseMatrixSet denseMatrixSet = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder(), reducedDataSet.rowCount(), 2, 1.0);
- denseMatrixSet.setColumn(1, reducedDataSet.doubleColumn(0));
- denseMatrixSet.duplicateColumn(1);
- // assume a hypothesis of the form w0 +w1*X + w2*X^2
- // initially all weights are set o zeor
- NonLinearVectorPolynomial hypothesis = new NonLinearVectorPolynomial(new ScalarMonomial(0, 0.0),
- new ScalarMonomial(1, 0.0),
- new ScalarMonomial(2, 0.0));
- // the regressor
- NonLinearRegressor regressor = new NonLinearRegressor(hypothesis);
- GDInput gdInput = new GDInput();
- gdInput.showIterations = true;
- gdInput.eta = 0.001;
- gdInput.errF = new MSEVectorFunction(hypothesis);
- gdInput.iterationContorller = new DefaultIterativeAlgorithmController(10000, 1.0e-8);
- GradientDescent gdSolver = new GradientDescent(gdInput);
- IterativeAlgorithmResult result = (IterativeAlgorithmResult) regressor.train(denseMatrixSet, labels, gdSolver);
- System.out.println(result);
- System.out.println("Intercept: " + hypothesis.getCoeff(0) + " slope 1: " + hypothesis.getCoeff(1) + " slope 2" + hypothesis.getCoeff(2));
- }
diff --git a/src/main/java/examples/ml/example2/example.md b/src/main/java/examples/ml/example2/example.md
deleted file mode 100644
index 6021b7d..0000000
--- a/src/main/java/examples/ml/example2/example.md
+++ /dev/null
@@ -1,130 +0,0 @@
-# Example 2: Non Linear Regression
-## Contents
-* [Overview](#overview)
- * [Non Linear Regression](#nolinear_regression)
-* [Import files](#include_files)
-* [The main function](#m_func)
-* [Results](#results)
-* [Source Code](#source_code)
-## Overview
-### Non Linear Regression
-## Import files
-package examples.ml.example2;
-import optimization.GradientDescent;
-import optimization.GDInput;
-import utils.DefaultIterativeAlgorithmController;
-import utils.IterativeAlgorithmResult;
-import datasets.DenseMatrixSet;
-import datastructs.RowBuilder;
-import datasets.VectorDouble;
-import datastructs.RowType;
-import maths.errorfunctions.MSEVectorFunction;
-import maths.functions.NonLinearVectorPolynomial;
-import maths.functions.ScalarMonomial;
-import ml.regression.NonLinearRegressor;
-import tech.tablesaw.api.Table;
-import utils.TableDataSetLoader;
-import java.io.File;
-import java.io.IOException;
-## The main function
-public class Example2 {
- public static void main(String[] args)throws IOException {
- // load the data
- Table dataSet = TableDataSetLoader.loadDataSet(new File("src/main/resources/datasets/car_plant.csv"));
- Vector labels = new Vector(dataSet, "Electricity Usage");
- Table reducedDataSet = dataSet.removeColumns("Electricity Usage").first(dataSet.rowCount());
- DenseMatrixSet denseMatrixSet = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder(), reducedDataSet.rowCount(), 2, 1.0);
- denseMatrixSet.setColumn(1, reducedDataSet.doubleColumn(0));
- denseMatrixSet.duplicateColumn(1);
- // assume a hypothesis of the form w0 +w1*X + w2*X^2
- // initially all weights are set o zeor
- NonLinearVectorPolynomial hypothesis = new NonLinearVectorPolynomial(new ScalarMonomial(0, 0.0),
- new ScalarMonomial(1, 0.0),
- new ScalarMonomial(2, 0.0));
- // the regressor
- NonLinearRegressor regressor = new NonLinearRegressor(hypothesis);
- GDInput gdInput = new GDInput();
- gdInput.showIterations = true;
- gdInput.eta = 0.001;
- gdInput.errF = new MSEVectorFunction(hypothesis);
- gdInput.iterationContorller = new DefaultIterativeAlgorithmController(10000, 1.0e-8);
- BatchGradientDescent gdSolver = new BatchGradientDescent(gdInput);
- IterativeAlgorithmResult result = (IterativeAlgorithmResult) regressor.train(denseMatrixSet, labels, gdSolver);
- System.out.println(result);
- System.out.println("Intercept: " + hypothesis.getCoeff(0) + " slope 1: " + hypothesis.getCoeff(1) + " slope 2" + hypothesis.getCoeff(2));
- }
-## Results
-BatchGD: iteration: 1
- Jold: 8.224725 Jcur: 0.8990546942654579
- error |Jcur-Jold|: 7.325670305734541
- exit tolerance: 1.0E-8
-BatchGD: iteration: 2
- Jold: 0.8990546942654579 Jcur: 0.22953957530367752
- error |Jcur-Jold|: 0.6695151189617804
- exit tolerance: 1.0E-8
-BatchGD: iteration: 3
- Jold: 0.22953957530367752 Jcur: 0.16817958029206523
- error |Jcur-Jold|: 0.061359995011612295
- exit tolerance: 1.0E-8
-BatchGD: iteration: 4
- Jold: 0.16817958029206523 Jcur: 0.16238535312357452
- error |Jcur-Jold|: 0.0057942271684907065
- exit tolerance: 1.0E-8
-BatchGD: iteration: 5
- Jold: 0.16238535312357452 Jcur: 0.1616683032442727
- error |Jcur-Jold|: 7.170498793018232E-4
- exit tolerance: 1.0E-8
-BatchGD: iteration: 6
- Jold: 0.1616683032442727 Jcur: 0.16141542469742223
- error |Jcur-Jold|: 2.5287854685046574E-4
- exit tolerance: 1.0E-8
-Converged: true
-Tolerance: 9.990863976405695E-9
-# Threads: 1
-Iterations: 6638
-Intercept: 0.24271250840749334 slope 1: 0.559359716172044 slope 2-0.005371430750912633
-## Source Code
\ No newline at end of file
diff --git a/src/main/java/examples/ml/example3/Example3.java b/src/main/java/examples/ml/example3/Example3.java
deleted file mode 100644
index 07aa74c..0000000
--- a/src/main/java/examples/ml/example3/Example3.java
+++ /dev/null
@@ -1,73 +0,0 @@
-package examples.ml.example3;
-import datasets.DenseMatrixSet;
-import datasets.VectorDouble;
-import datastructs.RowBuilder;
-import datastructs.RowType;
-import maths.functions.distances.EuclideanVectorCalculator;
-import ml.classifiers.KNNClassifier;
-import ml.classifiers.utils.ClassificationVoter;
-import java.util.ArrayList;
-import java.util.List;
-/** Category: Machine Learning
- * ID: Example1
- * Description: Classification with vanilla KNN algorithm
- * Taken From:
- * Details:
- * TODO
- */
-public class Example3 {
- public static void main(String[] args) {
- DenseMatrixSet dataSet = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder());
- dataSet.create(12, 2);
- dataSet.set(0, 1.0, 3.0);
- dataSet.set(1, 1.5, 2.0);
- dataSet.set(2, 2.0, 1.0);
- dataSet.set(3, 2.5, 4.0);
- dataSet.set(4, 3.0, 1.5);
- dataSet.set(5, 3.5, 2.5);
- dataSet.set(6, 5.0, 5.0);
- dataSet.set(7, 5.5, 4.0);
- dataSet.set(8, 6.0, 6.0);
- dataSet.set(9, 6.5, 4.5);
- dataSet.set(10, 7.0, 1.5);
- dataSet.set(11, 8.0, 2.5);
- List labels = new ArrayList<>(dataSet.m());
- for (int i = 0; i < 6; ++i) {
- labels.add(0);
- }
- for (int i = 6; i < dataSet.m(); ++i) {
- labels.add(1);
- }
- KNNClassifier,
- EuclideanVectorCalculator,
- ClassificationVoter> classifier = new KNNClassifier,
- EuclideanVectorCalculator, ClassificationVoter>(2, false);
- classifier.setDistanceCalculator(new EuclideanVectorCalculator());
- classifier.setMajorityVoter(new ClassificationVoter());
- classifier.train(dataSet, labels);
- VectorDouble r = new VectorDouble(3.1, 2.2);
- Integer classIdx = classifier.predict(r);
- System.out.println("Point " + r + " has class index " + classIdx);
- r = new VectorDouble(9.1, 6.2);
- classIdx = classifier.predict(r);
- }
diff --git a/src/main/java/examples/ml/example3/example.md b/src/main/java/examples/ml/example3/example.md
deleted file mode 100644
index a2413f1..0000000
--- a/src/main/java/examples/ml/example3/example.md
+++ /dev/null
@@ -1,102 +0,0 @@
-# Example 3: KNN Classification
-## Contents
-* [Overview](#overview)
- * [KNN Classification](#knn_classification)
-* [Import files](#include_files)
-* [The main function](#m_func)
-* [Results](#results)
-* [Source Code](#source_code)
-## Overview
-### KNN Classification
-## Import files
-package examples.ml.example3;
-import datasets.DenseMatrixSet;
-import datastructs.RowBuilder;
-import datasets.VectorDouble;
-import datastructs.RowType;
-import maths.functions.distances.EuclideanVectorCalculator;
-import ml.classifiers.KNNClassifier;
-import ml.classifiers.utils.ClassificationVoter;
-import java.util.ArrayList;
-import java.util.List;
-## The main function
-public class Example3 {
- public static void main(String[] args) {
- DenseMatrixSet dataSet = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder());
- dataSet.create(12, 2);
- dataSet.set(0, 1.0, 3.0);
- dataSet.set(1, 1.5, 2.0);
- dataSet.set(2, 2.0, 1.0);
- dataSet.set(3, 2.5, 4.0);
- dataSet.set(4, 3.0, 1.5);
- dataSet.set(5, 3.5, 2.5);
- dataSet.set(6, 5.0, 5.0);
- dataSet.set(7, 5.5, 4.0);
- dataSet.set(8, 6.0, 6.0);
- dataSet.set(9, 6.5, 4.5);
- dataSet.set(10, 7.0, 1.5);
- dataSet.set(11, 8.0, 2.5);
- List labels = new ArrayList<>(dataSet.m());
- for (int i = 0; i < 6; ++i) {
- labels.add(0);
- }
- for (int i = 6; i < dataSet.m(); ++i) {
- labels.add(1);
- }
- KNNClassifier,
- EuclideanVectorCalculator,
- ClassificationVoter> classifier = new KNNClassifier,
- EuclideanVectorCalculator, ClassificationVoter>(2, false);
- classifier.setDistanceCalculator(new EuclideanVectorCalculator());
- classifier.setMajorityVoter(new ClassificationVoter());
- classifier.train(dataSet, labels);
- Vector r = new Vector(3.1, 2.2);
- Integer classIdx = classifier.predict(r);
- System.out.println("Point " + r + " has class index " + classIdx);
- r = new Vector(9.1, 6.2);
- classIdx = classifier.predict(r);
- }
-## Results
-## Source Code
\ No newline at end of file
diff --git a/src/main/java/examples/ml/example4/Example4.java b/src/main/java/examples/ml/example4/Example4.java
deleted file mode 100644
index 1b1957b..0000000
--- a/src/main/java/examples/ml/example4/Example4.java
+++ /dev/null
@@ -1,102 +0,0 @@
-package examples.ml.example4;
-import datasets.DenseMatrixSet;
-import datasets.VectorDouble;
-import datastructs.RowBuilder;
-import datastructs.RowType;
-import maths.functions.distances.EuclideanVectorCalculator;
-import ml.classifiers.ThreadedKNNClassifier;
-import parallel.partitioners.MatrixRowPartitionPolicy;
-import parallel.partitioners.RangePartitioner;
-import tech.tablesaw.api.Table;
-import tech.tablesaw.columns.Column;
-import ml.classifiers.utils.ClassificationVoter;
-import utils.Pair;
-import utils.PairBuilder;
-import utils.TableDataSetLoader;
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.ExecutorService;
-import static java.util.concurrent.Executors.newFixedThreadPool;
-/** Category: Machine Learning
- * ID: Example8
- * Description: Classification with vanilla ParallelKNN algorithm
- * Taken From:
- * Details:
- * TODO
- */
-public class Example4 {
- public static Pair, List> createDataSet() throws IOException, IllegalArgumentException {
- // load the data
- Table dataSetTable = TableDataSetLoader.loadDataSet(new File("src/main/resources/datasets/iris_data.csv"));
- List labels = new ArrayList<>();
- Column species = dataSetTable.column("species");
- for (int i = 0; i < species.size(); i++) {
- String label = (String) species.get(i);
- if(label.equals("Iris-setosa")){
- labels.add(0);
- }
- else if(label.equals("Iris-versicolor")){
- labels.add(1);
- }
- else if(label.equals("Iris-virginica")){
- labels.add(2);
- }
- else{
- throw new IllegalArgumentException("Unknown class");
- }
- }
- Table reducedDataSet = dataSetTable.removeColumns("species").first(dataSetTable.rowCount());
- DenseMatrixSet dataSet = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder());
- dataSet.initializeFrom(reducedDataSet);
- // partition the data set
- List> partitions = RangePartitioner.partition(0, dataSet.m(), 4);
- MatrixRowPartitionPolicy partitionPolicy = new MatrixRowPartitionPolicy(partitions);
- dataSet.setPartitionPolicy(partitionPolicy);
- return PairBuilder.makePair(dataSet, labels);
- }
- public static void main(String[] args) throws IOException, IllegalArgumentException{
- Pair, List> data = Example4.createDataSet();
- ExecutorService executorService = newFixedThreadPool(4);
- System.out.println("Number of rows: "+data.first.m());
- System.out.println("Number of labels: "+data.second.size());
- ThreadedKNNClassifier, EuclideanVectorCalculator,
- ClassificationVoter> classifier = new ThreadedKNNClassifier<>(3, false, executorService);
- classifier.setDistanceCalculator(new EuclideanVectorCalculator());
- classifier.setMajorityVoter(new ClassificationVoter());
- classifier.train(data.first, data.second);
- VectorDouble point = new VectorDouble(5.9,3.0,5.1,1.8);
- Integer classIdx = classifier.predict(point);
- System.out.println("Point "+ point +" has class index "+ classIdx);
- executorService.shutdown();
- }
diff --git a/src/main/java/examples/ml/example4/example.md b/src/main/java/examples/ml/example4/example.md
deleted file mode 100644
index 6738b46..0000000
--- a/src/main/java/examples/ml/example4/example.md
+++ /dev/null
@@ -1,132 +0,0 @@
-# Example 4: KNN Classification With Multiple Threads
-## Contents
-* [Overview](#overview)
- * [KNN classification with many threads](#knn_classification)
-* [Import files](#include_files)
-* [The main function](#m_func)
-* [Results](#results)
-* [Source Code](#source_code)
-## Overview
-### KNN classification with many threads
-## Import files
-package examples.ml.example4;
-import datasets.DenseMatrixSet;
-import datastructs.RowBuilder;
-import datasets.VectorDouble;
-import datastructs.RowType;
-import maths.functions.distances.EuclideanVectorCalculator;
-import ml.classifiers.ThreadedKNNClassifier;
-import parallel.partitioners.MatrixRowPartitionPolicy;
-import parallel.partitioners.RangePartitioner;
-import tech.tablesaw.api.Table;
-import tech.tablesaw.columns.Column;
-import ml.classifiers.utils.ClassificationVoter;
-import utils.Pair;
-import utils.PairBuilder;
-import utils.TableDataSetLoader;
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.concurrent.ExecutorService;
-import static java.util.concurrent.Executors.newFixedThreadPool;
-## The main function
-public class Example4 {
- public static Pair, List> createDataSet() throws IOException, IllegalArgumentException {
- // load the data
- Table dataSetTable = TableDataSetLoader.loadDataSet(new File("src/main/resources/datasets/iris_data.csv"));
- List labels = new ArrayList<>();
- Column species = dataSetTable.column("species");
- for (int i = 0; i < species.size(); i++) {
- String label = (String) species.get(i);
- if(label.equals("Iris-setosa")){
- labels.add(0);
- }
- else if(label.equals("Iris-versicolor")){
- labels.add(1);
- }
- else if(label.equals("Iris-virginica")){
- labels.add(2);
- }
- else{
- throw new IllegalArgumentException("Unknown class");
- }
- }
- Table reducedDataSet = dataSetTable.removeColumns("species").first(dataSetTable.rowCount());
- DenseMatrixSet dataSet = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder());
- dataSet.initializeFrom(reducedDataSet);
- // partition the data set
- List> partitions = RangePartitioner.partition(0, dataSet.m(), 4);
- MatrixRowPartitionPolicy partitionPolicy = new MatrixRowPartitionPolicy(partitions);
- dataSet.setPartitionPolicy(partitionPolicy);
- return PairBuilder.makePair(dataSet, labels);
- }
- public static void main(String[] args) throws IOException, IllegalArgumentException{
- Pair, List> data = Example4.createDataSet();
- ExecutorService executorService = newFixedThreadPool(4);
- System.out.println("Number of rows: "+data.first.m());
- System.out.println("Number of labels: "+data.second.size());
- ThreadedKNNClassifier, EuclideanVectorCalculator,
- ClassificationVoter> classifier = new ThreadedKNNClassifier<>(3, false, executorService);
- classifier.setDistanceCalculator(new EuclideanVectorCalculator());
- classifier.setMajorityVoter(new ClassificationVoter());
- classifier.train(data.first, data.second);
- Vector point = new Vector(5.9,3.0,5.1,1.8);
- Integer classIdx = classifier.predict(point);
- System.out.println("Point "+ point +" has class index "+ classIdx);
- executorService.shutdown();
- }
-## Results
-## Source Code
\ No newline at end of file
diff --git a/src/main/java/examples/ml/example5/Example5.java b/src/main/java/examples/ml/example5/Example5.java
deleted file mode 100644
index 5abe244..0000000
--- a/src/main/java/examples/ml/example5/Example5.java
+++ /dev/null
@@ -1,51 +0,0 @@
-package examples.ml.example5;
-import utils.DefaultIterativeAlgorithmController;
-import base.CommonConstants;
-import datastructs.IVector;
-import datasets.DenseMatrixSet;
-import datastructs.RowBuilder;
-import datastructs.RowType;
-import maths.functions.distances.DistanceCalculator;
-import maths.functions.distances.EuclideanVectorCalculator;
-import maths.functions.generators.IRandomGenerator;
-import maths.functions.generators.UniformRandomGenerator;
-import ml.clustering.KMeans;
-import ml.clustering.KMeansInput;
-/** Category: Machine Learning
- * ID: Example11
- * Description: Clustering with ```KMeans```
- * Taken From:
- * Taken From:
- * Details:
- * TODO
- */
-public class Example5 {
- public static void main(String[] args){
- //some synthetic data
- DenseMatrixSet matrix = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder(), 6, 2, 0.0);
- matrix.set(0, 1.0, 2.0);
- matrix.set(1, 1.0, 4.0);
- matrix.set(2, 1.0, 0.0);
- matrix.set(3, 10.0, 2.0);
- matrix.set(4, 10.0, 2.0);
- matrix.set(5, 10.0, 0.0);
- KMeansInput input = new KMeansInput();
- input.k = 2;
- input.iterationContorller = new DefaultIterativeAlgorithmController(10, CommonConstants.getTol());
- KMeans kmeans = new KMeans<>(input);
- DistanceCalculator, Double> similarity = new EuclideanVectorCalculator();
- IRandomGenerator randomGenerator = new UniformRandomGenerator();
- kmeans.cluster(matrix, similarity, randomGenerator);
- }
diff --git a/src/main/java/examples/ml/example5/example.md b/src/main/java/examples/ml/example5/example.md
deleted file mode 100644
index 77b7c7d..0000000
--- a/src/main/java/examples/ml/example5/example.md
+++ /dev/null
@@ -1,76 +0,0 @@
-# Example 5: Clustering with KMeans
-## Contents
-* [Overview](#overview)
- * [KMeans algorithm](#kmeans_algorithm)
-* [Import files](#include_files)
-* [The main function](#m_func)
-* [Results](#results)
-* [Source Code](#source_code)
-## Overview
-### KMeans algorithm
-## Import files
-package examples.ml.example5;
-import utils.DefaultIterativeAlgorithmController;
-import base.CommonConstants;
-import datastructs.IVector;
-import datasets.DenseMatrixSet;
-import datastructs.RowBuilder;
-import datastructs.RowType;
-import maths.functions.distances.DistanceCalculator;
-import maths.functions.distances.EuclideanVectorCalculator;
-import maths.functions.generators.IRandomGenerator;
-import maths.functions.generators.UniformRandomGenerator;
-import ml.clustering.KMeans;
-import ml.clustering.KMeansInput;
-## The main function
-public class Example5 {
- public static void main(String[] args){
- //some synthetic data
- DenseMatrixSet matrix = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder(), 6, 2, 0.0);
- matrix.set(0, 1.0, 2.0);
- matrix.set(1, 1.0, 4.0);
- matrix.set(2, 1.0, 0.0);
- matrix.set(3, 10.0, 2.0);
- matrix.set(4, 10.0, 2.0);
- matrix.set(5, 10.0, 0.0);
- KMeansInput input = new KMeansInput();
- input.k = 2;
- input.iterationContorller = new DefaultIterativeAlgorithmController(10, CommonConstants.getTol());
- KMeans kmeans = new KMeans<>(input);
- DistanceCalculator, Double> similarity = new EuclideanVectorCalculator();
- IRandomGenerator randomGenerator = new UniformRandomGenerator();
- kmeans.cluster(matrix, similarity, randomGenerator);
- }
-## Results
-## Source Code
\ No newline at end of file
diff --git a/src/main/java/examples/ml/example6/Example6.java b/src/main/java/examples/ml/example6/Example6.java
deleted file mode 100644
index eaaa911..0000000
--- a/src/main/java/examples/ml/example6/Example6.java
+++ /dev/null
@@ -1,106 +0,0 @@
-package examples.ml.example6;
-import datasets.VectorDouble;
-import optimization.GradientDescent;
-import optimization.GDInput;
-import utils.DefaultIterativeAlgorithmController;
-import utils.IterativeAlgorithmResult;
-import datasets.DenseMatrixSet;
-import datastructs.RowBuilder;
-import datastructs.RowType;
-import maths.errorfunctions.LogisticMSEVectorFunction;
-import maths.functions.LinearVectorPolynomial;
-import maths.functions.SigmoidFunction;
-import ml.classifiers.LogisticRegressionClassifier;
-import tech.tablesaw.api.Table;
-import tech.tablesaw.columns.Column;
-import utils.Pair;
-import utils.PairBuilder;
-import utils.TableDataSetLoader;
-import java.io.File;
-import java.io.IOException;
-/** Category: Machine Learning
- * ID: Example6
- * Description: Classification with vanilla LogisticRegression
- * Taken From:
- * Details:
- * TODO
- */
-public class Example6 {
- public static Pair createDataSet() throws IOException, IllegalArgumentException {
- // load the data
- Table dataSetTable = TableDataSetLoader.loadDataSet(new File("src/main/resources/datasets/iris_dataset_reduced.csv"));
- Column species = dataSetTable.column("species");
- VectorDouble labels = new VectorDouble(species.size());
- for (int i = 0; i < species.size(); i++) {
- String label = (String) species.get(i);
- if(label.equals("Iris-setosa")){
- labels.set(i, 0.0);
- }
- else if(label.equals("Iris-versicolor")){
- labels.set(i, 1);
- }
- else{
- throw new IllegalArgumentException("Unknown class");
- }
- }
- Table reducedDataSet = dataSetTable.removeColumns("species").first(dataSetTable.rowCount());
- DenseMatrixSet dataSet = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder(), reducedDataSet.rowCount(), reducedDataSet.columnCount() + 1, 1.0);
- dataSet.setColumn(1, reducedDataSet.doubleColumn(0));
- dataSet.setColumn(2, reducedDataSet.doubleColumn(1));
- dataSet.setColumn(3, reducedDataSet.doubleColumn(2));
- dataSet.setColumn(4, reducedDataSet.doubleColumn(3));
- return PairBuilder.makePair(dataSet, labels);
- }
- public static void main(String[] args) throws IOException, IllegalArgumentException{
- Pair data = Example6.createDataSet();
- System.out.println("Number of rows: "+data.first.m());
- System.out.println("Number of labels: "+data.second.size());
- SigmoidFunction hypothesis = new SigmoidFunction(new LinearVectorPolynomial(4));
- GDInput gdInput = new GDInput();
- gdInput.showIterations = true;
- gdInput.eta = 0.01;
- gdInput.errF = new LogisticMSEVectorFunction(hypothesis);
- gdInput.iterationContorller = new DefaultIterativeAlgorithmController(100000,1.0e-8);
- // the optimizer
- GradientDescent gdSolver = new GradientDescent(gdInput);
- // the classifier
- LogisticRegressionClassifier, LinearVectorPolynomial> classifier = new LogisticRegressionClassifier(hypothesis, gdSolver );
- // train the model
- IterativeAlgorithmResult result = (IterativeAlgorithmResult) classifier.train(data.first, data.second);
- System.out.println(" ");
- System.out.println(result);
- System.out.println("Intercept: "+hypothesis.getCoeff(0)+
- " slope1: "+hypothesis.getCoeff(1) +
- " slope2: "+hypothesis.getCoeff(2) +
- " slope3: "+hypothesis.getCoeff(3));
- // use 1.0 to account for the intercept term
- VectorDouble point = new VectorDouble(1.0, 5.7,2.8,4.1,1.3);
- Integer classIdx = classifier.predict(point);
- System.out.println("Point "+ point +" has class index "+ classIdx);
- }
diff --git a/src/main/java/examples/ml/example6/example.md b/src/main/java/examples/ml/example6/example.md
deleted file mode 100644
index 57b336d..0000000
--- a/src/main/java/examples/ml/example6/example.md
+++ /dev/null
@@ -1,174 +0,0 @@
-# Example 6: Logistic Classification
- ## Contents
- * [Overview](#overview)
- * [Logistic classification](#logistic_classification)
- * [Import files](#include_files)
- * [The main function](#m_func)
- * [Results](#results)
- * [Source Code](#source_code)
- ## Overview
- ### Logistic classification
- ## Import files
- ```
-package examples.ml.example6;
-import optimization.GradientDescent;
-import optimization.GDInput;
-import utils.DefaultIterativeAlgorithmController;
-import utils.IterativeAlgorithmResult;
-import datasets.DenseMatrixSet;
-import datastructs.RowBuilder;
-import datasets.VectorDouble;
-import datastructs.RowType;
-import maths.errorfunctions.LogisticMSEVectorFunction;
-import maths.functions.LinearVectorPolynomial;
-import maths.functions.SigmoidFunction;
-import ml.classifiers.LogisticRegressionClassifier;
-import tech.tablesaw.api.Table;
-import tech.tablesaw.columns.Column;
-import utils.Pair;
-import utils.PairBuilder;
-import utils.TableDataSetLoader;
-import java.io.File;
-import java.io.IOException;
- ```
- ## The main function
- ```
- public class Example6 {
- public static Pair createDataSet() throws IOException, IllegalArgumentException {
- // load the data
- Table dataSetTable = TableDataSetLoader.loadDataSet(new File("src/main/resources/datasets/iris_dataset_reduced.csv"));
- Column species = dataSetTable.column("species");
- Vector labels = new Vector(species.size());
- for (int i = 0; i < species.size(); i++) {
- String label = (String) species.get(i);
- if(label.equals("Iris-setosa")){
- labels.set(i, 0.0);
- }
- else if(label.equals("Iris-versicolor")){
- labels.set(i, 1);
- }
- else{
- throw new IllegalArgumentException("Unknown class");
- }
- }
- Table reducedDataSet = dataSetTable.removeColumns("species").first(dataSetTable.rowCount());
- DenseMatrixSet dataSet = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder(), reducedDataSet.rowCount(), reducedDataSet.columnCount() + 1, 1.0);
- dataSet.setColumn(1, reducedDataSet.doubleColumn(0));
- dataSet.setColumn(2, reducedDataSet.doubleColumn(1));
- dataSet.setColumn(3, reducedDataSet.doubleColumn(2));
- dataSet.setColumn(4, reducedDataSet.doubleColumn(3));
- return PairBuilder.makePair(dataSet, labels);
- }
- public static void main(String[] args) throws IOException, IllegalArgumentException{
- Pair data = Example6.createDataSet();
- System.out.println("Number of rows: "+data.first.m());
- System.out.println("Number of labels: "+data.second.size());
- SigmoidFunction hypothesis = new SigmoidFunction(new LinearVectorPolynomial(4));
- GDInput gdInput = new GDInput();
- gdInput.showIterations = true;
- gdInput.eta = 0.01;
- gdInput.errF = new LogisticMSEVectorFunction(hypothesis);
- gdInput.iterationContorller = new DefaultIterativeAlgorithmController(100000,1.0e-8);
- // the optimizer
- BatchGradientDescent gdSolver = new BatchGradientDescent(gdInput);
- // the classifier
- LogisticRegressionClassifier, LinearVectorPolynomial> classifier = new LogisticRegressionClassifier(hypothesis, gdSolver );
- // train the model
- IterativeAlgorithmResult result = (IterativeAlgorithmResult) classifier.train(data.first, data.second);
- System.out.println(" ");
- System.out.println(result);
- System.out.println("Intercept: "+hypothesis.getCoeff(0)+
- " slope1: "+hypothesis.getCoeff(1) +
- " slope2: "+hypothesis.getCoeff(2) +
- " slope3: "+hypothesis.getCoeff(3));
- // use 1.0 to account for the intercept term
- Vector point = new Vector(1.0, 5.7,2.8,4.1,1.3);
- Integer classIdx = classifier.predict(point);
- System.out.println("Point "+ point +" has class index "+ classIdx);
- }
- }
- ```
- ## Results
- ```
-BatchGD: iteration: 62769
- Jold: 0.0010943436826512905 Jcur: 0.0010943336813655545
- error |Jcur-Jold|: 1.0001285735998164E-8
- exit tolerance: 1.0E-8
-BatchGD: iteration: 62770
- Jold: 0.0010943336813655545 Jcur: 0.0010943236803309794
- error |Jcur-Jold|: 1.0001034575143897E-8
- exit tolerance: 1.0E-8
-BatchGD: iteration: 62771
- Jold: 0.0010943236803309794 Jcur: 0.0010943136795475577
- error |Jcur-Jold|: 1.0000783421662204E-8
- exit tolerance: 1.0E-8
-BatchGD: iteration: 62772
- Jold: 0.0010943136795475577 Jcur: 0.0010943036790152583
- error |Jcur-Jold|: 1.0000532299405535E-8
- exit tolerance: 1.0E-8
-BatchGD: iteration: 62773
- Jold: 0.0010943036790152583 Jcur: 0.0010942936787340855
- error |Jcur-Jold|: 1.0000281172812056E-8
- exit tolerance: 1.0E-8
-BatchGD: iteration: 62774
- Jold: 0.0010942936787340855 Jcur: 0.0010942836787040298
- error |Jcur-Jold|: 1.0000030055759557E-8
- exit tolerance: 1.0E-8
-BatchGD: iteration: 62775
- Jold: 0.0010942836787040298 Jcur: 0.0010942736789250564
- error |Jcur-Jold|: 9.999778973401527E-9
- exit tolerance: 1.0E-8
-Converged: true
-Tolerance: 9.999778973401527E-9
-# Threads: 1
-Iterations: 62776
-Intercept: -0.5245243583997479 slope1: -0.8737637329493022 slope2: -2.8232639955834893 slope3: 4.586584930820463
-Point has class index 1
- ```
- ## Source Code
- Example6.java
\ No newline at end of file
diff --git a/src/main/java/examples/ml/example7/Example7.java b/src/main/java/examples/ml/example7/Example7.java
deleted file mode 100644
index a4ceb16..0000000
--- a/src/main/java/examples/ml/example7/Example7.java
+++ /dev/null
@@ -1,154 +0,0 @@
-package examples.ml.example7;
-import datasets.VectorDouble;
-import optimization.GradientDescent;
-import optimization.GDInput;
-import utils.DefaultIterativeAlgorithmController;
-import utils.IterativeAlgorithmResult;
-import datasets.DenseMatrixSet;
-import datastructs.RowBuilder;
-import datastructs.RowType;
-import maths.errorfunctions.MSEVectorFunction;
-import maths.functions.LinearVectorPolynomial;
-import maths.functions.regularizers.LassoRegularizer;
-import maths.functions.regularizers.RidgeRegularizer;
-import ml.regression.LinearRegressor;
-import tech.tablesaw.api.DoubleColumn;
-import tech.tablesaw.api.Table;
-import utils.Pair;
-import utils.PairBuilder;
-import utils.TableDataSetLoader;
-import java.io.File;
-import java.io.IOException;
-/** Category: Machine Learning
- * ID: Example7
- * Description: Linear regression with Lasso
- * Taken From:
- * Details:
- * TODO
- */
-public class Example7 {
- public static Pair createDataSet() throws IOException, IllegalArgumentException {
- // load the data
- Table dataSetTable = TableDataSetLoader.loadDataSet(new File("src/main/resources/datasets/X_Y_Sinusoid_Data.csv"));
- DoubleColumn y = dataSetTable.doubleColumn("y");
- VectorDouble labels = new VectorDouble(y);
- Table reducedDataSet = dataSetTable.removeColumns("y").first(dataSetTable.rowCount());
- DenseMatrixSet dataSet = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder(), reducedDataSet.rowCount(), reducedDataSet.columnCount() + 1, 1.0);
- dataSet.setColumn(1, reducedDataSet.doubleColumn(0));
- return PairBuilder.makePair(dataSet, labels);
- }
- public static void linearRegression(DenseMatrixSet data, VectorDouble labels){
- System.out.println("Doing LinearRegression");
- LinearVectorPolynomial hypothesis = new LinearVectorPolynomial(1);
- GDInput gdInput = new GDInput();
- gdInput.showIterations = false;
- gdInput.eta = 0.01;
- gdInput.errF = new MSEVectorFunction(hypothesis);
- gdInput.iterationContorller = new DefaultIterativeAlgorithmController(100000,1.0e-8);
- // the optimizer
- GradientDescent gdSolver = new GradientDescent(gdInput);
- // the classifier
- LinearRegressor> regressor = new LinearRegressor(hypothesis);
- // train the model
- IterativeAlgorithmResult result = (IterativeAlgorithmResult) regressor.train(data, labels, gdSolver);
- System.out.println(" ");
- System.out.println(result);
- System.out.println("Intercept: "+hypothesis.getCoeff(0)+
- " slope1: "+hypothesis.getCoeff(1));
- }
- public static void ridgeRegression(DenseMatrixSet data, VectorDouble labels){
- System.out.println("Doing Ridge LinearRegression");
- LinearVectorPolynomial hypothesis = new LinearVectorPolynomial(1);
- RidgeRegularizer ridgeRegularizer = new RidgeRegularizer(0.001, 1, hypothesis);
- GDInput gdInput = new GDInput();
- gdInput.showIterations = false;
- gdInput.eta = 0.01;
- gdInput.errF = new MSEVectorFunction(hypothesis, ridgeRegularizer);
- gdInput.iterationContorller = new DefaultIterativeAlgorithmController(100000,1.0e-8);
- // the optimizer
- GradientDescent gdSolver = new GradientDescent(gdInput);
- // the classifier
- LinearRegressor> regressor = new LinearRegressor(hypothesis);
- // train the model
- IterativeAlgorithmResult result = (IterativeAlgorithmResult) regressor.train(data, labels, gdSolver);
- System.out.println(" ");
- System.out.println(result);
- System.out.println("Intercept: "+hypothesis.getCoeff(0)+
- " slope1: "+hypothesis.getCoeff(1));
- }
- public static void lassoRegression(DenseMatrixSet data, VectorDouble labels){
- System.out.println("Doing Lasso LinearRegression");
- LinearVectorPolynomial hypothesis = new LinearVectorPolynomial(1);
- LassoRegularizer lassoRegularizer = new LassoRegularizer(0.0001, 1, hypothesis);
- GDInput gdInput = new GDInput();
- gdInput.showIterations = false;
- gdInput.eta = 0.01;
- gdInput.errF = new MSEVectorFunction(hypothesis, lassoRegularizer);
- gdInput.iterationContorller = new DefaultIterativeAlgorithmController(100000,1.0e-8);
- // the optimizer
- GradientDescent gdSolver = new GradientDescent(gdInput);
- // the classifier
- LinearRegressor> regressor = new LinearRegressor(hypothesis);
- // train the model
- IterativeAlgorithmResult result = (IterativeAlgorithmResult) regressor.train(data, labels, gdSolver);
- System.out.println(" ");
- System.out.println(result);
- System.out.println("Intercept: "+hypothesis.getCoeff(0)+
- " slope1: "+hypothesis.getCoeff(1));
- }
- public static void main(String[] args) throws IOException, IllegalArgumentException{
- Pair data = Example7.createDataSet();
- System.out.println("Number of rows: "+data.first.m());
- System.out.println("Number of labels: "+data.second.size());
- Example7.linearRegression(data.first, data.second);
- Example7.ridgeRegression(data.first, data.second);
- Example7.lassoRegression(data.first, data.second);
- }
diff --git a/src/main/java/examples/ml/example7/example.md b/src/main/java/examples/ml/example7/example.md
deleted file mode 100644
index afb572e..0000000
--- a/src/main/java/examples/ml/example7/example.md
+++ /dev/null
@@ -1,207 +0,0 @@
-# Example 7: Linear regression with regularization ```LassoRegularizer``` and ```RidgeRegularizer```
- ## Contents
- * [Overview](#overview)
- * [Logistic classification](#logistic_classification)
- * [Import files](#include_files)
- * [The main function](#m_func)
- * [Results](#results)
- * [Source Code](#source_code)
- ## Overview
- ### Logistic classification
- ## Import files
- ```
-package examples.ml.example7;
-import optimization.GradientDescent;
-import optimization.GDInput;
-import utils.DefaultIterativeAlgorithmController;
-import utils.IterativeAlgorithmResult;
-import datasets.DenseMatrixSet;
-import datastructs.RowBuilder;
-import datasets.VectorDouble;
-import datastructs.RowType;
-import maths.errorfunctions.MSEVectorFunction;
-import maths.functions.LinearVectorPolynomial;
-import maths.functions.regularizers.LassoRegularizer;
-import maths.functions.regularizers.RidgeRegularizer;
-import ml.regression.LinearRegressor;
-import tech.tablesaw.api.DoubleColumn;
-import tech.tablesaw.api.Table;
-import utils.Pair;
-import utils.PairBuilder;
-import utils.TableDataSetLoader;
-import java.io.File;
-import java.io.IOException;
- ```
- ## The main function
- ```
- public class Example7 {
- public static Pair createDataSet() throws IOException, IllegalArgumentException {
- // load the data
- Table dataSetTable = TableDataSetLoader.loadDataSet(new File("src/main/resources/datasets/X_Y_Sinusoid_Data.csv"));
- DoubleColumn y = dataSetTable.doubleColumn("y");
- Vector labels = new Vector(y);
- Table reducedDataSet = dataSetTable.removeColumns("y").first(dataSetTable.rowCount());
- DenseMatrixSet dataSet = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder(), reducedDataSet.rowCount(), reducedDataSet.columnCount() + 1, 1.0);
- dataSet.setColumn(1, reducedDataSet.doubleColumn(0));
- return PairBuilder.makePair(dataSet, labels);
- }
- public static void linearRegression(DenseMatrixSet data, Vector labels){
- System.out.println("Doing LinearRegression");
- LinearVectorPolynomial hypothesis = new LinearVectorPolynomial(1);
- GDInput gdInput = new GDInput();
- gdInput.showIterations = false;
- gdInput.eta = 0.01;
- gdInput.errF = new MSEVectorFunction(hypothesis);
- gdInput.iterationContorller = new DefaultIterativeAlgorithmController(100000,1.0e-8);
- // the optimizer
- BatchGradientDescent gdSolver = new BatchGradientDescent(gdInput);
- // the classifier
- LinearRegressor> regressor = new LinearRegressor(hypothesis);
- // train the model
- IterativeAlgorithmResult result = (IterativeAlgorithmResult) regressor.train(data, labels, gdSolver);
- System.out.println(" ");
- System.out.println(result);
- System.out.println("Intercept: "+hypothesis.getCoeff(0)+
- " slope1: "+hypothesis.getCoeff(1));
- }
- public static void ridgeRegression(DenseMatrixSet data, Vector labels){
- System.out.println("Doing Ridge LinearRegression");
- LinearVectorPolynomial hypothesis = new LinearVectorPolynomial(1);
- RidgeRegularizer ridgeRegularizer = new RidgeRegularizer(0.001, 1, hypothesis);
- GDInput gdInput = new GDInput();
- gdInput.showIterations = false;
- gdInput.eta = 0.01;
- gdInput.errF = new MSEVectorFunction(hypothesis, ridgeRegularizer);
- gdInput.iterationContorller = new DefaultIterativeAlgorithmController(100000,1.0e-8);
- // the optimizer
- BatchGradientDescent gdSolver = new BatchGradientDescent(gdInput);
- // the classifier
- LinearRegressor> regressor = new LinearRegressor(hypothesis);
- // train the model
- IterativeAlgorithmResult result = (IterativeAlgorithmResult) regressor.train(data, labels, gdSolver);
- System.out.println(" ");
- System.out.println(result);
- System.out.println("Intercept: "+hypothesis.getCoeff(0)+
- " slope1: "+hypothesis.getCoeff(1));
- }
- public static void lassoRegression(DenseMatrixSet data, Vector labels){
- System.out.println("Doing Lasso LinearRegression");
- LinearVectorPolynomial hypothesis = new LinearVectorPolynomial(1);
- LassoRegularizer lassoRegularizer = new LassoRegularizer(0.0001, 1, hypothesis);
- GDInput gdInput = new GDInput();
- gdInput.showIterations = false;
- gdInput.eta = 0.01;
- gdInput.errF = new MSEVectorFunction(hypothesis, lassoRegularizer);
- gdInput.iterationContorller = new DefaultIterativeAlgorithmController(100000,1.0e-8);
- // the optimizer
- BatchGradientDescent gdSolver = new BatchGradientDescent(gdInput);
- // the classifier
- LinearRegressor> regressor = new LinearRegressor(hypothesis);
- // train the model
- IterativeAlgorithmResult result = (IterativeAlgorithmResult) regressor.train(data, labels, gdSolver);
- System.out.println(" ");
- System.out.println(result);
- System.out.println("Intercept: "+hypothesis.getCoeff(0)+
- " slope1: "+hypothesis.getCoeff(1));
- }
- public static void main(String[] args) throws IOException, IllegalArgumentException{
- Pair data = Example7.createDataSet();
- System.out.println("Number of rows: "+data.first.m());
- System.out.println("Number of labels: "+data.second.size());
- Example7.linearRegression(data.first, data.second);
- Example7.ridgeRegression(data.first, data.second);
- Example7.lassoRegression(data.first, data.second);
- }
- }
- ```
- ## Results
- ```
-Number of rows: 20
-Number of labels: 20
-Doing LinearRegression
-Converged: true
-Tolerance: 9.993163019306195E-9
-# Threads: 1
-Iterations: 5133
-Intercept: 1.1707821773747917 slope1: -2.162486001652405
-Doing Ridge LinearRegression
-Converged: true
-Tolerance: 9.873171780139245E-9
-# Threads: 1
-Iterations: 3850
-Intercept: 1.1580261130223417 slope1: -2.1376028466785955
-Doing Lasso LinearRegression
-Converged: true
-Tolerance: 9.997267735872839E-9
-# Threads: 1
-Iterations: 5092
-Intercept: 1.170591014094624 slope1: -2.162113100936771
- ```
- ## Source Code
- Example7.java
\ No newline at end of file
diff --git a/src/main/java/examples/ml/example8/Example8.java b/src/main/java/examples/ml/example8/Example8.java
deleted file mode 100644
index bc5bcd4..0000000
--- a/src/main/java/examples/ml/example8/Example8.java
+++ /dev/null
@@ -1,65 +0,0 @@
-package examples.ml.example8;
-import maths.ConfusionMatrix;
-import java.util.ArrayList;
-import java.util.List;
-public class Example8 {
- public static void main(String[] args){
- final int SIZE = 165;
- final int N_CLASSES = 2;
- List actual = new ArrayList<>();
- for(int i=0; i< SIZE; ++i){
- if(i < 60){
- actual.add(0);
- }
- else{
- actual.add(1);
- }
- }
- List predicted = new ArrayList<>();
- for(int i=0; i< SIZE; ++i){
- if(i < 50){
- predicted.add(0);
- }
- else if(i>=50 && i<65){
- predicted.add(1);
- }
- else if(i>=65 && i<70){
- predicted.add(0);
- }
- else{
- predicted.add(1);
- }
- }
- ConfusionMatrix confusionMatrix = new ConfusionMatrix(actual, predicted, N_CLASSES);
- // let's compute some metrics
- System.out.println("TP: "+confusionMatrix.getClassCounts(1));
- System.out.println("TN: "+confusionMatrix.getClassCounts(0));
- System.out.println("FP: "+confusionMatrix.getClassCountsAsOtherClass(0,1));
- System.out.println("FN: "+confusionMatrix.getClassCountsAsOtherClass(1,0));
- System.out.println("Accuracy is: " + confusionMatrix.accuracy());
- System.out.println("Misclassification Rate: " + confusionMatrix.misclassificationRate());
- System.out.println("TP Rate or Recall: " + confusionMatrix.recallClass(1));
- System.out.println("TN Rate or Specificity: " + confusionMatrix.recallClass(0));
- System.out.println("False Positive Rate: " + (double)confusionMatrix.getClassCountsAsOtherClass(0,1)/60.0);
- System.out.println("Precision: " + (double)confusionMatrix.getClassCounts(1)/
- (double) (confusionMatrix.getClassCountsAsOtherClass(0,1) + confusionMatrix.getClassCounts(1)));
- System.out.println("Prevalence: " + (double)(confusionMatrix.getClassCountsAsOtherClass(1,0) +
- confusionMatrix.getClassCounts(1))/(double) confusionMatrix.totalCount());
- }
diff --git a/src/main/java/examples/ml/example8/confusion_matrix2.png b/src/main/java/examples/ml/example8/confusion_matrix2.png
deleted file mode 100644
index 48bd1a9..0000000
Binary files a/src/main/java/examples/ml/example8/confusion_matrix2.png and /dev/null differ
diff --git a/src/main/java/examples/ml/example8/confusion_matrix_simple2.png b/src/main/java/examples/ml/example8/confusion_matrix_simple2.png
deleted file mode 100644
index c1c8673..0000000
Binary files a/src/main/java/examples/ml/example8/confusion_matrix_simple2.png and /dev/null differ
diff --git a/src/main/java/examples/ml/example8/example.md b/src/main/java/examples/ml/example8/example.md
deleted file mode 100644
index 53c2b92..0000000
--- a/src/main/java/examples/ml/example8/example.md
+++ /dev/null
@@ -1,166 +0,0 @@
-# Example 8: Classification performance assessment using the ```ConfusionMatrix``` class
-## Contents
- * [Acknowledgements](#acknowledgement)
- * [Overview](#overview)
- * [Confusion matrix](#confusion_matrix)
- * [Import files](#include_files)
- * [The main function](#m_func)
- * [Results](#results)
- * [Source Code](#source_code)
- ## Acknowledgements
- This example was largely taken from Simple guide to confusion matrix terminology
- ## Overview
- Example 3 introduced the K-Nearest Neighbors and Example 6 discussed logistic regression.
- More often than not we want to be able to quantify the performance of a classifier.
- This example introduces various metrics one can do so.
- ### Confusion matrix
- The confusion matrix is a popular technique to assess the quality of classifier.
- In very simple words the confusion matrix is a square matrix that is used to describe the
- performance of a classification model on a set of test data for which the correct classification
- is known.
- Let's start exploring what a confusion matrix can tell us by considering a binary classifier.
- The following table shows an assumed classifier.
-Here is what we can immediately infer from that matrix.
-- Overall we have 165 items in the data set.
-- There are two possible classes (binary classification) namely Yes and No.
-- The classifier predicted Yes 110 times and No 55 times.
-- In reality 60 items are under the class No and 105 under the class Yes.
-Let'now define some basic terminology that is used when we consider a confusion matrix.
-- True Positives or TP: The classifier predicts Yes and the are indeed classed as Yes
-- True Negatives or TN: The classifier predicts No and they are indeed classed as No
-- False Positives or FP: The classifier predicts Yes and but they are classed as No. This is also known as Type I error
-- False Negatives or FN: The classifier predicts No but they are actually classes as Yes. This is also known as Type II error.
- Note that all the terms defined above, are whole numbers and not rates.
- Given this terminology here is how we could rewrite the confusion matrix as shown below
- The confusion matrix can be used to compute various rates. The most common ones are:
-- Accuracy: overall it tells us how often is the classifier correct
- - Accuracy = (TP + TN)/total = (100 + 50)/165 = 0.91
-- Misclassification rate: overall how often is it wrong. This is also known as the error rate.
- - Misclassification rate = (FP + FN)/total = 10 + 5/165 = 0.09
-- True Positive Rate: When it's actually yes, how often does it predict yes? This is also known as sensitivity or recall
- - True Positive Rate = TP/actual Yes = 100/105 = 0.95
-- False Positive Rate: When it's actually no, how often does it predict yes?
- - False Positive Rate = FP/actual No = 10/60 = 0.17
-- True Negative Rate: When it's actually no, how often does it predict no? This is also known as specificity
- - True Negative Rate} = TN/actual No = 50/60 = 0.83
-- Precision: When it predicts Yes how often is it correct?
- -Precision = TP/Predicted Yes = 100/110 = 0.91
-- Prevalence: How often does the yes condition actually occur in our sample?
- - Prevalence = actual Yes/total = 105/165 = 0.64
-There are other terms worth mentioning but we won't do that here. Instead have a look at
-the following article Simple guide to confusion matrix terminology.
- ## Import files
- ```
-package examples.ml.example8;
-import maths.ConfusionMatrix;
-import java.util.ArrayList;
-import java.util.List;
-public class Example8 {
- public static void main(String[] args){
- final int SIZE = 165;
- final int N_CLASSES = 2;
- List actual = new ArrayList<>();
- for(int i=0; i< SIZE; ++i){
- if(i < 60){
- actual.add(0);
- }
- else{
- actual.add(1);
- }
- }
- List predicted = new ArrayList<>();
- for(int i=0; i< SIZE; ++i){
- if(i < 50){
- predicted.add(0);
- }
- else if(i>=50 && i<65){
- predicted.add(1);
- }
- else if(i>=65 && i<70){
- predicted.add(0);
- }
- else{
- predicted.add(1);
- }
- }
- ConfusionMatrix confusionMatrix = new ConfusionMatrix(actual, predicted, N_CLASSES);
- // let's compute some metrics
- System.out.println("TP: "+confusionMatrix.getClassCounts(1));
- System.out.println("TN: "+confusionMatrix.getClassCounts(0));
- System.out.println("FP: "+confusionMatrix.getClassCountsAsOtherClass(0,1));
- System.out.println("FN: "+confusionMatrix.getClassCountsAsOtherClass(1,0));
- System.out.println("Accuracy is: " + confusionMatrix.accuracy());
- System.out.println("Misclassification Rate: " + confusionMatrix.misclassificationRate());
- System.out.println("TP Rate or Recall: " + confusionMatrix.recallClass(1));
- System.out.println("TN Rate or Specificity: " + confusionMatrix.recallClass(0));
- System.out.println("False Positive Rate: " + (double)confusionMatrix.getClassCountsAsOtherClass(0,1)/60.0);
- System.out.println("Precision: " + (double)confusionMatrix.getClassCounts(1)/
- (double) (confusionMatrix.getClassCountsAsOtherClass(0,1) + confusionMatrix.getClassCounts(1)));
- System.out.println("Prevalence: " + (double)(confusionMatrix.getClassCountsAsOtherClass(1,0) +
- confusionMatrix.getClassCounts(1))/(double) confusionMatrix.totalCount());
- }
- ```
- ## Results
- ```
-TP: 100
-TN: 50
-FP: 10
-FN: 5
-Accuracy is: 0.9090909090909091
-Misclassification Rate: 0.09090909090909094
-TP Rate or Recall: 0.9523809523809523
-TN Rate or Specificity: 0.8333333333333334
-False Positive Rate: 0.16666666666666666
-Precision: 0.9090909090909091
-Prevalence: 0.6363636363636364
- ```
- ## Source Code
- Example8.java
diff --git a/src/main/java/examples/optimization/example1/Example1.java b/src/main/java/examples/optimization/example1/Example1.java
deleted file mode 100644
index af4f08d..0000000
--- a/src/main/java/examples/optimization/example1/Example1.java
+++ /dev/null
@@ -1,56 +0,0 @@
-package examples.optimization.example1;
-import datasets.VectorDouble;
-import utils.DefaultIterativeAlgorithmController;
-import utils.IterativeAlgorithmResult;
-import optimization.GradientDescent;
-import optimization.GDInput;
-import datasets.DenseMatrixSet;
-import datastructs.RowBuilder;
-import datastructs.RowType;
-import maths.functions.LinearVectorPolynomial;
-import maths.errorfunctions.MSEVectorFunction;
-import tech.tablesaw.api.Table;
-import utils.TableDataSetLoader;
-import java.io.File;
-import java.io.IOException;
-/** Category: Machine Learning
- * ID: Example5
- * Description: Using Batch Gradient Descent with only one feature
- * Taken From:
- * Details:
- * TODO
- */
-public class Example1 {
- public static void main(String[] args)throws IOException {
- // load the data
- Table dataSet = TableDataSetLoader.loadDataSet(new File("src/main/resources/datasets/car_plant.csv"));
- VectorDouble labels = new VectorDouble(dataSet, "Electricity Usage");
- Table reducedDataSet = dataSet.removeColumns("Electricity Usage").first(dataSet.rowCount());
- DenseMatrixSet denseMatrixSet = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder(), reducedDataSet.rowCount(), 2, 1.0);
- denseMatrixSet.setColumn(1, reducedDataSet.doubleColumn(0));
- LinearVectorPolynomial hypothesis = new LinearVectorPolynomial(1);
- GDInput gdInput = new GDInput();
- gdInput.showIterations = true;
- gdInput.eta=0.01;
- gdInput.errF = new MSEVectorFunction(hypothesis);
- gdInput.iterationContorller = new DefaultIterativeAlgorithmController(10000,1.0e-8);
- GradientDescent gdSolver = new GradientDescent(gdInput);
- IterativeAlgorithmResult result = gdSolver.optimize(denseMatrixSet, labels, hypothesis);
- System.out.println(result);
- System.out.println("Intercept: "+hypothesis.getCoeff(0)+" slope: "+hypothesis.getCoeff(1));
- }
diff --git a/src/main/java/examples/optimization/example1/example.md b/src/main/java/examples/optimization/example1/example.md
deleted file mode 100644
index 8fd0384..0000000
--- a/src/main/java/examples/optimization/example1/example.md
+++ /dev/null
@@ -1,122 +0,0 @@
-# Example 1: Gradient Descent Optimization
-## Contents
-* [Overview](#overview)
-* [Gradient Descent](#gradient_descent)
-* [Import files](#import_files)
-* [The main function](#m_func)
-* [Results](#results)
-* [Source Code](#source_code)
-## Overview
-## Gradient Descent
-## Import files
-package examples.optimization.example1;
-import utils.DefaultIterativeAlgorithmController;
-import utils.IterativeAlgorithmResult;
-import optimization.GradientDescent;
-import optimization.GDInput;
-import datasets.DenseMatrixSet;
-import datastructs.RowBuilder;
-import datasets.VectorDouble;
-import datastructs.RowType;
-import maths.functions.LinearVectorPolynomial;
-import maths.errorfunctions.MSEVectorFunction;
-import tech.tablesaw.api.Table;
-import utils.TableDataSetLoader;
-import java.io.File;
-import java.io.IOException;
-## The main function
-public class Example1 {
- public static void main(String[] args)throws IOException {
- // load the data
- Table dataSet = TableDataSetLoader.loadDataSet(new File("src/main/resources/datasets/car_plant.csv"));
- Vector labels = new Vector(dataSet, "Electricity Usage");
- Table reducedDataSet = dataSet.removeColumns("Electricity Usage").first(dataSet.rowCount());
- DenseMatrixSet denseMatrixSet = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder(), reducedDataSet.rowCount(), 2, 1.0);
- denseMatrixSet.setColumn(1, reducedDataSet.doubleColumn(0));
- LinearVectorPolynomial hypothesis = new LinearVectorPolynomial(1);
- GDInput gdInput = new GDInput();
- gdInput.showIterations = true;
- gdInput.eta=0.01;
- gdInput.errF = new MSEVectorFunction(hypothesis);
- gdInput.iterationContorller = new DefaultIterativeAlgorithmController(10000,1.0e-8);
- BatchGradientDescent gdSolver = new BatchGradientDescent(gdInput);
- IterativeAlgorithmResult result = gdSolver.optimize(denseMatrixSet, labels, hypothesis);
- System.out.println(result);
- System.out.println("Intercept: "+hypothesis.getCoeff(0)+" slope: "+hypothesis.getCoeff(1));
- }
-## Results
-BatchGD: iteration: 1
- Jold: 8.224725 Jcur: 2.0346766011662285
- error |Jcur-Jold|: 6.1900483988337704
- exit tolerance: 1.0E-8
-BatchGD: iteration: 2
- Jold: 2.0346766011662285 Jcur: 0.5183480014814251
- error |Jcur-Jold|: 1.5163285996848033
- exit tolerance: 1.0E-8
-BatchGD: iteration: 3
- Jold: 0.5183480014814251 Jcur: 0.14690409557648973
- error |Jcur-Jold|: 0.37144390590493537
- exit tolerance: 1.0E-8
-BatchGD: iteration: 4
- Jold: 0.14690409557648973 Jcur: 0.055913665703255976
- error |Jcur-Jold|: 0.09099042987323375
- exit tolerance: 1.0E-8
-BatchGD: iteration: 5
- Jold: 0.055913665703255976 Jcur: 0.03362373732305984
- error |Jcur-Jold|: 0.02228992838019614
- exit tolerance: 1.0E-8
-BatchGD: iteration: 6
- Jold: 0.03362373732305984 Jcur: 0.028162835522061502
- error |Jcur-Jold|: 0.005460901800998334
- exit tolerance: 1.0E-8
-BatchGD: iteration: 7
- Jold: 0.028162835522061502 Jcur: 0.026824409586410466
- error |Jcur-Jold|: 0.0013384259356510365
- exit tolerance: 1.0E-8
-BatchGD: iteration: 8
- Jold: 0.026824409586410466 Jcur: 0.026495834873692763
- error |Jcur-Jold|: 3.2857471271770244E-4
- exit tolerance: 1.0E-8
-Converged: true
-Tolerance: 9.995007266283551E-9
-# Threads: 1
-Iterations: 7076
-Intercept: 0.37857734128519877 slope: 0.5049674670001678
-## Source Code
\ No newline at end of file
diff --git a/src/main/java/examples/optimization/example2/Example2.java b/src/main/java/examples/optimization/example2/Example2.java
deleted file mode 100644
index 47b8c53..0000000
--- a/src/main/java/examples/optimization/example2/Example2.java
+++ /dev/null
@@ -1,92 +0,0 @@
-package examples.optimization.example2;
-import datasets.VectorDouble;
-import optimization.GradientDescent;
-import utils.DefaultIterativeAlgorithmController;
-import utils.IterativeAlgorithmResult;
-import optimization.GDInput;
-import datasets.DenseMatrixSet;
-import datastructs.RowBuilder;
-import datastructs.RowType;
-import maths.functions.LinearVectorPolynomial;
-import maths.errorfunctions.MSEVectorFunction;
-import org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression;
-import tech.tablesaw.api.DoubleColumn;
-import tech.tablesaw.api.Table;
-import utils.*;
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-/** Category: Machine Learning
- * ID: Example6
- * Description: Batch Gradient Descent with two features
- * Taken From:
- * Details:
- * TODO
- */
-public class Example2 {
- public static Pair loadNormalizedDataSet(File file)throws IOException{
- // load the data
- Table dataSet = TableDataSetLoader.loadDataSet(file);
- DoubleColumn y = dataSet.doubleColumn("Electricity Usage");
- ListMaths.normalize(y);
- VectorDouble labels = new VectorDouble(y);
- Table reducedDataSet = dataSet.removeColumns("Electricity Usage").first(dataSet.rowCount());
- ListMaths.normalize(reducedDataSet.doubleColumn(0));
- List coolingCol = ParseUtils.parseAsDouble(reducedDataSet.column(1));
- ListMaths.normalize(coolingCol);
- DenseMatrixSet denseMatrixSet = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder(), reducedDataSet.rowCount(), 3, 1.0);
- denseMatrixSet.setColumn(1, reducedDataSet.doubleColumn(0));
- denseMatrixSet.setColumn(2, coolingCol);
- return PairBuilder.makePair(denseMatrixSet, labels);
- }
- public static void apacheOLS(DenseMatrixSet denseMatrixSet, VectorDouble labels)throws IOException{
- // the object that will do the fitting for us
- OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
- Double[][] x = new Double[denseMatrixSet.m()][2];
- denseMatrixSet.getSubMatrix(x, 2, 1, 2);
- regression.newSampleData(ListUtils.toDoubleArray(labels.getRawData()), ArrayUtils.toArray(x));
- double[] coeffs = regression.estimateRegressionParameters();
- System.out.println("Apache OLS: ");
- System.out.println("Intercept: "+coeffs[0]+" slope1: "+coeffs[1]+" slope2: "+coeffs[2]);
- }
- public static void main(String[] args)throws IOException {
- Pair dataSet = Example2.loadNormalizedDataSet(new File("src/main/resources/datasets/car_plant_multi.csv"));
- System.out.println(" ");
- // compute with Apache OLS for reference
- Example2.apacheOLS(dataSet.first, dataSet.second);
- LinearVectorPolynomial hypothesis = new LinearVectorPolynomial(2);
- GDInput gdInput = new GDInput();
- gdInput.showIterations = false;
- gdInput.eta=0.01;
- gdInput.errF = new MSEVectorFunction(hypothesis);
- gdInput.iterationContorller = new DefaultIterativeAlgorithmController(10000,1.0e-8);
- GradientDescent gdSolver = new GradientDescent(gdInput);
- IterativeAlgorithmResult result = (IterativeAlgorithmResult) gdSolver.optimize(dataSet.first, dataSet.second, hypothesis);
- System.out.println(" ");
- System.out.println(result);
- System.out.println("Intercept: "+hypothesis.getCoeff(0)+" slope1: "+hypothesis.getCoeff(1)+" slope2: "+hypothesis.getCoeff(2));
- }
diff --git a/src/main/java/examples/optimization/example2/example.md b/src/main/java/examples/optimization/example2/example.md
deleted file mode 100644
index c61f26d..0000000
--- a/src/main/java/examples/optimization/example2/example.md
+++ /dev/null
@@ -1,128 +0,0 @@
-# Example 2: Gradient Descent Optimization With Two Features
-## Contents
-* [Overview](#overview)
-* [Gradient Descent](#gradient_descent)
-* [Import files](#import_files)
-* [The main function](#m_func)
-* [Results](#results)
-* [Source Code](#source_code)
-## Overview
-## Gradient Descent
-## Import files
-package examples.optimization.example2;
-import utils.DefaultIterativeAlgorithmController;
-import utils.IterativeAlgorithmResult;
-import optimization.GradientDescent;
-import optimization.GDInput;
-import datasets.DenseMatrixSet;
-import datastructs.RowBuilder;
-import datasets.VectorDouble;
-import datastructs.RowType;
-import maths.functions.LinearVectorPolynomial;
-import maths.errorfunctions.MSEVectorFunction;
-import org.apache.commons.math3.stat.regression.OLSMultipleLinearRegression;
-import tech.tablesaw.api.DoubleColumn;
-import tech.tablesaw.api.Table;
-import utils.*;
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-## The main function
-public class Example2 {
- public static Pair loadNormalizedDataSet(File file)throws IOException{
- // load the data
- Table dataSet = TableDataSetLoader.loadDataSet(file);
- DoubleColumn y = dataSet.doubleColumn("Electricity Usage");
- ListMaths.normalize(y);
- Vector labels = new Vector(y);
- Table reducedDataSet = dataSet.removeColumns("Electricity Usage").first(dataSet.rowCount());
- ListMaths.normalize(reducedDataSet.doubleColumn(0));
- List coolingCol = ParseUtils.parseAsDouble(reducedDataSet.column(1));
- ListMaths.normalize(coolingCol);
- DenseMatrixSet denseMatrixSet = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder(), reducedDataSet.rowCount(), 3, 1.0);
- denseMatrixSet.setColumn(1, reducedDataSet.doubleColumn(0));
- denseMatrixSet.setColumn(2, coolingCol);
- return PairBuilder.makePair(denseMatrixSet, labels);
- }
- public static void apacheOLS(DenseMatrixSet denseMatrixSet, Vector labels)throws IOException{
- // the object that will do the fitting for us
- OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
- Double[][] x = new Double[denseMatrixSet.m()][2];
- denseMatrixSet.getSubMatrix(x, 2, 1, 2);
- regression.newSampleData(ListUtils.toDoubleArray(labels.getRawData()), ArrayUtils.toArray(x));
- double[] coeffs = regression.estimateRegressionParameters();
- System.out.println("Apache OLS: ");
- System.out.println("Intercept: "+coeffs[0]+" slope1: "+coeffs[1]+" slope2: "+coeffs[2]);
- }
- public static void main(String[] args)throws IOException {
- Pair dataSet = Example2.loadNormalizedDataSet(new File("src/main/resources/datasets/car_plant_multi.csv"));
- System.out.println(" ");
- // compute with Apache OLS for reference
- Example2.apacheOLS(dataSet.first, dataSet.second);
- LinearVectorPolynomial hypothesis = new LinearVectorPolynomial(2);
- GDInput gdInput = new GDInput();
- gdInput.showIterations = false;
- gdInput.eta=0.01;
- gdInput.errF = new MSEVectorFunction(hypothesis);
- gdInput.iterationContorller = new DefaultIterativeAlgorithmController(10000,1.0e-8);
- BatchGradientDescent gdSolver = new BatchGradientDescent(gdInput);
- IterativeAlgorithmResult result = (IterativeAlgorithmResult) gdSolver.optimize(dataSet.first, dataSet.second, hypothesis);
- System.out.println(" ");
- System.out.println(result);
- System.out.println("Intercept: "+hypothesis.getCoeff(0)+" slope1: "+hypothesis.getCoeff(1)+" slope2: "+hypothesis.getCoeff(2));
- }
-## Results
-Apache OLS:
-Intercept: -0.014061513598774061 slope1: 0.6550463939406858 slope2: 0.3541277607139755
-Converged: true
-Tolerance: 9.99538353734647E-9
-# Threads: 1
-Iterations: 4642
-Intercept: -0.009501183829303421 slope1: 0.643411046969411 slope2: 0.3609262733434958
-## Source Code
\ No newline at end of file
diff --git a/src/main/java/examples/plotting/example1/Example1.java b/src/main/java/examples/plotting/example1/Example1.java
deleted file mode 100644
index a6fe7ee..0000000
--- a/src/main/java/examples/plotting/example1/Example1.java
+++ /dev/null
@@ -1,29 +0,0 @@
-package examples.plotting.example1;
-import dataloader.CsvDataLoader;
-import tech.tablesaw.api.Table;
-import visualizations.BarChart;
-import java.io.File;
-import java.io.IOException;
- * Category: Plotting
- * ID: PlotABar
- * Description: Load a CSV file and plot a horizontal bar chart
- */
-public class Example1 {
- public static void main(String[] args) throws IOException {
- File file = new File("data/humans_data.csv");
- Table table = CsvDataLoader.TableLoader.parseFile(file);
- BarChart plotter = new BarChart();
- BarChart.BarChartOptions options = plotter.new BarChartOptions();
- options.chartTitle = "height by sex";
- options.groupColName = "Sex";
- options.numberColName = "Height";
- BarChart.plotHorizontalBar(options, table);
- }
diff --git a/src/main/java/examples/plotting/example1/example.md b/src/main/java/examples/plotting/example1/example.md
deleted file mode 100644
index 98cda9e..0000000
--- a/src/main/java/examples/plotting/example1/example.md
+++ /dev/null
@@ -1,56 +0,0 @@
-# Example 1: Bar Plot
-## Contents
-* [Overview](#overview)
-* [Import files](#include_files)
-* [The main function](#m_func)
-* [Results](#results)
-* [Source Code](#source_code)
-## Overview
-## Import files
-import dataloader.CsvDataLoader;
-import tech.tablesaw.api.Table;
-import visualizations.BarChart;
-import java.io.File;
-import java.io.IOException;
-## The main function
-public class Example1 {
- public static void main(String[] args) throws IOException {
- File file = new File("data/humans_data.csv");
- Table table = CsvDataLoader.TableLoader.parseFile(file);
- BarChart plotter = new BarChart();
- BarChart.BarChartOptions options = plotter.new BarChartOptions();
- options.chartTitle = "height by sex";
- options.groupColName = "Sex";
- options.numberColName = "Height";
- BarChart.plotHorizontalBar(options, table);
- }
-## Results
-## Source Code
\ No newline at end of file
diff --git a/src/main/java/examples/plotting/example2/Example2.java b/src/main/java/examples/plotting/example2/Example2.java
deleted file mode 100644
index 613a0fe..0000000
--- a/src/main/java/examples/plotting/example2/Example2.java
+++ /dev/null
@@ -1,34 +0,0 @@
-package examples.plotting.example2;
-import dataloader.CsvDataLoader;
-import datastructs.IVector;
-import tech.tablesaw.api.Table;
-import visualizations.Histograms;
-import java.io.File;
-import java.io.IOException;
- * Category: Plotting
- * ID: PlotAHistogram
- * Description: Load a CSV file and plot the distribution of a numeric sample
- */
-public class Example2 {
- public static void main(String[] args) throws IOException {
- File file = new File("test_data/annual.csv");
- Table data = CsvDataLoader.TableLoader.parseFile(file);
- // Extract numeric samples from the data
- IVector X = CsvDataLoader.TableLoader.buildNumericSample(data, "Mean");
- Histograms plotter = new Histograms();
- Histograms.HistogramOptions options = plotter.new HistogramOptions();
- options.chartTitle = "distribution of mean temperature";
- options.xAxisName = "Mean";
- Histograms.plotHistogram(options, data);
- }
diff --git a/src/main/java/examples/plotting/example2/example.md b/src/main/java/examples/plotting/example2/example.md
deleted file mode 100644
index e2719e0..0000000
--- a/src/main/java/examples/plotting/example2/example.md
+++ /dev/null
@@ -1,56 +0,0 @@
-# Example 2: Histogram Plot
-## Contents
-* [Overview](#overview)
-* [Import files](#include_files)
-* [The main function](#m_func)
-* [Results](#results)
-* [Source Code](#source_code)
-## Overview
-## Import files
-import dataloader.CsvDataLoader;
-import datastructs.NumericSample;
-import tech.tablesaw.api.Table;
-import visualizations.Histograms;
-import java.io.File;
-import java.io.IOException;
-## The main function
-public class Example2 {
- public static void main(String[] args) throws IOException {
- File file = new File("test_data/annual.csv");
- Table data = CsvDataLoader.TableLoader.parseFile(file);
- // Extract numeric samples from the data
- NumericSample X = CsvDataLoader.TableLoader.buildNumericSample(data, "Mean");
- Histograms plotter = new Histograms();
- Histograms.HistogramOptions options = plotter.new HistogramOptions();
- options.chartTitle = "distribution of mean temperature";
- options.xAxisName = "Mean";
- Histograms.plotHistogram(options, data);
- }
-## Results
-## Source Code
\ No newline at end of file
diff --git a/src/main/java/examples/plotting/example3/Example3.java b/src/main/java/examples/plotting/example3/Example3.java
deleted file mode 100644
index 73b16d2..0000000
--- a/src/main/java/examples/plotting/example3/Example3.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package examples.plotting.example3;
-import dataloader.CsvDataLoader;
-import datastructs.IVector;
-import tech.tablesaw.api.Table;
-import visualizations.LineChart;
-import java.io.File;
-import java.io.IOException;
- * Category: Plotting
- * ID: PlotALine
- * Description: Load a CSV file and plot two columns against each other
- */
-public class Example3 {
- public static void main(String[] args) throws IOException {
- File file = new File("data/annual.csv");
- Table table = CsvDataLoader.TableLoader.parseFile(file);
- // extract numeric samples from the data
- IVector X = CsvDataLoader.TableLoader.buildNumericSample(table, "Year");
- IVector Y = CsvDataLoader.TableLoader.buildNumericSample(table, "Mean");
- LineChart plotter = new LineChart();
- LineChart.LineChartOptions options = plotter.new LineChartOptions();
- options.chartTitle = "Per Year Mean";
- options.xAxisName = "Year";
- options.yAxisName = "Mean";
- LineChart.plotLine(X, Y, options);
- }
\ No newline at end of file
diff --git a/src/main/java/examples/plotting/example3/example.md b/src/main/java/examples/plotting/example3/example.md
deleted file mode 100644
index b6c6fc5..0000000
--- a/src/main/java/examples/plotting/example3/example.md
+++ /dev/null
@@ -1,61 +0,0 @@
-# Example 3: Line Plot
-## Contents
-* [Overview](#overview)
-* [Import files](#include_files)
-* [The main function](#m_func)
-* [Results](#results)
-* [Source Code](#source_code)
-## Overview
-## Import files
-import dataloader.CsvDataLoader;
-import datastructs.NumericSample;
-import tech.tablesaw.api.Table;
-import visualizations.LineChart;
-import java.io.File;
-import java.io.IOException;
-## The main function
-public class Example3 {
- public static void main(String[] args) throws IOException {
- File file = new File("data/annual.csv");
- Table table = CsvDataLoader.TableLoader.parseFile(file);
- // extract numeric samples from the data
- NumericSample X = CsvDataLoader.TableLoader.buildNumericSample(table, "Year");
- NumericSample Y = CsvDataLoader.TableLoader.buildNumericSample(table, "Mean");
- LineChart plotter = new LineChart();
- LineChart.LineChartOptions options = plotter.new LineChartOptions();
- options.chartTitle = "Per Year Mean";
- options.xAxisName = "Year";
- options.yAxisName = "Mean";
- LineChart.plotLine(X, Y, options);
- }
-## Results
-## Source Code
\ No newline at end of file
diff --git a/src/main/java/examples/plotting/example4/Example4.java b/src/main/java/examples/plotting/example4/Example4.java
deleted file mode 100644
index cc7f798..0000000
--- a/src/main/java/examples/plotting/example4/Example4.java
+++ /dev/null
@@ -1,30 +0,0 @@
-package examples.plotting.example4;
-import dataloader.CsvDataLoader;
-import tech.tablesaw.api.Table;
-import visualizations.PieChart;
-import java.io.File;
-import java.io.IOException;
- * Category: Plotting
- * ID: PlotAPie
- * Description: Load a CSV file and plot a pie chart
- */
-public class Example4 {
- public static void main(String[] args) throws IOException {
- File file = new File("data/humans_data.csv");
- Table table = CsvDataLoader.TableLoader.parseFile(file);
- PieChart plotter = new PieChart();
- PieChart.PieChartOptions options = plotter.new PieChartOptions();
- options.chartTitle = "height by sex";
- options.groupColName = "Sex";
- options.numericColName = "Height";
- PieChart.plotPie(options, table);
- }
diff --git a/src/main/java/examples/plotting/example4/example.md b/src/main/java/examples/plotting/example4/example.md
deleted file mode 100644
index d6a584e..0000000
--- a/src/main/java/examples/plotting/example4/example.md
+++ /dev/null
@@ -1,55 +0,0 @@
-# Example 4: Pie Plot
-## Contents
-* [Overview](#overview)
-* [Import files](#include_files)
-* [The main function](#m_func)
-* [Results](#results)
-* [Source Code](#source_code)
-## Overview
-## Import files
-import dataloader.CsvDataLoader;
-import tech.tablesaw.api.Table;
-import visualizations.PieChart;
-import java.io.File;
-import java.io.IOException;
-## The main function
-public class Example4 {
- public static void main(String[] args) throws IOException {
- File file = new File("data/humans_data.csv");
- Table table = CsvDataLoader.TableLoader.parseFile(file);
- PieChart plotter = new PieChart();
- PieChart.PieChartOptions options = plotter.new PieChartOptions();
- options.chartTitle = "height by sex";
- options.groupColName = "Sex";
- options.numericColName = "Height";
- PieChart.plotPie(options, table);
- }
-## Results
-## Source Code
\ No newline at end of file
diff --git a/src/main/java/examples/plotting/example5/Example5.java b/src/main/java/examples/plotting/example5/Example5.java
deleted file mode 100644
index 934efa9..0000000
--- a/src/main/java/examples/plotting/example5/Example5.java
+++ /dev/null
@@ -1,30 +0,0 @@
-package examples.plotting.example5;
-import dataloader.CsvDataLoader;
-import tech.tablesaw.api.Table;
-import visualizations.ScatterChart;
-import java.io.File;
-import java.io.IOException;
- * Category: Plotting
- * ID: PlotABar
- * Description: Load a CSV file and plot a scatter chart with 3 numeric variables
- */
-public class Example5 {
- public static void main(String[] args) throws IOException {
- File file = new File("data/humans_data.csv");
- Table data = CsvDataLoader.TableLoader.parseFile(file);
- ScatterChart plotter = new ScatterChart();
- ScatterChart.ScatterChartOptions options = plotter.new ScatterChartOptions();
- options.chartTitle = "weight by age and height";
- options.xAxisName = "Age";
- options.yAxisName = "Height";
- options.sizeColName = "Weight";
- ScatterChart.plotScatter3D(options, data);
- }
diff --git a/src/main/java/examples/plotting/example5/example.md b/src/main/java/examples/plotting/example5/example.md
deleted file mode 100644
index e40a5c9..0000000
--- a/src/main/java/examples/plotting/example5/example.md
+++ /dev/null
@@ -1,54 +0,0 @@
-# Example 5: Scatter Plot
-## Contents
-* [Overview](#overview)
-* [Import files](#include_files)
-* [The main function](#m_func)
-* [Results](#results)
-* [Source Code](#source_code)
-## Overview
-## Import files
-import dataloader.CsvDataLoader;
-import tech.tablesaw.api.Table;
-import visualizations.ScatterChart;
-import java.io.File;
-import java.io.IOException;
-## The main function
-public class Example5 {
- public static void main(String[] args) throws IOException {
- File file = new File("data/humans_data.csv");
- Table data = CsvDataLoader.TableLoader.parseFile(file);
- ScatterChart plotter = new ScatterChart();
- ScatterChart.ScatterChartOptions options = plotter.new ScatterChartOptions();
- options.chartTitle = "weight by age and height";
- options.xAxisName = "Age";
- options.yAxisName = "Height";
- options.sizeColName = "Weight";
- ScatterChart.plotScatter3D(options, data);
- }
-## Results
-## Source Code
\ No newline at end of file
diff --git a/src/main/java/examples/stats/example1/Example1.java b/src/main/java/examples/stats/example1/Example1.java
deleted file mode 100644
index 88ca1e9..0000000
--- a/src/main/java/examples/stats/example1/Example1.java
+++ /dev/null
@@ -1,67 +0,0 @@
-package examples.stats.example1;
-import datasets.VectorDouble;
-import datastructs.IVector;
-import org.apache.commons.math3.stat.StatUtils;
-import stats.utils.Resample;
-import org.apache.commons.math3.distribution.NormalDistribution;
- *
- * Category: Statistics
- * ID: MeanBootstrap
- * Illustration of basic bootstrap method for the mean
- * see also: https://machinelearningmastery.com/a-gentle-introduction-to-the-bootstrap-method/
- */
-public class Example1 {
- public static void main(String[] args){
- // the size of the sample
- final int SIZE = 100;
- final int RESAMPLE_SIZE = 20;
- // how many bootstrap iterations to perform
- final int BOOST_ITRS = 100;
- // parameters for normal distribution
- final double MU = 0.8;
- final double SD = 0.1;
- // create a sample
- VectorDouble sample = new VectorDouble(SIZE);
- // normal distribution:
- // see https://commons.apache.org/proper/commons-math/javadocs/api-3.5/org/apache/commons/math3/distribution/NormalDistribution.html
- NormalDistribution dist = new NormalDistribution(MU, SD);
- for(int i=0; i resample = Resample.resample(sample, RESAMPLE_SIZE, 3);
- means[itr] = ((VectorDouble)resample).getMean();
- }
- // compute the mean of means
- double mean = StatUtils.mean(means);
- System.out.println("Mean of means: "+mean);
- }
diff --git a/src/main/java/examples/stats/example1/example.md b/src/main/java/examples/stats/example1/example.md
deleted file mode 100644
index 962090f..0000000
--- a/src/main/java/examples/stats/example1/example.md
+++ /dev/null
@@ -1,14 +0,0 @@
-# Example 1: Simple Bootstrap For The Mean
-## Contents
-* [Overview](#overview)
- * [Linear Regression](#linear_regression)
- * [How Good Is The Fit?](#how_good_is_the_fit)
- * [```R^2``` Coefficient](#r2_coefficient)
-* [Include files](#include_files)
-* [Program structure](#prg_struct)
-* [The main function](#m_func)
-* [Results](#results)
-* [Source Code](#source_code)
-## Overview
\ No newline at end of file
diff --git a/src/main/java/examples/stats/example2/Example2.java b/src/main/java/examples/stats/example2/Example2.java
deleted file mode 100644
index 500cb16..0000000
--- a/src/main/java/examples/stats/example2/Example2.java
+++ /dev/null
@@ -1,67 +0,0 @@
-package examples.stats.example2;
-import dataloader.CsvDataLoader;
-import datasets.VectorDouble;
-import datastructs.IVector;
-import org.apache.commons.math3.stat.inference.TestUtils;
-import utils.ListUtils;
-import java.io.File;
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
- *
- * Category: Statistics, Hypothesis Testing
- * ID: HypothesisTestingForMean
- * Description: Load a CSV file using the CsvDataLoader.MapLoader and create
- * a NumericSample from the data set. Use the sample for hypothesis testing
- */
-public class Example2 {
- public static void main(String[] args){
- try {
- // load data set
- Map> dataSet = CsvDataLoader.MapLoader.parseFile(new File("data/robot_state.csv"));
- IVector sample = CsvDataLoader.MapLoader.buildNumericSample(dataSet, "X");
- // the mean value we assume
- double mu = 2.85;
- double level = 0.05;
- boolean rejectH0 = TestUtils.tTest(mu, ListUtils.toDoubleArray(sample.toArray()), level );
- double pLevel = TestUtils.tTest(mu, ListUtils.toDoubleArray(sample.toArray()));
- System.out.println("p-level is: "+pLevel);
- if(rejectH0){
- System.out.println("H0 hypothesis: mu="+mu+" can be rejected with confidence: "+(1.0-level));
- }
- else{
- System.out.println("H0 hypothesis: mu="+mu+" can not be rejected");
- }
- // now we should not reject
- mu = ((VectorDouble)sample).getMean();
- rejectH0 = TestUtils.tTest(mu, ListUtils.toDoubleArray(sample.toArray()), level );
- pLevel = TestUtils.tTest(mu, ListUtils.toDoubleArray(sample.toArray()));
- System.out.println("p-level is: "+pLevel);
- if(rejectH0){
- System.out.println("H0 hypothesis: mu="+mu+" can be rejected with confidence: "+(1.0-level));
- }
- else{
- System.out.println("H0 hypothesis: mu="+mu+" can not be rejected");
- }
- }
- catch(IOException exception)
- {
- System.out.println(exception.toString());
- }
- }
diff --git a/src/main/java/examples/stats/example2/example.md b/src/main/java/examples/stats/example2/example.md
deleted file mode 100644
index 33e208d..0000000
--- a/src/main/java/examples/stats/example2/example.md
+++ /dev/null
@@ -1,14 +0,0 @@
-# Example 2: Hypothesis Testing For The Mean
-## Contents
-* [Overview](#overview)
- * [Linear Regression](#linear_regression)
- * [How Good Is The Fit?](#how_good_is_the_fit)
- * [```R^2``` Coefficient](#r2_coefficient)
-* [Include files](#include_files)
-* [Program structure](#prg_struct)
-* [The main function](#m_func)
-* [Results](#results)
-* [Source Code](#source_code)
-## Overview
\ No newline at end of file
diff --git a/src/main/java/examples/stats/example3/Example3.java b/src/main/java/examples/stats/example3/Example3.java
deleted file mode 100644
index a806acc..0000000
--- a/src/main/java/examples/stats/example3/Example3.java
+++ /dev/null
@@ -1,111 +0,0 @@
-package examples.stats.example3;
-import optimization.GradientDescent;
-import optimization.GDInput;
-import utils.DefaultIterativeAlgorithmController;
-import utils.IterativeAlgorithmResult;
-import datasets.DenseMatrixSet;
-import datastructs.RowBuilder;
-import datasets.VectorDouble;
-import datastructs.RowType;
-import maths.errorfunctions.MSEVectorFunction;
-import maths.errorfunctions.SSEVectorFunction;
-import maths.functions.LinearVectorPolynomial;
-import ml.regression.LinearRegressor;
-import tech.tablesaw.api.Table;
-import utils.ListMaths;
-import utils.TableDataSetLoader;
-import java.io.File;
-import java.io.IOException;
-/** Category: Statistics
- * ID: Example1
- * Description: Goodness of fit of regression line
- * Taken From:
- * Details:
- * TODO
- */
-public class Example3 {
- public static void main(String[] args)throws IOException {
- // load the data
- Table dataSet = TableDataSetLoader.loadDataSet(new File("src/main/resources/datasets/car_plant.csv"));
- VectorDouble labels = new VectorDouble(dataSet, "Electricity Usage");
- Table reducedDataSet = dataSet.removeColumns("Electricity Usage").first(dataSet.rowCount());
- DenseMatrixSet denseMatrixSet = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder(), reducedDataSet.rowCount(), 2, 1.0);
- denseMatrixSet.setColumn(1, reducedDataSet.doubleColumn(0));
- LinearVectorPolynomial hypothesis = new LinearVectorPolynomial(1);
- LinearRegressor regressor = new LinearRegressor(hypothesis);
- GDInput gdInput = new GDInput();
- gdInput.showIterations = false;
- gdInput.eta=0.01;
- gdInput.errF = new MSEVectorFunction(hypothesis);
- gdInput.iterationContorller = new DefaultIterativeAlgorithmController(10000,1.0e-8);
- GradientDescent gdSolver = new GradientDescent(gdInput);
- IterativeAlgorithmResult result = (IterativeAlgorithmResult) regressor.train(denseMatrixSet, labels, gdSolver);
- System.out.println(result);
- System.out.println("Intercept: "+hypothesis.getCoeff(0)+" slope: "+hypothesis.getCoeff(1));
- // let's see the max error over the dateset
- VectorDouble errors = regressor.getErrors(denseMatrixSet, labels);
- double maxError = ListMaths.max(errors.getRawData());
- System.out.println("Maximum error over dataset: "+maxError);
- // let's get an estimate of the error variance.
- //The error variance sigma^2 can be estimated by considering the deviations between the observed
- //data values y_i and their fitted values \hat(y)_i . Specifically, the sum of squares for error SSE is defined
- //to be the sum of the squares of these deviations
- VectorDouble yhat = regressor.predict(denseMatrixSet);
- double sseError = SSEVectorFunction.error(labels, yhat);
- double sigma2_hat = sseError/ (yhat.size()-2);
- System.out.println("Estimate of error variance: "+ sigma2_hat);
- // interval estimation
- double Sxx = ListMaths.sxx(denseMatrixSet.getColumn(1).getRawData());
- System.out.println("Estimate of Sxx: "+Sxx);
- // standard error for the slope
- double se_slope = Math.sqrt(sigma2_hat)/Math.sqrt(Sxx);
- System.out.println("Standard error for the slope: "+se_slope);
- // t-statistic
- double t = hypothesis.getCoeff(1)/se_slope;
- System.out.println("t-statistic: "+t);
- //The two-sided p-value is calculated as
- //p-value = 2 × P(X > 6.37) approx 0
- //where the random variable X has a t-distribution with 10 degrees of freedom. This low p-value
- //indicates that the null hypothesis is not plausible and so the slope parameter is known to be
- //nonzero. In other words, it has been established that the distribution of electricity usage does
- //depend on the level of production.
- // The proportion of the total variability in the dependent variable y that is accounted for by
- // the regression line is given by the coefficient of determination.
- // This coefficient takes a value between 0 and 1, and the closer it is to one the smaller is the
- // sum of squares for error SSE in relation to
- // the sum of squares for regression SSR. Thus, larger values of R^2 tend to indicate that the data
- // points are closer to the fitted regression line. Nevertheless, a low
- // value of R^2 should not necessarily be interpreted as implying that the fitted regression line is
- // not appropriate or is not useful. A fitted regression line may be accurate and informative even
- // though a small value of R^2 is obtained because of a large error variance sigma62.
- double sst = ListMaths.sse(labels.getRawData());
- double r_sqr = 1.0- sseError/sst;
- System.out.println("Coefficient of determination: "+r_sqr);
- }
diff --git a/src/main/java/examples/stats/example3/example.md b/src/main/java/examples/stats/example3/example.md
deleted file mode 100644
index a41a1e8..0000000
--- a/src/main/java/examples/stats/example3/example.md
+++ /dev/null
@@ -1,148 +0,0 @@
-# Example 6: Goodness of fit of regression line
- ## Contents
- * [Overview](#overview)
- * [Goodness of fit of regression line](#goodness_of_fit)
- * [Import files](#include_files)
- * [The main function](#m_func)
- * [Results](#results)
- * [Source Code](#source_code)
- ## Overview
- ### Goodness of fit of regression line
- ## Import files
- ```
-package examples.stats.example3;
-import optimization.GradientDescent;
-import optimization.GDInput;
-import utils.DefaultIterativeAlgorithmController;
-import utils.IterativeAlgorithmResult;
-import datasets.DenseMatrixSet;
-import datastructs.RowBuilder;
-import datasets.VectorDouble;
-import datastructs.RowType;
-import maths.errorfunctions.MSEVectorFunction;
-import maths.errorfunctions.SSEVectorFunction;
-import maths.functions.LinearVectorPolynomial;
-import ml.regression.LinearRegressor;
-import tech.tablesaw.api.Table;
-import utils.ListMaths;
-import utils.TableDataSetLoader;
-import java.io.File;
-import java.io.IOException;
- ```
- ## The main function
- ```
- public class Example3 {
- public static void main(String[] args)throws IOException {
- // load the data
- Table dataSet = TableDataSetLoader.loadDataSet(new File("src/main/resources/datasets/car_plant.csv"));
- Vector labels = new Vector(dataSet, "Electricity Usage");
- Table reducedDataSet = dataSet.removeColumns("Electricity Usage").first(dataSet.rowCount());
- DenseMatrixSet denseMatrixSet = new DenseMatrixSet(RowType.Type.DOUBLE_VECTOR, new RowBuilder(), reducedDataSet.rowCount(), 2, 1.0);
- denseMatrixSet.setColumn(1, reducedDataSet.doubleColumn(0));
- LinearVectorPolynomial hypothesis = new LinearVectorPolynomial(1);
- LinearRegressor regressor = new LinearRegressor(hypothesis);
- GDInput gdInput = new GDInput();
- gdInput.showIterations = false;
- gdInput.eta=0.01;
- gdInput.errF = new MSEVectorFunction(hypothesis);
- gdInput.iterationContorller = new DefaultIterativeAlgorithmController(10000,1.0e-8);
- BatchGradientDescent gdSolver = new BatchGradientDescent(gdInput);
- IterativeAlgorithmResult result = (IterativeAlgorithmResult) regressor.train(denseMatrixSet, labels, gdSolver);
- System.out.println(result);
- System.out.println("Intercept: "+hypothesis.getCoeff(0)+" slope: "+hypothesis.getCoeff(1));
- // let's see the max error over the dateset
- Vector errors = regressor.getErrors(denseMatrixSet, labels);
- double maxError = ListMaths.max(errors.getRawData());
- System.out.println("Maximum error over dataset: "+maxError);
- // let's get an estimate of the error variance.
- //The error variance sigma^2 can be estimated by considering the deviations between the observed
- //data values y_i and their fitted values \hat(y)_i . Specifically, the sum of squares for error SSE is defined
- //to be the sum of the squares of these deviations
- Vector yhat = regressor.predict(denseMatrixSet);
- double sseError = SSEVectorFunction.error(labels, yhat);
- double sigma2_hat = sseError/ (yhat.size()-2);
- System.out.println("Estimate of error variance: "+ sigma2_hat);
- // interval estimation
- double Sxx = ListMaths.sxx(denseMatrixSet.getColumn(1).getRawData());
- System.out.println("Estimate of Sxx: "+Sxx);
- // standard error for the slope
- double se_slope = Math.sqrt(sigma2_hat)/Math.sqrt(Sxx);
- System.out.println("Standard error for the slope: "+se_slope);
- // t-statistic
- double t = hypothesis.getCoeff(1)/se_slope;
- System.out.println("t-statistic: "+t);
- //The two-sided p-value is calculated as
- //p-value = 2 × P(X > 6.37) approx 0
- //where the random variable X has a t-distribution with 10 degrees of freedom. This low p-value
- //indicates that the null hypothesis is not plausible and so the slope parameter is known to be
- //nonzero. In other words, it has been established that the distribution of electricity usage does
- //depend on the level of production.
- // The proportion of the total variability in the dependent variable y that is accounted for by
- // the regression line is given by the coefficient of determination.
- // This coefficient takes a value between 0 and 1, and the closer it is to one the smaller is the
- // sum of squares for error SSE in relation to
- // the sum of squares for regression SSR. Thus, larger values of R^2 tend to indicate that the data
- // points are closer to the fitted regression line. Nevertheless, a low
- // value of R^2 should not necessarily be interpreted as implying that the fitted regression line is
- // not appropriate or is not useful. A fitted regression line may be accurate and informative even
- // though a small value of R^2 is obtained because of a large error variance sigma62.
- double sst = ListMaths.sse(labels.getRawData());
- double r_sqr = 1.0- sseError/sst;
- System.out.println("Coefficient of determination: "+r_sqr);
- }
- }
- ```
- ## Results
- ```
-Converged: true
-Tolerance: 9.995007266283551E-9
-# Threads: 1
-Iterations: 7076
-Intercept: 0.37857734128519877 slope: 0.5049674670001678
-Maximum error over dataset: 0.2780755638140122
-Estimate of error variance: 0.02992964035512725
-Estimate of Sxx: 4.8723000000000525
-Standard error for the slope: 0.07837611613854814
-t-statistic: 6.442874333138931
-Coefficient of determination: 0.8019860710106865
- ```
- ## Source Code
- Example3.java
\ No newline at end of file
diff --git a/src/main/java/examples/stats/example4/Example4.java b/src/main/java/examples/stats/example4/Example4.java
deleted file mode 100644
index ea09fcf..0000000
--- a/src/main/java/examples/stats/example4/Example4.java
+++ /dev/null
@@ -1,51 +0,0 @@
-package examples.stats.example4;
-import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
-import utils.ListMaths;
-import utils.ListUtils;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
-public class Example4 {
- public static List getNormalSample(double mu, double sd, int n){
- List sample = new ArrayList<>(n);
- Random rnd = new Random();
- for(int i=0; i means = new ArrayList();
- for( int itr=0; itr < N_SIM; ++itr){
- List sample = Example4.getNormalSample(MU, SIGMA, N);
- double mean = ListMaths.sum(sample)/((double)sample.size());
- //System.out.println(mean);
- means.add(mean);
- }
- double[] vals = ListUtils.toDoubleArray(means);
- DescriptiveStatistics stats = new DescriptiveStatistics(vals );
- System.out.println("Standard deviation of means is: "+stats.getStandardDeviation());
- System.out.println("sigma/sqrt(N) is: " + SIGMA/Math.sqrt(N));
- }
diff --git a/src/main/java/examples/stats/example4/example.md b/src/main/java/examples/stats/example4/example.md
deleted file mode 100644
index c777769..0000000
--- a/src/main/java/examples/stats/example4/example.md
+++ /dev/null
@@ -1,85 +0,0 @@
-# Example 4: Simulate the standard error for the mean
- ## Contents
- * [Overview](#overview)
- * [Standard error for the mean](#standard_error_for_mean)
- * [Import files](#include_files)
- * [The main function](#m_func)
- * [Results](#results)
- * [Source Code](#source_code)
- ## Overview
- ### Standard error for the mean
- ## Import files
- ```
-package examples.stats.example4;
-import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
-import utils.ListMaths;
-import utils.ListUtils;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
- ```
- ## The main function
- ```
- public class Example4 {
- public static List getNormalSample(double mu, double sd, int n){
- List sample = new ArrayList<>(n);
- Random rnd = new Random();
- for(int i=0; i means = new ArrayList();
- for( int itr=0; itr < N_SIM; ++itr){
- List sample = Example4.getNormalSample(MU, SIGMA, N);
- double mean = ListMaths.sum(sample)/((double)sample.size());
- //System.out.println(mean);
- means.add(mean);
- }
- double[] vals = ListUtils.toDoubleArray(means);
- DescriptiveStatistics stats = new DescriptiveStatistics(vals );
- System.out.println("Standard deviation of means is: "+stats.getStandardDeviation());
- System.out.println("sigma/sqrt(N) is: " + SIGMA/Math.sqrt(N));
- }
- }
- ```
- ## Results
- ```
-Standard deviation of means is: 0.3081356365887988
-sigma/sqrt(N) is: 0.31622776601683794
- ```
- ## Source Code
- Example4.java
\ No newline at end of file
diff --git a/src/main/java/examples/stats/example5/Example5.java b/src/main/java/examples/stats/example5/Example5.java
deleted file mode 100644
index 0217c2f..0000000
--- a/src/main/java/examples/stats/example5/Example5.java
+++ /dev/null
@@ -1,26 +0,0 @@
-package examples.stats.example5;
-import org.apache.commons.math3.distribution.AbstractRealDistribution;
-import org.apache.commons.math3.distribution.NormalDistribution;
-import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
-import utils.ListUtils;
-import java.util.List;
-public class Example5 {
- public static void main(String[] args){
- final int SIZE = 20;
- final double MU = 1.0;
- final double STD = 0.3;
- AbstractRealDistribution normalDistribution = new NormalDistribution(MU, STD);
- List values = ListUtils.randomSample(SIZE, normalDistribution);
- DescriptiveStatistics stats = new DescriptiveStatistics(ListUtils.toDoubleArray(values));
- System.out.println("Mean is: "+ stats.getMean());
- System.out.println("Median is: "+stats.getPercentile(50));
- System.out.println("Min is: "+stats.getMin());
- System.out.println("Max is: "+stats.getMax());
- }
diff --git a/src/main/java/examples/stats/example5/example.md b/src/main/java/examples/stats/example5/example.md
deleted file mode 100644
index 3a0aee8..0000000
--- a/src/main/java/examples/stats/example5/example.md
+++ /dev/null
@@ -1,62 +0,0 @@
-# Calculate Descriptive Statistics Metrics
-## Contents
- * [Overview](#overview)
- * [Import files](#include_files)
- * [The main function](#m_func)
- * [Results](#results)
- * [Source Code](#source_code)
-## Overview
-## Import files
- ```
-package examples.stats.example5;
-import org.apache.commons.math3.distribution.AbstractRealDistribution;
-import org.apache.commons.math3.distribution.NormalDistribution;
-import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics;
-import utils.ListUtils;
-import java.util.List;
- ```
-## The main function
-public class Example5 {
- public static void main(String[] args){
- final int SIZE = 20;
- final double MU = 1.0;
- final double STD = 0.3;
- /// populate the list using the NormalDistribution
- AbstractRealDistribution normalDistribution = new NormalDistribution(MU, STD);
- List values = ListUtils.randomSample(SIZE, normalDistribution);
- DescriptiveStatistics stats = new DescriptiveStatistics(ListUtils.toDoubleArray(values));
- System.out.println("Mean is: "+ stats.getMean());
- System.out.println("Median is: "+stats.getPercentile(50));
- System.out.println("Min is: "+stats.getMin());
- System.out.println("Max is: "+stats.getMax());
- }
-## Results
-Mean is: 1.0282365100654927
-Median is: 1.016467404051775
-Min is: 0.520894109764956
-Max is: 1.6848924957278655
- ## Source Code
- Example5.java
\ No newline at end of file
diff --git a/src/main/java/examples/stats/example6/Example6.java b/src/main/java/examples/stats/example6/Example6.java
deleted file mode 100644
index ab2a5f0..0000000
--- a/src/main/java/examples/stats/example6/Example6.java
+++ /dev/null
@@ -1,88 +0,0 @@
-package examples.stats.example6;
-import org.apache.commons.math3.distribution.AbstractRealDistribution;
-import org.apache.commons.math3.distribution.NormalDistribution;
-import utils.ListMaths;
-import utils.ListUtils;
-import java.util.List;
-public class Example6 {
- public static void main(String[] arg){
- final int SIZE = 20;
- final double MU = 1.0;
- final double STD = 0.3;
- final double ALPHA = 0.05;
- AbstractRealDistribution normalDistribution = new NormalDistribution(MU, STD);
- List values = ListUtils.randomSample(SIZE, normalDistribution);
- AbstractRealDistribution standardNormalDistribution = new NormalDistribution();
- //compute the z-score
- double xbar = ListMaths.sum(values)/(double)values.size();
- double z = (xbar - MU)/(0.3/Math.sqrt((double) SIZE));
- System.out.println("Sample mean: " + xbar);
- System.out.println("z-score computed: "+z);
- System.out.println("Significance level: "+ALPHA);
- // test the hypothesis
- // H_0: \mu = 1.0
- // H_a: \mu > 1.0
- double p_value = 1.0 - standardNormalDistribution.cumulativeProbability(z);
- System.out.println();
- System.out.println("Upper tail test");
- // we reject H_0 if p-value < ALPHA
- if(p_value < ALPHA){
- System.out.println("The null hypothesis is rejected " +
- "with p-value " + p_value + " and significance level " + ALPHA);
- }
- else{
- System.out.println("With p-value " + p_value + " and significance level " + ALPHA + " cannot reject H0");
- }
- System.out.println();
- // test the hypothesis
- // H_0: \mu = 1.0
- // H_a: \mu < 1.0
- p_value = standardNormalDistribution.cumulativeProbability(z);
- System.out.println("Lower tail test");
- // we reject H_0 if p-value < ALPHA
- if(p_value < ALPHA){
- System.out.println("The null hypothesis is rejected " +
- "with p-value " + p_value + " and significance level " + ALPHA);
- }
- else{
- System.out.println("With p-value " + p_value + " and significance level " + ALPHA + " cannot reject H0");
- }
- System.out.println();
- System.out.println("Two sided test");
- // test the hypothesis
- // H_0: \mu = 1.0
- // H_a: \mu != 1.0
- p_value = 2.0*(1.0 - standardNormalDistribution.cumulativeProbability(Math.abs(z)));
- // we reject H_0 if p-value < ALPHA
- if(p_value < ALPHA){
- System.out.println("The null hypothesis is rejected " +
- "with p-value " + p_value + " and significance level " + ALPHA);
- }
- else{
- System.out.println("With p-value " + p_value + " and significance level " + ALPHA + " cannot reject H0");
- }
- }
diff --git a/src/main/java/examples/stats/example6/example.md b/src/main/java/examples/stats/example6/example.md
deleted file mode 100644
index 04eccfd..0000000
--- a/src/main/java/examples/stats/example6/example.md
+++ /dev/null
@@ -1,129 +0,0 @@
-# Hypothesis tests on the mean of a normal distribution with known variance
-## Contents
- * [Overview](#overview)
- * [Import files](#include_files)
- * [The main function](#m_func)
- * [Results](#results)
- * [Source Code](#source_code)
-## Overview
-## Import files
- ```
-package examples.stats.example6;
-import org.apache.commons.math3.distribution.AbstractRealDistribution;
-import org.apache.commons.math3.distribution.NormalDistribution;
-import utils.ListMaths;
-import utils.ListUtils;
-import java.util.List;
- ```
-## The main function
-public class Example6 {
- public static void main(String[] arg){
- final int SIZE = 20;
- final double MU = 1.0;
- final double STD = 0.3;
- final double ALPHA = 0.05;
- AbstractRealDistribution normalDistribution = new NormalDistribution(MU, STD);
- List values = ListUtils.randomSample(SIZE, normalDistribution);
- AbstractRealDistribution standardNormalDistribution = new NormalDistribution();
- //compute the z-score
- double xbar = ListMaths.sum(values)/(double)values.size();
- double z = (xbar - MU)/(0.3/Math.sqrt((double) SIZE));
- System.out.println("Sample mean: " + xbar);
- System.out.println("z-score computed: "+z);
- System.out.println("Significance level: "+ALPHA);
- // test the hypothesis
- // H_0: \mu = 1.0
- // H_a: \mu > 1.0
- double p_value = 1.0 - standardNormalDistribution.cumulativeProbability(z);
- System.out.println();
- System.out.println("Upper tail test");
- // we reject H_0 if p-value < ALPHA
- if(p_value < ALPHA){
- System.out.println("The null hypothesis is rejected " +
- "with p-value " + p_value + " and significance level " + ALPHA);
- }
- else{
- System.out.println("With p-value " + p_value + " and significance level " + ALPHA + " cannot reject H0");
- }
- System.out.println();
- // test the hypothesis
- // H_0: \mu = 1.0
- // H_a: \mu < 1.0
- p_value = standardNormalDistribution.cumulativeProbability(z);
- System.out.println("Lower tail test");
- // we reject H_0 if p-value < ALPHA
- if(p_value < ALPHA){
- System.out.println("The null hypothesis is rejected " +
- "with p-value " + p_value + " and significance level " + ALPHA);
- }
- else{
- System.out.println("With p-value " + p_value + " and significance level " + ALPHA + " cannot reject H0");
- }
- System.out.println();
- System.out.println("Two sided test");
- // test the hypothesis
- // H_0: \mu = 1.0
- // H_a: \mu != 1.0
- p_value = 2.0*(1.0 - standardNormalDistribution.cumulativeProbability(Math.abs(z)));
- // we reject H_0 if p-value < ALPHA
- if(p_value < ALPHA){
- System.out.println("The null hypothesis is rejected " +
- "with p-value " + p_value + " and significance level " + ALPHA);
- }
- else{
- System.out.println("With p-value " + p_value + " and significance level " + ALPHA + " cannot reject H0");
- }
- }
-## Results
-Sample mean: 0.9828392170518672
-z-score computed: -0.2558178481276285
-Significance level: 0.05
-Upper tail test
-With p-value 0.6009542542169531 and significance level 0.05 cannot reject H0
-Lower tail test
-With p-value 0.39904574578304686 and significance level 0.05 cannot reject H0
-Two sided test
-With p-value 0.7980914915660937 and significance level 0.05 cannot reject H0
- ## Source Code
- Example6.java
\ No newline at end of file
diff --git a/src/main/java/examples/stats/example7/Example7.java b/src/main/java/examples/stats/example7/Example7.java
deleted file mode 100644
index 3d7dff9..0000000
--- a/src/main/java/examples/stats/example7/Example7.java
+++ /dev/null
@@ -1,22 +0,0 @@
-package examples.stats.example7;
-import org.apache.commons.math3.distribution.AbstractRealDistribution;
-import org.apache.commons.math3.distribution.NormalDistribution;
-public class Example7 {
- public static void main(String[] args){
- final double ALPHA = 0.05;
- final double SIGMA = 2.0;
- int n = 25;
- final double z_ALPHA_HALF = 1.96;
- final double DELTA = 1.0;
- AbstractRealDistribution normalDistribution = new NormalDistribution();
- double beta = normalDistribution.cumulativeProbability(z_ALPHA_HALF - Math.sqrt(n)/SIGMA)-
- normalDistribution.cumulativeProbability(-z_ALPHA_HALF - Math.sqrt(n)/SIGMA);
- System.out.println("beta is "+beta);
- System.out.println("Power of test: " + (1.0- beta));
- }
diff --git a/src/main/java/examples/stats/example7/example.md b/src/main/java/examples/stats/example7/example.md
deleted file mode 100644
index 6182e5b..0000000
--- a/src/main/java/examples/stats/example7/example.md
+++ /dev/null
@@ -1,52 +0,0 @@
-# Type II error and the sample size
-## Contents
- * [Overview](#overview)
- * [Import files](#include_files)
- * [The main function](#m_func)
- * [Results](#results)
- * [Source Code](#source_code)
-## Overview
-## Import files
- ```
-package examples.stats.example7;
-import org.apache.commons.math3.distribution.AbstractRealDistribution;
-import org.apache.commons.math3.distribution.NormalDistribution;
- ```
-## The main function
-public class Example7 {
- public static void main(String[] args){
- final double ALPHA = 0.05;
- final double SIGMA = 2.0;
- int n = 25;
- final double z_ALPHA_HALF = 1.96;
- final double DELTA = 1.0;
- AbstractRealDistribution normalDistribution = new NormalDistribution();
- double beta = normalDistribution.cumulativeProbability(z_ALPHA_HALF - Math.sqrt(n)/SIGMA)-
- normalDistribution.cumulativeProbability(-z_ALPHA_HALF - Math.sqrt(n)/SIGMA);
- System.out.println("beta is "+beta);
- System.out.println("Power of test: " + (1.0- beta));
- }
-## Results
-beta is 0.29459441823305144
-Power of test: 0.7054055817669486
- ## Source Code
- Example7.java
diff --git a/src/main/java/examples/stats/example8/Example8.java b/src/main/java/examples/stats/example8/Example8.java
deleted file mode 100644
index 9ff23ed..0000000
--- a/src/main/java/examples/stats/example8/Example8.java
+++ /dev/null
@@ -1,41 +0,0 @@
-package examples.stats.example8;
-import io.CSVFileWriter;
-import org.apache.commons.math3.distribution.UniformIntegerDistribution;
-import stats.Statistics;
-import utils.ListUtils;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.List;
-public class Example8 {
- public static void main(String[] args){
- String path = Paths.get("src/main/java/examples/stats/example8")
- .toAbsolutePath()
- .toString();
- System.out.println("Path is: "+path);
- final int N_SIMS = 1000;
- List avg = new ArrayList<>(N_SIMS);
- for(int i =0; i sample = ListUtils.randomSample(i+1,
- new UniformIntegerDistribution(1,6));
- double mean = Statistics.calculate(ListUtils.toDoubleArray(sample),
- Statistics.Metrics.MEAN);
- avg.add(mean);
- if(i==10 || i == 100 || i == 500 || i==999) {
- CSVFileWriter writer = new CSVFileWriter(new String(path + "/" + "averages" + i + ".csv"));
- writer.writeDoubleRow(avg);
- }
- }
- }
diff --git a/src/main/java/examples/stats/example8/example.md b/src/main/java/examples/stats/example8/example.md
deleted file mode 100644
index f369df0..0000000
--- a/src/main/java/examples/stats/example8/example.md
+++ /dev/null
@@ -1,37 +0,0 @@
-# Example 8: Simulate The Central Limit Theorem
- ## Contents
- * [Overview](#overview)
- * [Central Limit Theorem](#goodness_of_fit)
- * [Import files](#include_files)
- * [The main function](#m_func)
- * [Results](#results)
- * [Source Code](#source_code)
- ## Overview
- ### Central Limit Theorem
- ## Import files
- ```
- ```
- ## The main function
- ```
- ```
- ## Results
- ```
- ```
- ## Source Code
- Example8.java
\ No newline at end of file