Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added Python module isolation-forest-onnx which can convert an isolation forest model to ONNX format. #53

Merged
merged 2 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 53 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,10 +1,60 @@
.gradle
# Gradle and Build directories
.gradle/
build/

# macOS system files
.DS_Store
.idea
spark-warehouse

# IDE configurations
.idea/
*.ipr
*.iml
*.iws
.vscode/
.sublime-*

# Python artifacts and caches
*.egg
*.egg-info/
*.pyc
*.pyo
__pycache__/
.cache/
.tox*
.venv*
.env
.envrc
.direnv/
.mypy_cache/
pinned.txt
/*/*pinned.txt

# Test-related files
.coverage
TEST-*.xml
coverage.xml
/htmlcov/

# Python build artifacts
dist/
/build/
/*/dist/
/*/build/
/MANIFEST
/*/MANIFEST
/*/activate

# Configuration files
product-spec.json
/*/product-spec.json
config/
/config/external/
/*/config

# Miscellaneous project files
spark-warehouse
version.txt

# Project-specific paths
isolation-forest/bin
isolation-forest-onnx/venv
4 changes: 2 additions & 2 deletions LICENSE
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ Outlier Detection Datasets (ODDS)
This project includes datasets from the Outlier Detection Datasets (ODDS)
repository (http://odds.cs.stonybrook.edu/).

- Dataset at isolation-forest/src/test/resources/mammography.csv
- Dataset at isolation-forest/src/test/resources/shuttle.csv
- Dataset at isolation-forest/src/test/resources/mammography.csv and isolation-forest-onnx/test/resources/mammography.csv
- Dataset at isolation-forest/src/test/resources/shuttle.csv and isolation-forest-onnx/test/resources/shuttle.csv

Your use of these datasets is subject to the citation requirements
outlined here: http://odds.cs.stonybrook.edu/about-odds/
Expand Down
8 changes: 4 additions & 4 deletions NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,22 @@ terms than as set forth above. In addition, such third party code may also
depend on and load multiple tiers of dependencies.

========================================================================
Dataset at isolation-forest/src/test/resources/mammography.csv
Dataset at isolation-forest/src/test/resources/mammography.csv and isolation-forest-onnx/test/resources/mammography.csv
========================================================================
Shebuti Rayana (2016). ODDS Library [http://odds.cs.stonybrook.edu].
Stony Brook, NY: Stony Brook University, Department of Computer Science.
Dataset from http://odds.cs.stonybrook.edu/mammography-dataset/
Dataset from https://odds.cs.stonybrook.edu/mammography-dataset/

The original Mammography (Woods et al., 1993) data set was made available by the courtesy of
Aleksandar Lazarevic. This dataset is publicly available in openML (https://www.openml.org/d/310)
under Public Domain Mark 1.0.

========================================================================
Dataset at isolation-forest/src/test/resources/shuttle.csv
Dataset at isolation-forest/src/test/resources/shuttle.csv and isolation-forest-onnx/test/resources/shuttle.csv
========================================================================
Shebuti Rayana (2016). ODDS Library [http://odds.cs.stonybrook.edu].
Stony Brook, NY: Stony Brook University, Department of Computer Science.
Dataset from http://odds.cs.stonybrook.edu/shuttle-dataset/
Dataset from https://odds.cs.stonybrook.edu/shuttle-dataset/

Dua, D. and Graff, C. (2019). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml].
Irvine, CA: University of California, School of Information and Computer Science.
Expand Down
6 changes: 0 additions & 6 deletions build.gradle
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
buildscript {
repositories {
jcenter()
maven {
url "https://plugins.gradle.org/m2/"
}
Expand All @@ -24,11 +23,6 @@ allprojects {
group = "com.linkedin.isolation-forest"

repositories {
jcenter()
mavenCentral()
}
}

task clean(type: Delete) {
delete "build"
}
Binary file modified gradle/wrapper/gradle-wrapper.jar
Binary file not shown.
37 changes: 17 additions & 20 deletions gradlew
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
Expand Down Expand Up @@ -82,6 +82,7 @@ esac

CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar


# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
Expand Down Expand Up @@ -125,10 +126,11 @@ if $darwin; then
GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
fi

# For Cygwin, switch paths to Windows format before running java
if $cygwin ; then
# For Cygwin or MSYS, switch paths to Windows format before running java
if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
APP_HOME=`cygpath --path --mixed "$APP_HOME"`
CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`

JAVACMD=`cygpath --unix "$JAVACMD"`

# We build the pattern for arguments to be converted via cygpath
Expand All @@ -154,19 +156,19 @@ if $cygwin ; then
else
eval `echo args$i`="\"$arg\""
fi
i=$((i+1))
i=`expr $i + 1`
done
case $i in
(0) set -- ;;
(1) set -- "$args0" ;;
(2) set -- "$args0" "$args1" ;;
(3) set -- "$args0" "$args1" "$args2" ;;
(4) set -- "$args0" "$args1" "$args2" "$args3" ;;
(5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
(6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
(7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
(8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
(9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
0) set -- ;;
1) set -- "$args0" ;;
2) set -- "$args0" "$args1" ;;
3) set -- "$args0" "$args1" "$args2" ;;
4) set -- "$args0" "$args1" "$args2" "$args3" ;;
5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
esac
fi

Expand All @@ -175,14 +177,9 @@ save () {
for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
echo " "
}
APP_ARGS=$(save "$@")
APP_ARGS=`save "$@"`

# Collect all arguments for the java command, following the shell quoting and substitution rules
eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"

# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
cd "$(dirname "$0")"
fi

exec "$JAVACMD" "$@"
27 changes: 8 additions & 19 deletions gradlew.bat
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem http://www.apache.org/licenses/LICENSE-2.0
@rem https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
Expand All @@ -29,6 +29,9 @@ if "%DIRNAME%" == "" set DIRNAME=.
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%

@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi

@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"

Expand All @@ -37,7 +40,7 @@ if defined JAVA_HOME goto findJavaFromJavaHome

set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if "%ERRORLEVEL%" == "0" goto init
if "%ERRORLEVEL%" == "0" goto execute

echo.
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Expand All @@ -51,7 +54,7 @@ goto fail
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe

if exist "%JAVA_EXE%" goto init
if exist "%JAVA_EXE%" goto execute

echo.
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
Expand All @@ -61,28 +64,14 @@ echo location of your Java installation.

goto fail

:init
@rem Get command-line arguments, handling Windows variants

if not "%OS%" == "Windows_NT" goto win9xME_args

:win9xME_args
@rem Slurp the command line arguments.
set CMD_LINE_ARGS=
set _SKIP=2

:win9xME_args_slurp
if "x%~1" == "x" goto execute

set CMD_LINE_ARGS=%*

:execute
@rem Setup the command line

set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar


@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*

:end
@rem End local scope for the variables with windows NT shell
Expand Down
1 change: 1 addition & 0 deletions isolation-forest-onnx/MANIFEST.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
include version.txt
124 changes: 124 additions & 0 deletions isolation-forest-onnx/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
plugins {
id 'base'
}

def venvDir = file('venv') // Directory for the virtual environment

task createVersionFile {
description = 'Create version.txt file for package version used in setup.py'
doLast {
def versionFile = file("${projectDir}/version.txt")
versionFile.parentFile.mkdirs()
versionFile.text = project.version
}
}

task createVenv(type: Exec) {
description = 'Create a Python virtual environment'
commandLine 'python3', '-m', 'venv', venvDir
}

task installDependencies(type: Exec) {
description = 'Install Python dependencies into the virtual environment'
dependsOn createVenv
commandLine "${venvDir}/bin/pip", 'install', '-r', 'requirements-dev.txt'
}

task buildPythonPackage(type: Exec) {
description = 'Build the Python source distribution and wheel'
dependsOn installDependencies, createVersionFile
commandLine "${venvDir}/bin/python", 'setup.py', 'sdist', 'bdist_wheel'
}

// Task to publish the Python package to PyPI
task publishPythonPackage(type: Exec) {
description = 'Upload the Python package to PyPI'
group = 'upload'

dependsOn buildPythonPackage
environment 'TWINE_USERNAME', '__token__'
environment 'TWINE_PASSWORD', System.getenv('PYPI_TOKEN')
commandLine "${venvDir}/bin/twine", 'upload', 'dist/*'
}

task publishPythonPackageToTestPyPI(type: Exec) {
description = 'Upload the Python package to Test PyPI'
group = 'upload'

dependsOn buildPythonPackage
environment 'TWINE_USERNAME', '__token__'
environment 'TWINE_PASSWORD', System.getenv('TEST_PYPI_TOKEN')
commandLine "${venvDir}/bin/twine", 'upload', '--repository-url', 'https://test.pypi.org/legacy/', 'dist/*'
}

task test(type: Exec) {
description = 'Run Python tests using pytest'
group = 'verification'

dependsOn installDependencies
environment 'PYTHONPATH', "${projectDir}/src"
commandLine "${venvDir}/bin/python", '-m', 'pytest', 'test', '--junitxml=build/reports/tests/TEST-pytest.xml'
inputs.dir file("src")
inputs.dir file("test")
outputs.dir file("build/reports/tests")
}

task runCoverage(type: Exec) {
description = 'Run tests with coverage'
group = 'verification'

dependsOn installDependencies
environment 'PYTHONPATH', "${projectDir}/src"

// Define the coverage report directory
def coverageReportDir = file("${buildDir}/reports/coverage")
def coverageFile = file("${coverageReportDir}/.coverage")
def coverageReportFile = file("${coverageReportDir}/coverage_report.txt")

// Create the directory if it doesn't exist
doFirst {
if (!coverageReportDir.exists()) {
coverageReportDir.mkdirs()
}
}

commandLine 'bash', '-c', ". ${venvDir}/bin/activate && coverage run --data-file=${coverageFile} -m pytest"
doLast {
// Generate the coverage report and output it to the specified file
exec {
commandLine 'bash', '-c', ". ${venvDir}/bin/activate && coverage report --data-file=${coverageFile} > ${coverageReportFile}"
}
// Generate HTML coverage report
exec {
commandLine 'bash', '-c', ". ${venvDir}/bin/activate && coverage html -d ${coverageReportDir}/html_report --data-file=${coverageFile}"
}
}
}

task runFlake8(type: Exec) {
description = 'Run flake8 linting'
group = 'verification'

dependsOn installDependencies
commandLine 'bash', '-c', ". ${venvDir}/bin/activate && flake8"
}

task runMypy(type: Exec) {
description = 'Run mypy type checks'
group = 'verification'

dependsOn installDependencies
commandLine 'bash', '-c', ". ${venvDir}/bin/activate && mypy src"
inputs.dir file("src")
outputs.dir file("build/reports/mypy")
}

clean {
delete 'venv' // Virtual environment for Python
delete 'dist' // Python distribution directory
delete 'version.txt' // Version file used for setup.py
}

// Define task dependencies
check.dependsOn test, runFlake8, runCoverage, runMypy
build.dependsOn buildPythonPackage
Loading
Loading