JuliaAI · ericphanson · Feb 2, 2023 · Oct 18, 2022 · Oct 18, 2022 · Nov 21, 2022
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -26,26 +26,28 @@ jobs:
       fail-fast: false
       matrix:
         version:
-          - '1'
+          - '1.6'
+          - '1' 
+          - 'nightly'
         os:
           - ubuntu-latest
         arch:
           - x64
+        include:
+          - os: windows-latest
+            version: '1'
+            arch: x64
     env:
       PYTHON: ''
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
         with:
           fetch-depth: 0
       - uses: julia-actions/setup-julia@v1
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - uses: actions/cache@v2
-        with:
-          path: ~/.julia/artifacts
-          key: ${{ runner.os }}-test-artifacts-${{ hashFiles('**/Project.toml') }}
-          restore-keys: ${{ runner.os }}-test-artifacts
+      - uses: julia-actions/cache@v1 # https://github.com/julia-actions/cache
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
       - uses: julia-actions/julia-processcoverage@v1

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -18,15 +18,11 @@ jobs:
   Documentation:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
       - uses: julia-actions/setup-julia@latest
         with:
           version: 1.6 # earliest supported version
-      - uses: actions/cache@v2
-        with:
-          path: ~/.julia/artifacts
-          key: ${{ runner.os }}-docs-artifacts-${{ hashFiles('**/Project.toml') }}
-          restore-keys: ${{ runner.os }}-docs-artifacts
+      - uses: julia-actions/cache@v1 # https://github.com/julia-actions/cache
       - uses: julia-actions/julia-docdeploy@releases/v1
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # For authentication with GitHub Actions token

diff --git a/.github/workflows/format_check.yml b/.github/workflows/format_check.yml
@@ -16,7 +16,7 @@ jobs:
       - uses: julia-actions/setup-julia@latest
         with:
           version: 1.6.0
-      - uses: actions/checkout@v1
+      - uses: actions/checkout@v3
       - name: Instantiate `format` environment and format
         run: |
           julia  --project=format -e 'using Pkg; Pkg.instantiate()'

diff --git a/Project.toml b/Project.toml
@@ -4,22 +4,25 @@ authors = ["Beacon Biosignals, Inc."]
 version = "0.2.0"
 
 [deps]
-DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+MLJModelInterface = "e80e1ace-859a-464e-9ed9-23947d8ae3ea"
 OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 PythonCall = "6099a3de-0909-46bc-b1f4-468b9a2dfc0d"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 
 [compat]
 Aqua = "0.5"
-DataFrames = "0.22, 1"
+MLJModelInterface = "1"
 OrderedCollections = "1.4"
 PythonCall = "0.9"
 Tables = "1.4"
 julia = "1.6"
 
 [extras]
 Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
+DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
+MLJBase = "a7f614a8-145f-11e9-1d2a-a57a1082229d"
+MLJTestInterface = "72560011-54dd-4dc2-94f3-c5de45b75ecd"
 Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
 
 [targets]
-test = ["Aqua", "Test"]
+test = ["Aqua", "DataFrames", "MLJBase", "MLJTestInterface", "Test"]
diff --git a/README.md b/README.md
@@ -16,6 +16,7 @@ Julia interface to [CatBoost](https://catboost.ai/).
 module Regression
 
 using CatBoost
+using PythonCall
 
 train_data = PyList([[1, 4, 5, 6], [4, 5, 6, 7], [30, 40, 50, 60]])
 eval_data = PyList([[2, 4, 6, 8], [1, 4, 50, 60]])
@@ -32,3 +33,28 @@ preds = predict(model, eval_data)
 
 end # module
 ```
+
+## MLJ Example
+```julia
+module Regression
+
+using CatBoost
+using DataFrames
+using MLJBase
+
+train_data = DataFrame([[1,4,30], [4,5,40], [5,6,50], [6,7,60]], :auto)
+eval_data = DataFrame([[2,1], [4,4], [6,50], [8,60]], :auto)
+train_labels = [10.0, 20.0, 30.0] 
+
+# Initialize MLJ Machine
+model = CatBoostRegressor(iterations = 2, learning_rate = 1, depth = 2)
+mach = machine(model, train_data, train_labels)
+
+# Fit model
+MLJBase.fit!(mach)
+
+# Get predictions
+preds = predict(model, eval_data)
+
+end # module
+```
diff --git a/docs/make.jl b/docs/make.jl
@@ -2,7 +2,8 @@ using CatBoost
 using Documenter
 
 makedocs(; modules=[CatBoost], sitename="CatBoost.jl", authors="Beacon Biosignals, Inc.",
-         pages=["API Documentation" => "index.md"])
+         pages=["Introduction" => "index.md", "Wrapper" => "wrapper.md",
+                "MLJ API" => "mlj_api.md"])
 
 deploydocs(; repo="github.com/beacon-biosignals/CatBoost.jl.git", push_preview=true,
            devbranch="main")
diff --git a/docs/src/index.md b/docs/src/index.md
@@ -1,13 +1,17 @@
-# API Documentation
+# CatBoost.jl
 
-Below is the API documentation for CatBoost.jl.
+Julia interface to [CatBoost](https://catboost.ai/). This library is a wrapper CatBoost's Python package via [PythonCall.jl](https://github.com/cjdoris/PythonCall.jl). 
 
 For a nice introduction to the package, see the [examples](https://github.com/beacon-biosignals/CatBoost.jl/blob/main/examples/).
 
-```@meta
-CurrentModule = CatBoost
+# Installation
+
+This package is available in the Julia General Registry. You can install it with either of the following commands:
+
+```
+pkg> add CatBoost
 ```
 
-```@autodocs
-Modules = [CatBoost]
+```julia
+julia> using Pkg; Pkg.add("CatBoost")
 ```
diff --git a/docs/src/mlj_api.md b/docs/src/mlj_api.md
@@ -0,0 +1,8 @@
+# MLJ API
+
+Below is the MLJ API documentation for CatBoost.jl.
+
+```@docs
+CatBoost.MLJCatBoostInterface.CatBoostClassifier
+CatBoost.MLJCatBoostInterface.CatBoostRegressor
+```
diff --git a/docs/src/wrapper.md b/docs/src/wrapper.md
@@ -0,0 +1,15 @@
+# Python Wrapper
+
+Below is the Python wrapper documentation for CatBoost.jl.
+
+```@docs
+Pool
+CatBoost.CatBoostClassifier
+CatBoost.CatBoostRegressor
+cv
+to_catboost
+to_pandas
+pandas_to_tbl
+feature_importance
+load_dataset
+```
diff --git a/examples/mlj/binary.jl b/examples/mlj/binary.jl
@@ -0,0 +1,30 @@
+module Binary
+
+using CatBoost.MLJCatBoostInterface
+using DataFrames
+using MLJBase
+using PythonCall
+
+# Initialize data
+train_data = DataFrame([coerce(["a", "a", "c"], Multiclass),
+                        coerce(["b", "b", "d"], Multiclass),
+                        coerce([0, 0, 1], OrderedFactor), [4, 5, 40], [5, 6, 50],
+                        [6, 7, 60]], :auto)
+train_labels = coerce([1, 1, -1], OrderedFactor)
+eval_data = DataFrame([coerce(["a", "a"], Multiclass), coerce(["b", "d"], Multiclass),
+                       coerce([0, 0], OrderedFactor), [4, 4], [6, 50], [8, 60]], :auto)
+
+# Initialize CatBoostClassifier
+model = CatBoostClassifier(; iterations=2, learning_rate=1.0, depth=2)
+mach = machine(model, train_data, train_labels)
+
+# Fit model
+MLJBase.fit!(mach)
+
+# Get predicted classes
+preds_class = MLJBase.predict_mode(mach, eval_data)
+
+# Get predicted probabilities for each class
+preds_proba = MLJBase.predict(mach, eval_data)
+
+end # module
diff --git a/examples/mlj/multiclass.jl b/examples/mlj/multiclass.jl
@@ -0,0 +1,31 @@
+module Multiclass
+
+using CatBoost.MLJCatBoostInterface
+using DataFrames
+using MLJBase
+using PythonCall
+
+# Initialize data
+train_data = DataFrame([coerce(["a", "a", "c"], MLJBase.Multiclass),
+                        coerce(["b", "b", "d"], MLJBase.Multiclass),
+                        coerce([0, 0, 1], MLJBase.OrderedFactor), [4, 5, 40], [5, 6, 50],
+                        [6, 7, 60]], :auto)
+train_labels = coerce([1, 0, -1], MLJBase.Multiclass)
+eval_data = DataFrame([coerce(["a", "a"], MLJBase.Multiclass),
+                       coerce(["b", "d"], MLJBase.Multiclass), [2, 1], [4, 4], [6, 50],
+                       [8, 60]], :auto)
+
+# Initialize CatBoostClassifier
+model = CatBoostClassifier(; iterations=2, learning_rate=1, depth=2)
+mach = machine(model, train_data, train_labels)
+
+# Fit model
+MLJBase.fit!(mach)
+
+# Get predicted classes
+preds_class = MLJBase.predict_mode(mach, eval_data)
+
+# Get predicted probabilities for each class
+preds_proba = MLJBase.predict(mach, eval_data)
+
+end # module
diff --git a/examples/mlj/regression.jl b/examples/mlj/regression.jl
@@ -0,0 +1,23 @@
+module Regression
+
+using CatBoost.MLJCatBoostInterface
+using DataFrames
+using MLJBase
+using PythonCall
+
+# Initialize data
+train_data = DataFrame([[1, 4, 30], [4, 5, 40], [5, 6, 50], [6, 7, 60]], :auto)
+train_labels = [10.0, 20.0, 30.0]
+eval_data = DataFrame([[2, 1], [4, 4], [6, 50], [8, 60]], :auto)
+
+# Initialize CatBoostClassifier
+model = CatBoostRegressor(; iterations=2, learning_rate=1.0, depth=2)
+mach = machine(model, train_data, train_labels)
+
+# Fit model
+MLJBase.fit!(mach)
+
+# Get predictions
+preds_class = MLJBase.predict(mach, eval_data)
+
+end # module
diff --git a/examples/binary.jl → examples/wrapper/binary.jl b/examples/binary.jl → examples/wrapper/binary.jl
@@ -13,6 +13,7 @@ eval_data = DataFrame([["a", "a"], ["b", "d"], [2, 1], [4, 4], [6, 50], [8, 60]]
 
 # Initialize CatBoostClassifier
 model = CatBoostClassifier(; iterations=2, learning_rate=1, depth=2)
+
 # Fit model
 fit!(model, train_data, train_labels, cat_features)
 

diff --git a/examples/cross_validation.jl → examples/wrapper/cross_validation.jl b/examples/cross_validation.jl → examples/wrapper/cross_validation.jl
diff --git a/examples/datasets_api.jl → examples/wrapper/datasets_api.jl b/examples/datasets_api.jl → examples/wrapper/datasets_api.jl
diff --git a/examples/learning_to_rank.jl → examples/wrapper/learning_to_rank.jl b/examples/learning_to_rank.jl → examples/wrapper/learning_to_rank.jl
diff --git a/examples/multiclass.jl → examples/wrapper/multiclass.jl b/examples/multiclass.jl → examples/wrapper/multiclass.jl
diff --git a/examples/regression.jl → examples/wrapper/regression.jl b/examples/regression.jl → examples/wrapper/regression.jl
@@ -8,7 +8,7 @@ eval_data = PyList([[2, 4, 6, 8], [1, 4, 50, 60]])
 train_labels = PyList([10, 20, 30])
 
 # Initialize CatBoostRegressor
-model = CatBoostRegressor(; iterations=2, learning_rate=1, depth=2)
+model = CatBoostRegressor(; iterations=2, learning_rate=1, depth=2, verbose=false)
 
 # Fit model
 fit!(model, train_data, train_labels)

diff --git a/examples/return_best.jl → examples/wrapper/return_best.jl b/examples/return_best.jl → examples/wrapper/return_best.jl
diff --git a/format/README.md b/format/README.md
@@ -1,6 +1,6 @@
 # format
 
-Run `julia --project=format format/run.jl` with Julia 1.5 to run JuliaFormatter.
+Run `julia --project=format format/run.jl` with Julia 1.6 to run JuliaFormatter.
 
 If you update the version of Julia used to generate the `Manifest.toml` make sure to also
 update the version in `.github/workflows/format_check.yml` to match.