From 00bb922722680e065117cff612e86ecaf289a3f6 Mon Sep 17 00:00:00 2001
From: Johanni Brea <jbrea@users.noreply.github.com>
Date: Tue, 31 Aug 2021 21:51:57 +0200
Subject: [PATCH 1/4] save downloaded arff file as artifact

---
 Project.toml  |  1 +
 src/OpenML.jl |  1 +
 src/data.jl   | 18 +++++++++++-------
 test/data.jl  |  9 +++++++++
 4 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/Project.toml b/Project.toml
index a99ef16..d3a525f 100644
--- a/Project.toml
+++ b/Project.toml
@@ -8,6 +8,7 @@ ARFFFiles = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8"
 HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
+Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
 ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"
 
 [compat]
diff --git a/src/OpenML.jl b/src/OpenML.jl
index 4a915e7..ad03dc4 100644
--- a/src/OpenML.jl
+++ b/src/OpenML.jl
@@ -5,6 +5,7 @@ using JSON
 import ARFFFiles
 import ScientificTypes: Continuous, Count, Textual, Multiclass, coerce, autotype
 using Markdown
+using Pkg.Artifacts
 
 export OpenML
 
diff --git a/src/data.jl b/src/data.jl
index 572039d..7ce5c19 100644
--- a/src/data.jl
+++ b/src/data.jl
@@ -5,10 +5,6 @@ const API_URL = "https://www.openml.org/api/v1/json"
 # https://github.com/openml/OpenML/tree/master/openml_OS/views/pages/api_new/v1/xsd
 # https://www.openml.org/api_docs#!/data/get_data_id
 
-# TODO:
-# - Use e.g. DataDeps to cache data locally
-# - Put the ARFF parser to a separate package or use ARFFFiles when
-#   https://github.com/cjdoris/ARFFFiles.jl/issues/4 is fixed.
 
 """
 Returns information about a dataset. The information includes the name,
@@ -58,9 +54,17 @@ df = DataFrame(table);
 ```
 """
 function load(id::Int; parser = :arff)
-    response = load_Dataset_Description(id)
-    arff_file = HTTP.request("GET", response["data_set_description"]["url"])
-    data = ARFFFiles.load(IOBuffer(arff_file.body))
+    dir = first(Artifacts.artifacts_dirs())
+    toml = joinpath(dir, "OpenMLArtifacts.toml")
+    hash = artifact_hash(string(id), toml)
+    if hash === nothing || !artifact_exists(hash)
+        hash = Artifacts.create_artifact() do artifact_dir
+            url = load_Dataset_Description(id)["data_set_description"]["url"]
+            download(url, joinpath(artifact_dir, "$id.arff"))
+        end
+        bind_artifact!(toml, string(id), hash)
+    end
+    data = ARFFFiles.load(joinpath(artifact_path(hash), "$id.arff"))
     if parser == :auto
         return coerce(data, autotype(data))
     else
diff --git a/test/data.jl b/test/data.jl
index 1da9430..b4eec2c 100644
--- a/test/data.jl
+++ b/test/data.jl
@@ -4,6 +4,7 @@ using Test
 using HTTP
 using OpenML
 import Tables.istable
+using Pkg.Artifacts
 
 response_test = OpenML.load_Dataset_Description(61)
 ntp_test = OpenML.load(61)
@@ -40,5 +41,13 @@ end
     @test length(filters_test["data"]["dataset"][1]) == offset
 end
 
+@testset "artifacts" begin
+    dir = first(Artifacts.artifacts_dirs())
+    toml = joinpath(dir, "OpenMLArtifacts.toml")
+    hash = artifact_hash("61", toml)
+    @test artifact_exists(hash)
+end
+
+
 end
 true

From 8b3d15b81c2b6d19414c55ef023053fac8f909df Mon Sep 17 00:00:00 2001
From: Johanni Brea <jbrea@users.noreply.github.com>
Date: Wed, 1 Sep 2021 16:10:20 +0200
Subject: [PATCH 2/4] compat with v1.0

---
 src/OpenML.jl |  4 +++-
 src/data.jl   | 25 ++++++++++++++++---------
 test/data.jl  | 15 ++++++++-------
 3 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/src/OpenML.jl b/src/OpenML.jl
index ad03dc4..ae0b1c3 100644
--- a/src/OpenML.jl
+++ b/src/OpenML.jl
@@ -5,7 +5,9 @@ using JSON
 import ARFFFiles
 import ScientificTypes: Continuous, Count, Textual, Multiclass, coerce, autotype
 using Markdown
-using Pkg.Artifacts
+if VERSION > v"1.3.0"
+    using Pkg.Artifacts
+end
 
 export OpenML
 
diff --git a/src/data.jl b/src/data.jl
index 7ce5c19..9bee2b3 100644
--- a/src/data.jl
+++ b/src/data.jl
@@ -54,17 +54,24 @@ df = DataFrame(table);
 ```
 """
 function load(id::Int; parser = :arff)
-    dir = first(Artifacts.artifacts_dirs())
-    toml = joinpath(dir, "OpenMLArtifacts.toml")
-    hash = artifact_hash(string(id), toml)
-    if hash === nothing || !artifact_exists(hash)
-        hash = Artifacts.create_artifact() do artifact_dir
-            url = load_Dataset_Description(id)["data_set_description"]["url"]
-            download(url, joinpath(artifact_dir, "$id.arff"))
+    if VERSION > v"1.3.0"
+        dir = first(Artifacts.artifacts_dirs())
+        toml = joinpath(dir, "OpenMLArtifacts.toml")
+        hash = artifact_hash(string(id), toml)
+        if hash === nothing || !artifact_exists(hash)
+            hash = Artifacts.create_artifact() do artifact_dir
+                url = load_Dataset_Description(id)["data_set_description"]["url"]
+                download(url, joinpath(artifact_dir, "$id.arff"))
+            end
+            bind_artifact!(toml, string(id), hash)
         end
-        bind_artifact!(toml, string(id), hash)
+        filename = joinpath(artifact_path(hash), "$id.arff")
+    else
+        url = load_Dataset_Description(id)["data_set_description"]["url"]
+        filename = tempname()
+        download(url, filename)
     end
-    data = ARFFFiles.load(joinpath(artifact_path(hash), "$id.arff"))
+    data = ARFFFiles.load(filename)
     if parser == :auto
         return coerce(data, autotype(data))
     else
diff --git a/test/data.jl b/test/data.jl
index b4eec2c..f77b155 100644
--- a/test/data.jl
+++ b/test/data.jl
@@ -4,7 +4,6 @@ using Test
 using HTTP
 using OpenML
 import Tables.istable
-using Pkg.Artifacts
 
 response_test = OpenML.load_Dataset_Description(61)
 ntp_test = OpenML.load(61)
@@ -41,13 +40,15 @@ end
     @test length(filters_test["data"]["dataset"][1]) == offset
 end
 
-@testset "artifacts" begin
-    dir = first(Artifacts.artifacts_dirs())
-    toml = joinpath(dir, "OpenMLArtifacts.toml")
-    hash = artifact_hash("61", toml)
-    @test artifact_exists(hash)
+if VERSION > v"1.3.0"
+    using Pkg.Artifacts
+    @testset "artifacts" begin
+        dir = first(Artifacts.artifacts_dirs())
+        toml = joinpath(dir, "OpenMLArtifacts.toml")
+        hash = artifact_hash("61", toml)
+        @test artifact_exists(hash)
+    end
 end
 
-
 end
 true

From 965b330539701f74eb9f7bf2a0d527f74a091a94 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Thu, 9 Sep 2021 08:34:34 +1200
Subject: [PATCH 3/4] update doc string for load

---
 src/data.jl | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/data.jl b/src/data.jl
index 9bee2b3..6065896 100644
--- a/src/data.jl
+++ b/src/data.jl
@@ -43,6 +43,8 @@ With `parser = :arff` (default) the ARFFFiles.jl parser is used.
 With `parser = :auto` the output of the ARFFFiles parser is coerced to
 automatically detected scientific types.
 
+Datasets are saved as julia artifacts so that they persist locally once loaded. 
+
 Returns a table.
 
 # Examples

From 1a11b2a4aa761ff029e721e45d748c6282b750d6 Mon Sep 17 00:00:00 2001
From: "Anthony D. Blaom" <anthony.blaom@gmail.com>
Date: Thu, 9 Sep 2021 08:35:07 +1200
Subject: [PATCH 4/4] bump version = "0.1.1"

---
 Project.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Project.toml b/Project.toml
index d3a525f..c1f01ff 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "OpenML"
 uuid = "8b6db2d4-7670-4922-a472-f9537c81ab66"
 authors = ["Diego Arenas <darenasc@gmail.com>", "Anthony D. Blaom <anthony.blaom@gmail.com>"]
-version = "0.1.0"
+version = "0.1.1"
 
 [deps]
 ARFFFiles = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8"