Skip to content

Commit

Permalink
Merge pull request #9 from JuliaAI/dev
Browse files Browse the repository at this point in the history
For  a 0.1.1 release
  • Loading branch information
ablaom authored Oct 10, 2021
2 parents b5dd7dd + 1a11b2a commit 818a825
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 8 deletions.
3 changes: 2 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
name = "OpenML"
uuid = "8b6db2d4-7670-4922-a472-f9537c81ab66"
authors = ["Diego Arenas <[email protected]>", "Anthony D. Blaom <[email protected]>"]
version = "0.1.0"
version = "0.1.1"

[deps]
ARFFFiles = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8"
HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3"
JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81"

[compat]
Expand Down
3 changes: 3 additions & 0 deletions src/OpenML.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ using JSON
import ARFFFiles
import ScientificTypes: Continuous, Count, Textual, Multiclass, coerce, autotype
using Markdown
if VERSION > v"1.3.0"
using Pkg.Artifacts
end

export OpenML

Expand Down
27 changes: 20 additions & 7 deletions src/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,6 @@ const API_URL = "https://www.openml.org/api/v1/json"
# https://github.com/openml/OpenML/tree/master/openml_OS/views/pages/api_new/v1/xsd
# https://www.openml.org/api_docs#!/data/get_data_id

# TODO:
# - Use e.g. DataDeps to cache data locally
# - Put the ARFF parser to a separate package or use ARFFFiles when
# https://github.com/cjdoris/ARFFFiles.jl/issues/4 is fixed.

"""
Returns information about a dataset. The information includes the name,
Expand Down Expand Up @@ -47,6 +43,8 @@ With `parser = :arff` (default) the ARFFFiles.jl parser is used.
With `parser = :auto` the output of the ARFFFiles parser is coerced to
automatically detected scientific types.
Datasets are saved as julia artifacts so that they persist locally once loaded.
Returns a table.
# Examples
Expand All @@ -58,9 +56,24 @@ df = DataFrame(table);
```
"""
function load(id::Int; parser = :arff)
response = load_Dataset_Description(id)
arff_file = HTTP.request("GET", response["data_set_description"]["url"])
data = ARFFFiles.load(IOBuffer(arff_file.body))
if VERSION > v"1.3.0"
dir = first(Artifacts.artifacts_dirs())
toml = joinpath(dir, "OpenMLArtifacts.toml")
hash = artifact_hash(string(id), toml)
if hash === nothing || !artifact_exists(hash)
hash = Artifacts.create_artifact() do artifact_dir
url = load_Dataset_Description(id)["data_set_description"]["url"]
download(url, joinpath(artifact_dir, "$id.arff"))
end
bind_artifact!(toml, string(id), hash)
end
filename = joinpath(artifact_path(hash), "$id.arff")
else
url = load_Dataset_Description(id)["data_set_description"]["url"]
filename = tempname()
download(url, filename)
end
data = ARFFFiles.load(filename)
if parser == :auto
return coerce(data, autotype(data))
else
Expand Down
10 changes: 10 additions & 0 deletions test/data.jl
Original file line number Diff line number Diff line change
Expand Up @@ -40,5 +40,15 @@ end
@test length(filters_test["data"]["dataset"][1]) == offset
end

if VERSION > v"1.3.0"
using Pkg.Artifacts
@testset "artifacts" begin
dir = first(Artifacts.artifacts_dirs())
toml = joinpath(dir, "OpenMLArtifacts.toml")
hash = artifact_hash("61", toml)
@test artifact_exists(hash)
end
end

end
true

0 comments on commit 818a825

Please sign in to comment.