From 539921af31a39ebd2c5b97dd53623728dfd0e97b Mon Sep 17 00:00:00 2001 From: Johanni Brea Date: Sun, 31 Oct 2021 22:00:29 +0100 Subject: [PATCH 1/2] rm ScientificTypes; improve error messages --- Project.toml | 4 +--- src/OpenML.jl | 3 +-- src/data.jl | 35 +++++++++++++++++++---------------- 3 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Project.toml b/Project.toml index c1f01ff..8ec59b9 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "OpenML" uuid = "8b6db2d4-7670-4922-a472-f9537c81ab66" authors = ["Diego Arenas ", "Anthony D. Blaom "] -version = "0.1.1" +version = "0.2.0" [deps] ARFFFiles = "da404889-ca92-49ff-9e8b-0aa6b4d38dc8" @@ -9,13 +9,11 @@ HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" Markdown = "d6f4376e-aef5-505a-96c1-9c027394607a" Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -ScientificTypes = "321657f4-b219-11e9-178b-2701a2544e81" [compat] ARFFFiles = "1.3" HTTP = "0.8, 0.9" JSON = "0.21" -ScientificTypes = "2" julia = "1" [extras] diff --git a/src/OpenML.jl b/src/OpenML.jl index ae0b1c3..b1fda67 100644 --- a/src/OpenML.jl +++ b/src/OpenML.jl @@ -1,9 +1,8 @@ -module OpenML +module OpenML using HTTP using JSON import ARFFFiles -import ScientificTypes: Continuous, Count, Textual, Multiclass, coerce, autotype using Markdown if VERSION > v"1.3.0" using Pkg.Artifacts diff --git a/src/data.jl b/src/data.jl index 6065896..a68782e 100644 --- a/src/data.jl +++ b/src/data.jl @@ -28,22 +28,21 @@ function load_Dataset_Description(id::Int; api_key::String="") println("No access granted. This dataset is not shared with you.") end catch e - println("Error occurred : $e") + println("Error occurred. Check if there exists a dataset with id $id.") + println("See e.g. OpenML.list_datasets()\n") + println(e) return nothing end return nothing end """ - OpenML.load(id; parser = :arff) + OpenML.load(id) Load the OpenML dataset with specified `id`, from those listed by [`list_datasets`](@ref) or on the [OpenML site](https://www.openml.org/search?type=data). -With `parser = :arff` (default) the ARFFFiles.jl parser is used. -With `parser = :auto` the output of the ARFFFiles parser is coerced to -automatically detected scientific types. -Datasets are saved as julia artifacts so that they persist locally once loaded. +Datasets are saved as julia artifacts so that they persist locally once loaded. Returns a table. @@ -51,11 +50,13 @@ Returns a table. ```julia using DataFrames -table = OpenML.load(61); -df = DataFrame(table); +table = OpenML.load(61) +df = DataFrame(table) # transform to a DataFrame +using ScientificTypes +df2 = coerce(df, autotype(df)) # corce to automatically detected scientific types ``` """ -function load(id::Int; parser = :arff) +function load(id::Int) if VERSION > v"1.3.0" dir = first(Artifacts.artifacts_dirs()) toml = joinpath(dir, "OpenMLArtifacts.toml") @@ -73,12 +74,7 @@ function load(id::Int; parser = :arff) filename = tempname() download(url, filename) end - data = ARFFFiles.load(filename) - if parser == :auto - return coerce(data, autotype(data)) - else - return data - end + ARFFFiles.load(filename) end @@ -321,7 +317,14 @@ julia> OpenML.describe_dataset(6) cited above for more details. ``` """ -describe_dataset(id) = Markdown.parse(load_Dataset_Description(id)["data_set_description"]["description"]) +function describe_dataset(id) + description = load_Dataset_Description(id)["data_set_description"]["description"] + if isa(description, AbstractString) + Markdown.parse(description) + else + "No description found." + end +end # Flow API From 92750498949b6f4e6d96f0dc238599d41db6291c Mon Sep 17 00:00:00 2001 From: Johanni Brea Date: Wed, 10 Nov 2021 12:30:37 +0100 Subject: [PATCH 2/2] fix test --- test/data.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/data.jl b/test/data.jl index f77b155..9f33b7c 100644 --- a/test/data.jl +++ b/test/data.jl @@ -41,12 +41,12 @@ end end if VERSION > v"1.3.0" - using Pkg.Artifacts + using Pkg @testset "artifacts" begin - dir = first(Artifacts.artifacts_dirs()) + dir = first(Pkg.Artifacts.artifacts_dirs()) toml = joinpath(dir, "OpenMLArtifacts.toml") - hash = artifact_hash("61", toml) - @test artifact_exists(hash) + hash = Pkg.Artifacts.artifact_hash("61", toml) + @test Pkg.Artifacts.artifact_exists(hash) end end