Skip to content

Commit

Permalink
Merge pull request #113 from JuliaML/cl/text
Browse files Browse the repository at this point in the history
Redesign Text datasets
  • Loading branch information
CarloLucibello authored Apr 16, 2022
2 parents 24a5611 + 41d7d82 commit 864ba5f
Show file tree
Hide file tree
Showing 15 changed files with 432 additions and 399 deletions.
23 changes: 17 additions & 6 deletions src/MLDatasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,12 @@ include("datasets_vision/svhn2.jl")
export SVHN2

# Text
include("datasets_text/PTBLM/PTBLM.jl")
include("datasets_text/UD_English/UD_English.jl")
include("datasets_text/SMSSpamCollection/SMSSpamCollection.jl")
include("datasets_text/ptblm.jl")
export PTBLM
include("datasets_text/udenglish.jl")
export UD_English
include("datasets_text/smsspamcollection.jl")
export SMSSpamCollection

# Graphs
include("datasets_graph/planetoid.jl")
Expand All @@ -87,12 +90,20 @@ include("datasets_graph/KarateClub/KarateClub.jl")
export KarateClub

function __init__()
# TODO automatically find and execute __init__xxx functions

# graph
__init__ogbdataset()
__init__tudataset()

# misc
__init__iris()
__init__mutagenesis()
__init__ogbdataset()
__init__tudataset()

#text
__init__ptblm()
__init__smsspam()
__init__udenglish()

# vision
__init__cifar10()
Expand All @@ -103,4 +114,4 @@ function __init__()
__init__svhn2()
end

end
end #module
15 changes: 15 additions & 0 deletions src/abstract_datasets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,21 @@ Base.length(d::SupervisedDataset) = numobs((d.features, d.targets))
Base.getindex(d::SupervisedDataset) = getobs((; d.features, d.targets))
Base.getindex(d::SupervisedDataset, i) = getobs((; d.features, d.targets), i)

"""
abstract type UnsupervisedDataset <: AbstractDataset end
An abstract dataset type for unsupervised or self-supervised learning tasks.
Concrete dataset types inheriting from it must provide
a `features` field.
"""
abstract type UnsupervisedDataset <: AbstractDataset end


Base.length(d::UnsupervisedDataset) = numobs(d.features)

Base.getindex(d::UnsupervisedDataset) = getobs(d.features)
Base.getindex(d::UnsupervisedDataset, i) = getobs(d.features, i)


### DOCSTRING TEMPLATES ######################

Expand Down
89 changes: 0 additions & 89 deletions src/datasets_text/PTBLM/PTBLM.jl

This file was deleted.

154 changes: 0 additions & 154 deletions src/datasets_text/SMSSpamCollection/SMSSpamCollection.jl

This file was deleted.

Loading

0 comments on commit 864ba5f

Please sign in to comment.