JuliaML · adrhill · Jun 23, 2022 · Jun 23, 2022 · Jun 23, 2022 · Jun 23, 2022
diff --git a/Project.toml b/Project.toml
@@ -13,9 +13,11 @@ FixedPointNumbers = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
 GZip = "92fee26a-97fe-5a0c-ad85-20a5f3185b63"
 Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
 HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
+ImageCore = "a09fc81d-aa75-5fe9-8630-4744c3626534"
 ImageShow = "4e3cecfd-b093-5904-9786-8bbb286a6a31"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
+JpegTurbo = "b835a17e-a41a-41e7-81f0-2f016b05efe0"
 LazyModules = "8cdb02fc-e678-4876-92c5-9defec4f444e"
 MAT = "23992714-dd62-5051-b70f-ba57cb901cac"
 MLUtils = "f1d291b0-491e-4a28-83b9-f70985020b54"
@@ -24,6 +26,7 @@ Pickle = "fbb45041-c46e-462f-888f-7c521cafbc2c"
 Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 Requires = "ae029012-a4dd-5104-9daa-d747884805df"
 SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
+StackViews = "cae243ae-269e-4f55-b966-ac2d0dc13c15"
 Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
 
 [compat]
@@ -36,15 +39,18 @@ FixedPointNumbers = "0.8"
 GZip = "0.5"
 Glob = "1.3"
 HDF5 = "0.16.2"
+ImageCore = "0.9"
 ImageShow = "0.3"
 JLD2 = "0.4.21"
 JSON3 = "1"
+JpegTurbo = "0.1"
 LazyModules = "0.3"
 MAT = "0.10"
 MLUtils = "0.2.0, 0.3, 0.4"
 NPZ = "0.4.1"
 Pickle = "0.3"
 Requires = "1"
+StackViews = "0.1"
 Tables = "1.6"
 julia = "1.6"
 

diff --git a/docs/src/datasets/imagenet_installation.md b/docs/src/datasets/imagenet_installation.md
@@ -0,0 +1,62 @@
+# Installing ImageNet
+The ImageNet 2012 Classification Dataset (ILSVRC 2012-2017) can be downloaded at
+[image-net.org](https://image-net.org/) after signing up and accepting the terms of access.
+It is therefore required that you download this dataset manually.
+
+## Existing installation
+The dataset structure is assumed to look as follows:
+```
+ImageNet
+├── train
+├── val
+│   ├── n01440764
+│   │   ├── ILSVRC2012_val_00000293.JPEG
+│   │   ├── ILSVRC2012_val_00002138.JPEG
+│   │   └── ...
+│   ├── n01443537
+│   └── ...
+├── test
+└── devkit
+    ├── data
+    │   ├── meta.mat
+    │   └── ...
+    └── ...
+```
+If your existing copy of the ImageNet dataset uses another file structure,
+we recommend to create symbolic links, e.g. using `ln` on Unix-like operating
+systems:
+```bash
+cd ~/.julia/datadeps
+mkdir -p ImageNet/val
+ln -s my/path/to/imagenet/val ImageNet/val
+mkdir -p ImageNet/devkit/data
+ln -s my/path/to/imagenet/devkit/data ImageNet/devkit/data
+```
+
+## New installation
+Download the following files from the [ImageNet website](https://image-net.org/):
+* `ILSVRC2012_devkit_t12`
+* `ILSVRC2012_img_train.tar`, only required for `:train` split
+* `ILSVRC2012_img_val.tar`, only required for `:val` split
+
+After downloading the data, move and extract the training and validation images to
+labeled subfolders running the following shell script:
+```bash
+# Extract the training data:
+mkdir -p ImageNet/train && tar -xvf ILSVRC2012_img_train.tar -C ImageNet/train
+# Unpack all 1000 compressed tar-files, one for each category:
+cd ImageNet/train
+find . -name "*.tar" | while read NAME ; do mkdir -p "\${NAME%.tar}"; tar -xvf "\${NAME}" -C "\${NAME%.tar}"; rm -f "\${NAME}"; done
+
+# Extract the validation data:
+cd ../..
+mkdir -p ImageNet/val && tar -xvf ILSVRC2012_img_val.tar -C ImageNet/val
+
+# Run script from soumith to create all class directories and moves images into corresponding directories:
+cd ImageNet/val
+wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash
+
+# Extract metadata from the devkit:
+cd ../..
+mkdir -p ImageNet/devkit && tar -xvf ILSVRC2012_img_val.tar -C ImageNet/devkit
+```
diff --git a/docs/src/datasets/vision.md b/docs/src/datasets/vision.md
@@ -24,6 +24,7 @@ CIFAR10
 CIFAR100
 EMNIST
 FashionMNIST
+ImageNet
 MNIST
 Omniglot
 SVHN2

diff --git a/src/MLDatasets.jl b/src/MLDatasets.jl
@@ -10,6 +10,7 @@ using Printf
 using Glob
 using DelimitedFiles: readdlm
 using FileIO
+using StackViews: StackView
 import CSV
 using LazyModules: @lazy
 
@@ -32,6 +33,8 @@ include("require.jl") # export @require
 @lazy import HDF5="f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
 # @lazy import JLD2
 
+@lazy import JpegTurbo="b835a17e-a41a-41e7-81f0-2f016b05efe0" # Required for ImageNet
+
 export getobs, numobs # From MLUtils.jl
 
 include("abstract_datasets.jl")
@@ -91,6 +94,9 @@ include("datasets/vision/omniglot.jl")
 export Omniglot
 include("datasets/vision/svhn2.jl")
 export SVHN2
+include("datasets/vision/imagenet_reader/ImageNetReader.jl")
+include("datasets/vision/imagenet.jl")
+export ImageNet
 
 
 ## Text
@@ -162,6 +168,7 @@ function __init__()
     __init__cifar100()
     __init__emnist()
     __init__fashionmnist()
+    __init__imagenet()
     __init__mnist()
     __init__omniglot()
     __init__svhn2()

diff --git a/src/datasets/vision/imagenet.jl b/src/datasets/vision/imagenet.jl
@@ -0,0 +1,155 @@
+const IMAGENET_WEBSITE = "https://image-net.org/"
+
+function __init__imagenet()
+    DEPNAME = "ImageNet"
+    return register(
+        ManualDataDep(
+            DEPNAME,
+            """The ImageNet 2012 Classification Dataset (ILSVRC 2012-2017) can be downloaded at
+            $(IMAGENET_WEBSITE) after signing up and accepting the terms of access.
+            It is therefore required that you download this dataset manually.
+
+            Please follow the instructions at
+            https://github.com/JuliaML/MLDatasets.jl/blob/master/docs/src/datasets/imagenet_installation.md.
+            """,
+        ),
+    )
+end
+
+"""
+    ImageNet(; Tx=Float32, split=:train, dir=nothing)
+    ImageNet([Tx, split])
+
+The ImageNet 2012 Classification Dataset (ILSVRC 2012-2017).
+This is the most highly-used subset of ImageNet. It spans 1000 object classes and contains
+1,281,167 training images, 50,000 validation images and 100,000 test images.
+Each image is in 224x224x3 format using RGB color space.
+
+# Arguments
+
+$ARGUMENTS_SUPERVISED_ARRAY
+- `split`: selects the data partition. Can take the values `:train:` or `:test`.
+
+# Fields
+
+$FIELDS_SUPERVISED_ARRAY
+- `split`.
+
+# Methods
+
+$METHODS_SUPERVISED_ARRAY
+- [`convert2image`](@ref) converts features to `RGB` images.
+
+# Examples
+
+```julia-repl
+julia> using MLDatasets: ImageNet
+
+julia> dataset = ImageNet(:val);
+
+julia> dataset[1:5].targets
+5-element Vector{Int64}:
+ 1
+ 1
+ 1
+ 1
+ 1
+
+julia> X, y = dataset[1:5];
+
+julia> size(X)
+(224, 224, 3, 5)
+
+julia> X, y = dataset[2000];
+
+julia> convert2image(dataset, X)
+
+julia> dataset.metadata
+Dict{String, Any} with 4 entries:
+  "class_WNIDs"       => ["n01440764", "n01443537", "n01484850", "n01491361", "n01494475", …
+  "class_description" => ["freshwater dace-like game fish of Europe and western Asia noted …
+  "class_names"       => Vector{SubString{String}}[["tench", "Tinca tinca"], ["goldfish", "…
+  "wnid_to_label"     => Dict("n07693725"=>932, "n03775546"=>660, "n01689811"=>45, "n021008…
+
+julia> dataset.metadata["class_names"][y]
+  3-element Vector{SubString{String}}:
+   "common iguana"
+   "iguana"
+   "Iguana iguana"
+```
+
+# References
+
+[1]: [Russakovsky et al., ImageNet Large Scale Visual Recognition Challenge](https://arxiv.org/abs/1409.0575)
+"""
+struct ImageNet <: SupervisedDataset
+    metadata::Dict{String,Any}
+    split::Symbol
+    dataset::FileDataset
+    targets::Vector{Int}
+    inverse_preprocess::Function
+end
+
+ImageNet(; split=:train, Tx=Float32, kws...) = ImageNet(Tx, split; kws...)
+ImageNet(split::Symbol; kws...) = ImageNet(; split, kws...)
+ImageNet(Tx::Type; kws...) = ImageNet(; Tx, kws...)
+
+function ImageNet(
+    Tx::Type,
+    split::Symbol;
+    preprocess=ImageNetReader.default_preprocess,
+    inverse_preprocess=ImageNetReader.default_inverse_preprocess,
+    dir=nothing,
+    train_dir="train",
+    val_dir="val",
+    test_dir="test",
+    devkit_dir="devkit",
+)
+    @assert split ∈ (:train, :val, :test)
+
+    DEPNAME = "ImageNet"
+    METADATA_FILENAME = joinpath(devkit_dir, "data", "meta.mat")
+
+    TRAINSET_SIZE = 1_281_167
+    VALSET_SIZE = 50_000
+    TESTSET_SIZE = 100_000
+
+    # Load metadata
+    file_path = datafile(DEPNAME, METADATA_FILENAME, dir)
+    metadata = ImageNetReader.read_metadata(file_path)
+
+    root_dir = @datadep_str DEPNAME
+    if split == :train
+        dataset = ImageNetReader.get_file_dataset(
+            Tx, preprocess, joinpath(root_dir, train_dir)
+        )
+        @assert length(dataset) == TRAINSET_SIZE
+    elseif split == :val
+        dataset = ImageNetReader.get_file_dataset(
+            Tx, preprocess, joinpath(root_dir, val_dir)
+        )
+        @assert length(dataset) == VALSET_SIZE
+    else
+        dataset = ImageNetReader.get_file_dataset(
+            Tx, preprocess, joinpath(root_dir, test_dir)
+        )
+        @assert length(dataset) == TESTSET_SIZE
+    end
+    targets = [
+        metadata["wnid_to_label"][wnid] for wnid in ImageNetReader.get_wnids(dataset)
+    ]
+    return ImageNet(metadata, split, dataset, targets, inverse_preprocess)
+end
+
+convert2image(d::ImageNet, x::AbstractArray) = d.inverse_preprocess(x)
+
+Base.length(d::ImageNet) = length(d.dataset)
+
+const IMAGENET_MEM_WARNING = """Loading the entire ImageNet dataset into memory might not be possible.
+    If you are sure you want to load all of ImageNet, use `dataset[1:end]` instead of `dataset[:]`.
+    """
+Base.getindex(::ImageNet, ::Colon) = throw(ArgumentError(IMAGENET_MEM_WARNING))
+Base.getindex(d::ImageNet, i::Integer) = (features=d.dataset[i], targets=d.targets[i])
+function Base.getindex(d::ImageNet, is::AbstractVector)
+    return (features=StackView(d.dataset[is]), targets=d.targets[is])
+end
diff --git a/src/datasets/vision/imagenet_reader/ImageNetReader.jl b/src/datasets/vision/imagenet_reader/ImageNetReader.jl
@@ -0,0 +1,48 @@
+module ImageNetReader
+using ImageCore: channelview, colorview, AbstractRGB, RGB
+
+import ..FileDataset
+import ..read_mat
+import ..@lazy
+
+@lazy import JpegTurbo = "b835a17e-a41a-41e7-81f0-2f016b05efe0"
+
+const NCLASSES = 1000
+const IMGSIZE = (224, 224)
+
+include("preprocess.jl")
+
+function get_file_dataset(Tx::Type{<:Real}, preprocess::Function, dir::AbstractString)
+    # Construct a function that loads images from FileDataset path,
+    # applies preprocessing and converts to type Tx.
+    function load_image(file::AbstractString)
+        im = JpegTurbo.jpeg_decode(RGB{Tx}, file; preferred_size=IMGSIZE)
+        return Tx.(preprocess(im))
+    end
+    return FileDataset(load_image, dir, "*.JPEG")
+end
+
+function read_metadata(file::AbstractString)
+    meta = read_mat(file)["synsets"]
+
+    # Only leaf nodes in WordNet metadata correspond to classes
+    is_child = iszero.(meta["num_children"])
+    @assert meta["ILSVRC2012_ID"][is_child] == 1:NCLASSES
+
+    # Sort classes by WNID for Metalhead compatibility
+    I = sortperm(meta["WNID"][is_child])
+
+    metadata = Dict{String,Any}()
+    metadata["class_WNIDs"] = Vector{String}(meta["WNID"][is_child][I]) # WordNet IDs
+    metadata["class_names"] = split.(meta["words"][is_child][I], ", ")
+    metadata["class_description"] = Vector{String}(meta["gloss"][is_child][I])
+    metadata["wnid_to_label"] = Dict(metadata["class_WNIDs"] .=> 1:NCLASSES)
+    return metadata
+end
+
+# Get WordNet ID from path
+get_wnids(d::FileDataset) = get_wnids(d.paths)
+get_wnids(paths::AbstractVector{<:AbstractString}) = path_to_wnid.(paths)
+path_to_wnid(path::AbstractString) = split(path, "/")[end - 1]
+
+end # ImageNetReader module
diff --git a/src/datasets/vision/imagenet_reader/preprocess.jl b/src/datasets/vision/imagenet_reader/preprocess.jl
@@ -0,0 +1,34 @@
+# Image preprocessing for ImageNet models.
+# Code adapted from Metalhead 0.5.3's utils.jl
+
+# Coefficients taken from PyTorch's ImageNet normalization code
+const PYTORCH_MEAN = [0.485f0, 0.456f0, 0.406f0]
+const PYTORCH_STD = [0.229f0, 0.224f0, 0.225f0]
+
+normalize_pytorch(x) = (x .- PYTORCH_MEAN) ./ PYTORCH_STD
+inv_normalize_pytorch(x) = x .* PYTORCH_STD .+ PYTORCH_MEAN
+
+function default_preprocess(im::AbstractMatrix{<:AbstractRGB})
+    # Similar to the validation dataset loader in PyTorch's ImageNet example
+    # https://github.com/pytorch/examples/blob/91ccd7a21be6fa687000beef82fc1e5d7d64e4bd/imagenet/main.py#L223-L230
+    im = center_crop(im)
+    im = normalize_pytorch(channelview(im))
+    return PermutedDimsArray(im, (3, 2, 1)) # Convert from Image.jl's CHW to Flux's WHC
+end
+
+function default_inverse_preprocess(x::AbstractArray{T,N}) where {T,N}
+    @assert N == 3 || N == 4
+    x = PermutedDimsArray(x, (3, 2, 1, 4:N...)) # Convert from WHC[N] to CHW[N]
+    return colorview(RGB, inv_normalize_pytorch(x))
+end
+
+# Take rectangle of pixels of shape `outsize` at the center of image `im`
+function center_crop(im::AbstractMatrix, outsize=IMGSIZE)
+    h2, w2 = div.(outsize, 2) # half height, half width of view
+    h_adjust, w_adjust = _adjust.(outsize)
+    return @view im[
+        ((div(end, 2) - h2):(div(end, 2) + h2 - h_adjust)) .+ 1,
+        ((div(end, 2) - w2):(div(end, 2) + w2 - w_adjust)) .+ 1,
+    ]
+end
+_adjust(i::Integer) = ifelse(iszero(i % 2), 1, 0)