From 6cc4c3ba5a0581c63c6b829af4e30fcbc3efa1b6 Mon Sep 17 00:00:00 2001 From: Jonas Klotz Date: Wed, 4 Jun 2025 17:52:13 +0200 Subject: [PATCH 1/2] Fix caltech256 download link (#9097) --- torchvision/datasets/caltech.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/torchvision/datasets/caltech.py b/torchvision/datasets/caltech.py index adcb49ff1c2..c34342ab86e 100644 --- a/torchvision/datasets/caltech.py +++ b/torchvision/datasets/caltech.py @@ -231,9 +231,8 @@ def __len__(self) -> int: def download(self) -> None: if self._check_integrity(): return - download_and_extract_archive( - "https://drive.google.com/file/d/1r6o0pSROcV1_VwT4oSjA2FBUSCWGuxLK", + "https://data.caltech.edu/records/nyy15-4j048/files/256_ObjectCategories.tar", self.root, filename="256_ObjectCategories.tar", md5="67b4f42ca05d46448c6bb8ecd2220f6d", From e48d450712ce80489b23e720e38adba4130a429e Mon Sep 17 00:00:00 2001 From: Jonas Klotz Date: Wed, 4 Jun 2025 17:52:46 +0200 Subject: [PATCH 2/2] Fix caltech101 download method (#9097) --- torchvision/datasets/caltech.py | 40 ++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/torchvision/datasets/caltech.py b/torchvision/datasets/caltech.py index c34342ab86e..26e00008b4e 100644 --- a/torchvision/datasets/caltech.py +++ b/torchvision/datasets/caltech.py @@ -1,11 +1,12 @@ import os import os.path +import shutil from pathlib import Path from typing import Any, Callable, Optional, Union from PIL import Image -from .utils import download_and_extract_archive, verify_str_arg +from .utils import download_and_extract_archive, extract_archive, verify_str_arg from .vision import VisionDataset @@ -131,19 +132,36 @@ def __len__(self) -> int: def download(self) -> None: if self._check_integrity(): return - + # Download and unzip the single ZIP containing both .tar files download_and_extract_archive( - "https://drive.google.com/file/d/137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp", + "https://data.caltech.edu/records/mzrjq-6wc02/files/caltech-101.zip?download=1", self.root, - filename="101_ObjectCategories.tar.gz", - md5="b224c7392d521a49829488ab0f1120d9", - ) - download_and_extract_archive( - "https://drive.google.com/file/d/175kQy3UsZ0wUEHZjqkUDdNVssr7bgh_m", - self.root, - filename="Annotations.tar", - md5="6f83eeb1f24d99cab4eb377263132c91", + filename="caltech-101.zip", + md5="3138e1922a9193bfa496528edbbc45d0", ) + archive_folder = os.path.join(self.root, "caltech-101") + category_archive = os.path.join(archive_folder, "101_ObjectCategories.tar.gz") + annotation_archive = os.path.join(archive_folder, "Annotations.tar") + macos_meta = os.path.join(archive_folder, "__MACOSX") + + # Remove macOS metadata folder if it exists + if os.path.isdir(macos_meta): + shutil.rmtree(macos_meta) + + # Extract '101_ObjectCategories.tar.gz' into self.root + extract_archive(category_archive, self.root, remove_finished=True) + + # Extract 'Annotations.tar' into self.root + extract_archive(annotation_archive, self.root, remove_finished=True) + + # Delete the 'caltech-101' folder (which may now be empty or contain only other hidden files) + if os.path.isdir(archive_folder): + shutil.rmtree(archive_folder) + + # Remove the ZIP file itself + zip_path = os.path.join(self.root, "caltech-101.zip") + if os.path.isfile(zip_path): + os.remove(zip_path) def extra_repr(self) -> str: return "Target type: {target_type}".format(**self.__dict__)