Azure · hakotesova · Oct 6, 2025 · Oct 6, 2025 · Oct 6, 2025 · Oct 6, 2025
@@ -4,6 +4,7 @@
 import os
 import urllib
 from zipfile import ZipFile
+from pathlib import Path
 
 from azure.identity import DefaultAzureCredential
 from azure.ai.ml import MLClient
@@ -103,28 +104,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # Download data
-    print("Downloading data.")
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
+    # Local data path
+    repo_root = Path(__file__).resolve().parents[6]
+    local_data_path = repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip"
 
-    # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    # Extract current dataset name from dataset path
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
-    # Get the name of zip file
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
     # Extract files
-    with ZipFile(data_file, "r") as zip:
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # Delete zip file
-    os.remove(data_file)
 
     # Upload data and create a data asset URI folder
     print("Uploading data to blob storage")

@@ -2,6 +2,7 @@
 import base64
 import json
 import os
+from pathlib import Path
 import urllib
 from zipfile import ZipFile
 
@@ -106,28 +107,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # Download data
-    print("Downloading data.")
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip"
+    # Use local data path
+    repo_root = Path(__file__).resolve().parents[6]
+    local_data_path = repo_root / "sample-data" / "image-classification" / "multilabelFridgeObjects.zip"
 
-    # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    # Extract current dataset name from dataset file
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
-    # Get the name of zip file
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
-    # Extract files
-    with ZipFile(data_file, "r") as zip:
+    # Extract files directly from the local path
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # Delete zip file
-    os.remove(data_file)
 
     # Upload data and create a data asset URI folder
     print("Uploading data to blob storage")

@@ -2,6 +2,7 @@
 import base64
 import json
 import os
+from pathlib import Path
 import subprocess
 import sys
 import urllib
@@ -156,27 +157,23 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
     # Create data folder if it doesnt exist.
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # Download data
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-instance-segmentation/odFridgeObjectsMask.zip"
+    # Local data
+    repo_root = Path(__file__).resolve().parents[5]
+    local_data_path = repo_root / "sample-data" / "image-instance-segmentation" / "odFridgeObjectsMask.zip"
 
     # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
     # Get the data zip file path
     data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
 
-    # Download the dataset
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
     # Extract files
     with ZipFile(data_file, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # Delete zip file
-    os.remove(data_file)
 
     # Upload data and create a data asset URI folder
     print("Uploading data to blob storage")

@@ -2,6 +2,7 @@
 import base64
 import json
 import os
+from pathlib import Path
 import urllib
 import xml.etree.ElementTree as ET
 
@@ -159,27 +160,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
     # create data folder if it doesnt exist.
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # download data
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip"
+    # local data
+    repo_root = Path(__file__).resolve().parents[5]
+    local_data_path = repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip"
 
     # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
-    # Get the data zip file path
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download the dataset
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
     # extract files
-    with ZipFile(data_file, "r") as zip:
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # delete zip file
-    os.remove(data_file)
 
     # Upload data and create a data asset URI folder
     print("Uploading data to blob storage")

@@ -2,6 +2,7 @@
 import base64
 import json
 import os
+from pathlib import Path
 import shutil
 import urllib.request
 import pandas as pd
@@ -19,34 +20,29 @@ def download_and_unzip(dataset_parent_dir: str, is_multilabel_dataset: int) -> N
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # download data
+    # local data
     if is_multilabel_dataset == 0:
-        download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
+        repo_root = Path(__file__).resolve().parents[5]
+        local_data_path = repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip"
     else:
-        download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip"
-    print(f"Downloading data from {download_url}")
+        repo_root = Path(__file__).resolve().parents[5]
+        local_data_path = repo_root / "sample-data" / "image-classification" / "multilabelFridgeObjects.zip"
+    print(f"Pulling data from {local_data_path}")
 
     # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
     if os.path.exists(dataset_dir):
         shutil.rmtree(dataset_dir)
 
-    # Get the name of zip file
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
     # extract files
-    with ZipFile(data_file, "r") as zip:
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # delete zip file
-    os.remove(data_file)
+
     return dataset_dir
 
 

@@ -2,6 +2,7 @@
 import base64
 import json
 import os
+from pathlib import Path
 import shutil
 import urllib.request
 import pandas as pd
@@ -19,31 +20,24 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # download data
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
-    print(f"Downloading data from {download_url}")
+    # local data
+    repo_root = Path(__file__).resolve().parents[5]
+    local_data_path = repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip"
 
     # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
     if os.path.exists(dataset_dir):
         shutil.rmtree(dataset_dir)
 
-    # Get the name of zip file
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
     # extract files
-    with ZipFile(data_file, "r") as zip:
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # delete zip file
-    os.remove(data_file)
+
     return dataset_dir
 
 

@@ -2,6 +2,7 @@
 import base64
 import json
 import os
+from pathlib import Path
 import shutil
 import urllib.request
 import pandas as pd
@@ -14,35 +15,25 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
     :param dataset_parent_dir: dataset parent directory to which dataset will be downloaded
     :type dataset_parent_dir: str
     """
-    # Create directory, if it does not exist
-    os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # download data
-
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip"
-    print(f"Downloading data from {download_url}")
+    # local data
+    repo_root = Path(__file__).resolve().parents[5]
+    local_data_path = repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip"
 
     # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
     if os.path.exists(dataset_dir):
         shutil.rmtree(dataset_dir)
 
-    # Get the name of zip file
-    data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
-
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
     # extract files
-    with ZipFile(data_file, "r") as zip:
+    with ZipFile(local_data_path, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # delete zip file
-    os.remove(data_file)
+
     return dataset_dir
 
 

@@ -2,6 +2,7 @@
 import base64
 import json
 import os
+from pathlib import Path
 import shutil
 import urllib.request
 import pandas as pd
@@ -19,12 +20,12 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
     # Create directory, if it does not exist
     os.makedirs(dataset_parent_dir, exist_ok=True)
 
-    # download data
-    download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
-    print(f"Downloading data from {download_url}")
+    # local data
+    repo_root = Path(__file__).resolve().parents[5]
+    local_data_path = repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip"
 
     # Extract current dataset name from dataset url
-    dataset_name = os.path.basename(download_url).split(".")[0]
+    dataset_name = os.path.basename(local_data_path).split(".")[0]
     # Get dataset path for later use
     dataset_dir = os.path.join(dataset_parent_dir, dataset_name)
 
@@ -34,16 +35,12 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
     # Get the name of zip file
     data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")
 
-    # Download data from public url
-    urllib.request.urlretrieve(download_url, filename=data_file)
-
     # extract files
     with ZipFile(data_file, "r") as zip:
         print("extracting files...")
         zip.extractall(path=dataset_parent_dir)
         print("done")
-    # delete zip file
-    os.remove(data_file)
+
     return dataset_dir