Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import urllib
from zipfile import ZipFile
from pathlib import Path

from azure.identity import DefaultAzureCredential
from azure.ai.ml import MLClient
Expand Down Expand Up @@ -103,28 +104,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# Download data
print("Downloading data.")
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
# Local data path
repo_root = Path(__file__).resolve().parents[6]
local_data_path = repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
# Extract current dataset name from dataset path
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# Extract files
with ZipFile(data_file, "r") as zip:
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# Delete zip file
os.remove(data_file)

# Upload data and create a data asset URI folder
print("Uploading data to blob storage")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import base64
import json
import os
from pathlib import Path
import urllib
from zipfile import ZipFile

Expand Down Expand Up @@ -106,28 +107,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# Download data
print("Downloading data.")
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip"
# Use local data path
repo_root = Path(__file__).resolve().parents[6]
local_data_path = repo_root / "sample-data" / "image-classification" / "multilabelFridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
# Extract current dataset name from dataset file
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# Extract files
with ZipFile(data_file, "r") as zip:
# Extract files directly from the local path
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# Delete zip file
os.remove(data_file)

# Upload data and create a data asset URI folder
print("Uploading data to blob storage")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import base64
import json
import os
from pathlib import Path
import subprocess
import sys
import urllib
Expand Down Expand Up @@ -156,27 +157,23 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
# Create data folder if it doesnt exist.
os.makedirs(dataset_parent_dir, exist_ok=True)

# Download data
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-instance-segmentation/odFridgeObjectsMask.zip"
# Local data
repo_root = Path(__file__).resolve().parents[5]
local_data_path = repo_root / "sample-data" / "image-instance-segmentation" / "odFridgeObjectsMask.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

# Get the data zip file path
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download the dataset
urllib.request.urlretrieve(download_url, filename=data_file)

# Extract files
with ZipFile(data_file, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# Delete zip file
os.remove(data_file)

# Upload data and create a data asset URI folder
print("Uploading data to blob storage")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import base64
import json
import os
from pathlib import Path
import urllib
import xml.etree.ElementTree as ET

Expand Down Expand Up @@ -159,27 +160,20 @@ def upload_data_and_create_jsonl_mltable_files(ml_client, dataset_parent_dir):
# create data folder if it doesnt exist.
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip"
# local data
repo_root = Path(__file__).resolve().parents[5]
local_data_path = repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

# Get the data zip file path
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download the dataset
urllib.request.urlretrieve(download_url, filename=data_file)

# extract files
with ZipFile(data_file, "r") as zip:
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# delete zip file
os.remove(data_file)

# Upload data and create a data asset URI folder
print("Uploading data to blob storage")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import base64
import json
import os
from pathlib import Path
import shutil
import urllib.request
import pandas as pd
Expand All @@ -19,34 +20,29 @@ def download_and_unzip(dataset_parent_dir: str, is_multilabel_dataset: int) -> N
# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
# local data
if is_multilabel_dataset == 0:
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
repo_root = Path(__file__).resolve().parents[5]
local_data_path = repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip"
else:
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/multilabelFridgeObjects.zip"
print(f"Downloading data from {download_url}")
repo_root = Path(__file__).resolve().parents[5]
local_data_path = repo_root / "sample-data" / "image-classification" / "multilabelFridgeObjects.zip"
print(f"Pulling data from {local_data_path}")

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

if os.path.exists(dataset_dir):
shutil.rmtree(dataset_dir)

# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# extract files
with ZipFile(data_file, "r") as zip:
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# delete zip file
os.remove(data_file)

return dataset_dir


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import base64
import json
import os
from pathlib import Path
import shutil
import urllib.request
import pandas as pd
Expand All @@ -19,31 +20,24 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
print(f"Downloading data from {download_url}")
# local data
repo_root = Path(__file__).resolve().parents[5]
local_data_path = repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

if os.path.exists(dataset_dir):
shutil.rmtree(dataset_dir)

# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# extract files
with ZipFile(data_file, "r") as zip:
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# delete zip file
os.remove(data_file)

return dataset_dir


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import base64
import json
import os
from pathlib import Path
import shutil
import urllib.request
import pandas as pd
Expand All @@ -14,35 +15,25 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
:param dataset_parent_dir: dataset parent directory to which dataset will be downloaded
:type dataset_parent_dir: str
"""
# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data

download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-object-detection/odFridgeObjects.zip"
print(f"Downloading data from {download_url}")
# local data
repo_root = Path(__file__).resolve().parents[5]
local_data_path = repo_root / "sample-data" / "image-object-detection" / "odFridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

if os.path.exists(dataset_dir):
shutil.rmtree(dataset_dir)

# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# extract files
with ZipFile(data_file, "r") as zip:
with ZipFile(local_data_path, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# delete zip file
os.remove(data_file)

return dataset_dir


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import base64
import json
import os
from pathlib import Path
import shutil
import urllib.request
import pandas as pd
Expand All @@ -19,12 +20,12 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
# Create directory, if it does not exist
os.makedirs(dataset_parent_dir, exist_ok=True)

# download data
download_url = "https://automlsamplenotebookdata-adcuc7f7bqhhh8a4.b02.azurefd.net/image-classification/fridgeObjects.zip"
print(f"Downloading data from {download_url}")
# local data
repo_root = Path(__file__).resolve().parents[5]
local_data_path = repo_root / "sample-data" / "image-classification" / "fridgeObjects.zip"

# Extract current dataset name from dataset url
dataset_name = os.path.basename(download_url).split(".")[0]
dataset_name = os.path.basename(local_data_path).split(".")[0]
# Get dataset path for later use
dataset_dir = os.path.join(dataset_parent_dir, dataset_name)

Expand All @@ -34,16 +35,12 @@ def download_and_unzip(dataset_parent_dir: str) -> None:
# Get the name of zip file
data_file = os.path.join(dataset_parent_dir, f"{dataset_name}.zip")

# Download data from public url
urllib.request.urlretrieve(download_url, filename=data_file)

# extract files
with ZipFile(data_file, "r") as zip:
print("extracting files...")
zip.extractall(path=dataset_parent_dir)
print("done")
# delete zip file
os.remove(data_file)

return dataset_dir


Expand Down
Loading
Loading