Skip to content

Commit 455944b

Browse files
committed
Update script
1 parent 5439aa0 commit 455944b

File tree

1 file changed

+32
-28
lines changed

1 file changed

+32
-28
lines changed

tests/create_xenium_filtered_points.py

Lines changed: 32 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -8,44 +8,48 @@
88
from spatialdata import read_zarr, SpatialData
99

1010

11-
data_dir = "data"
12-
zip_filepath = join(data_dir, "xenium_rep1_io.spatialdata.zarr.zip")
13-
spatialdata_filepath = join(data_dir, "xenium_rep1_io.spatialdata.zarr")
11+
def create_xenium_filtered_points():
12+
# 1. Download and extract the Xenium dataset if not already present
13+
data_dir = "data"
14+
zip_filepath = join(data_dir, "xenium_rep1_io.spatialdata.zarr.zip")
15+
spatialdata_filepath = join(data_dir, "xenium_rep1_io.spatialdata.zarr")
1416

1517

16-
if not isdir(spatialdata_filepath):
17-
if not isfile(zip_filepath):
18-
os.makedirs(data_dir, exist_ok=True)
19-
urlretrieve('https://s3.embl.de/spatialdata/spatialdata-sandbox/xenium_rep1_io.zip', zip_filepath)
20-
with zipfile.ZipFile(zip_filepath, "r") as zip_ref:
21-
zip_ref.extractall(data_dir)
22-
os.rename(join(data_dir, "data.zarr"), spatialdata_filepath)
18+
if not isdir(spatialdata_filepath):
19+
if not isfile(zip_filepath):
20+
os.makedirs(data_dir, exist_ok=True)
21+
# zip_url = 'https://s3.embl.de/spatialdata/spatialdata-sandbox/xenium_rep1_io.zip'
22+
zip_url = 'https://s3.embl.de/spatialdata/spatialdata-sandbox/xenium_rep1_io_spatialdata_0.7.1.zip'
23+
urlretrieve(zip_url, zip_filepath)
24+
with zipfile.ZipFile(zip_filepath, "r") as zip_ref:
25+
zip_ref.extractall(data_dir)
26+
os.rename(join(data_dir, "data.zarr"), spatialdata_filepath)
2327

24-
# This Xenium dataset has an AnnData "raw" element.
25-
# Reference: https://github.com/giovp/spatialdata-sandbox/issues/55
26-
raw_dir = join(spatialdata_filepath, "tables", "table", "raw")
27-
if isdir(raw_dir):
28-
shutil.rmtree(raw_dir)
28+
# This Xenium dataset has an AnnData "raw" element.
29+
# Reference: https://github.com/giovp/spatialdata-sandbox/issues/55
30+
raw_dir = join(spatialdata_filepath, "tables", "table", "raw")
31+
if isdir(raw_dir):
32+
shutil.rmtree(raw_dir)
2933

30-
sdata = read_zarr(spatialdata_filepath)
34+
sdata = read_zarr(spatialdata_filepath)
3135

32-
ddf = sdata.points["transcripts"]
36+
ddf = sdata.points["transcripts"]
3337

34-
# 2. Define a function to take every 100th row from a partition
38+
# 2. Define a function to take every 100th row from a partition
3539

3640

37-
def select_every_200th(partition):
38-
# Each 'partition' is a Pandas DataFrame
39-
# .iloc[::100] is the efficient pandas way to get every 100th row
40-
return partition.iloc[::200]
41+
def select_every_200th(partition):
42+
# Each 'partition' is a Pandas DataFrame
43+
# .iloc[::100] is the efficient pandas way to get every 100th row
44+
return partition.iloc[::200]
4145

4246

43-
# 3. Apply this function to every partition in the Dask DataFrame
44-
result = ddf.map_partitions(select_every_200th)
47+
# 3. Apply this function to every partition in the Dask DataFrame
48+
result = ddf.map_partitions(select_every_200th)
4549

46-
# 4. Compute the result to see it
47-
filtered_ddf = result[["x", "y", "z", "feature_name", "cell_id"]]
50+
# 4. Compute the result to see it
51+
filtered_ddf = result[["x", "y", "z", "feature_name", "cell_id"]]
4852

49-
small_sdata = SpatialData(points={"transcripts": filtered_ddf})
53+
small_sdata = SpatialData(points={"transcripts": filtered_ddf})
5054

51-
small_sdata.write("xenium_rep1_io.points_only.spatialdata.zarr", overwrite=True)
55+
small_sdata.write("xenium_rep1_io.points_only.spatialdata.zarr", overwrite=True)

0 commit comments

Comments
 (0)