Skip to content
This repository has been archived by the owner on Jun 11, 2024. It is now read-only.

Commit

Permalink
move notebooks to notebooks folder
Browse files Browse the repository at this point in the history
  • Loading branch information
zakwatts committed Jul 19, 2023
2 parents 7480c15 + ed1125f commit 3f358da
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 28 deletions.
17 changes: 8 additions & 9 deletions nwp/excarta/merge_excarta.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,20 @@
# import libs
import xarray as xr
import pandas as pd
import numpy as np
import datetime
import os
import pathlib as Path
from datetime import datetime
import zarr
import ocf_blosc2

import xarray as xr



def merge_zarr_files(zarr_path, merged_zarr_path):
# Collect paths of Zarr files in the specified directory
zarr_files = [
<<<<<<< HEAD
os.path.join(zarr_path, file)
for file in os.listdir(zarr_path)
if file.endswith(".zarr")
=======
os.path.join(zarr_path, file) for file in os.listdir(zarr_path) if file.endswith(".zarr")
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
]

print("1")
Expand All @@ -30,7 +29,7 @@ def merge_zarr_files(zarr_path, merged_zarr_path):

# Iterate over the remaining Zarr files and merge them into the initial dataset
for file in zarr_files[1:]:
ds = xr.open_zarr(file)
xr.open_zarr(file)
print(file)

# ds_filt = ds.sel(x=slice(*x_range), y=slice(*y_range))
Expand Down
25 changes: 22 additions & 3 deletions nwp/excarta/parse_excarta_monthly.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
# Low memory script
<<<<<<< HEAD
=======
import argparse
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
import os
import pathlib
from datetime import datetime

import pandas as pd
import xarray as xr
import argparse
import pathlib



def _parse_args():
Expand All @@ -21,6 +26,7 @@ def data_loader(folder_path, month_to_process):
Only process files for the month 'YYYYMM' given by month_to_process
"""
month_to_process = datetime.strptime(month_to_process, "%Y%m")
<<<<<<< HEAD
column_names = [
"DateTimeUTC",
"LocationId",
Expand All @@ -30,6 +36,9 @@ def data_loader(folder_path, month_to_process):
"dhi",
"ghi",
]
=======
column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
files = os.listdir(folder_path)
datasets = []

Expand All @@ -42,10 +51,14 @@ def data_loader(folder_path, month_to_process):
):
file_path = os.path.join(folder_path, filename)
df = pd.read_csv(
<<<<<<< HEAD
file_path,
header=None,
names=column_names,
parse_dates=["DateTimeUTC"],
=======
file_path, header=None, names=column_names, parse_dates=["DateTimeUTC"]
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
)

df["step"] = (
Expand Down Expand Up @@ -80,8 +93,12 @@ def pdtocdf(datasets):
"""

datasets = [
<<<<<<< HEAD
ds.set_index(index=["init_time", "step", "Latitude", "Longitude"])
for ds in datasets
=======
ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]) for ds in datasets
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
]

ds = xr.concat(datasets, dim="index")
Expand Down Expand Up @@ -109,7 +126,9 @@ def main():
raise RuntimeError(f'Output file "{args.output}" already exist')

PATH = "/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/solar_data"
month_to_process = f"{args.year}{args.month:02d}" # combine year and month arguments into the required format
month_to_process = (
f"{args.year}{args.month:02d}" # combine year and month arguments into the required format
)
datasets = load_data_from_all_years(PATH, month_to_process)
ds = pdtocdf(datasets)

Expand Down
16 changes: 12 additions & 4 deletions nwp/excarta/parse_excarta_to_output.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import xarray as xr
import pandas as pd
import numpy as np
import argparse
import datetime
import os
import pathlib
from datetime import datetime
import argparse

import pandas as pd
import xarray as xr


def _parse_args():
Expand All @@ -18,6 +18,7 @@ def data_loader(folder_path):
"""
Loads and transforms data from CSV files in the given folder_path.
"""
<<<<<<< HEAD
column_names = [
"DateTimeUTC",
"LocationId",
Expand All @@ -27,6 +28,9 @@ def data_loader(folder_path):
"dhi",
"ghi",
]
=======
column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
files = os.listdir(folder_path)
dfs = []

Expand Down Expand Up @@ -71,9 +75,13 @@ def pdtocdf(dfs):
merged_df = pd.concat(dfs, ignore_index=True)

ds = xr.Dataset.from_dataframe(merged_df)
<<<<<<< HEAD
ds = ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]).unstack(
"index"
)
=======
ds = ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]).unstack("index")
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
ds = ds.drop_vars(["LocationId", "DateTimeUTC"])

var_names = ds.data_vars
Expand Down
16 changes: 14 additions & 2 deletions nwp/excarta/parse_excarta_to_output_low_mem.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
# Low memory script
<<<<<<< HEAD
=======
import argparse
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
import os
import pathlib
from datetime import datetime

import pandas as pd
import xarray as xr
import argparse
import pathlib


def _parse_args():
Expand All @@ -17,6 +21,7 @@ def data_loader(folder_path):
"""
Loads and transforms data from CSV files in the given folder_path and directly convert each DataFrame into an xarray Dataset.
"""
<<<<<<< HEAD
column_names = [
"DateTimeUTC",
"LocationId",
Expand All @@ -26,6 +31,9 @@ def data_loader(folder_path):
"dhi",
"ghi",
]
=======
column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
files = os.listdir(folder_path)
datasets = []

Expand Down Expand Up @@ -70,8 +78,12 @@ def pdtocdf(datasets):
# ds = xr.merge(datasets)

datasets = [
<<<<<<< HEAD
ds.set_index(index=["init_time", "step", "Latitude", "Longitude"])
for ds in datasets
=======
ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]) for ds in datasets
>>>>>>> ed1125f2aadcc4f6ea53290fe7b2f87e027a025d
]

ds = xr.concat(datasets, dim="index")
Expand Down
20 changes: 10 additions & 10 deletions scripts/convert_icon_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
"""

import multiprocessing as mp
import os
import subprocess
from glob import glob
from pathlib import Path

import xarray as xr
import zarr
Expand All @@ -18,10 +21,7 @@
EU_VAR2D_LIST,
EU_VAR3D_LIST,
)
import subprocess

from pathlib import Path
import multiprocessing as mp

def decompress(full_bzip_filename: Path, temp_pth: Path) -> str:
"""
Expand All @@ -38,7 +38,7 @@ def decompress(full_bzip_filename: Path, temp_pth: Path) -> str:
base_nat_filename = os.path.splitext(base_bzip_filename)[0]
full_nat_filename = os.path.join(temp_pth, base_nat_filename)
if os.path.exists(full_nat_filename):
return full_nat_filename # Don't decompress a second time
return full_nat_filename # Don't decompress a second time
with open(full_nat_filename, "wb") as nat_file_handler:
process = subprocess.run(
["pbzip2", "--decompress", "--keep", "--stdout", full_bzip_filename],
Expand Down Expand Up @@ -179,8 +179,8 @@ def upload_to_hf(dataset_xr, folder, model="eu", run="00", token=None):
encoding = {var: {"compressor": Blosc2("zstd", clevel=9)} for var in dataset_xr.data_vars}
encoding["time"] = {"units": "nanoseconds since 1970-01-01"}
with zarr.ZipStore(
zarr_path,
mode="w",
zarr_path,
mode="w",
) as store:
dataset_xr.chunk(chunking).to_zarr(store, encoding=encoding, compute=True)
done = False
Expand All @@ -189,10 +189,10 @@ def upload_to_hf(dataset_xr, folder, model="eu", run="00", token=None):
api.upload_file(
path_or_fileobj=zarr_path,
path_in_repo=f"data/{dataset_xr.time.dt.year.values}/"
f"{dataset_xr.time.dt.month.values}/"
f"{dataset_xr.time.dt.day.values}/"
f"{dataset_xr.time.dt.year.values}{str(dataset_xr.time.dt.month.values).zfill(2)}{str(dataset_xr.time.dt.day.values).zfill(2)}"
f"_{str(dataset_xr.time.dt.hour.values).zfill(2)}.zarr.zip",
f"{dataset_xr.time.dt.month.values}/"
f"{dataset_xr.time.dt.day.values}/"
f"{dataset_xr.time.dt.year.values}{str(dataset_xr.time.dt.month.values).zfill(2)}{str(dataset_xr.time.dt.day.values).zfill(2)}"
f"_{str(dataset_xr.time.dt.hour.values).zfill(2)}.zarr.zip",
repo_id="openclimatefix/dwd-icon-global"
if model == "global"
else "openclimatefix/dwd-icon-eu",
Expand Down

0 comments on commit 3f358da

Please sign in to comment.