Skip to content
This repository has been archived by the owner on Jun 11, 2024. It is now read-only.

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Jul 19, 2023
1 parent 22f4def commit ed1125f
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 85 deletions.
31 changes: 11 additions & 20 deletions nwp/excarta/merge_excarta.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,19 @@
# import libs
import xarray as xr
import pandas as pd
import numpy as np
import datetime
import os
import pathlib as Path
from datetime import datetime
import zarr
import ocf_blosc2

import xarray as xr


def merge_zarr_files(zarr_path, merged_zarr_path):
# Collect paths of Zarr files in the specified directory
zarr_files = [os.path.join(zarr_path, file) for file in os.listdir(zarr_path) if file.endswith('.zarr')]
zarr_files = [
os.path.join(zarr_path, file) for file in os.listdir(zarr_path) if file.endswith(".zarr")
]

print("1")
# Open the first Zarr file to create the initial dataset
merged_ds = xr.open_zarr(zarr_files[0])

print("2")

# Define the specific range of x and y coordinates
Expand All @@ -25,30 +22,25 @@ def merge_zarr_files(zarr_path, merged_zarr_path):

# Iterate over the remaining Zarr files and merge them into the initial dataset
for file in zarr_files[1:]:
ds = xr.open_zarr(file)
xr.open_zarr(file)
print(file)

# ds_filt = ds.sel(x=slice(*x_range), y=slice(*y_range))
merged_ds = merged_ds.combine_first(ds_filt)

print("3")

# Rechunk the merged dataset
merged_ds = merged_ds.chunk(chunks={"init_time": 10, "x": 100, "y": 100})

print("4")


print("4")


print(merged_ds)

# Save the merged dataset as a new Zarr file
merged_ds.to_zarr(merged_zarr_path)

print("5")




# Specify the path where the independent Zarr files are located
Expand All @@ -59,4 +51,3 @@ def merge_zarr_files(zarr_path, merged_zarr_path):

# Merge the Zarr files
merge_zarr_files(zarr_path, merged_zarr_path)

51 changes: 29 additions & 22 deletions nwp/excarta/parse_excarta_monthly.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
#Low memory script
# Low memory script
import argparse
import os
import pathlib
from datetime import datetime

import pandas as pd
import xarray as xr
import argparse
import pathlib


def _parse_args():
parser = argparse.ArgumentParser()
Expand All @@ -14,28 +16,32 @@ def _parse_args():
return parser.parse_args()



def data_loader(folder_path, month_to_process):
"""
Loads and transforms data from CSV files in the given folder_path and directly convert each DataFrame into an xarray Dataset.
Only process files for the month 'YYYYMM' given by month_to_process
"""
month_to_process = datetime.strptime(month_to_process, "%Y%m")
column_names = ['DateTimeUTC', 'LocationId', 'Latitude', 'Longitude', 'dni', 'dhi', 'ghi']
column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
files = os.listdir(folder_path)
datasets = []

for filename in files:
if filename.endswith(".csv") and not filename.startswith("._"):
file_datetime = datetime.strptime(filename[:-4], "%Y%m%d%H")

if (file_datetime.year == month_to_process.year) and (file_datetime.month == month_to_process.month):

if (file_datetime.year == month_to_process.year) and (
file_datetime.month == month_to_process.month
):
file_path = os.path.join(folder_path, filename)
df = pd.read_csv(file_path, header=None, names=column_names, parse_dates=['DateTimeUTC'])

df['step'] = (df['DateTimeUTC'] - file_datetime).dt.total_seconds() / 3600 # convert timedelta to hours
df['init_time'] = file_datetime
df = pd.read_csv(
file_path, header=None, names=column_names, parse_dates=["DateTimeUTC"]
)

df["step"] = (
df["DateTimeUTC"] - file_datetime
).dt.total_seconds() / 3600 # convert timedelta to hours
df["init_time"] = file_datetime

# Convert the dataframe to an xarray Dataset and append to the list
ds = xr.Dataset.from_dataframe(df)
Expand All @@ -62,26 +68,25 @@ def pdtocdf(datasets):
"""
Processes the xarray Datasets and merges them.
"""

datasets = [ds.set_index(index=['init_time', 'step', 'Latitude', 'Longitude']) for ds in datasets]

ds = xr.concat(datasets, dim='index')
datasets = [
ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]) for ds in datasets
]

ds = xr.concat(datasets, dim="index")

# # Define the specific range of x and y coordinates to filter the data on
# x_range = (-10, 2) # Example x coordinate range
# y_range = (49, 59) # Example y coordinate range

ds = ds.rename({"Latitude": "y", "Longitude": "x"})



var_names = ds.data_vars
d2 = xr.concat([ds[v] for v in var_names], dim="variable")
d2 = d2.assign_coords(variable=("variable", var_names))
ds = xr.Dataset(dict(value=d2))
ds = ds.sortby('step')
ds = ds.sortby('init_time')

ds = ds.sortby("step")
ds = ds.sortby("init_time")

return ds

Expand All @@ -93,7 +98,9 @@ def main():
raise RuntimeError(f'Output file "{args.output}" already exist')

PATH = "/mnt/storage_b/data/ocf/solar_pv_nowcasting/experimental/Excarta/sr_UK_Malta_full/solar_data"
month_to_process = f"{args.year}{args.month:02d}" # combine year and month arguments into the required format
month_to_process = (
f"{args.year}{args.month:02d}" # combine year and month arguments into the required format
)
datasets = load_data_from_all_years(PATH, month_to_process)
ds = pdtocdf(datasets)

Expand All @@ -103,7 +110,7 @@ def main():
# ds = ds.sel(x=slice(float(-10), float(2)), y=slice(float(49), float(59)))

print(ds)
ds = ds.unstack('index')
ds = ds.unstack("index")

ds_filt = ds.sel(x=slice(float(13), float(15)), y=slice(float(35), float(37)))

Expand All @@ -118,4 +125,4 @@ def main():

# Check if script is being run directly
if __name__ == "__main__":
main()
main()
29 changes: 16 additions & 13 deletions nwp/excarta/parse_excarta_to_output.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import xarray as xr
import pandas as pd
import numpy as np
import argparse
import datetime
import os
import pathlib
from datetime import datetime
import argparse

import pandas as pd
import xarray as xr


def _parse_args():
Expand All @@ -18,20 +18,24 @@ def data_loader(folder_path):
"""
Loads and transforms data from CSV files in the given folder_path.
"""
column_names = ['DateTimeUTC', 'LocationId', 'Latitude', 'Longitude', 'dni', 'dhi', 'ghi']
column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
files = os.listdir(folder_path)
dfs = []

for filename in files:
if filename.endswith(".csv") and not filename.startswith("._"):
file_path = os.path.join(folder_path, filename)
df = pd.read_csv(file_path, header=None, names=column_names, parse_dates=['DateTimeUTC'])
df = pd.read_csv(
file_path, header=None, names=column_names, parse_dates=["DateTimeUTC"]
)

datetime_str = filename[:-4]
datetime_str = filename[:-4]
datetime_obj = datetime.strptime(datetime_str, "%Y%m%d%H")

df['step'] = (df['DateTimeUTC'] - datetime_obj).dt.total_seconds() / 3600 # convert timedelta to hours
df['init_time'] = datetime_obj
df["step"] = (
df["DateTimeUTC"] - datetime_obj
).dt.total_seconds() / 3600 # convert timedelta to hours
df["init_time"] = datetime_obj
dfs.append(df)

return dfs
Expand All @@ -43,7 +47,6 @@ def load_data_from_all_years(parent_folder_path):
"""
all_dataframes = []


# Actual date range is 2018 to 2022 (for in range use (2018,2023))
for year in range(2018, 2019):
folder_path = os.path.join(parent_folder_path, str(year))
Expand All @@ -60,15 +63,15 @@ def pdtocdf(dfs):
merged_df = pd.concat(dfs, ignore_index=True)

ds = xr.Dataset.from_dataframe(merged_df)
ds = ds.set_index(index=['init_time', 'step','Latitude','Longitude']).unstack('index')
ds = ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]).unstack("index")
ds = ds.drop_vars(["LocationId", "DateTimeUTC"])

var_names = ds.data_vars
d2 = xr.concat([ds[v] for v in var_names], dim="variable")
d2 = d2.assign_coords(variable=("variable", var_names))
ds = xr.Dataset(dict(value=d2))
ds = ds.sortby('step')
ds = ds.sortby('init_time')
ds = ds.sortby("step")
ds = ds.sortby("init_time")
ds = ds.rename({"Latitude": "y", "Longitude": "x"})

return ds
Expand Down
40 changes: 23 additions & 17 deletions nwp/excarta/parse_excarta_to_output_low_mem.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#Low memory script
# Low memory script
import argparse
import os
import pathlib
from datetime import datetime

import pandas as pd
import xarray as xr
import argparse
import pathlib


def _parse_args():
Expand All @@ -17,19 +18,23 @@ def data_loader(folder_path):
"""
Loads and transforms data from CSV files in the given folder_path and directly convert each DataFrame into an xarray Dataset.
"""
column_names = ['DateTimeUTC', 'LocationId', 'Latitude', 'Longitude', 'dni', 'dhi', 'ghi']
column_names = ["DateTimeUTC", "LocationId", "Latitude", "Longitude", "dni", "dhi", "ghi"]
files = os.listdir(folder_path)
datasets = []

for filename in files:
if filename.endswith(".csv") and not filename.startswith("._"):
file_path = os.path.join(folder_path, filename)

df = pd.read_csv(file_path, header=None, names=column_names, parse_dates=['DateTimeUTC'])
df = pd.read_csv(
file_path, header=None, names=column_names, parse_dates=["DateTimeUTC"]
)
datetime_str = filename[:-4]
datetime_obj = datetime.strptime(datetime_str, "%Y%m%d%H")
df['step'] = (df['DateTimeUTC'] - datetime_obj).dt.total_seconds() / 3600 # convert timedelta to hours
df['init_time'] = datetime_obj
df["step"] = (
df["DateTimeUTC"] - datetime_obj
).dt.total_seconds() / 3600 # convert timedelta to hours
df["init_time"] = datetime_obj

# Convert the dataframe to an xarray Dataset and append to the list
ds = xr.Dataset.from_dataframe(df)
Expand All @@ -55,26 +60,29 @@ def pdtocdf(datasets):
Processes the xarray Datasets and merges them.
"""
print(datasets)
# ds = xr.merge(datasets)
# ds = xr.merge(datasets)

datasets = [ds.set_index(index=['init_time', 'step', 'Latitude', 'Longitude']) for ds in datasets]
datasets = [
ds.set_index(index=["init_time", "step", "Latitude", "Longitude"]) for ds in datasets
]

ds = xr.concat(datasets, dim='index')
ds = xr.concat(datasets, dim="index")

# Going to unstack and then combine in a different script
# Get rid of the index dimension and just keep the desired ones
# ds = ds.unstack('index')

var_names = ds.data_vars
d2 = xr.concat([ds[v] for v in var_names], dim="variable")
d2 = d2.assign_coords(variable=("variable", var_names))
ds = xr.Dataset(dict(value=d2))
ds = ds.sortby('step')
ds = ds.sortby('init_time')
ds = ds.sortby("step")
ds = ds.sortby("init_time")
ds = ds.rename({"Latitude": "y", "Longitude": "x"})

return ds


def main():
args = _parse_args()

Expand All @@ -87,13 +95,11 @@ def main():

print(ds)

ds = ds.unstack('index')
ds = ds.unstack("index")

ds.to_zarr(args.output)




# Check if script is being run directly
if __name__ == "__main__":
main()
main()
6 changes: 3 additions & 3 deletions nwp/icon/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,13 @@ def download_model_files(runs=None, parent_folder=None, model="global"):
var_2d_list = GLOBAL_VAR2D_LIST
invariant = GLOBAL_INVARIENT_LIST
pressure_levels = GLOBAL_PRESSURE_LEVELS
f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days
f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days
else:
var_3d_list = EU_VAR3D_LIST
var_2d_list = EU_VAR2D_LIST
invariant = None
pressure_levels = EU_PRESSURE_LEVELS
f_steps = list(range(0, 79)) + list(range(81, 123, 3)) # 5 days
f_steps = list(range(0, 79)) + list(range(81, 123, 3)) # 5 days
for run in runs:
run_folder = os.path.join(parent_folder, run)
if not os.path.exists(run_folder):
Expand Down Expand Up @@ -87,7 +87,7 @@ def process_model_files(
)
lons = lon_ds.tlon.values
lats = lat_ds.tlat.values
f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days
f_steps = list(range(0, 79)) + list(range(81, 99, 3)) # 4 days
else:
var_base = "icon-eu_europe_regular-lat-lon"
var_3d_list = EU_VAR3D_LIST
Expand Down
Loading

0 comments on commit ed1125f

Please sign in to comment.