From 26d388cf7a67b1102fb820bb4c76769b221c0226 Mon Sep 17 00:00:00 2001 From: Quarto GHA Workflow Runner Date: Mon, 17 Jul 2023 07:11:38 +0000 Subject: [PATCH] Built site for gh-pages --- .nojekyll | 2 +- aemo_data.html | 4 + contributing.html | 4 + index.html | 4 + opennem_facilities.html | 745 +++++++++++++++++++++++ search.json | 14 + sitemap.xml | 10 +- snippets/aemo_data/opennem_facilities.py | 149 +++++ 8 files changed, 928 insertions(+), 4 deletions(-) create mode 100644 opennem_facilities.html create mode 100644 snippets/aemo_data/opennem_facilities.py diff --git a/.nojekyll b/.nojekyll index dad09d5..fa4fe01 100644 --- a/.nojekyll +++ b/.nojekyll @@ -1 +1 @@ -0b22b2a7 \ No newline at end of file +f903c905 \ No newline at end of file diff --git a/aemo_data.html b/aemo_data.html index f25f533..abbf93f 100644 --- a/aemo_data.html +++ b/aemo_data.html @@ -133,6 +133,10 @@
  • AEMO Data Snippets +
  • +
  • + + openNEM facility data
  • diff --git a/contributing.html b/contributing.html index ba6a7fb..c7719ca 100644 --- a/contributing.html +++ b/contributing.html @@ -97,6 +97,10 @@
  • AEMO Data Snippets +
  • +
  • + + openNEM facility data
  • diff --git a/index.html b/index.html index 71bedd1..1d2536a 100644 --- a/index.html +++ b/index.html @@ -97,6 +97,10 @@
  • AEMO Data Snippets +
  • +
  • + + openNEM facility data
  • diff --git a/opennem_facilities.html b/opennem_facilities.html new file mode 100644 index 0000000..77233f2 --- /dev/null +++ b/opennem_facilities.html @@ -0,0 +1,745 @@ + + + + + + + + + +CEEM Gists - openNEM facility data + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    +
    + +
    + +
    + + + + +
    + +
    +
    +

    openNEM facility data

    +
    + + + +
    + + + + +
    + + +
    + +
    +

    Very basic module for downloading and parsing openNEM facility data

    +

    This is a simple set of functions for downloading and parsing station and duid meta data from openNEM.

    +

    Essentially works as follows:

    +
      +
    • gets the master list of stations from openNEM
    • +
    • iteratively downloads and saves the json data for each of the stations within this list (about 400)
    • +
    • parses the downloaded data into a flat dataframe
    • +
    +

    The json data is stored locally, to prevent having to re-download the the every station each time you might want to adapt the parser and/or change the data you want to record.

    +

    The json is validated with pydantic (to deal with missing fields, and other irreularities in the openNEM json). There is probably a smarter way to flatten the validated data to pandas than what I have now, but it does the job.

    +

    Note there are two stations (commented out in the code) that are missing or have another issue.

    +
    +

    Requirements

    +

    Written using Python 3.11. Uses pandas, requests, simplejson and pydantic (for json data validation).

    +
    +
    +

    Usage

    +

    Before using the module, there is global variable (LOCALDIR) that needs to be set to specifify where the station json data is stored.

    +

    To download all the station json:

    +
    import opennem_facilities
    +opennem_facilities.download_all_stations()
    +

    Top parse the station data:

    +
    import opennem_facilities
    +df = opennem_facilities.parse_station_data()
    +

    This should return a dataframe as follows (where the code here is DUID)

    + +++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    network_regioncodefueltechcapacity_registeredlatlonstation_namestation_code
    0NSW1APPINgas_wcmg55-34.2109150.793AppinAPPIN
    1NSW1AVLSF1solar_utility245-34.9191146.61AvonlieAVLSF
    2NSW1AWABAREFbioenergy_biogas1-33.0233151.551AwabaAWABAREF
    3NSW1BANGOWF2wind84.8-34.7672148.921BangoBANGOWF
    +
    +
    +

    Extending / adapting

    +

    To parse additional details / metadata - you would have to adapt the Station pydantic model (i.e. add the fields you want to parse), and also adapt the function to flatten the data to pandas as appropriate.

    +
    +
    +

    code

    +

    The code csan be downloaded from here: ’opennem_faciulities.py, and is shown below as well:

    +
    # Basic python script to download and restructure DUID and station data
    +# from the openNEM facilities dataset
    +#
    +# Copyright (C) 2023 Dylan McConnell
    +#
    +# This program is free software: you can redistribute it and/or modify
    +# it under the terms of the GNU General Public License as published by
    +# the Free Software Foundation, either version 3 of the License, or
    +# (at your option) any later version.
    +#
    +# This program is distributed in the hope that it will be useful,
    +# but WITHOUT ANY WARRANTY; without even the implied warranty of
    +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    +# GNU General Public License for more details.
    +#
    +# You should have received a copy of the GNU General Public License
    +# along with this program.  If not, see <https://www.gnu.org/licenses/>.
    +
    +import os
    +from typing import List, Optional
    +
    +import pandas as pd
    +import requests
    +import simplejson
    +from pydantic import BaseModel
    +
    +GEOJSON = "https://data.opennem.org.au/v3/geo/au_facilities.json"
    +LOCALDIR = "/path/to/local/dir/"
    +STATION_URL = "https://api.opennem.org.au/station/au/NEM/{}"
    +
    +
    +def get_master():
    +    """
    +    Download master geojson file from openNEM, returning JSON
    +    """
    +    response = requests.get(GEOJSON)
    +    return simplejson.loads(response.content)
    +
    +
    +def get_station(station_code: str = "LIDDELL"):
    +    """
    +    Download and store station json from openNEM
    +    """
    +    response = requests.get(STATION_URL.format(station_code))
    +    json = simplejson.loads(response.content)
    +
    +    filename = station_filename(json["code"])
    +    with open(os.path.join(LOCALDIR, filename), "w") as f:
    +        simplejson.dump(json, f, indent=2)
    +
    +
    +def station_filename(code: str):
    +    """
    +    Simple function to replace problematic characters in station codes
    +    and return a filename
    +    """
    +    clean_code = code.replace("/", "_")
    +    return f"{clean_code}.json"
    +
    +
    +def load_station(station_code: str):
    +    """
    +    Load station json from local directory
    +    """
    +    filename = station_filename(station_code)
    +    with open(os.path.join(LOCALDIR, filename), "r") as f:
    +        return simplejson.load(f)
    +
    +
    +def station_generator(master_json):
    +    """
    +    Generator that yields the station code for every station in the NEM
    +    """
    +    for station in master_json["features"]:
    +        if station["properties"]["network"] == "NEM":
    +            yield station["properties"]["station_code"]
    +
    +
    +def download_all_stations():
    +    """
    +    Downloads all the station json data from the master list.
    +    """
    +    master_json = get_master()
    +    for station_code in station_generator(master_json):
    +        if station_code != "SLDCBLK":
    +            try:
    +                load_station(station_code)
    +            except FileNotFoundError:
    +                print("downloading ", station_code)
    +                get_station(station_code)
    +
    +
    +"""
    +Some pydantic models for validating openNEM data
    +"""
    +
    +
    +class DispatchUnit(BaseModel):
    +    network_region: str
    +    code: str
    +    fueltech: str
    +    capacity_registered: Optional[float] = None
    +
    +
    +class Location(BaseModel):
    +    lat: Optional[float] = None
    +    lng: Optional[float] = None
    +
    +
    +class Station(BaseModel):
    +    name: str
    +    code: str
    +    location: Location
    +    facilities: List[DispatchUnit]
    +
    +
    +def parse_station_data():
    +    """
    +    Parses all station data from the master list.
    +    Assumes all station json already downloaded.
    +    """
    +    master_json = get_master()
    +    data = []
    +
    +    for station_code in station_generator(master_json):
    +        if station_code not in ["MWPS", "SLDCBLK"]:
    +            station_json = load_station(station_code)
    +            valid_station = Station(**station_json)
    +            data.append(flatten_station(valid_station))
    +
    +    return pd.concat(data).reset_index(drop=True)
    +
    +
    +def flatten_station(valid_station: Station):
    +    """
    +    Simple function to convert a validated station to pandas dataframe
    +    (probably could be done neater / cleaner with pd.normalize_json)
    +    """
    +    d = []
    +    station_dict = valid_station.dict()
    +    for du in valid_station.facilities:
    +        data = du.dict()
    +        data["lat"] = station_dict["location"]["lat"]
    +        data["lon"] = station_dict["location"]["lng"]
    +        data["station_name"] = station_dict["name"]
    +        data["station_code"] = station_dict["code"]
    +        d.append(data)
    +
    +    return pd.DataFrame(d)
    + + +
    +
    + +
    + +
    + + + + + \ No newline at end of file diff --git a/search.json b/search.json index eeaebcc..f7208a1 100644 --- a/search.json +++ b/search.json @@ -13,6 +13,20 @@ "section": "", "text": "This script can be run via the command line to divide a large AEMO data CSV (e.g. from the Monthly Data Archive, such as rebids in BIDPEROFFER) into Parquet partitions. This is advantageous for using packages such as Dask or polars to analyse such data.\nPartitions are generated based on the chunksize parameter, which specifies a number of line (default \\(10^6\\) lines per chunk). However, this code could be modified to partition data another way (e.g. by date, or by unit ID).\nIt also assumes that the first row of the table is the header (i.e. columns) for a single data table.\n\n\nWritten using Python 3.11. Uses pandas and tqdm (progress bar).\nAlso uses standard librarypathlib and type annotations, so probably need at least Python > 3.5.\n\n\n\ncreate_parquet_partitions.py [-h] -file FILE -output_dir OUTPUT_DIR [-chunksize CHUNKSIZE]\n\n\npython create_parquet_partitions.py -file PUBLIC_DVD_BIDPEROFFER_202107010000.CSV -output_dir BIDPEROFFER -chunksize 1000000\n\n\n\n\ncreate_parquet_partitions.py" }, + { + "objectID": "opennem_facilities.html", + "href": "opennem_facilities.html", + "title": "openNEM facility data", + "section": "", + "text": "This is a simple set of functions for downloading and parsing station and duid meta data from openNEM.\nEssentially works as follows:\n\ngets the master list of stations from openNEM\niteratively downloads and saves the json data for each of the stations within this list (about 400)\nparses the downloaded data into a flat dataframe\n\nThe json data is stored locally, to prevent having to re-download the the every station each time you might want to adapt the parser and/or change the data you want to record.\nThe json is validated with pydantic (to deal with missing fields, and other irreularities in the openNEM json). There is probably a smarter way to flatten the validated data to pandas than what I have now, but it does the job.\nNote there are two stations (commented out in the code) that are missing or have another issue.\n\n\nWritten using Python 3.11. Uses pandas, requests, simplejson and pydantic (for json data validation).\n\n\n\nBefore using the module, there is global variable (LOCALDIR) that needs to be set to specifify where the station json data is stored.\nTo download all the station json:\nimport opennem_facilities\nopennem_facilities.download_all_stations()\nTop parse the station data:\nimport opennem_facilities\ndf = opennem_facilities.parse_station_data()\nThis should return a dataframe as follows (where the code here is DUID)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nnetwork_region\ncode\nfueltech\ncapacity_registered\nlat\nlon\nstation_name\nstation_code\n\n\n\n\n0\nNSW1\nAPPIN\ngas_wcmg\n55\n-34.2109\n150.793\nAppin\nAPPIN\n\n\n1\nNSW1\nAVLSF1\nsolar_utility\n245\n-34.9191\n146.61\nAvonlie\nAVLSF\n\n\n2\nNSW1\nAWABAREF\nbioenergy_biogas\n1\n-33.0233\n151.551\nAwaba\nAWABAREF\n\n\n3\nNSW1\nBANGOWF2\nwind\n84.8\n-34.7672\n148.921\nBango\nBANGOWF\n\n\n…\n…\n…\n…\n…\n…\n…\n…\n…\n\n\n\n\n\n\nTo parse additional details / metadata - you would have to adapt the Station pydantic model (i.e. add the fields you want to parse), and also adapt the function to flatten the data to pandas as appropriate.\n\n\n\nThe code csan be downloaded from here: ’opennem_faciulities.py, and is shown below as well:\n# Basic python script to download and restructure DUID and station data\n# from the openNEM facilities dataset\n#\n# Copyright (C) 2023 Dylan McConnell\n#\n# This program is free software: you can redistribute it and/or modify\n# it under the terms of the GNU General Public License as published by\n# the Free Software Foundation, either version 3 of the License, or\n# (at your option) any later version.\n#\n# This program is distributed in the hope that it will be useful,\n# but WITHOUT ANY WARRANTY; without even the implied warranty of\n# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n# GNU General Public License for more details.\n#\n# You should have received a copy of the GNU General Public License\n# along with this program. If not, see <https://www.gnu.org/licenses/>.\n\nimport os\nfrom typing import List, Optional\n\nimport pandas as pd\nimport requests\nimport simplejson\nfrom pydantic import BaseModel\n\nGEOJSON = \"https://data.opennem.org.au/v3/geo/au_facilities.json\"\nLOCALDIR = \"/path/to/local/dir/\"\nSTATION_URL = \"https://api.opennem.org.au/station/au/NEM/{}\"\n\n\ndef get_master():\n \"\"\"\n Download master geojson file from openNEM, returning JSON\n \"\"\"\n response = requests.get(GEOJSON)\n return simplejson.loads(response.content)\n\n\ndef get_station(station_code: str = \"LIDDELL\"):\n \"\"\"\n Download and store station json from openNEM\n \"\"\"\n response = requests.get(STATION_URL.format(station_code))\n json = simplejson.loads(response.content)\n\n filename = station_filename(json[\"code\"])\n with open(os.path.join(LOCALDIR, filename), \"w\") as f:\n simplejson.dump(json, f, indent=2)\n\n\ndef station_filename(code: str):\n \"\"\"\n Simple function to replace problematic characters in station codes\n and return a filename\n \"\"\"\n clean_code = code.replace(\"/\", \"_\")\n return f\"{clean_code}.json\"\n\n\ndef load_station(station_code: str):\n \"\"\"\n Load station json from local directory\n \"\"\"\n filename = station_filename(station_code)\n with open(os.path.join(LOCALDIR, filename), \"r\") as f:\n return simplejson.load(f)\n\n\ndef station_generator(master_json):\n \"\"\"\n Generator that yields the station code for every station in the NEM\n \"\"\"\n for station in master_json[\"features\"]:\n if station[\"properties\"][\"network\"] == \"NEM\":\n yield station[\"properties\"][\"station_code\"]\n\n\ndef download_all_stations():\n \"\"\"\n Downloads all the station json data from the master list.\n \"\"\"\n master_json = get_master()\n for station_code in station_generator(master_json):\n if station_code != \"SLDCBLK\":\n try:\n load_station(station_code)\n except FileNotFoundError:\n print(\"downloading \", station_code)\n get_station(station_code)\n\n\n\"\"\"\nSome pydantic models for validating openNEM data\n\"\"\"\n\n\nclass DispatchUnit(BaseModel):\n network_region: str\n code: str\n fueltech: str\n capacity_registered: Optional[float] = None\n\n\nclass Location(BaseModel):\n lat: Optional[float] = None\n lng: Optional[float] = None\n\n\nclass Station(BaseModel):\n name: str\n code: str\n location: Location\n facilities: List[DispatchUnit]\n\n\ndef parse_station_data():\n \"\"\"\n Parses all station data from the master list.\n Assumes all station json already downloaded.\n \"\"\"\n master_json = get_master()\n data = []\n\n for station_code in station_generator(master_json):\n if station_code not in [\"MWPS\", \"SLDCBLK\"]:\n station_json = load_station(station_code)\n valid_station = Station(**station_json)\n data.append(flatten_station(valid_station))\n\n return pd.concat(data).reset_index(drop=True)\n\n\ndef flatten_station(valid_station: Station):\n \"\"\"\n Simple function to convert a validated station to pandas dataframe\n (probably could be done neater / cleaner with pd.normalize_json)\n \"\"\"\n d = []\n station_dict = valid_station.dict()\n for du in valid_station.facilities:\n data = du.dict()\n data[\"lat\"] = station_dict[\"location\"][\"lat\"]\n data[\"lon\"] = station_dict[\"location\"][\"lng\"]\n data[\"station_name\"] = station_dict[\"name\"]\n data[\"station_code\"] = station_dict[\"code\"]\n d.append(data)\n\n return pd.DataFrame(d)" + }, + { + "objectID": "opennem_facilities.html#very-basic-module-for-downloading-and-parsing-opennem-facility-data", + "href": "opennem_facilities.html#very-basic-module-for-downloading-and-parsing-opennem-facility-data", + "title": "openNEM facility data", + "section": "", + "text": "This is a simple set of functions for downloading and parsing station and duid meta data from openNEM.\nEssentially works as follows:\n\ngets the master list of stations from openNEM\niteratively downloads and saves the json data for each of the stations within this list (about 400)\nparses the downloaded data into a flat dataframe\n\nThe json data is stored locally, to prevent having to re-download the the every station each time you might want to adapt the parser and/or change the data you want to record.\nThe json is validated with pydantic (to deal with missing fields, and other irreularities in the openNEM json). There is probably a smarter way to flatten the validated data to pandas than what I have now, but it does the job.\nNote there are two stations (commented out in the code) that are missing or have another issue.\n\n\nWritten using Python 3.11. Uses pandas, requests, simplejson and pydantic (for json data validation).\n\n\n\nBefore using the module, there is global variable (LOCALDIR) that needs to be set to specifify where the station json data is stored.\nTo download all the station json:\nimport opennem_facilities\nopennem_facilities.download_all_stations()\nTop parse the station data:\nimport opennem_facilities\ndf = opennem_facilities.parse_station_data()\nThis should return a dataframe as follows (where the code here is DUID)\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nnetwork_region\ncode\nfueltech\ncapacity_registered\nlat\nlon\nstation_name\nstation_code\n\n\n\n\n0\nNSW1\nAPPIN\ngas_wcmg\n55\n-34.2109\n150.793\nAppin\nAPPIN\n\n\n1\nNSW1\nAVLSF1\nsolar_utility\n245\n-34.9191\n146.61\nAvonlie\nAVLSF\n\n\n2\nNSW1\nAWABAREF\nbioenergy_biogas\n1\n-33.0233\n151.551\nAwaba\nAWABAREF\n\n\n3\nNSW1\nBANGOWF2\nwind\n84.8\n-34.7672\n148.921\nBango\nBANGOWF\n\n\n…\n…\n…\n…\n…\n…\n…\n…\n…\n\n\n\n\n\n\nTo parse additional details / metadata - you would have to adapt the Station pydantic model (i.e. add the fields you want to parse), and also adapt the function to flatten the data to pandas as appropriate.\n\n\n\nThe code csan be downloaded from here: ’opennem_faciulities.py, and is shown below as well:\n# Basic python script to download and restructure DUID and station data\n# from the openNEM facilities dataset\n#\n# Copyright (C) 2023 Dylan McConnell\n#\n# This program is free software: you can redistribute it and/or modify\n# it under the terms of the GNU General Public License as published by\n# the Free Software Foundation, either version 3 of the License, or\n# (at your option) any later version.\n#\n# This program is distributed in the hope that it will be useful,\n# but WITHOUT ANY WARRANTY; without even the implied warranty of\n# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n# GNU General Public License for more details.\n#\n# You should have received a copy of the GNU General Public License\n# along with this program. If not, see <https://www.gnu.org/licenses/>.\n\nimport os\nfrom typing import List, Optional\n\nimport pandas as pd\nimport requests\nimport simplejson\nfrom pydantic import BaseModel\n\nGEOJSON = \"https://data.opennem.org.au/v3/geo/au_facilities.json\"\nLOCALDIR = \"/path/to/local/dir/\"\nSTATION_URL = \"https://api.opennem.org.au/station/au/NEM/{}\"\n\n\ndef get_master():\n \"\"\"\n Download master geojson file from openNEM, returning JSON\n \"\"\"\n response = requests.get(GEOJSON)\n return simplejson.loads(response.content)\n\n\ndef get_station(station_code: str = \"LIDDELL\"):\n \"\"\"\n Download and store station json from openNEM\n \"\"\"\n response = requests.get(STATION_URL.format(station_code))\n json = simplejson.loads(response.content)\n\n filename = station_filename(json[\"code\"])\n with open(os.path.join(LOCALDIR, filename), \"w\") as f:\n simplejson.dump(json, f, indent=2)\n\n\ndef station_filename(code: str):\n \"\"\"\n Simple function to replace problematic characters in station codes\n and return a filename\n \"\"\"\n clean_code = code.replace(\"/\", \"_\")\n return f\"{clean_code}.json\"\n\n\ndef load_station(station_code: str):\n \"\"\"\n Load station json from local directory\n \"\"\"\n filename = station_filename(station_code)\n with open(os.path.join(LOCALDIR, filename), \"r\") as f:\n return simplejson.load(f)\n\n\ndef station_generator(master_json):\n \"\"\"\n Generator that yields the station code for every station in the NEM\n \"\"\"\n for station in master_json[\"features\"]:\n if station[\"properties\"][\"network\"] == \"NEM\":\n yield station[\"properties\"][\"station_code\"]\n\n\ndef download_all_stations():\n \"\"\"\n Downloads all the station json data from the master list.\n \"\"\"\n master_json = get_master()\n for station_code in station_generator(master_json):\n if station_code != \"SLDCBLK\":\n try:\n load_station(station_code)\n except FileNotFoundError:\n print(\"downloading \", station_code)\n get_station(station_code)\n\n\n\"\"\"\nSome pydantic models for validating openNEM data\n\"\"\"\n\n\nclass DispatchUnit(BaseModel):\n network_region: str\n code: str\n fueltech: str\n capacity_registered: Optional[float] = None\n\n\nclass Location(BaseModel):\n lat: Optional[float] = None\n lng: Optional[float] = None\n\n\nclass Station(BaseModel):\n name: str\n code: str\n location: Location\n facilities: List[DispatchUnit]\n\n\ndef parse_station_data():\n \"\"\"\n Parses all station data from the master list.\n Assumes all station json already downloaded.\n \"\"\"\n master_json = get_master()\n data = []\n\n for station_code in station_generator(master_json):\n if station_code not in [\"MWPS\", \"SLDCBLK\"]:\n station_json = load_station(station_code)\n valid_station = Station(**station_json)\n data.append(flatten_station(valid_station))\n\n return pd.concat(data).reset_index(drop=True)\n\n\ndef flatten_station(valid_station: Station):\n \"\"\"\n Simple function to convert a validated station to pandas dataframe\n (probably could be done neater / cleaner with pd.normalize_json)\n \"\"\"\n d = []\n station_dict = valid_station.dict()\n for du in valid_station.facilities:\n data = du.dict()\n data[\"lat\"] = station_dict[\"location\"][\"lat\"]\n data[\"lon\"] = station_dict[\"location\"][\"lng\"]\n data[\"station_name\"] = station_dict[\"name\"]\n data[\"station_code\"] = station_dict[\"code\"]\n d.append(data)\n\n return pd.DataFrame(d)" + }, { "objectID": "index.html", "href": "index.html", diff --git a/sitemap.xml b/sitemap.xml index b5e29a4..3530cb4 100644 --- a/sitemap.xml +++ b/sitemap.xml @@ -2,14 +2,18 @@ https://github.com/UNSW-CEEM/CEEM-Gists/aemo_data.html - 2023-07-11T04:26:17.617Z + 2023-07-17T07:11:38.678Z + + + https://github.com/UNSW-CEEM/CEEM-Gists/opennem_facilities.html + 2023-07-17T07:11:36.562Z https://github.com/UNSW-CEEM/CEEM-Gists/index.html - 2023-07-11T04:26:16.041Z + 2023-07-17T07:11:34.306Z https://github.com/UNSW-CEEM/CEEM-Gists/contributing.html - 2023-07-11T04:26:16.965Z + 2023-07-17T07:11:37.786Z diff --git a/snippets/aemo_data/opennem_facilities.py b/snippets/aemo_data/opennem_facilities.py new file mode 100644 index 0000000..5b5c1c8 --- /dev/null +++ b/snippets/aemo_data/opennem_facilities.py @@ -0,0 +1,149 @@ +# Basic python script to download and restructure DUID and station data +# from the openNEM facilities dataset +# +# Copyright (C) 2023 Dylan McConnell +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import os +from typing import List, Optional + +import pandas as pd +import requests +import simplejson +from pydantic import BaseModel + +GEOJSON = "https://data.opennem.org.au/v3/geo/au_facilities.json" +LOCALDIR = "/path/to/local/dir/" +STATION_URL = "https://api.opennem.org.au/station/au/NEM/{}" + + +def get_master(): + """ + Download master geojson file from openNEM, returning JSON + """ + response = requests.get(GEOJSON) + return simplejson.loads(response.content) + + +def get_station(station_code: str = "LIDDELL"): + """ + Download and store station json from openNEM + """ + response = requests.get(STATION_URL.format(station_code)) + json = simplejson.loads(response.content) + + filename = station_filename(json["code"]) + with open(os.path.join(LOCALDIR, filename), "w") as f: + simplejson.dump(json, f, indent=2) + + +def station_filename(code: str): + """ + Simple function to replace problematic characters in station codes + and return a filename + """ + clean_code = code.replace("/", "_") + return f"{clean_code}.json" + + +def load_station(station_code: str): + """ + Load station json from local directory + """ + filename = station_filename(station_code) + with open(os.path.join(LOCALDIR, filename), "r") as f: + return simplejson.load(f) + + +def station_generator(master_json): + """ + Generator that yields the station code for every station in the NEM + """ + for station in master_json["features"]: + if station["properties"]["network"] == "NEM": + yield station["properties"]["station_code"] + + +def download_all_stations(): + """ + Downloads all the station json data from the master list. + """ + master_json = get_master() + for station_code in station_generator(master_json): + if station_code != "SLDCBLK": + try: + load_station(station_code) + except FileNotFoundError: + print("downloading ", station_code) + get_station(station_code) + + +""" +Some pydantic models for validating openNEM data +""" + + +class DispatchUnit(BaseModel): + network_region: str + code: str + fueltech: str + capacity_registered: Optional[float] = None + + +class Location(BaseModel): + lat: Optional[float] = None + lng: Optional[float] = None + + +class Station(BaseModel): + name: str + code: str + location: Location + facilities: List[DispatchUnit] + + +def parse_station_data(): + """ + Parses all station data from the master list. + Assumes all station json already downloaded. + """ + master_json = get_master() + data = [] + + for station_code in station_generator(master_json): + if station_code not in ["MWPS", "SLDCBLK"]: + station_json = load_station(station_code) + valid_station = Station(**station_json) + data.append(flatten_station(valid_station)) + + return pd.concat(data).reset_index(drop=True) + + +def flatten_station(valid_station: Station): + """ + Simple function to convert a validated station to pandas dataframe + (probably could be done neater / cleaner with pd.normalize_json) + """ + d = [] + station_dict = valid_station.dict() + for du in valid_station.facilities: + data = du.dict() + data["lat"] = station_dict["location"]["lat"] + data["lon"] = station_dict["location"]["lng"] + data["station_name"] = station_dict["name"] + data["station_code"] = station_dict["code"] + d.append(data) + + return pd.DataFrame(d)