diff --git a/apps/owidbot/cli.py b/apps/owidbot/cli.py index c21a950c239..ee51a7ff418 100644 --- a/apps/owidbot/cli.py +++ b/apps/owidbot/cli.py @@ -138,8 +138,8 @@ def create_comment_body(branch: str, services: Dict[str, str], start_time: float body = f""" Quick links (staging server): -[Site](http://{container_name}/) | [Admin](http://{container_name}/admin/login) | [Wizard](http://{container_name}/etl/wizard/) -|--------------------------------|---|---| +[Site](http://{container_name}/) | [Admin](http://{container_name}/admin/login) | [Wizard](http://{container_name}/etl/wizard/) | [Docs](http://{container_name}/etl/docs/) +|--------------------------------|---|---|---| **Login**: `ssh owid@{container_name}` diff --git a/docs/api/index.md b/docs/api/index.md index e42c9aaa9bc..fdb9841cfa3 100644 --- a/docs/api/index.md +++ b/docs/api/index.md @@ -1,12 +1,117 @@ -# Public data API +# Our World In Data - Data APIs -Our mission is to make research and data on the world's biggest problems accessible and understandable to the public. As part of this work, we provide an experimental API to the datasets. +Our World in Data offers a curated collection of charts on our website, with data and metadata accessible via our Public Chart API. The API provides data in CSV and JSON formats over HTTP, enabling seamless integration with any programming language. It is specifically designed to support the creation of interactive charts. -When using the API, you have access to the public catalog of data processed by our data team. The catalog indexes _tables_ of data, rather than datasets or individual indicators. To learn more, read about our [data model](../architecture/design/common-format.md). +We also maintain a larger data catalog within our ETL system, where we fetch, process, and prepare data used for our charts. This catalog contains significantly more data, though its level of curation varies across different sections. It also has an API, albeit one that is currently less accessible; at this time, we only offer a Python client for interacting with it. Unlike the Public Chart API, which exclusively provides time series data by time (typically year) and entity (typically country), our ETL catalog includes larger datasets with additional dimensions, such as age group and gender breakdowns. -At the moment, we only support [Python](python.ipynb). +This documentation briefly describes both of these APIs. +# Chart data API -!!! warning "Our API is in beta" +Our chart API is structured around charts on our website, i.e. at https://ourworldindata.org/grapher/* . You can find charts by searching our data catalog at https://ourworldindata.org/data. - We currently only provide a python API. Our hope is to extend this to other languages in the future. Please [report any issue](https://github.com/owid/etl) that you may find. +Once you've found the chart with the data you need, simply append ".csv" to the URL to download the data or ".metadata.json" to retrieve the metadata. You can also add ".zip" to download a ZIP file that includes both files, along with a README in markdown format describing the data. + +An example for our life expectancy chart: +- https://ourworldindata.org/grapher/life-expectancy - the page on our website where you can see the chart +- https://ourworldindata.org/grapher/life-expectancy.csv - the data for this chart (see below for options) +- https://ourworldindata.org/grapher/life-expectancy.metadata.json - the metadata for this chart, like the chart title, the units, how to cite the data sources +- https://ourworldindata.org/grapher/life-expectancy.zip - the above two plus a readme as zip file archive + +## Options + +The following options can be specified for all of these endpoints: + +**csvType** +- `full` (default): Get the full data, i.e. all time points and all entities +- `filtered`: Get only the data needed to display the visible chart. For a map chart this will be only data for a single year but all countries, for a line chart it will be the selected time range and visible entities, ... + +Note that if you use `filtered`, the other query parameters in the URL will change what is downloaded. E.g. if you navigate to our life-expectancy chart and then visually select the country "Italy" and change the time range to 1950-2000 you will see that the URL in the browser is modified to include `?time=1980..2000&country=~ITA`. When you make a request to any of the endpoints above you can include any of these modifications to get exactly that data: + +``` +https://ourworldindata.org/grapher/life-expectancy.csv?csvType=filtered&time=1980..2000&country=~ITA +``` + +**useColumnShortNames** +- `false` (default): Column names are long, use capitalization and whitespace - e.g. `Period life expectancy at birth - Sex: all - Age: 0` +- `true`: Column names are short and don't use whitespace - e.g. `life_expectancy_0__sex_all__age_0` + +``` +https://ourworldindata.org/grapher/life-expectancy.csv?useShortNames=true +``` + +## Example notebooks + +Check out this list of public example notebooks that demonstrate the use of our chart API: +- https://colab.research.google.com/drive/1HDcqCy6ZZ05IznXzaaP9Blvvp3qoPnP8?usp=sharing +- https://observablehq.com/@owid/recreating-the-life-expectancy-chart + +## CSV structure + +Each row in the CSV file corresponds to an observation for an entity (most often a country or region) at a specific time point (generally a year). For example, the first three rows of data from our life expectancy chart appear as follows: + +> Entity,Code,Year,Period life expectancy at birth - Sex: all - Age: 0 +> Afghanistan,AFG,1950,27.7275 +> Afghanistan,AFG,1951,27.9634 + +The first two columns in the CSV file are "Entity" and "Code." "Entity" is the name of the entity, typically a country, such as "United States." "Code" is the OWID internal entity code used for countries or regions. For standard countries, this matches the [ISO alpha-3 code](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-3) (e.g., "USA"); for non-standard or historical countries, we use custom codes. Country and region codes are standardized across all Our World in Data datasets, allowing you to join multiple datasets using either of these columns. + +The third column is either "Year" or "Day". If the data is annual, this is "Year" and contains only the year as an integer. If the column is "Day", the column contains a date string in the form "YYYY-MM-DD". + +The final columns are the data columns, which are the time series that powers the chart. For simple line charts there is only a single data column, whereas more complex charts can have more columns. + +## Metadata structure + +The .metadata.json file contains metadata about the data package. The "charts" key contains information to recreate the chart, like the title, subtitle etc.. The "columns" key contains information about each of the columns in the csv, like the unit, timespan covered, citation for the data etc.. Here is a (slightly shortened) example of the metadata for the life-expectancy chart: + +```json +{ + "chart": { + "title": "Life expectancy", + "subtitle": "The [period life expectancy](#dod:period-life-expectancy) at birth, in a given year.", + "citation": "UN WPP (2022); HMD (2023); Zijdeman et al. (2015); Riley (2005)", + "originalChartUrl": "https://ourworldindata.org/grapher/life-expectancy", + "selection": ["World", "Americas", "Europe", "Africa", "Asia", "Oceania"] + }, + "columns": { + "Period life expectancy at birth - Sex: all - Age: 0": { + "titleShort": "Life expectancy at birth", + "titleLong": "Life expectancy at birth - Various sources – period tables", + "descriptionShort": "The period life expectancy at birth, in a given year.", + "descriptionKey": [ + "Period life expectancy is a metric that summarizes death rates across all age groups in one particular year.", + "..." + ], + "shortUnit": "years", + "unit": "years", + "timespan": "1543-2021", + "type": "Numeric", + "owidVariableId": 815383, + "shortName": "life_expectancy_0__sex_all__age_0", + "lastUpdated": "2023-10-10", + "nextUpdate": "2024-11-30", + "citationShort": "UN WPP (2022); HMD (2023); Zijdeman et al. (2015); Riley (2005) – with minor processing by Our World in Data", + "citationLong": "UN WPP (2022); HMD (2023); Zijdeman et al. (2015); Riley (2005) – ...", + "fullMetadata": "https://api.ourworldindata.org/v1/indicators/815383.metadata.json" + } + }, + "dateDownloaded": "2024-10-30" +} +``` + +# ETL catalog API + +The ETL catalog API makes it possible to access the dataframes our data scientists use to prepare the data for our public charts. + +When using this API, you have access to the public catalog of data processed by our data team. The catalog indexes _tables_ of data, rather than datasets or individual indicators. To learn more, read about our [data model](../architecture/design/common-format.md). + +At the moment, this API only supports [Python](python.ipynb). + + +!!! warning "Our ETL API is in beta" + + We currently only provide a python API for our ETL catalog. Our hope is to extend this to other languages in the future. Please [report any issue](https://github.com/owid/etl) that you may find. + +=== "Python" + + (see [example notebook](python.ipynb)) diff --git a/docs/ignore/generate_dynamic_docs.py b/docs/ignore/generate_dynamic_docs.py index f06dd352d12..5a6a96c0355 100644 --- a/docs/ignore/generate_dynamic_docs.py +++ b/docs/ignore/generate_dynamic_docs.py @@ -2,7 +2,7 @@ import mkdocs_gen_files from etl.docs import render_dataset, render_indicator, render_origin, render_table -from etl.paths import LIB_DIR +from etl.paths import BASE_DIR, LIB_DIR header_metadata = """--- tags: @@ -58,29 +58,23 @@ ############################################################ # owid-catalog ############################################################ -docs_api = """# Public data API +# Load index.md and concatenate with catalog README.md +with open(BASE_DIR / "docs/api/index.md", "r") as f2: + docs_api = f2.readlines() -Our mission is to make research and data on the world's biggest problems accessible and understandable to the public. As part of this work, we provide an experimental API to the datasets. +with open(LIB_DIR / "catalog/README.md", "r") as f2: + docs_catalog = f2.readlines() -When using the API, you have access to the public catalog of data processed by our data team. The catalog indexes _tables_ of data, rather than datasets or individual indicators. To learn more, read about our [data model](../architecture/design/common-format.md). +docs_catalog = " ".join(docs_catalog) +docs_api = "".join(docs_api) -At the moment, we only support Python. +docs = """ +{docs_api} -!!! warning "Our API is in beta" +{docs_catalog} +""".format(docs_catalog=f" {docs_catalog}", docs_api=docs_api) - We currently only provide a python API. Our hope is to extend this to other languages in the future. Please [report any issue](https://github.com/owid/etl) that you may find. - -=== "Python" - - (see [example notebook](python.ipynb)) - -{docs_api_python} - -""" +# Dynamically create the API documentation with mkdocs_gen_files.open("api/index.md", "w") as f: - with open(LIB_DIR / "catalog/README.md", "r") as f2: - docs = f2.readlines() - docs = " ".join(docs) - docs = docs_api.format(docs_api_python=f" {docs}") print(docs, file=f)