Skip to content

Commit

Permalink
Merge pull request #1413 from owid/add-uk-egg-statistics
Browse files Browse the repository at this point in the history
Add uk egg statistics
  • Loading branch information
pabloarosado authored Aug 2, 2023
2 parents 0996d96 + 44cfb54 commit 53c8908
Show file tree
Hide file tree
Showing 9 changed files with 236 additions and 2 deletions.
11 changes: 10 additions & 1 deletion dag/animal_welfare.yml
Original file line number Diff line number Diff line change
@@ -1,10 +1,19 @@
steps:
#
# Global hen inventory (Welfare Footprint Project, 2023).
# Global hen inventory (Welfare Footprint Project, 2022).
#
data://meadow/animal_welfare/2023-08-01/global_hen_inventory:
- snapshot://animal_welfare/2023-08-01/global_hen_inventory.csv
data://garden/animal_welfare/2023-08-01/global_hen_inventory:
- data://meadow/animal_welfare/2023-08-01/global_hen_inventory
data://grapher/animal_welfare/2023-08-01/global_hen_inventory:
- data://garden/animal_welfare/2023-08-01/global_hen_inventory
#
# UK egg statistics (Defra, 2023).
#
data://meadow/animal_welfare/2023-08-01/uk_egg_statistics:
- snapshot://animal_welfare/2023-08-01/uk_egg_statistics.ods
data://garden/animal_welfare/2023-08-01/uk_egg_statistics:
- data://meadow/animal_welfare/2023-08-01/uk_egg_statistics
data://grapher/animal_welfare/2023-08-01/uk_egg_statistics:
- data://garden/animal_welfare/2023-08-01/uk_egg_statistics
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
dataset:
title: "UK egg statistics (Defra, 2023)"

tables:
uk_egg_statistics:
variables:
number_of_eggs_from_barns:
title: Number of eggs from hens in barns
unit: eggs
short_unit: ''
number_of_eggs_from_enriched_cages:
title: Number of eggs from hens in (enriched) cages
unit: eggs
short_unit: ''
number_of_eggs_from_non_organic_free_range_farms:
title: Number of eggs from hens in non-organic, free-range farms
unit: eggs
short_unit: ''
number_of_eggs_from_organic_free_range_farms:
title: Number of eggs from hens in organic, free-range farms
unit: eggs
short_unit: ''
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""Load a meadow dataset and create a garden dataset."""

from owid.catalog import Dataset

from etl.helpers import PathFinder, create_dataset

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)

# Columns to select from data, and how to rename them.
COLUMNS = {
"year": "year",
"enriched": "number_of_eggs_from_enriched_cages",
"barn": "number_of_eggs_from_barns",
"free_range": "number_of_eggs_from_non_organic_free_range_farms",
"organic": "number_of_eggs_from_organic_free_range_farms",
}


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load meadow dataset and read its main table.
ds_meadow: Dataset = paths.load_dependency("uk_egg_statistics")
tb = ds_meadow["uk_egg_statistics"].reset_index()

#
# Process data.
#
# Select and rename columns.
tb = tb[list(COLUMNS)].rename(columns=COLUMNS, errors="raise")

# Convert million dozens of eggs to eggs.
for column in tb.drop(columns="year").columns:
tb[column] *= 12e6

# Add a country column.
tb["country"] = "United Kingdom"

# Set an appropriate index and sort conveniently.
tb = tb.set_index(["country", "year"], verify_integrity=True).sort_index().sort_index(axis=1)

#
# Save outputs.
#
# Create a new garden dataset with the same metadata as the meadow dataset.
ds_garden = create_dataset(dest_dir, tables=[tb], default_metadata=ds_meadow.metadata)

# Save changes in the new garden dataset.
ds_garden.save()
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Load a garden dataset and create a grapher dataset."""

from owid.catalog import Dataset

from etl.helpers import PathFinder, create_dataset, grapher_checks

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Load garden dataset and read its main table.
ds_garden: Dataset = paths.load_dependency("uk_egg_statistics")
tb = ds_garden["uk_egg_statistics"]

#
# Save outputs.
#
# Create a new grapher dataset with the same metadata as the garden dataset.
ds_grapher = create_dataset(dest_dir, tables=[tb], default_metadata=ds_garden.metadata)

#
# Checks.
#
grapher_checks(ds_grapher)

# Save changes in the new grapher dataset.
ds_grapher.save()
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""Load a snapshot and create a meadow dataset."""

import owid.catalog.processing as pr

from etl.helpers import PathFinder, create_dataset
from etl.snapshot import Snapshot

# Get paths and naming conventions for current step.
paths = PathFinder(__file__)

# Columns to select from data, and how to rename them.
COLUMNS = {
"Year": "year",
"Enriched": "enriched",
"Barn": "barn",
"Free Range": "free_range",
"Organic": "organic",
}


def run(dest_dir: str) -> None:
#
# Load inputs.
#
# Retrieve snapshot.
snap: Snapshot = paths.load_dependency("uk_egg_statistics.ods")

# Load data from snapshot.
tb = pr.read_excel(snap.path, sheet_name="Packers_Annual", skiprows=2, metadata=snap.to_table_metadata())

#
# Process data.
#
# Select and rename columns.
tb = tb[list(COLUMNS)].rename(columns=COLUMNS, errors="raise")

# Remove spurious rows at the bottom of the file, containing footnotes.
# To achieve that, detect rows where the year column is not a number.
tb = tb[tb["year"].str.match(r"\d{4}", na=True)].reset_index(drop=True)

# Set an appropriate index and sort conveniently.
tb = tb.set_index(["year"], verify_integrity=True).sort_index().sort_index(axis=1)

#
# Save outputs.
#
# Create a new meadow dataset with the same metadata as the snapshot.
ds_meadow = create_dataset(dest_dir, tables=[tb], default_metadata=snap.metadata)

# Save changes in the new garden dataset.
ds_meadow.save()
15 changes: 14 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ fsspec = "2022.11.0"
openai = "^0.27.7"
pdfplumber = "^0.9.0"
wbgapi = "^1.0.12"
odfpy = "^1.4.1"

[tool.poetry.group.dev.dependencies]
pytest = ">=7.1.2"
Expand Down
24 changes: 24 additions & 0 deletions snapshots/animal_welfare/2023-08-01/uk_egg_statistics.ods.dvc
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
meta:
name: UK egg statistics
publication_year: 2023

publication_date: '2023-07-27'
source_name: Department for Environment, Food & Rural Affairs of the United Kingdom
source_published_by: Quarterly UK statistics on Egg Packing Station Throughput and
Prices, by the Department for Environment, Food & Rural Affairs of the United
Kingdom
url: https://www.gov.uk/government/statistics/egg-statistics
source_data_url:
https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/1173898/egg-packers-27july23.ods
license_url:
https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/1173898/egg-packers-27july23.ods
license_name: Public domain
date_accessed: 2023-08-01
is_public: true
description: |
DEFRA runs a quarterly survey of registered UK egg packing stations. It is a voluntary sample survey of 27 respondents that collects information on throughput by production type and prices of graded eggs and sales of ungraded eggs. The response rate is typically 100 per cent and the survey accounts for 75 per cent of eggs packed in the UK. The survey figures are raised up to give UK estimates using information on the number of commercial laying hens, average egg yields, average mortality rates, the proportion of UK eggs that go through packing stations. Throughput by egg type for packing stations not surveyed is calculated using data provided by packing stations responding to the survey. The raised figures are published in this statistics notice and the associated datasets. The figures in this notice therefore represent all Class A eggs passed over a grader in the UK, including seconds.
wdir: ../../../data/snapshots/animal_welfare/2023-08-01
outs:
- md5: fa90508145ab75c9fddb46c8d6a560fe
size: 33319
path: uk_egg_statistics.ods
32 changes: 32 additions & 0 deletions snapshots/animal_welfare/2023-08-01/uk_egg_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Script to create a snapshot of dataset 'UK egg statistics'."""

from pathlib import Path

import click

from etl.snapshot import Snapshot

# Version for current snapshot dataset.
SNAPSHOT_VERSION = Path(__file__).parent.name


@click.command()
@click.option(
"--upload/--skip-upload",
default=True,
type=bool,
help="Upload dataset to Snapshot",
)
def main(upload: bool) -> None:
# Create a new snapshot.
snap = Snapshot(f"animal_welfare/{SNAPSHOT_VERSION}/uk_egg_statistics.ods")

# Download data from source.
snap.download_from_source()

# Add file to DVC and upload to S3.
snap.dvc_add(upload=upload)


if __name__ == "__main__":
main()

0 comments on commit 53c8908

Please sign in to comment.