Skip to content

Commit

Permalink
Merge pull request #51 from USEPA/bib
Browse files Browse the repository at this point in the history
adds parsing of bibtex files to olca-schema `Source` objects
  • Loading branch information
bl-young authored Apr 19, 2024
2 parents fef05a7 + 532b61f commit 01d1682
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 5 deletions.
9 changes: 6 additions & 3 deletions .github/workflows/python-app.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,13 @@ jobs:
python -m pip install --upgrade pip setuptools wheel
pip install pytest pytest-cov flake8
# install package & dependencies
- name: Install package and dependencies
run: |
pip install .
if: ${{ matrix.py-version == '3.7' || matrix.py-version == '3.8' }}
run: pip install .

- name: Install package and dependencies
if: ${{ !(matrix.py-version == '3.7' || matrix.py-version == '3.8') }}
run: pip install .[bib]

# linting & pytest
- name: Lint with flake8
Expand Down
99 changes: 99 additions & 0 deletions esupy/bibtex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
# bibtex.py (esupy)
# !/usr/bin/env python3
# coding=utf-8

"""
Module to support generating sources within the olca_schema.
"""

from pathlib import Path
import logging as log

from esupy.util import make_uuid

def generate_sources(bib_path: Path,
bibids: dict
) -> list:
"""
Generates a list of olca_schema.Source based on requested bib_ids.
:param bib_path: Path object to a .bib file containing source information
:param bibids: dictionary in the format of {Name: bib_id}, where name is the
displayed in the openLCA dashboard.
:return: list of olca_schema.Source
"""
try:
import bibtexparser
from bibtexparser.bparser import BibTexParser
except ImportError:
log.warning("Writing sources requires bibtexparser package")
return []
try:
import olca_schema as o
except ImportError:
log.warning("Writing sources requires olca_schema package")
return []

def customizations(record):
"""Use some functions delivered by the library
:param record: a record
:returns: -- customized record
"""
#record = bibtexparser.customization.author(record)
record = bibtexparser.customization.add_plaintext_fields(record)
record = bibtexparser.customization.doi(record)

return record

parser = BibTexParser(common_strings=True)
parser.ignore_nonstandard_types = False
parser.homogenize_fields = True
parser.customization = customizations

def read_bib_file(path: str):
with open(path) as bibtex_file:
bib_database = parser.parse_file(bibtex_file)

return bib_database.entries_dict


def parse_for_olca(bibids, d):

key_dict = {'description': ['plain_author',
'plain_publisher',
'plain_title',
'plain_journal',
'year'],
'textReference': '',
'year': 'plain_year',
'url': 'url',
}
s = []
for bibid, name in bibids.items():
try:
record = d[bibid]
except KeyError:
print(f'{bibid} not found')
continue
source = {}
source['name'] = bibids[bibid]
for key, value in key_dict.items():
try:
if isinstance(value, list):
source[key] = ', '.join([record[v] for v in value if v in record])
else:
source[key] = record[value]
except KeyError:
source[key] = ''
source['@id'] = make_uuid(source['description'])
s.append(o.Source.from_dict(source))
return s

d = read_bib_file(bib_path)
source_list = parse_for_olca(bibids, d)
return source_list

if __name__ == "__main__":
source_list = generate_sources(
bib_path = Path(__file__).parents[1] / 'tests' / 'test.bib',
bibids = {'bare_traci_2011': 'TRACI 2.1'})
2 changes: 1 addition & 1 deletion esupy/processed_data_mgmt.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ def parse_data_commons_index(df):
df['ext'] = df['file_name'].str.rsplit('.', n=1, expand=True)[1]
df['file'] = df['file_name'].str.rsplit('.', n=1, expand=True)[0]
df['git_hash'] = df['file'].str.rsplit('_', n=1, expand=True)[1]
df['git_hash'].fillna('', inplace=True)
df['git_hash'] = df['git_hash'].fillna('')
df.loc[df['git_hash'].map(len) != 7, 'git_hash'] = ''
try:
df['version'] = (df['file']
Expand Down
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
'boto3>=1.23.0',
],
extras_require={"urban_rural": ['geopandas>=0.13.2',
'shapely>=2.0.1']},
'shapely>=2.0.1'],
"bib": ['olca_schema>=0.0.11',
'bibtexparser>=1.2']},
url='http://github.com/usepa/esupy',
license='CC0',
author='Wesley Ingwersen',
Expand Down
10 changes: 10 additions & 0 deletions tests/test.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
@article{bare_traci_2011,
title = {{TRACI} 2.0: the tool for the reduction and assessment of chemical and other environmental impacts 2.0},
volume = {13},
pages = {687--696},
number = {5},
journal = {Clean Technologies and Environmental Policy},
author = {Bare, Jane},
doi = {10.1007/s10098-010-0338-9},
year = {2011}
}
13 changes: 13 additions & 0 deletions tests/test_esupy.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
"""Test functions"""

import pytest
import sys
from pathlib import Path

import esupy.processed_data_mgmt as es_dt
import esupy.bibtex as bibtex


def test_data_commons_access():
Expand All @@ -21,3 +26,11 @@ def test_data_commons_access():
df2 = es_dt.load_preprocessed_output(meta, path)

assert(df1 is not None and df2 is None)


@pytest.mark.skipif(sys.version_info < (3, 9), reason="bibliographies require python3.9 or higher")
def test_source_generation():
source_list = bibtex.generate_sources(
bib_path = Path(__file__).parents[1] / 'tests' / 'test.bib',
bibids = {'bare_traci_2011': 'TRACI 2.1'})
assert(len(source_list) == 1)

0 comments on commit 01d1682

Please sign in to comment.