Skip to content

Commit ccd893d

Browse files
authored
fix: fix RxNorm product to ingredient mappings (#72)
* chore: code format export sql files * fix: RxNorm mappings via RxNav paths * chore: proper capitalization in flake.nix * feat: add cli, changelog * chore: move HTML path table * chore: delete fix-ingredients * chore: bump package version
1 parent 444924f commit ccd893d

18 files changed

+2211
-425
lines changed

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Changelog
2+
3+
## [3.1.0] - 2025-05-08
4+
5+
### Added
6+
7+
- Correct RxNorm paths between products and ingredients (per RxNav)
8+
- Command line interface for generating the ZIP archive
9+
10+
### Fixed
11+
12+
- Erroneous product-to-ingredient mapping that led to products having thousands of ingredients

README.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,11 +197,10 @@ Simple answer?
197197

198198
### Releasing
199199

200-
Here's how the v3.0.0 archive was created.
200+
Here's how the v3.1.0 archive was created.
201201

202202
```bash
203-
cd database
204-
zip -r ../onsides-v3.0.0.zip annotations csv database_scripts schema
203+
build-zip --version v3.1.0
205204
```
206205

207206
---

flake.nix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
test_tabula() {
4141
python -c "import tabula" >/dev/null 2>&1
4242
if [ $? -eq 0 ]; then
43-
echo "Onsides development environment activated!"
43+
echo "OnSIDES development environment activated!"
4444
else
4545
echo "There was an error installing tabula. Please check your installation."
4646
fi

pyproject.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "onsides"
3-
version = "3.0.0"
3+
version = "3.1.0"
44
description = "Database of drug adverse events from product labels"
55
readme = "README.md"
66
authors = [
@@ -12,6 +12,7 @@ requires-python = ">=3.12"
1212
dependencies = [
1313
"beautifulsoup4>=4.13.3",
1414
"connectorx>=0.4.2",
15+
"duckdb>=1.2.2",
1516
"fastexcel>=0.13.0",
1617
"httpx>=0.28.1",
1718
"lxml>=5.3.1",
@@ -41,3 +42,6 @@ build-backend = "hatchling.build"
4142
dev = [
4243
"pytest>=8.3.5",
4344
]
45+
46+
[project.scripts]
47+
build-zip = "onsides.cli:main"

snakemake/onsides/export/Snakefile

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import polars as pl
55
from rich.progress import track
66

77
from onsides import rxnorm
8+
from onsides.rxnorm_ingredients import generate_rxnorm_map
89

910
tables = [
1011
"product_adverse_effect",
@@ -178,16 +179,26 @@ rule combine_results_to_sqlite:
178179
input.us_sql,
179180
input.uk_sql,
180181
input.eu_sql,
182+
]
183+
for file in duckdb_scripts:
184+
print(f"Running: 'duckdb duck.db < {file}'")
185+
shell("duckdb duck.db < {file}")
186+
187+
generate_rxnorm_map("snakemake/onsides/export/rxnav-path-table.html")
188+
189+
vocab_scripts = [
181190
input.vocab_sql,
182191
input.filter_sql,
183192
]
184-
for file in duckdb_scripts:
193+
for file in vocab_scripts:
194+
print(f"Running: 'duckdb duck.db < {file}'")
185195
shell("duckdb duck.db < {file}")
186196

187197
sqlite_scripts = [
188198
input.threshold_sql,
189199
]
190200
for file in sqlite_scripts:
201+
print(f"Running: 'sqlite3 database/onsides.db < {file}'")
191202
shell("sqlite3 database/onsides.db < {file}")
192203

193204

snakemake/onsides/export/eu_mapping.sql

Lines changed: 62 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -1,43 +1,43 @@
1-
create table eu_preds as
2-
select
3-
text_id as effect_meddra_id,
4-
split_part(text_id, '.', 1) as source_product_id,
1+
CREATE TABLE eu_preds AS
2+
SELECT
3+
text_id AS effect_meddra_id,
4+
split_part(text_id, '.', 1) AS source_product_id,
55
term_id,
66
pred0,
77
pred1
8-
from
8+
FROM
99
'_onsides/combined/label_english_preds.parquet'
10-
where
10+
WHERE
1111
ends_with(text_id, 'EU');
1212

13-
create table eu_meta as
14-
select
15-
name as source_product_name,
16-
code as source_product_id,
17-
page_url as source_label_url
18-
from
13+
CREATE TABLE eu_meta AS
14+
SELECT
15+
name AS source_product_name,
16+
code AS source_product_id,
17+
page_url AS source_label_url
18+
FROM
1919
'_onsides/eu/label_text.parquet';
2020

21-
create table eu_rxnorm as
22-
select
23-
code as source_product_id,
24-
rxcui as rxnorm_product_id
25-
from
21+
CREATE TABLE eu_rxnorm AS
22+
SELECT
23+
code AS source_product_id,
24+
rxcui AS rxnorm_product_id
25+
FROM
2626
'_onsides/eu/labels_to_rxnorm.parquet';
2727

28-
create table eu_final as
29-
select
28+
CREATE TABLE eu_final AS
29+
SELECT
3030
source_product_name,
3131
source_product_id,
3232
source_label_url,
3333
term_id,
3434
rxnorm_product_id,
3535
pred0,
3636
pred1
37-
from
37+
FROM
3838
eu_preds
39-
inner join eu_meta using (source_product_id)
40-
inner join eu_rxnorm using (source_product_id);
39+
INNER JOIN eu_meta USING (source_product_id)
40+
INNER JOIN eu_rxnorm USING (source_product_id);
4141

4242
--------------------------------------------------------------------------------
4343
--------------------------------------------------------------------------------
@@ -49,57 +49,53 @@ LOAD sqlite;
4949
ATTACH IF NOT EXISTS 'database/onsides.db' AS db (TYPE sqlite);
5050

5151
-- Product label
52-
insert into
52+
INSERT INTO
5353
db.product_label (
5454
source,
5555
source_product_name,
5656
source_product_id,
5757
source_label_url
58+
) WITH eu_inner AS (
59+
SELECT
60+
DISTINCT source_product_name,
61+
source_product_id,
62+
source_label_url
63+
FROM
64+
eu_final
5865
)
59-
with
60-
eu_inner as (
61-
select distinct
62-
source_product_name,
63-
source_product_id,
64-
source_label_url
65-
from
66-
eu_final
67-
)
68-
select
69-
'EU' as source,
66+
SELECT
67+
'EU' AS source,
7068
source_product_name,
7169
source_product_id,
7270
source_label_url
73-
from
71+
FROM
7472
eu_inner;
7573

7674
-- Product to RxNorm
7775
INSERT INTO
78-
db.product_to_rxnorm (label_id, rxnorm_product_id)
79-
WITH
80-
new_labels AS (
81-
SELECT
82-
label_id,
83-
source_product_id
84-
FROM
85-
db.product_label
86-
WHERE
87-
source = 'EU'
88-
),
89-
joined_labels as (
90-
SELECT DISTINCT
91-
label_id,
92-
rxnorm_product_id
93-
FROM
94-
new_labels
95-
INNER JOIN eu_final USING (source_product_id)
96-
)
97-
select
76+
db.product_to_rxnorm (label_id, rxnorm_product_id) WITH new_labels AS (
77+
SELECT
78+
label_id,
79+
source_product_id
80+
FROM
81+
db.product_label
82+
WHERE
83+
source = 'EU'
84+
),
85+
joined_labels AS (
86+
SELECT
87+
DISTINCT label_id,
88+
rxnorm_product_id
89+
FROM
90+
new_labels
91+
INNER JOIN eu_final USING (source_product_id)
92+
)
93+
SELECT
9894
label_id,
9995
rxnorm_product_id
100-
from
96+
FROM
10197
joined_labels
102-
where
98+
WHERE
10399
rxnorm_product_id != '';
104100

105101
-- Adverse effect
@@ -111,20 +107,18 @@ INSERT INTO
111107
match_method,
112108
pred0,
113109
pred1
110+
) WITH new_labels AS (
111+
SELECT
112+
label_id,
113+
source_product_id
114+
FROM
115+
db.product_label
116+
WHERE
117+
source = 'EU'
114118
)
115-
WITH
116-
new_labels AS (
117-
SELECT
118-
label_id,
119-
source_product_id
120-
FROM
121-
db.product_label
122-
WHERE
123-
source = 'EU'
124-
)
125119
SELECT
126120
label_id AS product_label_id,
127-
'NA' as label_section,
121+
'NA' AS label_section,
128122
term_id AS effect_meddra_id,
129123
'PMB' AS match_method,
130124
pred0,

0 commit comments

Comments
 (0)