Skip to content

Commit

Permalink
Add journal metadata table for recording impact factor.
Browse files Browse the repository at this point in the history
  • Loading branch information
yjcyxky committed Jul 10, 2024
1 parent 5aba7f4 commit efe64fd
Show file tree
Hide file tree
Showing 17 changed files with 22,559 additions and 496 deletions.
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,6 @@ assets/*
.VSCodeCounter
neo4j-data
neo4j-import
data/drugbank/*
data/aact_*/*
data/entity_attr/drugbank/*
data/entity_attr/aact_*/*
frontend
34 changes: 0 additions & 34 deletions data/README.md
Original file line number Diff line number Diff line change
@@ -1,34 +0,0 @@
## Requirements

- Python 3.11+
- json2parquet

```bash
# macOSx
brew install domoritz/homebrew-tap/json2parquet

# Linux
cargo install json2parquet
```

- Required Python packages: `pip install click duckdb`

## Prepare additional data for each entity and relation

### Compound

Get additional data for each compound from [DrugBank](https://www.drugbank.ca/). You might need to request access to the DrugBank data. If you have access, download the DrugBank XML file and save it to the `data` directory. We assume the file is named `drugbank_5.1_2024-01-03.xml`.

```bash
# Convert the DrugBank XML file to TSV
python3 data/drugbank.py tojson --input-file data/drugbank/drugbank_5.1_2024-01-03.xml --output-dir data/drugbank --format tsv && zip data/drugbank/drugbank_5.1_2024-01-03.tsv.zip data/drugbank/drugbank_5.1_2024-01-03.tsv

# Convert the DrugBank XML file to JSON
python3 data/drugbank.py tojson --input-file data/drugbank/drugbank_5.1_2024-01-03.xml --output-dir data/drugbank

# Convert the JSON file to Parquet
python3 data/drugbank.py tojson --input-file data/drugbank/drugbank_5.1_2024-01-03.xml --output-dir data/drugbank --format linejson
json2parquet data/drugbank/drugbank_5.1_2024-01-03.jsonl data/drugbank/drugbank_5.1_2024-01-03.parquet
```

### Gene
34 changes: 34 additions & 0 deletions data/entity_attr/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
## Requirements

- Python 3.11+
- json2parquet

```bash
# macOSx
brew install domoritz/homebrew-tap/json2parquet

# Linux
cargo install json2parquet
```

- Required Python packages: `pip install click duckdb`

## Prepare additional data for each entity and relation

### Compound

Get additional data for each compound from [DrugBank](https://www.drugbank.ca/). You might need to request access to the DrugBank data. If you have access, download the DrugBank XML file and save it to the `data` directory. We assume the file is named `drugbank_5.1_2024-01-03.xml`.

```bash
# Convert the DrugBank XML file to TSV
python3 data/drugbank.py tojson --input-file data/drugbank/drugbank_5.1_2024-01-03.xml --output-dir data/drugbank --format tsv && zip data/drugbank/drugbank_5.1_2024-01-03.tsv.zip data/drugbank/drugbank_5.1_2024-01-03.tsv

# Convert the DrugBank XML file to JSON
python3 data/drugbank.py tojson --input-file data/drugbank/drugbank_5.1_2024-01-03.xml --output-dir data/drugbank

# Convert the JSON file to Parquet
python3 data/drugbank.py tojson --input-file data/drugbank/drugbank_5.1_2024-01-03.xml --output-dir data/drugbank --format linejson
json2parquet data/drugbank/drugbank_5.1_2024-01-03.jsonl data/drugbank/drugbank_5.1_2024-01-03.parquet
```

### Gene
File renamed without changes.
21,801 changes: 21,801 additions & 0 deletions data/entity_attr/impact_factor_2024.tsv

Large diffs are not rendered by default.

File renamed without changes.
2 changes: 2 additions & 0 deletions migrations/20240709_add_journal_metadata_table.down.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
-- Drop the journal metadata table when rolling back the migration.
DROP TABLE IF EXISTS biomedgps_journal_metadata;
18 changes: 18 additions & 0 deletions migrations/20240709_add_journal_metadata_table.up.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
-- biomedgps_journal_metadata table is created to store metadata for journals, such as the journal name, the journal type, etc.

CREATE TABLE IF NOT EXISTS biomedgps_journal_metadata (
journal_name VARCHAR(255) NOT NULL UNIQUE, -- The name of the journal
abbr_name VARCHAR(255) NOT NULL UNIQUE, -- The abbreviation name of the journal
issn VARCHAR(32) NOT NULL UNIQUE, -- The print ISSN of the journal
eissn VARCHAR(32) NOT NULL UNIQUE, -- The electronic ISSN of the journal
impact_factor DECIMAL(6, 3), -- The impact factor of the journal
impact_factor_5_year DECIMAL(6, 3), -- The 5-year impact factor of the journal
category VARCHAR(32), -- The category of the journal, such as Medicine, Biology, etc.
jcr_quartile VARCHAR(8), -- Journal Citation Reports (JCR) quartile, such as Q1, Q2, etc.
rank INTEGER, -- The rank of the journal in the category
total_num_of_journals INTEGER, -- The total number of journals in the category
CONSTRAINT biomedgps_journal_metadata_journal_name_uniq_key UNIQUE (journal_name),
CONSTRAINT biomedgps_journal_metadata_abbr_name_uniq_key UNIQUE (abbr_name),
CONSTRAINT biomedgps_journal_metadata_issn_uniq_key UNIQUE (issn),
CONSTRAINT biomedgps_journal_metadata_eissn_uniq_key UNIQUE (eissn)
);
4 changes: 2 additions & 2 deletions src/api/route.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ use crate::model::core::{
Entity, Entity2D, EntityMetadata, KnowledgeCuration, RecordResponse, Relation, RelationCount,
RelationMetadata, Statistics, Subgraph,
};
use crate::model::entity_attr::EntityAttr;
use crate::model::entity_attr::{CompoundAttr, EntityAttrRecordResponse};
use crate::model::entity::compound::CompoundAttr;
use crate::model::entity_attr::{EntityAttr, EntityAttrRecordResponse};
use crate::model::graph::Graph;
use crate::model::init_db::get_kg_score_table_name;
use crate::model::kge::DEFAULT_MODEL_NAME;
Expand Down
13 changes: 10 additions & 3 deletions src/bin/biomedgps-cli.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
extern crate log;

use biomedgps::model::entity_attr::CompoundAttr;
use biomedgps::model::entity::compound::CompoundAttr;
use biomedgps::model::init_db::create_kg_score_table;
use biomedgps::model::kge::{init_kge_models, DEFAULT_MODEL_NAME};
use biomedgps::model::{
Expand All @@ -11,7 +11,8 @@ use biomedgps::model::{
util::read_annotation_file,
};
use biomedgps::{
build_index, connect_graph_db, import_data, import_kge, init_logger, run_migrations, change_emb_dimension
build_index, change_emb_dimension, connect_graph_db, import_data, import_kge, init_logger,
run_migrations,
};
use log::*;
use regex::Regex;
Expand Down Expand Up @@ -700,7 +701,13 @@ async fn main() {
if table_name == "biomedgps" && arguments.dimension != 400 {
if arguments.force {
warn!("The dimension of the embedding is not 400, but the table name is biomedgps. We will change the dimension of the embedding as you specified.");
match change_emb_dimension(&database_url, table_name.as_str(), arguments.dimension).await {
match change_emb_dimension(
&database_url,
table_name.as_str(),
arguments.dimension,
)
.await
{
Ok(_) => {
info!("Change the dimension of the embedding successfully.");
}
Expand Down
Loading

0 comments on commit efe64fd

Please sign in to comment.