Skip to content

Commit

Permalink
Update static database action according to migration to Rust and LZ4
Browse files Browse the repository at this point in the history
  • Loading branch information
pverscha committed Mar 14, 2024
1 parent 9d3c735 commit a4177ba
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 11 deletions.
25 changes: 14 additions & 11 deletions .github/workflows/static_database.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,22 @@ jobs:
generate_static_database:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/setup-java@v1
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
id: cache
with:
java-version: '11'
java-package: jdk
architecture: x64
shared-key: ${{ env.CACHE_KEY }}
- run: cd scripts/helper_scripts/unipept-database-rs && cargo fetch
if: ${{ !steps.cache.outputs.cache-hit }}
- run: ./scripts/build_binaries.sh
- name: Get current date
id: date
run: echo "::set-output name=date::$(date +'%Y-%m-%d')"
- name: Install required utilities
run: |
sudo apt-get update
sudo apt-get -y install git maven curl unzip gawk sqlite3 libsqlite3-dev pv nodejs wget
sudo apt-get -y install git curl unzip gawk sqlite3 libsqlite3-dev pv nodejs wget uuid-runtime pigz lz4 parallel
- name: Download Taxdmp file
shell: bash
run: wget https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip
Expand All @@ -36,11 +39,11 @@ jobs:
sqlite3 output.db < workflows/static_database/structure.sql
# Read all generated data into this database
zcat output/ec_numbers.tsv.gz | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin ec_numbers'
zcat output/go_terms.tsv.gz | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin go_terms'
zcat output/interpro_entries.tsv.gz | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin interpro_entries'
zcat output/taxons.tsv.gz | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin taxons'
zcat output/lineages.tsv.gz | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin lineages'
lz4cat output/ec_numbers.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin ec_numbers'
lz4cat output/go_terms.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin go_terms'
lz4cat output/interpro_entries.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin interpro_entries'
lz4cat output/taxons.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin taxons'
lz4cat output/lineages.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin lineages'
# Create virtual tables
sqlite3 output.db < workflows/static_database/init_virtual_tables.sql
Expand Down
3 changes: 3 additions & 0 deletions scripts/build_database.sh
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,10 @@ download_taxdmp() {
# Check if our self-hosted version is available or not using the GitHub API
LATEST_RELEASE_URL="https://api.github.com/repos/unipept/unipept-database/releases/latest"
TAXDMP_RELEASE_ASSET_RE="unipept/unipept-database/releases/download/[^/]+/ncbi-taxdmp.zip"
# Temporary disable the pipefail check (cause egrep can exit with code 1 if nothing is found).
set +eo pipefail
SELF_HOSTED_URL=$(curl -s "$LATEST_RELEASE_URL" | egrep -o "$TAXDMP_RELEASE_ASSET_RE")
set -eo pipefail

if [ "$SELF_HOSTED_URL" ]
then
Expand Down

0 comments on commit a4177ba

Please sign in to comment.