Static Database #86
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Database | |
on: | |
workflow_dispatch: | |
schedule: | |
# * is a special character in YAML so you have to quote this string | |
- cron: '0 0 1 * *' | |
jobs: | |
generate_static_database: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v2 | |
- uses: actions/setup-java@v1 | |
with: | |
java-version: '11' | |
java-package: jdk | |
architecture: x64 | |
- name: Get current date | |
id: date | |
run: echo "::set-output name=date::$(date +'%Y-%m-%d')" | |
- name: Install required utilities | |
run: | | |
sudo apt-get update | |
sudo apt-get -y install git maven curl unzip gawk sqlite3 libsqlite3-dev pv nodejs wget | |
- name: Download Taxdmp file | |
shell: bash | |
run: wget https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip | |
- name: Generate tsv.gz files | |
shell: bash | |
run: ./scripts/build_database.sh static-database "swissprot,trembl" "https://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz,https://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.xml.gz" "output" | |
- name: Build SQLite database from generated files | |
shell: bash | |
run: | | |
# Initialize the database | |
sqlite3 output.db < workflows/static_database/structure.sql | |
# Read all generated data into this database | |
zcat output/ec_numbers.tsv.gz | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin ec_numbers' | |
zcat output/go_terms.tsv.gz | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin go_terms' | |
zcat output/interpro_entries.tsv.gz | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin interpro_entries' | |
zcat output/taxons.tsv.gz | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin taxons' | |
zcat output/lineages.tsv.gz | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin lineages' | |
# Create virtual tables | |
sqlite3 output.db < workflows/static_database/init_virtual_tables.sql | |
# Compress the database before uploading it to a Github release | |
zip output.zip output.db | |
- name: Create new tag | |
uses: rickstaa/action-create-tag@v1 | |
id: "tag_create" | |
with: | |
tag: database-${{ steps.date.outputs.date }} | |
message: "Static information database built on ${{ steps.date.outputs.date }}" | |
- name: Create Release | |
id: create_release | |
uses: actions/create-release@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
tag_name: database-${{ steps.date.outputs.date }} | |
release_name: Static database ${{ steps.date.outputs.date }} | |
draft: false | |
prerelease: false | |
- name: Upload Static Database Release Asset | |
id: upload-database-release-asset | |
uses: actions/upload-release-asset@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
upload_url: ${{ steps.create_release.outputs.upload_url }} | |
asset_path: ./output.zip | |
asset_name: unipept-static-db-${{ steps.date.outputs.date }}.zip | |
asset_content_type: application/zip | |
- name: Upload NCBI Taxdmp Release Asset | |
id: upload-taxdmp-release-asset | |
uses: actions/upload-release-asset@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
upload_url: ${{ steps.create_release.outputs.upload_url }} | |
asset_path: ./taxdmp.zip | |
asset_name: ncbi-taxdmp.zip | |
asset_content_type: application/zip |