From 2187ff55e485e5035febc1a68909eb33f3fa8fb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Al=C3=A1n=20F=2E=20Mu=C3=B1oz?= Date: Fri, 2 Aug 2024 16:42:11 -0400 Subject: [PATCH] Adds automatic versioning of manifest Uploads manifest to Zenodo when manifests/profile_index.csv is commited. --- .github/workflows/nix-github-actions.yml | 31 ++ README.md | 16 +- flake.lock | 576 +++++++++++++++++++++++ flake.nix | 35 ++ manifests/profile_index.csv | 7 + manifests/src/README.md | 23 + manifests/src/update_etags.sh | 12 + manifests/src/upload_index.sh | 99 ++++ profile_index.csv | 7 - 9 files changed, 791 insertions(+), 15 deletions(-) create mode 100644 .github/workflows/nix-github-actions.yml create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 manifests/profile_index.csv create mode 100644 manifests/src/README.md create mode 100644 manifests/src/update_etags.sh create mode 100644 manifests/src/upload_index.sh delete mode 100644 profile_index.csv diff --git a/.github/workflows/nix-github-actions.yml b/.github/workflows/nix-github-actions.yml new file mode 100644 index 0000000..f862e8a --- /dev/null +++ b/.github/workflows/nix-github-actions.yml @@ -0,0 +1,31 @@ +name: Publish to Zenodo + +on: + push: + branches: + - main + - master + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout Code + uses: actions/checkout@v3 + + - name: Install Nix + uses: DeterminateSystems/nix-installer-action@main + with: + logger: pretty + log-directives: nix_installer=trace + backtrace: full + + - name: Nix cache + uses: DeterminateSystems/magic-nix-cache-action@main + + - name: Run command in flake environment + run: | + nix develop . --accept-flake-config --impure --command bash manifests/src/upload_index.sh + env: + ZENODO_TOKEN: ${{ secrets.ZENODO_TOKEN }} diff --git a/README.md b/README.md index a0fb9ac..cf72e4e 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Currently, this collection comprises 4 datasets: - Most data components (images, raw CellProfiler output, single-cell profiles, aggregated CellProfiler profiles) from 12 sources for the principal dataset. Each source corresponds to a unique data generating center (except `source_7` and `source_13`, which were from the same center). - First draft of [metadata](metadata/README.md) files. - A [notebook](https://github.com/jump-cellpainting/datasets/blob/update-readme/sample_notebook.ipynb) to load and inspect the data currently available in the principal dataset. -- A [tutorial](https://broadinstitute.github.io/2023_12_JUMP_data_only_vignettes/howto/tutorial_basic.html) to load the different subsets of data in the principal dataset, each available as a single dataframe. The URLs to the subsets are [here](https://github.com/jump-cellpainting/datasets/blob/main/profiles_index.csv). Snakemake workflows for producing these assembled profiles are available [here](https://github.com/broadinstitute/jump-profiling-recipe/releases/tag/v0.1.0). +- A [tutorial](https://broadinstitute.github.io/2023_12_JUMP_data_only_vignettes/howto/tutorial_basic.html) to load the different subsets of data in the principal dataset, each available as a single dataframe. The URLs to the subsets are [here](https://github.com/jump-cellpainting/datasets/blob/main/manifests/profiles_index.csv) and indexed [here](https://zenodo.org/records/13146273/latest) on Zenodo; [ETags](https://docs.aws.amazon.com/AmazonS3/latest/API/API_Object.html) are included to enable integrity checks. Snakemake workflows for producing these assembled profiles are available [here](https://github.com/broadinstitute/jump-profiling-recipe/releases/tag/v0.1.0). **Please note: At present in the principal dataset (`cpg0016`), some compounds will be missing replicates, and a full QC of the dataset is pending. We don’t recommend performing any analysis with the principal dataset the full QC of the dataset is complete. The other datasets are complete.** @@ -47,7 +47,7 @@ To get set up to run the notebook, first install the python dependencies and act See the typical [folder structure](https://github.com/broadinstitute/cellpainting-gallery/blob/main/folder_structure.md) for datasets in the Cell Painting Gallery. Please [note](README.md#whats-available-now) that not all components are currently available. -This new resource https://broad.io/jump will include vignettes demonstrating how to work with JUMP data. Currently, it contains one [tutorial](https://broadinstitute.github.io/2023_12_JUMP_data_only_vignettes/howto/tutorial_basic.html) which demonstrates how to load the different subsets of data within `cpg0016`. +This new resource will include vignettes demonstrating how to work with JUMP data. Currently, it contains one [tutorial](https://broadinstitute.github.io/2023_12_JUMP_data_only_vignettes/howto/tutorial_basic.html) which demonstrates how to load the different subsets of data within `cpg0016`. ## Citation/license @@ -57,18 +57,18 @@ All the data is released with CC0 1.0 Universal (CC0 1.0). Still, professional ethics require that you cite the associated publication. Please use the following format to cite this resource as a whole: -_We used the JUMP Cell Painting datasets (Chandrasekaran et al., 2023), available from the Cell Painting Gallery on the Registry of Open Data on AWS ([https://registry.opendata.aws/cellpainting-gallery/](https://registry.opendata.aws/cellpainting-gallery/))._ - -_Chandrasekaran et al., 2023: doi:10.1101/2023.03.23.534023_ +> _We used the JUMP Cell Painting datasets (Chandrasekaran et al., 2023), available from the Cell Painting Gallery on the Registry of Open Data on AWS ([https://registry.opendata.aws/cellpainting-gallery/](https://registry.opendata.aws/cellpainting-gallery/))._ +> +> _Chandrasekaran et al., 2023: doi:10.1101/2023.03.23.534023_ ### Citing individual JUMP datasets To cite individual JUMP Cell Painting datasets, please follow the guidelines in the Cell Painting Gallery citation [guide](https://github.com/broadinstitute/cellpainting-gallery/#citationlicense). Examples are as follows: -_We used the dataset cpg0001 (Cimini et al., 2022), available from the Cell Painting Gallery on the Registry of Open Data on AWS ()._ - -_We used the dataset cpg0000 (Chandrasekaran et al., 2022), available from the Cell Painting Gallery on the Registry of Open Data on AWS ()._ +> _We used the dataset cpg0001 (Cimini et al., 2022), available from the Cell Painting Gallery on the Registry of Open Data on AWS ()._ +> +> _We used the dataset cpg0000 (Chandrasekaran et al., 2022), available from the Cell Painting Gallery on the Registry of Open Data on AWS ()._ ## Gratitude diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..d6cb668 --- /dev/null +++ b/flake.lock @@ -0,0 +1,576 @@ +{ + "nodes": { + "cachix": { + "inputs": { + "devenv": "devenv_2", + "flake-compat": [ + "devenv", + "flake-compat" + ], + "nixpkgs": [ + "devenv", + "nixpkgs" + ], + "pre-commit-hooks": [ + "devenv", + "pre-commit-hooks" + ] + }, + "locked": { + "lastModified": 1712055811, + "narHash": "sha256-7FcfMm5A/f02yyzuavJe06zLa9hcMHsagE28ADcmQvk=", + "owner": "cachix", + "repo": "cachix", + "rev": "02e38da89851ec7fec3356a5c04bc8349cae0e30", + "type": "github" + }, + "original": { + "owner": "cachix", + "repo": "cachix", + "type": "github" + } + }, + "devenv": { + "inputs": { + "cachix": "cachix", + "flake-compat": "flake-compat_2", + "nix": "nix_2", + "nixpkgs": "nixpkgs_2", + "pre-commit-hooks": "pre-commit-hooks" + }, + "locked": { + "lastModified": 1721817837, + "narHash": "sha256-vZYHahW5w9nMbDV0YFC+HE8bwjkDjJ2kauDQWKjRGtY=", + "owner": "cachix", + "repo": "devenv", + "rev": "44bfc26843694ab17ebae1d4922065e48d93f501", + "type": "github" + }, + "original": { + "owner": "cachix", + "repo": "devenv", + "type": "github" + } + }, + "devenv_2": { + "inputs": { + "flake-compat": [ + "devenv", + "cachix", + "flake-compat" + ], + "nix": "nix", + "nixpkgs": "nixpkgs", + "poetry2nix": "poetry2nix", + "pre-commit-hooks": [ + "devenv", + "cachix", + "pre-commit-hooks" + ] + }, + "locked": { + "lastModified": 1708704632, + "narHash": "sha256-w+dOIW60FKMaHI1q5714CSibk99JfYxm0CzTinYWr+Q=", + "owner": "cachix", + "repo": "devenv", + "rev": "2ee4450b0f4b95a1b90f2eb5ffea98b90e48c196", + "type": "github" + }, + "original": { + "owner": "cachix", + "ref": "python-rewrite", + "repo": "devenv", + "type": "github" + } + }, + "dream2nix": { + "inputs": { + "nixpkgs": "nixpkgs_3", + "purescript-overlay": "purescript-overlay", + "pyproject-nix": "pyproject-nix" + }, + "locked": { + "lastModified": 1722011555, + "narHash": "sha256-4GEt1/zxLZ6PKy6S3iYkNrOdinfgAXsCRI6Z1Kwb3Zg=", + "owner": "nix-community", + "repo": "dream2nix", + "rev": "f7a39eac6c5a2b652358385e377e8494ce2e14eb", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "dream2nix", + "type": "github" + } + }, + "flake-compat": { + "flake": false, + "locked": { + "lastModified": 1673956053, + "narHash": "sha256-4gtG9iQuiKITOjNQQeQIpoIB6b16fm+504Ch3sNKLd8=", + "owner": "edolstra", + "repo": "flake-compat", + "rev": "35bb57c0c8d8b62bbfd284272c928ceb64ddbde9", + "type": "github" + }, + "original": { + "owner": "edolstra", + "repo": "flake-compat", + "type": "github" + } + }, + "flake-compat_2": { + "flake": false, + "locked": { + "lastModified": 1696426674, + "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=", + "owner": "edolstra", + "repo": "flake-compat", + "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33", + "type": "github" + }, + "original": { + "owner": "edolstra", + "repo": "flake-compat", + "type": "github" + } + }, + "flake-utils": { + "inputs": { + "systems": "systems" + }, + "locked": { + "lastModified": 1689068808, + "narHash": "sha256-6ixXo3wt24N/melDWjq70UuHQLxGV8jZvooRanIHXw0=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "919d646de7be200f3bf08cb76ae1f09402b6f9b4", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "flake-utils_2": { + "inputs": { + "systems": "systems_2" + }, + "locked": { + "lastModified": 1710146030, + "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "flake-utils_3": { + "inputs": { + "systems": "systems_3" + }, + "locked": { + "lastModified": 1710146030, + "narHash": "sha256-SZ5L6eA7HJ/nmkzGG7/ISclqe6oZdOZTNoesiInkXPQ=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "b1d9ab70662946ef0850d488da1c9019f3a9752a", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "gitignore": { + "inputs": { + "nixpkgs": [ + "devenv", + "pre-commit-hooks", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1709087332, + "narHash": "sha256-HG2cCnktfHsKV0s4XW83gU3F57gaTljL9KNSuG6bnQs=", + "owner": "hercules-ci", + "repo": "gitignore.nix", + "rev": "637db329424fd7e46cf4185293b9cc8c88c95394", + "type": "github" + }, + "original": { + "owner": "hercules-ci", + "repo": "gitignore.nix", + "type": "github" + } + }, + "nix": { + "inputs": { + "flake-compat": "flake-compat", + "nixpkgs": [ + "devenv", + "cachix", + "devenv", + "nixpkgs" + ], + "nixpkgs-regression": "nixpkgs-regression" + }, + "locked": { + "lastModified": 1712911606, + "narHash": "sha256-BGvBhepCufsjcUkXnEEXhEVjwdJAwPglCC2+bInc794=", + "owner": "domenkozar", + "repo": "nix", + "rev": "b24a9318ea3f3600c1e24b4a00691ee912d4de12", + "type": "github" + }, + "original": { + "owner": "domenkozar", + "ref": "devenv-2.21", + "repo": "nix", + "type": "github" + } + }, + "nix-github-actions": { + "inputs": { + "nixpkgs": [ + "devenv", + "cachix", + "devenv", + "poetry2nix", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1688870561, + "narHash": "sha256-4UYkifnPEw1nAzqqPOTL2MvWtm3sNGw1UTYTalkTcGY=", + "owner": "nix-community", + "repo": "nix-github-actions", + "rev": "165b1650b753316aa7f1787f3005a8d2da0f5301", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "nix-github-actions", + "type": "github" + } + }, + "nix_2": { + "inputs": { + "flake-compat": [ + "devenv", + "flake-compat" + ], + "nixpkgs": [ + "devenv", + "nixpkgs" + ], + "nixpkgs-regression": "nixpkgs-regression_2" + }, + "locked": { + "lastModified": 1712911606, + "narHash": "sha256-BGvBhepCufsjcUkXnEEXhEVjwdJAwPglCC2+bInc794=", + "owner": "domenkozar", + "repo": "nix", + "rev": "b24a9318ea3f3600c1e24b4a00691ee912d4de12", + "type": "github" + }, + "original": { + "owner": "domenkozar", + "ref": "devenv-2.21", + "repo": "nix", + "type": "github" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1692808169, + "narHash": "sha256-x9Opq06rIiwdwGeK2Ykj69dNc2IvUH1fY55Wm7atwrE=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "9201b5ff357e781bf014d0330d18555695df7ba8", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs-regression": { + "locked": { + "lastModified": 1643052045, + "narHash": "sha256-uGJ0VXIhWKGXxkeNnq4TvV3CIOkUJ3PAoLZ3HMzNVMw=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "215d4d0fd80ca5163643b03a33fde804a29cc1e2", + "type": "github" + }, + "original": { + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "215d4d0fd80ca5163643b03a33fde804a29cc1e2", + "type": "github" + } + }, + "nixpkgs-regression_2": { + "locked": { + "lastModified": 1643052045, + "narHash": "sha256-uGJ0VXIhWKGXxkeNnq4TvV3CIOkUJ3PAoLZ3HMzNVMw=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "215d4d0fd80ca5163643b03a33fde804a29cc1e2", + "type": "github" + }, + "original": { + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "215d4d0fd80ca5163643b03a33fde804a29cc1e2", + "type": "github" + } + }, + "nixpkgs-stable": { + "locked": { + "lastModified": 1710695816, + "narHash": "sha256-3Eh7fhEID17pv9ZxrPwCLfqXnYP006RKzSs0JptsN84=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "614b4613980a522ba49f0d194531beddbb7220d3", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-23.11", + "repo": "nixpkgs", + "type": "github" + } + }, + "nixpkgs_2": { + "locked": { + "lastModified": 1713361204, + "narHash": "sha256-TA6EDunWTkc5FvDCqU3W2T3SFn0gRZqh6D/hJnM02MM=", + "owner": "cachix", + "repo": "devenv-nixpkgs", + "rev": "285676e87ad9f0ca23d8714a6ab61e7e027020c6", + "type": "github" + }, + "original": { + "owner": "cachix", + "ref": "rolling", + "repo": "devenv-nixpkgs", + "type": "github" + } + }, + "nixpkgs_3": { + "locked": { + "lastModified": 1720181791, + "narHash": "sha256-i4vJL12/AdyuQuviMMd1Hk2tsGt02hDNhA0Zj1m16N8=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "4284c2b73c8bce4b46a6adf23e16d9e2ec8da4bb", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "poetry2nix": { + "inputs": { + "flake-utils": "flake-utils", + "nix-github-actions": "nix-github-actions", + "nixpkgs": [ + "devenv", + "cachix", + "devenv", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1692876271, + "narHash": "sha256-IXfZEkI0Mal5y1jr6IRWMqK8GW2/f28xJenZIPQqkY0=", + "owner": "nix-community", + "repo": "poetry2nix", + "rev": "d5006be9c2c2417dafb2e2e5034d83fabd207ee3", + "type": "github" + }, + "original": { + "owner": "nix-community", + "repo": "poetry2nix", + "type": "github" + } + }, + "pre-commit-hooks": { + "inputs": { + "flake-compat": [ + "devenv", + "flake-compat" + ], + "flake-utils": "flake-utils_2", + "gitignore": "gitignore", + "nixpkgs": [ + "devenv", + "nixpkgs" + ], + "nixpkgs-stable": "nixpkgs-stable" + }, + "locked": { + "lastModified": 1713775815, + "narHash": "sha256-Wu9cdYTnGQQwtT20QQMg7jzkANKQjwBD9iccfGKkfls=", + "owner": "cachix", + "repo": "pre-commit-hooks.nix", + "rev": "2ac4dcbf55ed43f3be0bae15e181f08a57af24a4", + "type": "github" + }, + "original": { + "owner": "cachix", + "repo": "pre-commit-hooks.nix", + "type": "github" + } + }, + "purescript-overlay": { + "inputs": { + "nixpkgs": [ + "dream2nix", + "nixpkgs" + ], + "slimlock": "slimlock" + }, + "locked": { + "lastModified": 1696022621, + "narHash": "sha256-eMjFmsj2G1E0Q5XiibUNgFjTiSz0GxIeSSzzVdoN730=", + "owner": "thomashoneyman", + "repo": "purescript-overlay", + "rev": "047c7933abd6da8aa239904422e22d190ce55ead", + "type": "github" + }, + "original": { + "owner": "thomashoneyman", + "repo": "purescript-overlay", + "type": "github" + } + }, + "pyproject-nix": { + "flake": false, + "locked": { + "lastModified": 1702448246, + "narHash": "sha256-hFg5s/hoJFv7tDpiGvEvXP0UfFvFEDgTdyHIjDVHu1I=", + "owner": "davhau", + "repo": "pyproject.nix", + "rev": "5a06a2697b228c04dd2f35659b4b659ca74f7aeb", + "type": "github" + }, + "original": { + "owner": "davhau", + "ref": "dream2nix", + "repo": "pyproject.nix", + "type": "github" + } + }, + "root": { + "inputs": { + "devenv": "devenv", + "dream2nix": "dream2nix", + "flake-utils": "flake-utils_3", + "nixpkgs": [ + "dream2nix", + "nixpkgs" + ], + "systems": "systems_4" + } + }, + "slimlock": { + "inputs": { + "nixpkgs": [ + "dream2nix", + "purescript-overlay", + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1688610262, + "narHash": "sha256-Wg0ViDotFWGWqKIQzyYCgayeH8s4U1OZcTiWTQYdAp4=", + "owner": "thomashoneyman", + "repo": "slimlock", + "rev": "b5c6cdcaf636ebbebd0a1f32520929394493f1a6", + "type": "github" + }, + "original": { + "owner": "thomashoneyman", + "repo": "slimlock", + "type": "github" + } + }, + "systems": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + }, + "systems_2": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + }, + "systems_3": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + }, + "systems_4": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..5cfcad0 --- /dev/null +++ b/flake.nix @@ -0,0 +1,35 @@ +# Flake to set up an environment to upload profile_index.csv to Zenodo. +{ + inputs = { + dream2nix.url = "github:nix-community/dream2nix"; + nixpkgs.follows = "dream2nix/nixpkgs"; + flake-utils.url = "github:numtide/flake-utils"; + systems.url = "github:nix-systems/default"; + devenv.url = "github:cachix/devenv"; + }; + + outputs = { self, nixpkgs, devenv, systems, dream2nix, ... } @ inputs: + inputs.flake-utils.lib.eachDefaultSystem (system: + let + + pkgs = import nixpkgs { + system = system; + config.allowUnfree = true; + }; + + in { + devShells = with pkgs; + { + default = pkgs.mkShell { + packages = [ + coreutils + jq + curl + gawk + moreutils + ]; + }; + }; + } + ); +} diff --git a/manifests/profile_index.csv b/manifests/profile_index.csv new file mode 100644 index 0000000..2e2d44f --- /dev/null +++ b/manifests/profile_index.csv @@ -0,0 +1,7 @@ +"subset","url","etag" +"orf","https://cellpainting-gallery.s3.amazonaws.com/cpg0016-jump-assembled/source_all/workspace/profiles/jump-profiling-recipe_2024_a917fa7/ORF/profiles_wellpos_cc_var_mad_outlier_featselect_sphering_harmony/profiles_wellpos_cc_var_mad_outlier_featselect_sphering_harmony.parquet","c05a241135dcedda4e9cc639480b3f8e-44" +"crispr","https://cellpainting-gallery.s3.amazonaws.com/cpg0016-jump-assembled/source_all/workspace/profiles/jump-profiling-recipe_2024_a917fa7/CRISPR/profiles_wellpos_cc_var_mad_outlier_featselect_sphering_harmony_PCA_corrected/profiles_wellpos_cc_var_mad_outlier_featselect_sphering_harmony_PCA_corrected.parquet","4c59782c0dd5244f67d14323e8325828-10" +"compound","https://cellpainting-gallery.s3.amazonaws.com/cpg0016-jump-assembled/source_all/workspace/profiles/jump-profiling-recipe_2024_a917fa7/COMPOUND/profiles_var_mad_int_featselect_harmony/profiles_var_mad_int_featselect_harmony.parquet","1368a48ddbd4c44b1bfbc084591aaf10-338" +"orf_interpretable","https://cellpainting-gallery.s3.amazonaws.com/cpg0016-jump-assembled/source_all/workspace/profiles/jump-profiling-recipe_2024_a917fa7/ORF/profiles_wellpos_cc_var_mad_outlier_featselect_sphering_harmony/profiles_wellpos_cc_var_mad_outlier.parquet","97b0c31d7d678ca2a5e2353df5799fd8-217" +"crispr_interpretable","https://cellpainting-gallery.s3.amazonaws.com/cpg0016-jump-assembled/source_all/workspace/profiles/jump-profiling-recipe_2024_a917fa7/CRISPR/profiles_wellpos_cc_var_mad_outlier_featselect_sphering_harmony_PCA_corrected/profiles_wellpos_cc_var_mad_outlier.parquet","90b08b824c06bcf16dfc5e788e74f099-135" +"compound_interpretable","https://cellpainting-gallery.s3.amazonaws.com/cpg0016-jump-assembled/source_all/workspace/profiles/jump-profiling-recipe_2024_a917fa7/COMPOUND/profiles_var_mad_int_featselect_harmony/profiles_var_mad_int.parquet","b638fa24310db569bc869af92e16f69c-1444" diff --git a/manifests/src/README.md b/manifests/src/README.md new file mode 100644 index 0000000..db6992a --- /dev/null +++ b/manifests/src/README.md @@ -0,0 +1,23 @@ +# Automated versioning with Zenodo + +The scripts in this folder are used for automated versioning by uploading the manifest file (`profile_index.csv`, currently the only one in the root folder) to Zenodo. +In the future, additional manifest files will be added and updated in this repository, triggering the same automated versioning process. + +## Updating new versions + +To release a new set of assembled JUMP profiles, manually update the URLs in `profile_index.csv` to point to the new location. +If necessary, update the associated names for new dataset types. + +## Update ETags to reflect new URLs + +After updating a URL, the ETag (provided by S3) will no longer match. To update the ETags, run the following command from the home folder: + +```bash +bash manifests/src/update_etags.sh | sponge > profile_index.csv +``` + +Note: If using Nix, all dependencies are already included in the flake at the root folder. Simply run `nix develop` before the above command. + +## Commit changes + +Add and commit the updated `profile_index.csv`. This should trigger an update on Zenodo. Once the update is complete, the csv files in the repository and on Zenodo should match. diff --git a/manifests/src/update_etags.sh b/manifests/src/update_etags.sh new file mode 100644 index 0000000..c21bf9f --- /dev/null +++ b/manifests/src/update_etags.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +# Returns the updated ETag for elements in the second column of $1 alongside the first two columns. +cat $1 | + tail -n +2 | # Remove headers + cut -f2 -d',' | # Select url column + xargs -I {} -- curl -I --silent "{}" | # Fetch remote metadata + grep "ETag" | # Select etag field from resulting html + awk '{print $2}' | # Remove prefix + sed 's/\r$//' | # Remove carriage + sed 1i'"etag"' | # add header + paste - $1 -d',' | # Merge with original file + awk -F ',' '{print $2","$3","$1}' # Print in the right order diff --git a/manifests/src/upload_index.sh b/manifests/src/upload_index.sh new file mode 100644 index 0000000..f890732 --- /dev/null +++ b/manifests/src/upload_index.sh @@ -0,0 +1,99 @@ +# Find the latest version of the dataset +ZENODO_ENDPOINT="https://zenodo.org" +DEPOSITION_PREFIX="${ZENODO_ENDPOINT}/api/deposit/depositions" +ORIGINAL_ID="13146273" +FILE_TO_VERSION="manifests/profile_index.csv" + +echo "Checking that S3 ETags match their local counterpart" +S3_ETAGS=$(cat ${FILE_TO_VERSION} | tail -n +2 | cut -f2 -d',' | xargs -I {} -- curl -I --silent "{}" | grep ETag | awk '{print $2}' | sed 's/\r$//' | md5sum | cut -f1 -d" ") +LOCAL_ETAGS=$(cat ${FILE_TO_VERSION} | tail -n +2 | cut -f3 -d',' | md5sum | cut -f1 -d" ") + +echo "Remote ${S3_ETAGS} vs Local ${LOCAL_ETAGS} values" +if [ "${S3_ETAGS}" != "${LOCAL_ETAGS}" ]; then + echo "At least one ETag does not match their url." + exit 1 +fi + +if [ -z "${ORIGINAL_ID}" ]; then # Only get latest id when provided an original one + echo "Creating new deposition" + DEPOSITION_ENDPOINT="${DEPOSITION_PREFIX}" +else # Update existing dataset + echo "Previous ID Exists" + LATEST_ID=$(curl "${ZENODO_ENDPOINT}/records/${ORIGINAL_ID}/latest" | + grep records | sed 's/.*href=".*\.org\/records\/\(.*\)".*/\1/') + REMOTE_HASH=$(curl -H "Content-Type: application/json" -X GET --data "{}" \ + "${DEPOSITION_PREFIX}/${LATEST_ID}/files?access_token=${ZENODO_TOKEN}" | + jq ".[] .links .download" | xargs curl | md5sum | cut -f1 -d" ") + LOCAL_HASH=$(md5sum ${FILE_TO_VERSION} | cut -f1 -d" ") + + echo "Checking for changes in file contents: Remote ${REMOTE_HASH} vs Local ${LOCAL_HASH}" + if [ "${REMOTE_HASH}" = "${LOCAL_HASH}" ]; then + echo "The urls and md5sums have not changed" + exit 0 + fi + + echo "Creating new version" + DEPOSITION_ENDPOINT="${DEPOSITION_PREFIX}/${LATEST_ID}/actions/newversion" +fi + + +if [ -z "${ZENODO_TOKEN}" ]; then # Check Zenodo Token + echo "Access token not available" + exit 1 +else + echo "Access token found." +fi + + +# Create new deposition +DEPOSITION=$(curl -H "Content-Type: application/json" \ + -X POST\ + --data "{}" \ + "${DEPOSITION_ENDPOINT}?access_token=${ZENODO_TOKEN}"\ + | jq .id) +echo "New deposition ID is ${DEPOSITION}" + +# Variables +BUCKET_DATA=$(curl "${DEPOSITION_PREFIX}/$DEPOSITION?access_token=$ZENODO_TOKEN") +BUCKET=$(echo "${BUCKET_DATA}" | jq --raw-output .links.bucket) + +if [ "${BUCKET}" = "null" ]; then + echo "Could not find URL for upload. Response from server:" + echo "${BUCKET_DATA}" + exit 1 +fi + +# Upload file +echo "Uploading file to bucket ${BUCKET}" +curl -o /dev/null \ + --upload-file ${FILE_TO_VERSION} \ + ${BUCKET}/${FILE_TO_VERSION}?access_token="${ZENODO_TOKEN}" + + +# Upload Metadata +echo -e '{"metadata": { + "title": "The Joint Undertaking for Morphological Profiling (JUMP) Consortium Datasets Index", + "creators": [ + { + "name": "The JUMP Cell Painting Consortium" + } + ], + "upload_type": "dataset", + "access_right": "open" +}}' > metadata.json + +NEW_DEPOSITION_ENDPOINT="${DEPOSITION_PREFIX}/${DEPOSITION}" +echo "Uploading file to ${NEW_DEPOSITION_ENDPOINT}" +curl -H "Content-Type: application/json" \ + -X PUT\ + --data @metadata.json \ + "${NEW_DEPOSITION_ENDPOINT}?access_token=${ZENODO_TOKEN}" + +# Publish +echo "Publishing to ${NEW_DEPOSITION_ENDPOINT}" +curl -H "Content-Type: application/json" \ + -X POST\ + --data "{}"\ + "${NEW_DEPOSITION_ENDPOINT}/actions/publish?access_token=${ZENODO_TOKEN}"\ + | jq .id + diff --git a/profile_index.csv b/profile_index.csv deleted file mode 100644 index e722593..0000000 --- a/profile_index.csv +++ /dev/null @@ -1,7 +0,0 @@ -"subset","url" -"orf", "https://cellpainting-gallery.s3.amazonaws.com/cpg0016-jump-assembled/source_all/workspace/profiles/jump-profiling-recipe_2024_a917fa7/ORF/profiles_wellpos_cc_var_mad_outlier_featselect_sphering_harmony/profiles_wellpos_cc_var_mad_outlier_featselect_sphering_harmony.parquet" -"crispr", "https://cellpainting-gallery.s3.amazonaws.com/cpg0016-jump-assembled/source_all/workspace/profiles/jump-profiling-recipe_2024_a917fa7/CRISPR/profiles_wellpos_cc_var_mad_outlier_featselect_sphering_harmony_PCA_corrected/profiles_wellpos_cc_var_mad_outlier_featselect_sphering_harmony_PCA_corrected.parquet" -"compound", "https://cellpainting-gallery.s3.amazonaws.com/cpg0016-jump-assembled/source_all/workspace/profiles/jump-profiling-recipe_2024_a917fa7/COMPOUND/profiles_var_mad_int_featselect_harmony/profiles_var_mad_int_featselect_harmony.parquet" -"orf_interpretable", "https://cellpainting-gallery.s3.amazonaws.com/cpg0016-jump-assembled/source_all/workspace/profiles/jump-profiling-recipe_2024_a917fa7/ORF/profiles_wellpos_cc_var_mad_outlier_featselect_sphering_harmony/profiles_wellpos_cc_var_mad_outlier.parquet" -"crispr_interpretable", "https://cellpainting-gallery.s3.amazonaws.com/cpg0016-jump-assembled/source_all/workspace/profiles/jump-profiling-recipe_2024_a917fa7/CRISPR/profiles_wellpos_cc_var_mad_outlier_featselect_sphering_harmony_PCA_corrected/profiles_wellpos_cc_var_mad_outlier.parquet" -"compound_interpretable", "https://cellpainting-gallery.s3.amazonaws.com/cpg0016-jump-assembled/source_all/workspace/profiles/jump-profiling-recipe_2024_a917fa7/COMPOUND/profiles_var_mad_int_featselect_harmony/profiles_var_mad_int.parquet"