From fc899330ecec1e5da15a5052fbf196e18c744cfe Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Fri, 20 Dec 2024 10:16:39 -0500 Subject: [PATCH 01/12] Initial changelog for 0.2 --- .../create-a-semantic-version-release.md | 2 +- CHANGELOG.md | 29 ++++++++++++++++++- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/create-a-semantic-version-release.md b/.github/ISSUE_TEMPLATE/create-a-semantic-version-release.md index bee31cedc..11f5f56e5 100644 --- a/.github/ISSUE_TEMPLATE/create-a-semantic-version-release.md +++ b/.github/ISSUE_TEMPLATE/create-a-semantic-version-release.md @@ -23,5 +23,5 @@ If issues must be resolved before creating a release, mark them as blockers in Z - Are there any _major_ shifts in project dependencies? For example, is there a package that was used throughout Docker environments that has been replaced? - Have there been any changes in repo file organization? -- [ Create a release on GitHub ](https://github.com/AlexsLemonade/OpenScPCA-analysis/releases/new) +- [ ] [Create a release on GitHub](https://github.com/AlexsLemonade/OpenScPCA-analysis/releases/new) - populate the contents with the release notes added to the changelog. diff --git a/CHANGELOG.md b/CHANGELOG.md index 7630f835b..20f159e58 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,33 @@ Add new release notes in reverse numerical order (newest first) below this comme You may want to add temporary notes here for tracking as features are added, before a new release is ready. --> +## v0.2.0 + +This release adds the first set of community-contribyted analyses to the repository. +These modules are focused on cell type identification and annotation for specific ScPCA datasets: + +- `cell-type-dsrct` +- `cell-type-ETO-ALL-03` +- `cell-type-nonETP-ALL-03` +- `cell-type-wilms-tumor-06` +- `cell-type-wilms0tumor-14` +- `cell-type-glioblastoma` + + +This release also adds the following new modules developed by the Data Lab team: + +- `hello-clusters`: a demonstration module for clustering analysis using the [`rOpenScPCA` package](https://github.com/AlexsLemonade/rOpenScPCA) +- `seurat-conversion`: a module for converting `SingleCellExperiment` objects to Seurat objects, also using the `rOpenScPCA` package +- `metacells`: a module that begins to explore the utility of metacell analysis within the ScPCA project + +Other updates in this release include: + +- a new `sync-results.py` script for simplifying uploading analysis results from an analysis module to an S3 bucket + + + + + ## v0.1.0 @@ -17,7 +44,7 @@ The repository at this stage should be generally complete with respect to infras With respect to infrastructure, the repository contains the following components: -- detailed documentation in the `docs` directory explaining how to interact with the OpenScPCA project and how to set up and run analyses (published at https://openscpca.readthedocs.io) +- detailed documentation in the `docs` directory explaining how to interact with the OpenScPCA project and how to set up and run analyses (published at https://openscpca.readthedocs.io) - a `create-analysis-module.py` script for setting up new analysis modules - `download-data.py` and `download-results.py` scripts to download data and results from the OpenScPCA project - template notebooks, scripts, environment files, and Docker images for analysis modules From c92fd7a2c5b20e16e3e20481d6d08128d9c0435e Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Fri, 20 Dec 2024 11:01:41 -0500 Subject: [PATCH 02/12] Only skip push step --- .github/workflows/build-push-docker-module.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build-push-docker-module.yml b/.github/workflows/build-push-docker-module.yml index 305aa0d5d..0d3e7f886 100644 --- a/.github/workflows/build-push-docker-module.yml +++ b/.github/workflows/build-push-docker-module.yml @@ -20,7 +20,7 @@ permissions: jobs: build-push: name: Build and Push Docker Image - if: inputs.push-ecr && github.repository_owner == 'AlexsLemonade' + if: github.repository_owner == 'AlexsLemonade' environment: prod runs-on: openscpca-22.04-big-disk @@ -80,7 +80,7 @@ jobs: env: DOCKER_BUILD_SUMMARY: false with: - push: true + push: inputs.push-ecr context: "{{defaultContext}}:analyses/${{ inputs.module }}" tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} From 9ea64fefa9e2a294839fafbb0db552ebf0f6fc40 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Fri, 20 Dec 2024 11:44:46 -0500 Subject: [PATCH 03/12] more changelog updates --- CHANGELOG.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 20f159e58..18d77ca3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,18 +11,19 @@ You may want to add temporary notes here for tracking as features are added, bef ## v0.2.0 -This release adds the first set of community-contribyted analyses to the repository. -These modules are focused on cell type identification and annotation for specific ScPCA datasets: +This release adds the first set of community-contributed analyses to the repository. +These modules are focused on cell type identification and annotation for specific ScPCA datasets. +Note that many of these modules are still in development at this stage, and may not yet be fully functional. - `cell-type-dsrct` - `cell-type-ETO-ALL-03` - `cell-type-nonETP-ALL-03` - `cell-type-wilms-tumor-06` - `cell-type-wilms0tumor-14` -- `cell-type-glioblastoma` +- `celltype-glioblastoma` -This release also adds the following new modules developed by the Data Lab team: +This release also adds the following new modules developed by the Data Lab: - `hello-clusters`: a demonstration module for clustering analysis using the [`rOpenScPCA` package](https://github.com/AlexsLemonade/rOpenScPCA) - `seurat-conversion`: a module for converting `SingleCellExperiment` objects to Seurat objects, also using the `rOpenScPCA` package @@ -30,11 +31,11 @@ This release also adds the following new modules developed by the Data Lab team: Other updates in this release include: -- a new `sync-results.py` script for simplifying uploading analysis results from an analysis module to an S3 bucket - - - +- a new `sync-results.py` script to simplify uploading (and downloading) analysis results from an analysis module to a user's S3 bucket +- changes from miniconda to miniforge for conda usage throughout the project +While not part of this repository, we do want to also note that we have created the [`rOpenScPCA` package](https://github.com/AlexsLemonade/rOpenScPCA), which will house utility functions commonly used by analysis modules here. +The goal is to centralize common functions used across analyses to make it easier to share code across modules and to maintain consistency in the analyses. ## v0.1.0 From d165bb0040302ce9aecd4306b2362497fd33e89c Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Fri, 20 Dec 2024 11:47:30 -0500 Subject: [PATCH 04/12] rename celltype-glioblastoma --- ....yml => docker_cell-type-glioblastoma.yml} | 24 +- ...oma.yml => run_cell-type-glioblastoma.yml} | 12 +- CHANGELOG.md | 2 +- .../.dockerignore | 0 .../.gitignore | 0 .../Dockerfile | 0 .../README.md | 0 .../environment.yml | 2 +- .../notebook-template.ipynb | 310 ++++++++++++++++++ .../plots/.gitkeep | 0 .../results/README.md | 0 .../scratch/.gitkeep | 0 .../script-template.py | 4 +- .../scripts/.gitkeep | 0 .../notebook-template.ipynb | 310 ------------------ 15 files changed, 332 insertions(+), 332 deletions(-) rename .github/workflows/{docker_celltype-glioblastoma.yml => docker_cell-type-glioblastoma.yml} (66%) rename .github/workflows/{run_celltype-glioblastoma.yml => run_cell-type-glioblastoma.yml} (82%) rename analyses/{celltype-glioblastoma => cell-type-glioblastoma}/.dockerignore (100%) rename analyses/{celltype-glioblastoma => cell-type-glioblastoma}/.gitignore (100%) rename analyses/{celltype-glioblastoma => cell-type-glioblastoma}/Dockerfile (100%) rename analyses/{celltype-glioblastoma => cell-type-glioblastoma}/README.md (100%) rename analyses/{celltype-glioblastoma => cell-type-glioblastoma}/environment.yml (85%) create mode 100644 analyses/cell-type-glioblastoma/notebook-template.ipynb rename analyses/{celltype-glioblastoma => cell-type-glioblastoma}/plots/.gitkeep (100%) rename analyses/{celltype-glioblastoma => cell-type-glioblastoma}/results/README.md (100%) rename analyses/{celltype-glioblastoma => cell-type-glioblastoma}/scratch/.gitkeep (100%) rename analyses/{celltype-glioblastoma => cell-type-glioblastoma}/script-template.py (95%) rename analyses/{celltype-glioblastoma => cell-type-glioblastoma}/scripts/.gitkeep (100%) delete mode 100644 analyses/celltype-glioblastoma/notebook-template.ipynb diff --git a/.github/workflows/docker_celltype-glioblastoma.yml b/.github/workflows/docker_cell-type-glioblastoma.yml similarity index 66% rename from .github/workflows/docker_celltype-glioblastoma.yml rename to .github/workflows/docker_cell-type-glioblastoma.yml index 77f5421f1..ea4179ffa 100644 --- a/.github/workflows/docker_celltype-glioblastoma.yml +++ b/.github/workflows/docker_cell-type-glioblastoma.yml @@ -1,11 +1,11 @@ -# This is a workflow to build the docker image for the celltype-glioblastoma module +# This is a workflow to build the docker image for the cell-type-glioblastoma module # # Docker modules are run on pull requests when code for files that affect the Docker image have changed. # If other files are used during the Docker build, they should be added to `paths` # # At module initialization, this workflow is inactive, and needs to be activated manually -name: Build docker image for celltype-glioblastoma +name: Build docker image for cell-type-glioblastoma concurrency: # only one run per branch at a time @@ -17,18 +17,18 @@ on: # branches: # - main # paths: - # - "analyses/celltype-glioblastoma/Dockerfile" - # - "analyses/celltype-glioblastoma/.dockerignore" - # - "analyses/celltype-glioblastoma/renv.lock" - # - "analyses/celltype-glioblastoma/conda-lock.yml" + # - "analyses/cell-type-glioblastoma/Dockerfile" + # - "analyses/cell-type-glioblastoma/.dockerignore" + # - "analyses/cell-type-glioblastoma/renv.lock" + # - "analyses/cell-type-glioblastoma/conda-lock.yml" # push: # branches: # - main # paths: - # - "analyses/celltype-glioblastoma/Dockerfile" - # - "analyses/celltype-glioblastoma/.dockerignore" - # - "analyses/celltype-glioblastoma/renv.lock" - # - "analyses/celltype-glioblastoma/conda-lock.yml" + # - "analyses/cell-type-glioblastoma/Dockerfile" + # - "analyses/cell-type-glioblastoma/.dockerignore" + # - "analyses/cell-type-glioblastoma/renv.lock" + # - "analyses/cell-type-glioblastoma/conda-lock.yml" workflow_dispatch: inputs: push-ecr: @@ -49,7 +49,7 @@ jobs: - name: Build image uses: docker/build-push-action@v5 with: - context: "{{defaultContext}}:analyses/celltype-glioblastoma" + context: "{{defaultContext}}:analyses/cell-type-glioblastoma" push: false cache-from: type=gha cache-to: type=gha,mode=max @@ -59,5 +59,5 @@ jobs: if: github.repository_owner == 'AlexsLemonade' && (github.event_name == 'push' || inputs.push-ecr) uses: ./.github/workflows/build-push-docker-module.yml with: - module: "celltype-glioblastoma" + module: "cell-type-glioblastoma" push-ecr: true diff --git a/.github/workflows/run_celltype-glioblastoma.yml b/.github/workflows/run_cell-type-glioblastoma.yml similarity index 82% rename from .github/workflows/run_celltype-glioblastoma.yml rename to .github/workflows/run_cell-type-glioblastoma.yml index 3bffc5f93..b48d066fe 100644 --- a/.github/workflows/run_celltype-glioblastoma.yml +++ b/.github/workflows/run_cell-type-glioblastoma.yml @@ -1,4 +1,4 @@ -# This is a workflow to run the celltype-glioblastoma module +# This is a workflow to run the cell-type-glioblastoma module # # Analysis modules are run based on three triggers: # - Manual trigger @@ -7,9 +7,9 @@ # # At initialization, only the manual trigger is active -name: Run celltype-glioblastoma analysis module +name: Run cell-type-glioblastoma analysis module env: - MODULE_PATH: analyses/celltype-glioblastoma + MODULE_PATH: analyses/cell-type-glioblastoma AWS_DEFAULT_REGION: us-east-2 concurrency: @@ -24,9 +24,9 @@ on: # branches: # - main # paths: - # - analyses/celltype-glioblastoma/** - # - "!analyses/celltype-glioblastoma/Dockerfile" - # - "!analyses/celltype-glioblastoma/.dockerignore" + # - analyses/cell-type-glioblastoma/** + # - "!analyses/cell-type-glioblastoma/Dockerfile" + # - "!analyses/cell-type-glioblastoma/.dockerignore" # - .github/workflows/run_celltype-glioblastoma.yml jobs: diff --git a/CHANGELOG.md b/CHANGELOG.md index 18d77ca3d..ca2fdb4e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,10 +17,10 @@ Note that many of these modules are still in development at this stage, and may - `cell-type-dsrct` - `cell-type-ETO-ALL-03` +- `cell-type-glioblastoma` - `cell-type-nonETP-ALL-03` - `cell-type-wilms-tumor-06` - `cell-type-wilms0tumor-14` -- `celltype-glioblastoma` This release also adds the following new modules developed by the Data Lab: diff --git a/analyses/celltype-glioblastoma/.dockerignore b/analyses/cell-type-glioblastoma/.dockerignore similarity index 100% rename from analyses/celltype-glioblastoma/.dockerignore rename to analyses/cell-type-glioblastoma/.dockerignore diff --git a/analyses/celltype-glioblastoma/.gitignore b/analyses/cell-type-glioblastoma/.gitignore similarity index 100% rename from analyses/celltype-glioblastoma/.gitignore rename to analyses/cell-type-glioblastoma/.gitignore diff --git a/analyses/celltype-glioblastoma/Dockerfile b/analyses/cell-type-glioblastoma/Dockerfile similarity index 100% rename from analyses/celltype-glioblastoma/Dockerfile rename to analyses/cell-type-glioblastoma/Dockerfile diff --git a/analyses/celltype-glioblastoma/README.md b/analyses/cell-type-glioblastoma/README.md similarity index 100% rename from analyses/celltype-glioblastoma/README.md rename to analyses/cell-type-glioblastoma/README.md diff --git a/analyses/celltype-glioblastoma/environment.yml b/analyses/cell-type-glioblastoma/environment.yml similarity index 85% rename from analyses/celltype-glioblastoma/environment.yml rename to analyses/cell-type-glioblastoma/environment.yml index 5e8844ce9..6c683643a 100644 --- a/analyses/celltype-glioblastoma/environment.yml +++ b/analyses/cell-type-glioblastoma/environment.yml @@ -1,4 +1,4 @@ -name: "openscpca-celltype-glioblastoma" +name: "openscpca-cell-type-glioblastoma" channels: - conda-forge - bioconda diff --git a/analyses/cell-type-glioblastoma/notebook-template.ipynb b/analyses/cell-type-glioblastoma/notebook-template.ipynb new file mode 100644 index 000000000..b0e85e1f4 --- /dev/null +++ b/analyses/cell-type-glioblastoma/notebook-template.ipynb @@ -0,0 +1,310 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5ecf8d71-c970-49ad-a908-fb94f43835e9", + "metadata": {}, + "source": [ + "# cell-type-glioblastoma Analysis\n", + "\n", + "_Author Name_\n", + "\n", + "_Date_\n" + ] + }, + { + "cell_type": "markdown", + "id": "abd58442", + "metadata": {}, + "source": [ + "## Introduction\n", + "\n", + "This a template for an analysis notebook using Jupyter.\n", + "\n", + "It is a good idea to start with a brief introduction to the analysis, including the purpose of the analysis, the data used, and the methods applied.\n", + "Replace this text with your own introduction, and be sure to update the _Title_, _Author Name_, and _Date_ at the top of the document.\n", + "\n", + "Don't forget to rename this file as well!\n" + ] + }, + { + "cell_type": "markdown", + "id": "f7e42626", + "metadata": {}, + "source": [ + "## Setup\n" + ] + }, + { + "cell_type": "markdown", + "id": "b3d030d5", + "metadata": {}, + "source": [ + "### Load modules\n", + "\n", + "Load required Python modules in the following cell.\n", + "We have included the standard `pathlib` module and the `session_info` module that we will be using at the bottom of this notebook to record the versions of the modules used in this analysis.\n", + "\n", + "Do not install modules here; only load them with `import` statements.\n", + "Avoid renaming modules with `as` statements, unless you are performing a standard renaming (e.g., `import pandas as pd`).\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "922376d5-331e-4966-910c-1f5540fdc230", + "metadata": {}, + "outputs": [], + "source": [ + "# import required modules\n", + "import pathlib\n", + "\n", + "import session_info" + ] + }, + { + "cell_type": "markdown", + "id": "7f5a9354", + "metadata": {}, + "source": [ + "### Paths\n", + "\n", + "Setting paths to directories and files for input and output at the start of a notebook can be helpful for organization and reproducibility.\n", + "You may not know all of the paths you need when you start a notebook, but once you do, keeping them all in one place is helpful, both for yourself in the future and for other users of the notebook.\n" + ] + }, + { + "cell_type": "markdown", + "id": "86d81532", + "metadata": {}, + "source": [ + "#### Base directories\n", + "\n", + "In the cell below, we include some common directories that you might need in your analysis.\n", + "We find the repository root by looking for a (hidden) `.git` directory somewhere in the file path that contains this file, then set the data directory and module directories relative to that.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d124fed2", + "metadata": {}, + "outputs": [], + "source": [ + "# Find the repository root directory\n", + "repo_root = pathlib.Path.cwd()\n", + "while not (repo_root / \".git\").is_dir(): # search for the .git directory\n", + " repo_root = repo_root.parent\n", + " if repo_root == \"/\":\n", + " raise FileNotFoundError(\"Could not find the repository root directory\")\n", + "\n", + "# set module path (using pathlib)\n", + "module_root = repo_root / \"analyses\" / \"cell-type-glioblastoma\"\n", + "\n", + "# set current data directory\n", + "data_dir = repo_root / \"data\" / \"current\"\n", + "\n", + "# set results and plots directories (using the analysis project file to find root)\n", + "results_dir = module_root / \"results\"\n", + "plots_dir = module_root / \"plots\"" + ] + }, + { + "cell_type": "markdown", + "id": "b40fb5e9", + "metadata": {}, + "source": [ + "#### Input and output files\n", + "\n", + "Set paths to input and output directories and files in the cell below.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "49c1e1c1", + "metadata": {}, + "outputs": [], + "source": [ + "# Input files\n", + "\n", + "# Output files\n" + ] + }, + { + "cell_type": "markdown", + "id": "f1fbe4b6", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, + "source": [ + "## Analysis content\n", + "\n", + "Organize the remainder of your content into sections and subsections as appropriate for your analysis.\n", + "\n", + "Add new Code and Markdown cells as needed using the Jupyter interface or your favorite keyboard shortcuts.\n", + "For example, you can use **Alt+Enter** on Windows or Linux or **Opt+Return** on a Mac to execute the current cell and insert a new one below.\n", + "\n", + "Be sure to comment your code and explain your steps as you go.\n", + "\n", + "It is usually a good idea to rerun the entire notebook before sharing any output, to ensure that the notebook and output files are fully up to date.\n", + "You can do this in the Jupyter Lab or Notebook interface by clicking on the _Kernel_ menu and choosing _Restart Kernel and Run All Cells..._\n" + ] + }, + { + "cell_type": "markdown", + "id": "d1b4d0c9", + "metadata": {}, + "source": [ + "## Session Info\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "eb4d84a4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "Click to view session information\n", + "
\n",
+              "-----\n",
+              "session_info        1.0.0\n",
+              "-----\n",
+              "
\n", + "
\n", + "Click to view modules imported as dependencies\n", + "
\n",
+              "CoreFoundation              NA\n",
+              "Foundation                  NA\n",
+              "PyObjCTools                 NA\n",
+              "anyio                       NA\n",
+              "appnope                     0.1.4\n",
+              "arrow                       1.3.0\n",
+              "asttokens                   NA\n",
+              "attr                        23.2.0\n",
+              "attrs                       23.2.0\n",
+              "babel                       2.14.0\n",
+              "brotli                      1.1.0\n",
+              "certifi                     2024.02.02\n",
+              "charset_normalizer          3.3.2\n",
+              "colorama                    0.4.6\n",
+              "comm                        0.2.2\n",
+              "cython_runtime              NA\n",
+              "dateutil                    2.8.2\n",
+              "debugpy                     1.8.1\n",
+              "decorator                   5.1.1\n",
+              "executing                   2.0.1\n",
+              "fastjsonschema              NA\n",
+              "fqdn                        NA\n",
+              "idna                        3.6\n",
+              "ipykernel                   6.29.3\n",
+              "isoduration                 NA\n",
+              "jedi                        0.19.1\n",
+              "jinja2                      3.1.3\n",
+              "json5                       0.9.24\n",
+              "jsonpointer                 2.4\n",
+              "jsonschema                  4.21.1\n",
+              "jsonschema_specifications   NA\n",
+              "jupyter_events              0.10.0\n",
+              "jupyter_server              2.13.0\n",
+              "jupyterlab_server           2.25.4\n",
+              "markupsafe                  2.1.5\n",
+              "nbformat                    5.10.3\n",
+              "objc                        10.2\n",
+              "overrides                   NA\n",
+              "packaging                   24.0\n",
+              "parso                       0.8.3\n",
+              "pexpect                     4.9.0\n",
+              "pickleshare                 0.7.5\n",
+              "platformdirs                4.2.0\n",
+              "prometheus_client           NA\n",
+              "prompt_toolkit              3.0.38\n",
+              "psutil                      5.9.8\n",
+              "ptyprocess                  0.7.0\n",
+              "pure_eval                   0.2.2\n",
+              "pydev_ipython               NA\n",
+              "pydevconsole                NA\n",
+              "pydevd                      2.9.5\n",
+              "pydevd_file_utils           NA\n",
+              "pydevd_plugins              NA\n",
+              "pydevd_tracing              NA\n",
+              "pygments                    2.17.2\n",
+              "pythonjsonlogger            NA\n",
+              "referencing                 NA\n",
+              "requests                    2.31.0\n",
+              "rfc3339_validator           0.1.4\n",
+              "rfc3986_validator           0.1.1\n",
+              "rpds                        NA\n",
+              "ruamel                      NA\n",
+              "send2trash                  NA\n",
+              "six                         1.16.0\n",
+              "sniffio                     1.3.1\n",
+              "socks                       1.7.1\n",
+              "stack_data                  0.6.2\n",
+              "tornado                     6.4\n",
+              "traitlets                   5.14.2\n",
+              "uri_template                NA\n",
+              "urllib3                     1.26.18\n",
+              "wcwidth                     0.2.13\n",
+              "webcolors                   1.13\n",
+              "websocket                   1.7.0\n",
+              "yaml                        6.0.1\n",
+              "zmq                         25.1.2\n",
+              "
\n", + "
\n", + "
\n",
+              "-----\n",
+              "IPython             8.17.2\n",
+              "jupyter_client      8.6.1\n",
+              "jupyter_core        5.7.2\n",
+              "jupyterlab          4.1.5\n",
+              "notebook            7.1.2\n",
+              "-----\n",
+              "Python 3.11.8 | packaged by conda-forge | (main, Feb 16 2024, 20:49:36) [Clang 16.0.6 ]\n",
+              "macOS-14.3.1-arm64-arm-64bit\n",
+              "-----\n",
+              "Session information updated at 2024-03-20 18:04\n",
+              "
\n", + "
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# record the versions of the modules used in this analysis and other environment information\n", + "session_info.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/analyses/celltype-glioblastoma/plots/.gitkeep b/analyses/cell-type-glioblastoma/plots/.gitkeep similarity index 100% rename from analyses/celltype-glioblastoma/plots/.gitkeep rename to analyses/cell-type-glioblastoma/plots/.gitkeep diff --git a/analyses/celltype-glioblastoma/results/README.md b/analyses/cell-type-glioblastoma/results/README.md similarity index 100% rename from analyses/celltype-glioblastoma/results/README.md rename to analyses/cell-type-glioblastoma/results/README.md diff --git a/analyses/celltype-glioblastoma/scratch/.gitkeep b/analyses/cell-type-glioblastoma/scratch/.gitkeep similarity index 100% rename from analyses/celltype-glioblastoma/scratch/.gitkeep rename to analyses/cell-type-glioblastoma/scratch/.gitkeep diff --git a/analyses/celltype-glioblastoma/script-template.py b/analyses/cell-type-glioblastoma/script-template.py similarity index 95% rename from analyses/celltype-glioblastoma/script-template.py rename to analyses/cell-type-glioblastoma/script-template.py index 9365e3cde..8b7cd680a 100644 --- a/analyses/celltype-glioblastoma/script-template.py +++ b/analyses/cell-type-glioblastoma/script-template.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -# celltype-glioblastoma Analysis Script +# cell-type-glioblastoma Analysis Script # Author Name # Date # @@ -55,7 +55,7 @@ def find_git_root(): repo_root = find_git_root() # set module path (using pathlib) -module_root = repo_root / "analyses" / "celltype-glioblastoma" +module_root = repo_root / "analyses" / "cell-type-glioblastoma" # set current data directory data_dir = repo_root / "data" / "current" diff --git a/analyses/celltype-glioblastoma/scripts/.gitkeep b/analyses/cell-type-glioblastoma/scripts/.gitkeep similarity index 100% rename from analyses/celltype-glioblastoma/scripts/.gitkeep rename to analyses/cell-type-glioblastoma/scripts/.gitkeep diff --git a/analyses/celltype-glioblastoma/notebook-template.ipynb b/analyses/celltype-glioblastoma/notebook-template.ipynb deleted file mode 100644 index cb37c0446..000000000 --- a/analyses/celltype-glioblastoma/notebook-template.ipynb +++ /dev/null @@ -1,310 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "5ecf8d71-c970-49ad-a908-fb94f43835e9", - "metadata": {}, - "source": [ - "# celltype-glioblastoma Analysis\n", - "\n", - "_Author Name_\n", - "\n", - "_Date_\n" - ] - }, - { - "cell_type": "markdown", - "id": "abd58442", - "metadata": {}, - "source": [ - "## Introduction\n", - "\n", - "This a template for an analysis notebook using Jupyter.\n", - "\n", - "It is a good idea to start with a brief introduction to the analysis, including the purpose of the analysis, the data used, and the methods applied.\n", - "Replace this text with your own introduction, and be sure to update the _Title_, _Author Name_, and _Date_ at the top of the document.\n", - "\n", - "Don't forget to rename this file as well!\n" - ] - }, - { - "cell_type": "markdown", - "id": "f7e42626", - "metadata": {}, - "source": [ - "## Setup\n" - ] - }, - { - "cell_type": "markdown", - "id": "b3d030d5", - "metadata": {}, - "source": [ - "### Load modules\n", - "\n", - "Load required Python modules in the following cell.\n", - "We have included the standard `pathlib` module and the `session_info` module that we will be using at the bottom of this notebook to record the versions of the modules used in this analysis.\n", - "\n", - "Do not install modules here; only load them with `import` statements.\n", - "Avoid renaming modules with `as` statements, unless you are performing a standard renaming (e.g., `import pandas as pd`).\n" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "922376d5-331e-4966-910c-1f5540fdc230", - "metadata": {}, - "outputs": [], - "source": [ - "# import required modules\n", - "import pathlib\n", - "\n", - "import session_info" - ] - }, - { - "cell_type": "markdown", - "id": "7f5a9354", - "metadata": {}, - "source": [ - "### Paths\n", - "\n", - "Setting paths to directories and files for input and output at the start of a notebook can be helpful for organization and reproducibility.\n", - "You may not know all of the paths you need when you start a notebook, but once you do, keeping them all in one place is helpful, both for yourself in the future and for other users of the notebook.\n" - ] - }, - { - "cell_type": "markdown", - "id": "86d81532", - "metadata": {}, - "source": [ - "#### Base directories\n", - "\n", - "In the cell below, we include some common directories that you might need in your analysis.\n", - "We find the repository root by looking for a (hidden) `.git` directory somewhere in the file path that contains this file, then set the data directory and module directories relative to that.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "d124fed2", - "metadata": {}, - "outputs": [], - "source": [ - "# Find the repository root directory\n", - "repo_root = pathlib.Path.cwd()\n", - "while not (repo_root / \".git\").is_dir(): # search for the .git directory\n", - " repo_root = repo_root.parent\n", - " if repo_root == \"/\":\n", - " raise FileNotFoundError(\"Could not find the repository root directory\")\n", - "\n", - "# set module path (using pathlib)\n", - "module_root = repo_root / \"analyses\" / \"celltype-glioblastoma\"\n", - "\n", - "# set current data directory\n", - "data_dir = repo_root / \"data\" / \"current\"\n", - "\n", - "# set results and plots directories (using the analysis project file to find root)\n", - "results_dir = module_root / \"results\"\n", - "plots_dir = module_root / \"plots\"" - ] - }, - { - "cell_type": "markdown", - "id": "b40fb5e9", - "metadata": {}, - "source": [ - "#### Input and output files\n", - "\n", - "Set paths to input and output directories and files in the cell below.\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "49c1e1c1", - "metadata": {}, - "outputs": [], - "source": [ - "# Input files\n", - "\n", - "# Output files\n" - ] - }, - { - "cell_type": "markdown", - "id": "f1fbe4b6", - "metadata": { - "jp-MarkdownHeadingCollapsed": true - }, - "source": [ - "## Analysis content\n", - "\n", - "Organize the remainder of your content into sections and subsections as appropriate for your analysis.\n", - "\n", - "Add new Code and Markdown cells as needed using the Jupyter interface or your favorite keyboard shortcuts.\n", - "For example, you can use **Alt+Enter** on Windows or Linux or **Opt+Return** on a Mac to execute the current cell and insert a new one below.\n", - "\n", - "Be sure to comment your code and explain your steps as you go.\n", - "\n", - "It is usually a good idea to rerun the entire notebook before sharing any output, to ensure that the notebook and output files are fully up to date.\n", - "You can do this in the Jupyter Lab or Notebook interface by clicking on the _Kernel_ menu and choosing _Restart Kernel and Run All Cells..._\n" - ] - }, - { - "cell_type": "markdown", - "id": "d1b4d0c9", - "metadata": {}, - "source": [ - "## Session Info\n" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "eb4d84a4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "Click to view session information\n", - "
\n",
-       "-----\n",
-       "session_info        1.0.0\n",
-       "-----\n",
-       "
\n", - "
\n", - "Click to view modules imported as dependencies\n", - "
\n",
-       "CoreFoundation              NA\n",
-       "Foundation                  NA\n",
-       "PyObjCTools                 NA\n",
-       "anyio                       NA\n",
-       "appnope                     0.1.4\n",
-       "arrow                       1.3.0\n",
-       "asttokens                   NA\n",
-       "attr                        23.2.0\n",
-       "attrs                       23.2.0\n",
-       "babel                       2.14.0\n",
-       "brotli                      1.1.0\n",
-       "certifi                     2024.02.02\n",
-       "charset_normalizer          3.3.2\n",
-       "colorama                    0.4.6\n",
-       "comm                        0.2.2\n",
-       "cython_runtime              NA\n",
-       "dateutil                    2.8.2\n",
-       "debugpy                     1.8.1\n",
-       "decorator                   5.1.1\n",
-       "executing                   2.0.1\n",
-       "fastjsonschema              NA\n",
-       "fqdn                        NA\n",
-       "idna                        3.6\n",
-       "ipykernel                   6.29.3\n",
-       "isoduration                 NA\n",
-       "jedi                        0.19.1\n",
-       "jinja2                      3.1.3\n",
-       "json5                       0.9.24\n",
-       "jsonpointer                 2.4\n",
-       "jsonschema                  4.21.1\n",
-       "jsonschema_specifications   NA\n",
-       "jupyter_events              0.10.0\n",
-       "jupyter_server              2.13.0\n",
-       "jupyterlab_server           2.25.4\n",
-       "markupsafe                  2.1.5\n",
-       "nbformat                    5.10.3\n",
-       "objc                        10.2\n",
-       "overrides                   NA\n",
-       "packaging                   24.0\n",
-       "parso                       0.8.3\n",
-       "pexpect                     4.9.0\n",
-       "pickleshare                 0.7.5\n",
-       "platformdirs                4.2.0\n",
-       "prometheus_client           NA\n",
-       "prompt_toolkit              3.0.38\n",
-       "psutil                      5.9.8\n",
-       "ptyprocess                  0.7.0\n",
-       "pure_eval                   0.2.2\n",
-       "pydev_ipython               NA\n",
-       "pydevconsole                NA\n",
-       "pydevd                      2.9.5\n",
-       "pydevd_file_utils           NA\n",
-       "pydevd_plugins              NA\n",
-       "pydevd_tracing              NA\n",
-       "pygments                    2.17.2\n",
-       "pythonjsonlogger            NA\n",
-       "referencing                 NA\n",
-       "requests                    2.31.0\n",
-       "rfc3339_validator           0.1.4\n",
-       "rfc3986_validator           0.1.1\n",
-       "rpds                        NA\n",
-       "ruamel                      NA\n",
-       "send2trash                  NA\n",
-       "six                         1.16.0\n",
-       "sniffio                     1.3.1\n",
-       "socks                       1.7.1\n",
-       "stack_data                  0.6.2\n",
-       "tornado                     6.4\n",
-       "traitlets                   5.14.2\n",
-       "uri_template                NA\n",
-       "urllib3                     1.26.18\n",
-       "wcwidth                     0.2.13\n",
-       "webcolors                   1.13\n",
-       "websocket                   1.7.0\n",
-       "yaml                        6.0.1\n",
-       "zmq                         25.1.2\n",
-       "
\n", - "
\n", - "
\n",
-       "-----\n",
-       "IPython             8.17.2\n",
-       "jupyter_client      8.6.1\n",
-       "jupyter_core        5.7.2\n",
-       "jupyterlab          4.1.5\n",
-       "notebook            7.1.2\n",
-       "-----\n",
-       "Python 3.11.8 | packaged by conda-forge | (main, Feb 16 2024, 20:49:36) [Clang 16.0.6 ]\n",
-       "macOS-14.3.1-arm64-arm-64bit\n",
-       "-----\n",
-       "Session information updated at 2024-03-20 18:04\n",
-       "
\n", - "
" - ], - "text/plain": [ - "" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# record the versions of the modules used in this analysis and other environment information\n", - "session_info.show()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} From 3707ee4febab4c61d48af115c2db8604e5c67d57 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Fri, 20 Dec 2024 12:38:06 -0500 Subject: [PATCH 05/12] Apply suggestions from code review Co-authored-by: Stephanie Spielman --- CHANGELOG.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca2fdb4e8..03d6a285c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,14 +13,14 @@ You may want to add temporary notes here for tracking as features are added, bef This release adds the first set of community-contributed analyses to the repository. These modules are focused on cell type identification and annotation for specific ScPCA datasets. -Note that many of these modules are still in development at this stage, and may not yet be fully functional. +Note that many of these modules are still in development at this stage, and may not be fully functional. - `cell-type-dsrct` -- `cell-type-ETO-ALL-03` +- `cell-type-ETP-ALL-03` - `cell-type-glioblastoma` - `cell-type-nonETP-ALL-03` - `cell-type-wilms-tumor-06` -- `cell-type-wilms0tumor-14` +- `cell-type-wilms-tumor-14` This release also adds the following new modules developed by the Data Lab: @@ -35,7 +35,7 @@ Other updates in this release include: - changes from miniconda to miniforge for conda usage throughout the project While not part of this repository, we do want to also note that we have created the [`rOpenScPCA` package](https://github.com/AlexsLemonade/rOpenScPCA), which will house utility functions commonly used by analysis modules here. -The goal is to centralize common functions used across analyses to make it easier to share code across modules and to maintain consistency in the analyses. +The goal is to centralize common functions used across analysis modules to make it easier to share code and maintain consistency across modules. ## v0.1.0 From 25e8c9ca2eb28ea83ef50b185bdd55743a872c52 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Fri, 20 Dec 2024 12:38:25 -0500 Subject: [PATCH 06/12] remove metacells mention --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 03d6a285c..e6f04a382 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,6 @@ This release also adds the following new modules developed by the Data Lab: - `hello-clusters`: a demonstration module for clustering analysis using the [`rOpenScPCA` package](https://github.com/AlexsLemonade/rOpenScPCA) - `seurat-conversion`: a module for converting `SingleCellExperiment` objects to Seurat objects, also using the `rOpenScPCA` package -- `metacells`: a module that begins to explore the utility of metacell analysis within the ScPCA project Other updates in this release include: From 737ad8c0835640495047f65ca759c6e7a1c32405 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Fri, 20 Dec 2024 12:41:39 -0500 Subject: [PATCH 07/12] add cell-type-consensus --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e6f04a382..930988f6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ Note that many of these modules are still in development at this stage, and may This release also adds the following new modules developed by the Data Lab: +- `cell-type-consensus`: a module for exploring consensus cell types across multiple annotation methods - `hello-clusters`: a demonstration module for clustering analysis using the [`rOpenScPCA` package](https://github.com/AlexsLemonade/rOpenScPCA) - `seurat-conversion`: a module for converting `SingleCellExperiment` objects to Seurat objects, also using the `rOpenScPCA` package From ac4e9bc96271de7f6396c9e496d3f637286adbf7 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Fri, 20 Dec 2024 12:43:19 -0500 Subject: [PATCH 08/12] spelling fix --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 930988f6e..107f343b4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,7 +32,7 @@ This release also adds the following new modules developed by the Data Lab: Other updates in this release include: - a new `sync-results.py` script to simplify uploading (and downloading) analysis results from an analysis module to a user's S3 bucket -- changes from miniconda to miniforge for conda usage throughout the project +- changes from `miniconda` to `miniforge` for conda usage throughout the project While not part of this repository, we do want to also note that we have created the [`rOpenScPCA` package](https://github.com/AlexsLemonade/rOpenScPCA), which will house utility functions commonly used by analysis modules here. The goal is to centralize common functions used across analysis modules to make it easier to share code and maintain consistency across modules. From 0b5395f8c4264fa11112f3573ed28bbd24d9c060 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Fri, 20 Dec 2024 14:25:11 -0500 Subject: [PATCH 09/12] Update CHANGELOG.md Co-authored-by: Jaclyn Taroni <19534205+jaclyn-taroni@users.noreply.github.com> --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 107f343b4..af6e3e121 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,7 @@ You may want to add temporary notes here for tracking as features are added, bef This release adds the first set of community-contributed analyses to the repository. These modules are focused on cell type identification and annotation for specific ScPCA datasets. -Note that many of these modules are still in development at this stage, and may not be fully functional. +Note that many of these modules are still in development at this stage and may not be fully functional. - `cell-type-dsrct` - `cell-type-ETP-ALL-03` From fb6a785b5acbf063f5f65744996975e9e0d4f797 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Fri, 20 Dec 2024 14:26:51 -0500 Subject: [PATCH 10/12] use ${{ for push statement in docker action --- .github/workflows/build-push-docker-module.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-push-docker-module.yml b/.github/workflows/build-push-docker-module.yml index 0d3e7f886..07c3528ed 100644 --- a/.github/workflows/build-push-docker-module.yml +++ b/.github/workflows/build-push-docker-module.yml @@ -80,7 +80,7 @@ jobs: env: DOCKER_BUILD_SUMMARY: false with: - push: inputs.push-ecr + push: ${{ inputs.push-ecr }} context: "{{defaultContext}}:analyses/${{ inputs.module }}" tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} From 26c2e81fdc641c204e1f8fd5872e516df203574d Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Fri, 20 Dec 2024 15:08:09 -0500 Subject: [PATCH 11/12] Issue title update for Docker failures Just noted that the title for docker images failing was a bit ambiguous (the same as for analysis steps failing). So a very small fix. --- .github/workflows/docker_all-modules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker_all-modules.yml b/.github/workflows/docker_all-modules.yml index 1e82c4a4a..35fc803c2 100644 --- a/.github/workflows/docker_all-modules.yml +++ b/.github/workflows/docker_all-modules.yml @@ -69,7 +69,7 @@ jobs: if: contains(needs.*.result, 'failure') uses: peter-evans/create-issue-from-file@v5 with: - title: Analysis module failing in CI + title: Docker image failing in CI content-filepath: | .github/cron-issue-templates/all-docker-issue-template.md labels: | From ecd604c25e084de4d3e63638080b53defd1fc1b5 Mon Sep 17 00:00:00 2001 From: Joshua Shapiro Date: Fri, 20 Dec 2024 15:14:20 -0500 Subject: [PATCH 12/12] Update .github/workflows/docker_all-modules.yml Co-authored-by: Jaclyn Taroni <19534205+jaclyn-taroni@users.noreply.github.com> --- .github/workflows/docker_all-modules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docker_all-modules.yml b/.github/workflows/docker_all-modules.yml index 35fc803c2..ff7f53fab 100644 --- a/.github/workflows/docker_all-modules.yml +++ b/.github/workflows/docker_all-modules.yml @@ -69,7 +69,7 @@ jobs: if: contains(needs.*.result, 'failure') uses: peter-evans/create-issue-from-file@v5 with: - title: Docker image failing in CI + title: Docker image build failing in CI content-filepath: | .github/cron-issue-templates/all-docker-issue-template.md labels: |