From 37c7e585d16f55276bc73ee75f5dc4ff12458a7c Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Wed, 6 Nov 2024 12:56:54 -0500 Subject: [PATCH 1/5] remove packages directory --- packages/README.md | 9 - packages/rOpenScPCA/.Rbuildignore | 4 - packages/rOpenScPCA/DESCRIPTION | 42 --- packages/rOpenScPCA/LICENSE | 195 -------------- packages/rOpenScPCA/NAMESPACE | 10 - packages/rOpenScPCA/R/calculate-clusters.R | 249 ------------------ packages/rOpenScPCA/R/evaluate-clusters.R | 246 ----------------- packages/rOpenScPCA/R/sweep-clusters.R | 124 --------- packages/rOpenScPCA/README.md | 26 -- packages/rOpenScPCA/man/calculate_clusters.Rd | 91 ------- packages/rOpenScPCA/man/calculate_purity.Rd | 40 --- .../rOpenScPCA/man/calculate_silhouette.Rd | 39 --- .../rOpenScPCA/man/calculate_stability.Rd | 100 ------- packages/rOpenScPCA/man/extract_pc_matrix.Rd | 37 --- packages/rOpenScPCA/man/prepare_pc_matrix.Rd | 23 -- packages/rOpenScPCA/man/sweep_clusters.Rd | 105 -------- packages/rOpenScPCA/rOpenScPCA.Rproj | 20 -- packages/rOpenScPCA/tests/testthat.R | 12 - .../tests/testthat/test-calculate-clusters.R | 147 ----------- .../tests/testthat/test-evaluate-clusters.R | 100 ------- .../tests/testthat/test-sweep-clusters.R | 127 --------- 21 files changed, 1746 deletions(-) delete mode 100644 packages/README.md delete mode 100644 packages/rOpenScPCA/.Rbuildignore delete mode 100644 packages/rOpenScPCA/DESCRIPTION delete mode 100644 packages/rOpenScPCA/LICENSE delete mode 100644 packages/rOpenScPCA/NAMESPACE delete mode 100644 packages/rOpenScPCA/R/calculate-clusters.R delete mode 100644 packages/rOpenScPCA/R/evaluate-clusters.R delete mode 100644 packages/rOpenScPCA/R/sweep-clusters.R delete mode 100644 packages/rOpenScPCA/README.md delete mode 100644 packages/rOpenScPCA/man/calculate_clusters.Rd delete mode 100644 packages/rOpenScPCA/man/calculate_purity.Rd delete mode 100644 packages/rOpenScPCA/man/calculate_silhouette.Rd delete mode 100644 packages/rOpenScPCA/man/calculate_stability.Rd delete mode 100644 packages/rOpenScPCA/man/extract_pc_matrix.Rd delete mode 100644 packages/rOpenScPCA/man/prepare_pc_matrix.Rd delete mode 100644 packages/rOpenScPCA/man/sweep_clusters.Rd delete mode 100644 packages/rOpenScPCA/rOpenScPCA.Rproj delete mode 100644 packages/rOpenScPCA/tests/testthat.R delete mode 100644 packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R delete mode 100644 packages/rOpenScPCA/tests/testthat/test-evaluate-clusters.R delete mode 100644 packages/rOpenScPCA/tests/testthat/test-sweep-clusters.R diff --git a/packages/README.md b/packages/README.md deleted file mode 100644 index 3962960b2..000000000 --- a/packages/README.md +++ /dev/null @@ -1,9 +0,0 @@ -This directory contains packages written for use with [OpenScPCA analysis modules](https://openscpca.readthedocs.io/en/latest/contributing-to-analyses/analysis-modules/). - -The recommended version for use is given in the table below. -Please see the individual package `README` files for installation instructions. - -| Package | Description | Version | -| ------------ | ---------------------------------------- | ------- | -| `rOpenScPCA` | R functions for use in OpenScPCA modules | `0.1.0` | - diff --git a/packages/rOpenScPCA/.Rbuildignore b/packages/rOpenScPCA/.Rbuildignore deleted file mode 100644 index d82130280..000000000 --- a/packages/rOpenScPCA/.Rbuildignore +++ /dev/null @@ -1,4 +0,0 @@ -^renv$ -^renv\.lock$ -^.*\.Rproj$ -^\.Rproj\.user$ diff --git a/packages/rOpenScPCA/DESCRIPTION b/packages/rOpenScPCA/DESCRIPTION deleted file mode 100644 index 30cb1475a..000000000 --- a/packages/rOpenScPCA/DESCRIPTION +++ /dev/null @@ -1,42 +0,0 @@ -Package: rOpenScPCA -Type: Package -Title: Utility Functions for OpenScPCA Project Analysis Modules -Version: 0.1.0 -Authors@R: c( - person(c("Stephanie", "J."), "Spielman", - email = "stephanie.spielman@ccdatalab.org", - comment = list(ORCID = "0000-0002-9090-4788"), - role = c("aut", "cre")), - person(c("Joshua", "A."), "Shapiro", - email = "josh.shapiro@ccdatalab.org", - comment = list(ORCID = "0000-0002-6224-0347"), - role = c("aut")) - ) -Maintainer: Stephanie J. Spielman -Description: This package contains utility functions that support single-cell RNA-seq - analysis in R-based OpenScPCA analysis module code. -License: BSD_3_clause + file LICENSE -Encoding: UTF-8 -LazyData: true -Suggests: - testthat (>= 3.0.0), - scater, - Seurat, - splatter -Config/testthat/edition: 3 -RoxygenNote: 7.3.2 -Imports: - BiocParallel, - bluster (>= 1.14), - dplyr, - methods, - pdfCluster, - purrr, - SingleCellExperiment, - tibble, - tidyr -biocViews: - GeneExpression, - Transcriptomics, - SingleCell, - Clustering diff --git a/packages/rOpenScPCA/LICENSE b/packages/rOpenScPCA/LICENSE deleted file mode 100644 index fd378cd44..000000000 --- a/packages/rOpenScPCA/LICENSE +++ /dev/null @@ -1,195 +0,0 @@ -Copyright (c) 2024 OpenScPCA Project Maintainers & Contributors - -* All content is available for re-use under the CC-BY 4.0 license ([see section](#creative-commons-attribution-40-international) below). - -* Code blocks contained within any computational notebooks or source code files (e.g., `*.R`, `*.sh` or `*.py`) are also available for re-use under the BSD 3-Clause License ([see section](#bsd-3-clause-license) below). - -# Creative Commons Attribution 4.0 International - -Creative Commons Corporation (“Creative Commons”) is not a law firm and does not provide legal services or legal advice. Distribution of Creative Commons public licenses does not create a lawyer-client or other relationship. Creative Commons makes its licenses and related information available on an “as-is” basis. Creative Commons gives no warranties regarding its licenses, any material licensed under their terms and conditions, or any related information. Creative Commons disclaims all liability for damages resulting from their use to the fullest extent possible. - -### Using Creative Commons Public Licenses - -Creative Commons public licenses provide a standard set of terms and conditions that creators and other rights holders may use to share original works of authorship and other material subject to copyright and certain other rights specified in the public license below. The following considerations are for informational purposes only, are not exhaustive, and do not form part of our licenses. - -* __Considerations for licensors:__ Our public licenses are intended for use by those authorized to give the public permission to use material in ways otherwise restricted by copyright and certain other rights. Our licenses are irrevocable. Licensors should read and understand the terms and conditions of the license they choose before applying it. Licensors should also secure all rights necessary before applying our licenses so that the public can reuse the material as expected. Licensors should clearly mark any material not subject to the license. This includes other CC-licensed material, or material used under an exception or limitation to copyright. [More considerations for licensors](http://wiki.creativecommons.org/Considerations_for_licensors_and_licensees#Considerations_for_licensors). - -* __Considerations for the public:__ By using one of our public licenses, a licensor grants the public permission to use the licensed material under specified terms and conditions. If the licensor’s permission is not necessary for any reason–for example, because of any applicable exception or limitation to copyright–then that use is not regulated by the license. Our licenses grant only permissions under copyright and certain other rights that a licensor has authority to grant. Use of the licensed material may still be restricted for other reasons, including because others have copyright or other rights in the material. A licensor may make special requests, such as asking that all changes be marked or described. Although not required by our licenses, you are encouraged to respect those requests where reasonable. [More considerations for the public](http://wiki.creativecommons.org/Considerations_for_licensors_and_licensees#Considerations_for_licensees). - -## Creative Commons Attribution 4.0 International Public License - -By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions. - -### Section 1 – Definitions. - -a. __Adapted Material__ means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image. - -b. __Adapter's License__ means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License. - -c. __Copyright and Similar Rights__ means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights. - -d. __Effective Technological Measures__ means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements. - -e. __Exceptions and Limitations__ means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material. - -f. __Licensed Material__ means the artistic or literary work, database, or other material to which the Licensor applied this Public License. - -g. __Licensed Rights__ means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license. - -h. __Licensor__ means the individual(s) or entity(ies) granting rights under this Public License. - -i. __Share__ means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them. - -j. __Sui Generis Database Rights__ means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world. - -k. __You__ means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning. - -### Section 2 – Scope. - -a. ___License grant.___ - - 1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to: - - A. reproduce and Share the Licensed Material, in whole or in part; and - - B. produce, reproduce, and Share Adapted Material. - - 2. __Exceptions and Limitations.__ For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions. - - 3. __Term.__ The term of this Public License is specified in Section 6(a). - - 4. __Media and formats; technical modifications allowed.__ The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material. - - 5. __Downstream recipients.__ - - A. __Offer from the Licensor – Licensed Material.__ Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License. - - B. __No downstream restrictions.__ You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material. - - 6. __No endorsement.__ Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i). - -b. ___Other rights.___ - - 1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise. - - 2. Patent and trademark rights are not licensed under this Public License. - - 3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties. - -### Section 3 – License Conditions. - -Your exercise of the Licensed Rights is expressly made subject to the following conditions. - -a. ___Attribution.___ - - 1. If You Share the Licensed Material (including in modified form), You must: - - A. retain the following if it is supplied by the Licensor with the Licensed Material: - - i. identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated); - - ii. a copyright notice; - - iii. a notice that refers to this Public License; - - iv. a notice that refers to the disclaimer of warranties; - - v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable; - - B. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and - - C. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License. - - 2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information. - - 3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable. - - 4. If You Share Adapted Material You produce, the Adapter's License You apply must not prevent recipients of the Adapted Material from complying with this Public License. - -### Section 4 – Sui Generis Database Rights. - -Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material: - -a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database; - -b. if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material; and - -c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database. - -For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights. - -### Section 5 – Disclaimer of Warranties and Limitation of Liability. - -a. __Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You.__ - -b. __To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You.__ - -c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability. - -### Section 6 – Term and Termination. - -a. This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically. - -b. Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates: - - 1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or - - 2. upon express reinstatement by the Licensor. - - For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License. - -c. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License. - -d. Sections 1, 5, 6, 7, and 8 survive termination of this Public License. - -### Section 7 – Other Terms and Conditions. - -a. The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed. - -b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License. - -### Section 8 – Interpretation. - -a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License. - -b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions. - -c. No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor. - -d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority. - -``` -Creative Commons is not a party to its public licenses. Notwithstanding, Creative Commons may elect to apply one of its public licenses to material it publishes and in those instances will be considered the “Licensor.” Except for the limited purpose of indicating that material is shared under a Creative Commons public license or as otherwise permitted by the Creative Commons policies published at [creativecommons.org/policies](http://creativecommons.org/policies), Creative Commons does not authorize the use of the trademark “Creative Commons” or any other trademark or logo of Creative Commons without its prior written consent including, without limitation, in connection with any unauthorized modifications to any of its public licenses or any other arrangements, understandings, or agreements concerning use of licensed material. For the avoidance of doubt, this paragraph does not form part of the public licenses. - -Creative Commons may be contacted at creativecommons.org -``` - -# BSD 3-Clause License - -_Copyright (c) 2024, OpenScPCA Project Maintainers & Contributors_ -_All rights reserved._ - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -* Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/packages/rOpenScPCA/NAMESPACE b/packages/rOpenScPCA/NAMESPACE deleted file mode 100644 index 780875267..000000000 --- a/packages/rOpenScPCA/NAMESPACE +++ /dev/null @@ -1,10 +0,0 @@ -# Generated by roxygen2: do not edit by hand - -export(calculate_clusters) -export(calculate_purity) -export(calculate_silhouette) -export(calculate_stability) -export(extract_pc_matrix) -export(sweep_clusters) -import(SingleCellExperiment) -import(methods) diff --git a/packages/rOpenScPCA/R/calculate-clusters.R b/packages/rOpenScPCA/R/calculate-clusters.R deleted file mode 100644 index 1b50b0e23..000000000 --- a/packages/rOpenScPCA/R/calculate-clusters.R +++ /dev/null @@ -1,249 +0,0 @@ -#' Calculate graph-based clusters from a provided matrix -#' -#' This function is provided to simplify application of bluster package clustering functions on OpenScPCA data. -#' In particular, this function runs bluster::clusterRows() with the bluster::NNGraphParam() function on a -#' principal components matrix, provided either directly or via single-cell object. -#' Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults. -#' Specifically, the clustering algorithm defaults to "louvain" and the weighting scheme to "jaccard" -#' to align with common practice in scRNA-seq analysis. -#' -#' @import methods -#' -#' @param x An object containing PCs that clustering can be performed in. This can be either a SingleCellExperiment -#' object, a Seurat object, or a matrix where columns are PCs and rows are cells. If a matrix is provided, it must -#' have row names of cell ids (e.g., barcodes). -#' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden". -#' @param weighting Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number" -#' @param nn Number of nearest neighbors. Default is 10. -#' @param resolution Resolution parameter used by louvain and leiden clustering only. Default is 1. -#' @param objective_function Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity" -#' @param cluster_args List of additional arguments to pass to the chosen clustering function. -#' Only single values for each argument are supported (no vectors or lists). -#' See igraph documentation for details on each clustering function: https://igraph.org/r/html/latest -#' @param threads Number of threads to use. Default is 1. -#' @param seed Random seed to set for clustering. -#' @param pc_name Name of principal components slot in provided object. This argument is only used if a SingleCellExperiment -#' or Seurat object is provided. If not provided, the SingleCellExperiment object name will default to "PCA" and the -#' Seurat object name will default to "pca". -#' -#' @return A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters -#' and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and -#' leiden clustering will further include `objective_function`. -#' -#' @export -#' -#' @examples -#' \dontrun{ -#' # cluster PCs from a SingleCellExperiment object using default parameters and -#' # a random seed for reproducibility -#' cluster_df <- calculate_clusters(sce_object, seed = 11) -#' -#' # cluster PCs from a SingleCellExperiment object using default parameters and 4 threads -#' cluster_df <- calculate_clusters(sce_object, threads = 4, seed = 11) -#' -#' # cluster PCs from a Seurat object using default parameters -#' cluster_df <- calculate_clusters(seurat_object, seed = 11) -#' -#' # cluster directly from a matrix using default parameters -#' cluster_df <- calculate_clusters(pca_matrix, seed = 11) -#' -#' # cluster directly from a matrix using the leiden algorithm with a resolution of 0.1 -#' cluster_df <- calculate_clusters( -#' pca_matrix, -#' algorithm = "leiden", -#' resolution = 0.1, -#' seed = 11 -#' ) -#' -#' # cluster directly from a matrix using the leiden algorithm with 3 iterations -#' cluster_df <- calculate_clusters( -#' pca_matrix, -#' algorithm = "leiden", -#' cluster_args = list(n_iterations = 3), -#' seed = 11 -#' ) -#' } -calculate_clusters <- function( - x, - algorithm = c("louvain", "walktrap", "leiden"), - weighting = c("jaccard", "rank", "number"), - nn = 10, - resolution = 1, # louvain or leiden - objective_function = c("CPM", "modularity"), # leiden only - cluster_args = list(), - threads = 1, - seed = NULL, - pc_name = NULL) { - if (!is.null(seed)) { - set.seed(seed) - } - - # check and prepare matrix - pca_matrix <- prepare_pc_matrix(x, pc_name = pc_name) - - # Check input arguments - stopifnot( - "`resolution` must be numeric" = is.numeric(resolution), - "`nn` must be numeric" = is.numeric(nn), - "`threads` must be numeric" = is.numeric(threads) - ) - - algorithm <- match.arg(algorithm) - weighting <- match.arg(weighting) - objective_function <- match.arg(objective_function) - - if (length(cluster_args)) { - stopifnot( - "`cluster_args` must be a named list." = is.list(cluster_args) && !("" %in% allNames(cluster_args)), - "`cluster_args` elements must all have only a single value" = all(sapply(cluster_args, length) == 1) - ) - } - - # Update cluster_args list with parameters that users can directly provide - # note that clusterRows throws an error if this list has a param not used by the chosen algorithm - if (algorithm %in% c("louvain", "leiden")) { - cluster_args$resolution <- resolution - } - if (algorithm == "leiden") { - cluster_args$objective_function <- objective_function - } - - if (threads > 1) { - bp_param <- BiocParallel::MulticoreParam(threads) - } else { - bp_param <- BiocParallel::SerialParam() - } - - - # Perform clustering - clusters <- bluster::clusterRows( - pca_matrix, - bluster::NNGraphParam( - k = nn, - type = weighting, - cluster.fun = algorithm, - cluster.args = cluster_args, - BPPARAM = bp_param - ) - ) - - # Transform results into a table and return - cluster_df <- data.frame( - cell_id = rownames(pca_matrix), - cluster = clusters, - algorithm = algorithm, - weighting = weighting, - nn = nn - ) - - # Add in cluster_args if it has parameters to include - if (length(cluster_args) != 0) { - cluster_df <- cluster_df |> - dplyr::bind_cols( - data.frame(cluster_args) - ) - } - - return(cluster_df) -} - - - -#' Extract a principal components (PC) matrix from either a SingleCellExperiment -#' or a Seurat object. -#' -#' This function first determines if the provided object is a SingleCellExperiment or -#' Seurat object, and then extract the PC matrix. If no name for the PC matrix is provided, -#' this function will assume the name of "PCA" for SingleCellExperiment objects, and -#' "pca" for Seurat objects. -#' -#' @import SingleCellExperiment -#' @import methods -#' -#' @param sc_object Either a SingleCellExperiment or Seurat object -#' @param pc_name Optionally, the name of the PC matrix in the object. If this is -#' not provided, the name "PCA" is assumed for SingleCellExperiment objects, and -#' "pca" for Seurat objects. -#' -#' @return PC matrix with row names -#' -#' @export -#' -#' @examples -#' \dontrun{ -#' # extract PC matrix from SCE object, assuming default name "PCA" -#' pca_matrix <- extract_pc_matrix(sce_object) -#' -#' # extract PC matrix from SCE object with non-default name "PCA_MAT" -#' pca_matrix <- extract_pc_matrix(sce_object, pc_name = "PCA_MAT") -#' -#' # extract PC matrix from Seurat object, assuming default name "pca" -#' pca_matrix <- extract_pc_matrix(seurat_object) -#' } -extract_pc_matrix <- function(sc_object, pc_name = NULL) { - # default PC names for each type of object to use if - # pc_name is NULL - default_sce <- "PCA" - default_seurat <- "pca" - - if (is(sc_object, "SingleCellExperiment")) { - pc_name <- ifelse(is.null(pc_name), default_sce, pc_name) - stopifnot( - "Could not find a PC matrix in the SingleCellExperiment object." = - pc_name %in% reducedDimNames(sc_object) - ) - - pca_matrix <- reducedDim(sc_object, pc_name) - } else if (is(sc_object, "Seurat")) { - pc_name <- ifelse(is.null(pc_name), default_seurat, pc_name) - stopifnot( - "Seurat package must be installed to process a Seurat object" = - requireNamespace("Seurat", quietly = TRUE), - "Could not find a PC matrix in the Seurat object." = - pc_name %in% names(sc_object@reductions) - ) - - pca_matrix <- Seurat::Embeddings( - sc_object, - reduction = pc_name - ) - } else { - stop("You must provide a SingleCellExperiment or Seurat object.") - } - - # Ensure row names are present - stopifnot( - "The extracted PCA matrix does not have row names." = is.character(rownames(pca_matrix)) - ) - - return(pca_matrix) -} - - - - - - -#' Helper function to check and/or extract a matrix of PCs from a given object -#' -#' @param x Either a matrix of principal components (PCs), or a SingleCellExperiment -#' or Seurat object containing PCs. If a matrix is provided, rows should be cells -#' and columns should be PCs, and row names should be cell ids (e.g., barcodes). -#' @param pc_name Optionally, the name of the PC matrix in the object. Not used for -#' matrices. If this is not provided, the name "PCA" is assumed for -#' SingleCellExperiment objects, and "pca" for Seurat objects. -#' -#' @return A matrix of PCs with row names representing cell ids -prepare_pc_matrix <- function(x, pc_name = NULL) { - if (any(class(x) %in% c("matrix", "Matrix"))) { - stopifnot( - "The matrix must have row names representing cell ids, e.g. barcodes." = is.character(rownames(x)) - ) - } else if (is(x, "SingleCellExperiment") || is(x, "Seurat")) { - x <- extract_pc_matrix(x, pc_name = pc_name) - } else { - stop("The first argument should be one of: a SingleCellExperiment object, a Seurat object, or a matrix with row names.") - } - - return(x) -} diff --git a/packages/rOpenScPCA/R/evaluate-clusters.R b/packages/rOpenScPCA/R/evaluate-clusters.R deleted file mode 100644 index 6ea526584..000000000 --- a/packages/rOpenScPCA/R/evaluate-clusters.R +++ /dev/null @@ -1,246 +0,0 @@ -#' Calculate the silhouette width of clusters -#' -#' This function uses the `bluster::approxSilhouette()` function to calculate the -#' silhouette width for a clustering result. These results can be used downstream to -#' calculate the average silhouette width, a popular metric for cluster evaluation. -#' -#' @param x Either a matrix of principal components (PCs), or a SingleCellExperiment -#' or Seurat object containing PCs. If a matrix is provided, rows should be cells -#' and columns should be PCs, and row names should be cell ids (e.g., barcodes). -#' @param cluster_df A data frame that contains at least the columns `cell_id` and -#' `cluster`. The `cell_id` values should match either the PC matrix row names, -#' or the SingleCellExperiment/Seurat object cell ids. Typically this will be output from -#' the `rOpenScPCA::calculate_clusters()` function. -#' @param pc_name Optionally, the name of the PC matrix in the object. Not used if a -#' matrix is provided. If the name is not provided, the name "PCA" is assumed for -#' SingleCellExperiment objects, and "pca" for Seurat objects. -#' -#' @return Expanded `cluster_df` data frame with these additional columns: -#' - `silhouette_width`, the cell's silhouette width -#' - `other`, the closest cluster other than the one to which the given cell was assigned -#' For more information, see documentation for `bluster::approxSilhouette()` -#' -#' @export -#' @examples -#' \dontrun{ -#' # calculate silhouette width for clusters stored in a data frame -#' cluster_df <- calculate_silhouette(sce_object, cluster_df) -#' } -calculate_silhouette <- function( - x, - cluster_df, - pc_name = NULL) { - x <- prepare_pc_matrix(x, pc_name) - - expected_df_names <- c("cell_id", "cluster") - stopifnot( - "Expected columns 'cell_id' and 'cluster' in the cluster_df." = - all(expected_df_names %in% colnames(cluster_df)) - ) - - silhouette_df <- x |> - bluster::approxSilhouette(cluster_df$cluster) |> - as.data.frame() |> - tibble::rownames_to_column("cell_id") |> - dplyr::rename("silhouette_width" = "width") - - # join with cluster_df in this direction, so that columns in - # cluster_df come first - silhouette_df <- cluster_df |> - dplyr::inner_join(silhouette_df, by = c("cell_id", "cluster")) - - return(silhouette_df) -} - - - - -#' Calculate the neighborhood purity of clusters -#' -#' This function uses the `bluster::neighborPurity()` function to calculate the -#' neighborhood purity values for a clustering result. -#' -#' @param x Either a matrix of principal components (PCs), or a SingleCellExperiment -#' or Seurat object containing PCs. If a matrix is provided, rows should be cells -#' and columns should be PCs, and row names should be cell ids (e.g., barcodes). -#' @param cluster_df A data frame that contains at least the columns `cell_id` and -#' `cluster`. The `cell_id` values should match either the PC matrix row names, -#' or the SingleCellExperiment/Seurat object cell ids. Typically this will be output from -#' the `rOpenScPCA::calculate_clusters()` function. -#' @param pc_name Optionally, the name of the PC matrix in the object. Not used if a -#' matrix is provided. If the name is not provided, the name "PCA" is assumed for -#' SingleCellExperiment objects, and "pca" for Seurat objects. -#' @param ... Additional arguments to pass to `bluster::neighborPurity()` -#' -#' @return Expanded `cluster_df` data frame with these additional columns: -#' - `purity`, the cell's neighborhood purity -#' - `maximum`, the cluster with the highest proportion of observations neighboring the given cell. -#' For more information, see documentation for `bluster::neighborPurity()` -#' -#' @export -#' @examples -#' \dontrun{ -#' # calculate neighborhood purity for clusters stored in a data frame -#' cluster_df <- calculate_purity(sce_object, cluster_df) -#' } -calculate_purity <- function( - x, - cluster_df, - pc_name = NULL, - ...) { - x <- prepare_pc_matrix(x, pc_name) - - expected_df_names <- c("cell_id", "cluster") - stopifnot( - "Expected columns 'cell_id' and 'cluster' in cluster_df." = - all(expected_df_names %in% colnames(cluster_df)) - ) - - purity_df <- x |> - bluster::neighborPurity(cluster_df$cluster) |> - as.data.frame() |> - tibble::rownames_to_column("cell_id") - - # join with cluster_df in this direction, so that columns in - # cluster_df come first - purity_df <- cluster_df |> - dplyr::inner_join(purity_df, by = c("cell_id")) - - return(purity_df) -} - - - -#' Calculate cluster stability using the Adjusted Rand Index (ARI) -#' -#' This function generates and clusters, using provided parameters, bootstrap -#' replicates calculates the Adjusted Rand Index (ARI) between each set of bootstrapped -#' clusters and the original provided clusters. ARI measures similarity between different -#' cluster results, where a value of 0 indicates an entirely random relationship between -#' results, and a value of 1 indicates perfect agreement. -#' -#' When assessing stability, you should specify the same clustering parameters here as -#' were used to calculate the original clusters. -#' -#' Note that this function will also make use of bluster::clusterRows() with the -#' bluster::NNGraphParam() function on a principal components matrix. Note that defaults -#' for some arguments may differ from the bluster::NNGraphParam() defaults. -#' Specifically, the clustering algorithm defaults to "louvain" and the weighting scheme -#' to "jaccard" to align with common practice in scRNA-seq analysis. -#' -#' -#' @param x An object containing PCs that clusters were calculated from. This can be -#' either a SingleCellExperiment object, a Seurat object, or a matrix where columns -#' are PCs and rows are cells. If a matrix is provided, it must have row names of cell -#' ids (e.g., barcodes). -#' @param clusters A vector of cluster ids, typically a numeric factor variable, obtained -#' by previously clustering the PCs. -#' @param replicates Number of bootstrap replicates to perform. Default is 20. -#' @param seed Random seed -#' @param pc_name Optionally, the name of the PC matrix in the object. Not used if a -#' matrix is provided. If the name is not provided, the name "PCA" is assumed for -#' SingleCellExperiment objects, and "pca" for Seurat objects. -#' @param ... Additional arguments to pass to `calculate_clusters()` which calculates -#' bootstrapped clusters. Usually, these will be the same arguments used to generate -#' the original clusters. -#' -#' @return Data frame with columns `replicate` and `ari`, representing the given bootstrap replicate -#' and its ARI value, respectively, and columns representing clustering algorithm parameters which -#' include at least `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also -#' include `resolution`, and leiden clustering will further include `objective_function`. -#' -#' -#' @export -#' -#' @examples -#' \dontrun{ -#' -#' # First, cluster PCs from a SingleCellExperiment object using default parameters -#' # and setting a seed for reproducibility -#' cluster_df <- calculate_clusters(sce_object, seed = 11) -#' # Second, calculate cluster stability using default parameters -#' stability_df <- calculate_stability(sce_object, cluster_df$clusters, seed = 11) -#' -#' -#' # First, cluster PCs from a SingleCellExperiment object using default parameters -#' # and setting a seed for reproducibility -#' cluster_df <- calculate_clusters(sce_object, seed = 11) -#' # Second, calculate cluster stability using default parameters and 50 replicates -#' stability_df <- calculate_stability( -#' sce_object, -#' cluster_df$clusters, -#' replicates = 50, -#' seed = 11 -#' ) -#' -#' -#' # First, cluster PCs from a SingleCellExperiment object using the leiden -#' # algorithm and a resolution of 0.1 -#' cluster_df <- calculate_clusters( -#' sce_object, -#' algorithm = "leiden", -#' resolution = 0.1, -#' seed = 11 -#' ) -#' # Second, calculate cluster stability using the same parameters as were used -#' # for the initial clustering -#' stability_df <- calculate_stability( -#' sce_object, -#' cluster_df$clusters, -#' algorithm = "leiden", -#' resolution = 0.1, -#' seed = 11 -#' ) -#' } -calculate_stability <- function( - x, - clusters, - replicates = 20, - seed = NULL, - pc_name = NULL, - ...) { - if (!is.null(seed)) { - set.seed(seed) - } - - # ensure we have a matrix - pca_matrix <- prepare_pc_matrix(x, pc_name = pc_name) - - # check clusters and matrix compatibility - stopifnot( - "The number of rows in the matrix must equal the length of the clusters vector." = - nrow(pca_matrix) == length(clusters) - ) - - # calculate ARI for each cluster result bootstrap replicate - all_ari_df <- 1:replicates |> - purrr::map( - \(i) { - sample_cells <- sample(nrow(pca_matrix), replace = TRUE) - resampled_pca <- pca_matrix[sample_cells, ] - original_clusters <- clusters[sample_cells] - - resampled_df <- calculate_clusters(resampled_pca, ...) - - ari <- pdfCluster::adj.rand.index(resampled_df$cluster, original_clusters) - - # return df with ari and clustering parameters - ari_df <- resampled_df |> - dplyr::slice(1) |> - dplyr::select(!c("cell_id", "cluster")) |> - dplyr::mutate( - # define this variable here to ensure it's numeric - replicate = i, - ari = ari, - # ensure these columns come first - .before = "algorithm" - ) - - return(ari_df) - } - ) |> - dplyr::bind_rows() - - - return(all_ari_df) -} diff --git a/packages/rOpenScPCA/R/sweep-clusters.R b/packages/rOpenScPCA/R/sweep-clusters.R deleted file mode 100644 index c4fbdb83b..000000000 --- a/packages/rOpenScPCA/R/sweep-clusters.R +++ /dev/null @@ -1,124 +0,0 @@ -#' Calculate clusters across a set of parameters -#' -#' This function can be used to perform reproducible clustering while varying a set of parameters. -#' Multiple values can be provided for any of: -#' - The algorithm (`algorithm`) -#' - The weighting scheme (`weighting`) -#' - Number of nearest neighbors (`nn`) -#' - The resolution parameter (`resolution`) -#' - The objective function parameter (`objective_function`) -#' -#' For each algorithm specified, all parameters possible to use with that -#' algorithm will be systematically varied. This function does not accept additional -#' parameters besides those listed above. -#' Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults. -#' Specifically, the clustering algorithm defaults to "louvain" and the weighting scheme to "jaccard" -#' to align with common practice in scRNA-seq analysis. -#' -#' @param x An object containing PCs that clustering can be performed in. This can be either -#' a SingleCellExperiment object, a Seurat object, or a matrix where columns are PCs and -#' rows are cells. If a matrix is provided, it must have row names of cell ids (e.g., barcodes). -#' @param algorithm Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", -#' or "leiden". -#' @param weighting Weighting scheme(s) to consider when sweeping parameters. -#' Provide a vector of unique values to vary this parameter. Options include "jaccard" (default), -#' "rank", or "number" -#' @param nn Number of nearest neighbors to consider when sweeping parameters. -#' Provide a vector of unique values to vary this parameter. Default is 10. -#' @param resolution Resolution parameter used by louvain and leiden clustering only. -#' Provide a vector of unique values to vary this parameter. Default is 1. -#' @param objective_function Leiden-specific parameter for whether to use the -#' Constant Potts Model ("CPM"; default) or "modularity". Provide a vector of unique values -#' to vary this parameter. -#' @param seed Random seed to set for clustering. -#' @param threads Number of threads to use. Default is 1. -#' @param pc_name Name of principal components slot in provided object. This argument is only used -#' if a SingleCellExperiment or Seurat object is provided. If not provided, the SingleCellExperiment -#' object name will default to "PCA" and the Seurat object name will default to "pca". -#' -#' @return A list of data frames from performing clustering across all parameter combinations. -#' Columns include `cluster_set` (identifier column for results from a single clustering run), -#' `cell_id`, and `cluster`. Additional columns represent algorithm parameters and include at least: -#' `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, -#' and leiden clustering will further include `objective_function`. -#' -#' @export -#' -#' @examples -#' \dontrun{ -#' # perform louvain clustering with jaccard weighting (defaults), -#' # varying the nearest neighobor parameter, and set a seed for reproducibility -#' cluster_df <- sweep_clusters( -#' sce_object, -#' nn = c(10, 15, 20, 25), -#' seed = 11 -#' ) -#' -#' # perform louvain clustering, with jaccard and rank weighting, and -#' # varying the nearest neighbor and resolution parameters. -#' cluster_df <- sweep_clusters( -#' sce_object, -#' algorithm = "louvain", -#' weighting = c("jaccard", "rank"), -#' nn = c(10, 15, 20, 25), -#' resolution = c(0.5, 1), -#' seed = 11 -#' ) -#' -#' # perform walktrap and louvain clustering with jaccard weighting, and -#' # varying the nearest neighbors for both algorithms, and resolution for louvain. -#' cluster_df <- sweep_clusters( -#' sce_object, -#' algorithm = c("walktrap", "louvain"), -#' weighting = "jaccard", -#' nn = c(10, 15, 20, 25), -#' resolution = c(0.5, 1), -#' seed = 11 -#' ) -#' } -sweep_clusters <- function( - x, - algorithm = "louvain", - weighting = "jaccard", - nn = 10, - resolution = 1, # louvain or leiden - objective_function = "CPM", # leiden only - threads = 1, - seed = NULL, - pc_name = NULL) { - # check and prepare matrix - pca_matrix <- prepare_pc_matrix(x, pc_name = pc_name) - - # Collect all specific inputs into a single list - sweep_params <- tidyr::expand_grid( - algorithm = unique(algorithm), - weighting = unique(weighting), - nn = unique(nn), - resolution = unique(resolution), - objective_function = unique(objective_function) - ) |> - # set unused parameters for each algorithm to default; this will allow duplicates to be removed by distinct() - dplyr::mutate( - resolution = ifelse(algorithm %in% c("louvain", "leiden"), resolution, 1), - objective_function = ifelse(algorithm == "leiden", objective_function, "CPM") - ) |> - dplyr::distinct() - - sweep_results <- sweep_params |> - purrr::pmap( - \(algorithm, weighting, nn, resolution, objective_function) { - calculate_clusters( - pca_matrix, - algorithm = algorithm, - weighting = weighting, - nn = nn, - resolution = resolution, - objective_function = objective_function, - threads = threads, - seed = seed - ) - } - ) - - return(sweep_results) -} diff --git a/packages/rOpenScPCA/README.md b/packages/rOpenScPCA/README.md deleted file mode 100644 index db420dda3..000000000 --- a/packages/rOpenScPCA/README.md +++ /dev/null @@ -1,26 +0,0 @@ -# rOpenScPCA - -This package contains utility functions to support single-cell RNAseq analysis in the OpenScPCA project. - -## Installation - -`rOpenScPCA` can either be installed with `renv` or the `remotes` package: - -```r -# Install the package with renv -renv::install("AlexsLemonade/OpenScPCA-analysis:packages/rOpenScPCA") -# You can then add to a renv.lock file with renv::snapshot() - -# Install the package with remotes -remotes::install_github("AlexsLemonade/OpenScPCA-analysis/packages/rOpenScPCA") -``` - - diff --git a/packages/rOpenScPCA/man/calculate_clusters.Rd b/packages/rOpenScPCA/man/calculate_clusters.Rd deleted file mode 100644 index 655a36cd1..000000000 --- a/packages/rOpenScPCA/man/calculate_clusters.Rd +++ /dev/null @@ -1,91 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/calculate-clusters.R -\name{calculate_clusters} -\alias{calculate_clusters} -\title{Calculate graph-based clusters from a provided matrix} -\usage{ -calculate_clusters( - x, - algorithm = c("louvain", "walktrap", "leiden"), - weighting = c("jaccard", "rank", "number"), - nn = 10, - resolution = 1, - objective_function = c("CPM", "modularity"), - cluster_args = list(), - threads = 1, - seed = NULL, - pc_name = NULL -) -} -\arguments{ -\item{x}{An object containing PCs that clustering can be performed in. This can be either a SingleCellExperiment -object, a Seurat object, or a matrix where columns are PCs and rows are cells. If a matrix is provided, it must -have row names of cell ids (e.g., barcodes).} - -\item{algorithm}{Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", or "leiden".} - -\item{weighting}{Weighting scheme to use. Must be one of "jaccard" (default), "rank", or "number"} - -\item{nn}{Number of nearest neighbors. Default is 10.} - -\item{resolution}{Resolution parameter used by louvain and leiden clustering only. Default is 1.} - -\item{objective_function}{Leiden-specific parameter for whether to use the Constant Potts Model ("CPM"; default) or "modularity"} - -\item{cluster_args}{List of additional arguments to pass to the chosen clustering function. -Only single values for each argument are supported (no vectors or lists). -See igraph documentation for details on each clustering function: https://igraph.org/r/html/latest} - -\item{threads}{Number of threads to use. Default is 1.} - -\item{seed}{Random seed to set for clustering.} - -\item{pc_name}{Name of principal components slot in provided object. This argument is only used if a SingleCellExperiment -or Seurat object is provided. If not provided, the SingleCellExperiment object name will default to "PCA" and the -Seurat object name will default to "pca".} -} -\value{ -A data frame of cluster results with columns `cell_id` and `cluster`. Additional columns represent algorithm parameters - and include at least: `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, and - leiden clustering will further include `objective_function`. -} -\description{ -This function is provided to simplify application of bluster package clustering functions on OpenScPCA data. -In particular, this function runs bluster::clusterRows() with the bluster::NNGraphParam() function on a -principal components matrix, provided either directly or via single-cell object. -Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults. -Specifically, the clustering algorithm defaults to "louvain" and the weighting scheme to "jaccard" -to align with common practice in scRNA-seq analysis. -} -\examples{ -\dontrun{ -# cluster PCs from a SingleCellExperiment object using default parameters and -# a random seed for reproducibility -cluster_df <- calculate_clusters(sce_object, seed = 11) - -# cluster PCs from a SingleCellExperiment object using default parameters and 4 threads -cluster_df <- calculate_clusters(sce_object, threads = 4, seed = 11) - -# cluster PCs from a Seurat object using default parameters -cluster_df <- calculate_clusters(seurat_object, seed = 11) - -# cluster directly from a matrix using default parameters -cluster_df <- calculate_clusters(pca_matrix, seed = 11) - -# cluster directly from a matrix using the leiden algorithm with a resolution of 0.1 -cluster_df <- calculate_clusters( - pca_matrix, - algorithm = "leiden", - resolution = 0.1, - seed = 11 -) - -# cluster directly from a matrix using the leiden algorithm with 3 iterations -cluster_df <- calculate_clusters( - pca_matrix, - algorithm = "leiden", - cluster_args = list(n_iterations = 3), - seed = 11 -) -} -} diff --git a/packages/rOpenScPCA/man/calculate_purity.Rd b/packages/rOpenScPCA/man/calculate_purity.Rd deleted file mode 100644 index b9173dcc1..000000000 --- a/packages/rOpenScPCA/man/calculate_purity.Rd +++ /dev/null @@ -1,40 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/evaluate-clusters.R -\name{calculate_purity} -\alias{calculate_purity} -\title{Calculate the neighborhood purity of clusters} -\usage{ -calculate_purity(x, cluster_df, pc_name = NULL, ...) -} -\arguments{ -\item{x}{Either a matrix of principal components (PCs), or a SingleCellExperiment -or Seurat object containing PCs. If a matrix is provided, rows should be cells -and columns should be PCs, and row names should be cell ids (e.g., barcodes).} - -\item{cluster_df}{A data frame that contains at least the columns `cell_id` and -`cluster`. The `cell_id` values should match either the PC matrix row names, -or the SingleCellExperiment/Seurat object cell ids. Typically this will be output from -the `rOpenScPCA::calculate_clusters()` function.} - -\item{pc_name}{Optionally, the name of the PC matrix in the object. Not used if a -matrix is provided. If the name is not provided, the name "PCA" is assumed for -SingleCellExperiment objects, and "pca" for Seurat objects.} - -\item{...}{Additional arguments to pass to `bluster::neighborPurity()`} -} -\value{ -Expanded `cluster_df` data frame with these additional columns: -- `purity`, the cell's neighborhood purity -- `maximum`, the cluster with the highest proportion of observations neighboring the given cell. -For more information, see documentation for `bluster::neighborPurity()` -} -\description{ -This function uses the `bluster::neighborPurity()` function to calculate the -neighborhood purity values for a clustering result. -} -\examples{ -\dontrun{ -# calculate neighborhood purity for clusters stored in a data frame -cluster_df <- calculate_purity(sce_object, cluster_df) -} -} diff --git a/packages/rOpenScPCA/man/calculate_silhouette.Rd b/packages/rOpenScPCA/man/calculate_silhouette.Rd deleted file mode 100644 index f3df8e428..000000000 --- a/packages/rOpenScPCA/man/calculate_silhouette.Rd +++ /dev/null @@ -1,39 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/evaluate-clusters.R -\name{calculate_silhouette} -\alias{calculate_silhouette} -\title{Calculate the silhouette width of clusters} -\usage{ -calculate_silhouette(x, cluster_df, pc_name = NULL) -} -\arguments{ -\item{x}{Either a matrix of principal components (PCs), or a SingleCellExperiment -or Seurat object containing PCs. If a matrix is provided, rows should be cells -and columns should be PCs, and row names should be cell ids (e.g., barcodes).} - -\item{cluster_df}{A data frame that contains at least the columns `cell_id` and -`cluster`. The `cell_id` values should match either the PC matrix row names, -or the SingleCellExperiment/Seurat object cell ids. Typically this will be output from -the `rOpenScPCA::calculate_clusters()` function.} - -\item{pc_name}{Optionally, the name of the PC matrix in the object. Not used if a -matrix is provided. If the name is not provided, the name "PCA" is assumed for -SingleCellExperiment objects, and "pca" for Seurat objects.} -} -\value{ -Expanded `cluster_df` data frame with these additional columns: -- `silhouette_width`, the cell's silhouette width -- `other`, the closest cluster other than the one to which the given cell was assigned -For more information, see documentation for `bluster::approxSilhouette()` -} -\description{ -This function uses the `bluster::approxSilhouette()` function to calculate the -silhouette width for a clustering result. These results can be used downstream to -calculate the average silhouette width, a popular metric for cluster evaluation. -} -\examples{ -\dontrun{ -# calculate silhouette width for clusters stored in a data frame -cluster_df <- calculate_silhouette(sce_object, cluster_df) -} -} diff --git a/packages/rOpenScPCA/man/calculate_stability.Rd b/packages/rOpenScPCA/man/calculate_stability.Rd deleted file mode 100644 index 24ddde983..000000000 --- a/packages/rOpenScPCA/man/calculate_stability.Rd +++ /dev/null @@ -1,100 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/evaluate-clusters.R -\name{calculate_stability} -\alias{calculate_stability} -\title{Calculate cluster stability using the Adjusted Rand Index (ARI)} -\usage{ -calculate_stability( - x, - clusters, - replicates = 20, - seed = NULL, - pc_name = NULL, - ... -) -} -\arguments{ -\item{x}{An object containing PCs that clusters were calculated from. This can be -either a SingleCellExperiment object, a Seurat object, or a matrix where columns -are PCs and rows are cells. If a matrix is provided, it must have row names of cell -ids (e.g., barcodes).} - -\item{clusters}{A vector of cluster ids, typically a numeric factor variable, obtained -by previously clustering the PCs.} - -\item{replicates}{Number of bootstrap replicates to perform. Default is 20.} - -\item{seed}{Random seed} - -\item{pc_name}{Optionally, the name of the PC matrix in the object. Not used if a -matrix is provided. If the name is not provided, the name "PCA" is assumed for -SingleCellExperiment objects, and "pca" for Seurat objects.} - -\item{...}{Additional arguments to pass to `calculate_clusters()` which calculates -bootstrapped clusters. Usually, these will be the same arguments used to generate -the original clusters.} -} -\value{ -Data frame with columns `replicate` and `ari`, representing the given bootstrap replicate - and its ARI value, respectively, and columns representing clustering algorithm parameters which - include at least `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also - include `resolution`, and leiden clustering will further include `objective_function`. -} -\description{ -This function generates and clusters, using provided parameters, bootstrap -replicates calculates the Adjusted Rand Index (ARI) between each set of bootstrapped -clusters and the original provided clusters. ARI measures similarity between different -cluster results, where a value of 0 indicates an entirely random relationship between -results, and a value of 1 indicates perfect agreement. -} -\details{ -When assessing stability, you should specify the same clustering parameters here as -were used to calculate the original clusters. - -Note that this function will also make use of bluster::clusterRows() with the -bluster::NNGraphParam() function on a principal components matrix. Note that defaults -for some arguments may differ from the bluster::NNGraphParam() defaults. -Specifically, the clustering algorithm defaults to "louvain" and the weighting scheme -to "jaccard" to align with common practice in scRNA-seq analysis. -} -\examples{ -\dontrun{ - -# First, cluster PCs from a SingleCellExperiment object using default parameters -# and setting a seed for reproducibility -cluster_df <- calculate_clusters(sce_object, seed = 11) -# Second, calculate cluster stability using default parameters -stability_df <- calculate_stability(sce_object, cluster_df$clusters, seed = 11) - - -# First, cluster PCs from a SingleCellExperiment object using default parameters -# and setting a seed for reproducibility -cluster_df <- calculate_clusters(sce_object, seed = 11) -# Second, calculate cluster stability using default parameters and 50 replicates -stability_df <- calculate_stability( - sce_object, - cluster_df$clusters, - replicates = 50, - seed = 11 -) - - -# First, cluster PCs from a SingleCellExperiment object using the leiden -# algorithm and a resolution of 0.1 -cluster_df <- calculate_clusters( - sce_object, - algorithm = "leiden", - resolution = 0.1, - seed = 11 -) -# Second, calculate cluster stability using the same parameters as were used -# for the initial clustering -stability_df <- calculate_stability( - sce_object, - cluster_df$clusters, - algorithm = "leiden", - resolution = 0.1, - seed = 11 -) -} -} diff --git a/packages/rOpenScPCA/man/extract_pc_matrix.Rd b/packages/rOpenScPCA/man/extract_pc_matrix.Rd deleted file mode 100644 index 8f21bc930..000000000 --- a/packages/rOpenScPCA/man/extract_pc_matrix.Rd +++ /dev/null @@ -1,37 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/calculate-clusters.R -\name{extract_pc_matrix} -\alias{extract_pc_matrix} -\title{Extract a principal components (PC) matrix from either a SingleCellExperiment -or a Seurat object.} -\usage{ -extract_pc_matrix(sc_object, pc_name = NULL) -} -\arguments{ -\item{sc_object}{Either a SingleCellExperiment or Seurat object} - -\item{pc_name}{Optionally, the name of the PC matrix in the object. If this is -not provided, the name "PCA" is assumed for SingleCellExperiment objects, and -"pca" for Seurat objects.} -} -\value{ -PC matrix with row names -} -\description{ -This function first determines if the provided object is a SingleCellExperiment or -Seurat object, and then extract the PC matrix. If no name for the PC matrix is provided, -this function will assume the name of "PCA" for SingleCellExperiment objects, and -"pca" for Seurat objects. -} -\examples{ -\dontrun{ -# extract PC matrix from SCE object, assuming default name "PCA" -pca_matrix <- extract_pc_matrix(sce_object) - -# extract PC matrix from SCE object with non-default name "PCA_MAT" -pca_matrix <- extract_pc_matrix(sce_object, pc_name = "PCA_MAT") - -# extract PC matrix from Seurat object, assuming default name "pca" -pca_matrix <- extract_pc_matrix(seurat_object) -} -} diff --git a/packages/rOpenScPCA/man/prepare_pc_matrix.Rd b/packages/rOpenScPCA/man/prepare_pc_matrix.Rd deleted file mode 100644 index 9c4eadc07..000000000 --- a/packages/rOpenScPCA/man/prepare_pc_matrix.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/calculate-clusters.R -\name{prepare_pc_matrix} -\alias{prepare_pc_matrix} -\title{Helper function to check and/or extract a matrix of PCs from a given object} -\usage{ -prepare_pc_matrix(x, pc_name = NULL) -} -\arguments{ -\item{x}{Either a matrix of principal components (PCs), or a SingleCellExperiment -or Seurat object containing PCs. If a matrix is provided, rows should be cells -and columns should be PCs, and row names should be cell ids (e.g., barcodes).} - -\item{pc_name}{Optionally, the name of the PC matrix in the object. Not used for -matrices. If this is not provided, the name "PCA" is assumed for -SingleCellExperiment objects, and "pca" for Seurat objects.} -} -\value{ -A matrix of PCs with row names representing cell ids -} -\description{ -Helper function to check and/or extract a matrix of PCs from a given object -} diff --git a/packages/rOpenScPCA/man/sweep_clusters.Rd b/packages/rOpenScPCA/man/sweep_clusters.Rd deleted file mode 100644 index 3286d3ed3..000000000 --- a/packages/rOpenScPCA/man/sweep_clusters.Rd +++ /dev/null @@ -1,105 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/sweep-clusters.R -\name{sweep_clusters} -\alias{sweep_clusters} -\title{Calculate clusters across a set of parameters} -\usage{ -sweep_clusters( - x, - algorithm = "louvain", - weighting = "jaccard", - nn = 10, - resolution = 1, - objective_function = "CPM", - threads = 1, - seed = NULL, - pc_name = NULL -) -} -\arguments{ -\item{x}{An object containing PCs that clustering can be performed in. This can be either -a SingleCellExperiment object, a Seurat object, or a matrix where columns are PCs and -rows are cells. If a matrix is provided, it must have row names of cell ids (e.g., barcodes).} - -\item{algorithm}{Clustering algorithm to use. Must be one of "louvain" (default), "walktrap", -or "leiden".} - -\item{weighting}{Weighting scheme(s) to consider when sweeping parameters. -Provide a vector of unique values to vary this parameter. Options include "jaccard" (default), - "rank", or "number"} - -\item{nn}{Number of nearest neighbors to consider when sweeping parameters. -Provide a vector of unique values to vary this parameter. Default is 10.} - -\item{resolution}{Resolution parameter used by louvain and leiden clustering only. -Provide a vector of unique values to vary this parameter. Default is 1.} - -\item{objective_function}{Leiden-specific parameter for whether to use the -Constant Potts Model ("CPM"; default) or "modularity". Provide a vector of unique values -to vary this parameter.} - -\item{threads}{Number of threads to use. Default is 1.} - -\item{seed}{Random seed to set for clustering.} - -\item{pc_name}{Name of principal components slot in provided object. This argument is only used -if a SingleCellExperiment or Seurat object is provided. If not provided, the SingleCellExperiment -object name will default to "PCA" and the Seurat object name will default to "pca".} -} -\value{ -A list of data frames from performing clustering across all parameter combinations. - Columns include `cluster_set` (identifier column for results from a single clustering run), - `cell_id`, and `cluster`. Additional columns represent algorithm parameters and include at least: - `algorithm`, `weighting`, and `nn`. Louvain and leiden clustering will also include `resolution`, - and leiden clustering will further include `objective_function`. -} -\description{ -This function can be used to perform reproducible clustering while varying a set of parameters. -Multiple values can be provided for any of: - - The algorithm (`algorithm`) - - The weighting scheme (`weighting`) - - Number of nearest neighbors (`nn`) - - The resolution parameter (`resolution`) - - The objective function parameter (`objective_function`) -} -\details{ -For each algorithm specified, all parameters possible to use with that -algorithm will be systematically varied. This function does not accept additional -parameters besides those listed above. -Note that defaults for some arguments may differ from the bluster::NNGraphParam() defaults. -Specifically, the clustering algorithm defaults to "louvain" and the weighting scheme to "jaccard" -to align with common practice in scRNA-seq analysis. -} -\examples{ -\dontrun{ -# perform louvain clustering with jaccard weighting (defaults), -# varying the nearest neighobor parameter, and set a seed for reproducibility -cluster_df <- sweep_clusters( - sce_object, - nn = c(10, 15, 20, 25), - seed = 11 -) - -# perform louvain clustering, with jaccard and rank weighting, and -# varying the nearest neighbor and resolution parameters. -cluster_df <- sweep_clusters( - sce_object, - algorithm = "louvain", - weighting = c("jaccard", "rank"), - nn = c(10, 15, 20, 25), - resolution = c(0.5, 1), - seed = 11 -) - -# perform walktrap and louvain clustering with jaccard weighting, and -# varying the nearest neighbors for both algorithms, and resolution for louvain. -cluster_df <- sweep_clusters( - sce_object, - algorithm = c("walktrap", "louvain"), - weighting = "jaccard", - nn = c(10, 15, 20, 25), - resolution = c(0.5, 1), - seed = 11 -) -} -} diff --git a/packages/rOpenScPCA/rOpenScPCA.Rproj b/packages/rOpenScPCA/rOpenScPCA.Rproj deleted file mode 100644 index ba381fb1b..000000000 --- a/packages/rOpenScPCA/rOpenScPCA.Rproj +++ /dev/null @@ -1,20 +0,0 @@ -Version: 1.0 - -RestoreWorkspace: No -SaveWorkspace: No -AlwaysSaveHistory: No - -EnableCodeIndexing: Yes -UseSpacesForTab: Yes -NumSpacesForTab: 2 -Encoding: UTF-8 - -RnwWeave: Sweave -LaTeX: pdfLaTeX - -AutoAppendNewline: Yes -StripTrailingWhitespace: Yes - -BuildType: Package -PackageUseDevtools: Yes -PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/packages/rOpenScPCA/tests/testthat.R b/packages/rOpenScPCA/tests/testthat.R deleted file mode 100644 index 5dc4da8e3..000000000 --- a/packages/rOpenScPCA/tests/testthat.R +++ /dev/null @@ -1,12 +0,0 @@ -# This file is part of the standard setup for testthat. -# It is recommended that you do not modify it. -# -# Where should you do additional test configuration? -# Learn more about the roles of various files in: -# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview -# * https://testthat.r-lib.org/articles/special-files.html - -library(testthat) -library(rOpenScPCA) - -test_check("rOpenScPCA") diff --git a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R deleted file mode 100644 index ef977f8ff..000000000 --- a/packages/rOpenScPCA/tests/testthat/test-calculate-clusters.R +++ /dev/null @@ -1,147 +0,0 @@ -suppressPackageStartupMessages(library(SingleCellExperiment)) - -set.seed(2024) -sce <- splatter::simpleSimulate(nGenes = 1000, verbose = FALSE) |> - scater::logNormCounts() |> - scater::runPCA(ncomponents = 10) - -test_mat <- reducedDim(sce, "PCA") - -srat <- Seurat::CreateSeuratObject(counts = counts(sce), assay = "RNA") -srat[["pca"]] <- Seurat::CreateDimReducObject( - embeddings = test_mat, - key = "PC_", # underscore avoids Seurat warning that it's adding an underscore - assay = "RNA" -) - -test_that("calculate_clusters runs with a matrix, defaults", { - cluster_df <- calculate_clusters(test_mat) - - expect_setequal( - colnames(cluster_df), - c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution") - ) - expect_equal(cluster_df$cell_id, rownames(test_mat)) - expect_s3_class(cluster_df$cluster, "factor") - expect_equal(unique(cluster_df$algorithm), "louvain") - expect_equal(unique(cluster_df$weighting), "jaccard") - expect_equal(unique(cluster_df$nn), 10) - expect_equal(unique(cluster_df$resolution), 1) -}) - - -test_that("calculate_clusters runs with additional cluster_args", { - cluster_df <- calculate_clusters( - test_mat, - algorithm = "leiden", - cluster_args = list(n_iterations = 3) - ) - - expect_setequal( - colnames(cluster_df), - c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution", "objective_function", "n_iterations") - ) - expect_equal(unique(cluster_df$n_iterations), 3) -}) - - - -test_that("calculate_clusters runs when cluster_args is empty", { - cluster_df <- calculate_clusters( - test_mat, - algorithm = "walktrap" - ) - - expect_setequal( - colnames(cluster_df), - c("cell_id", "cluster", "algorithm", "weighting", "nn") - ) - expect_equal(unique(cluster_df$algorithm), "walktrap") -}) - - -test_that("calculate_clusters runs with an object, defaults", { - cluster_df_sce <- calculate_clusters(sce) - expect_setequal( - colnames(cluster_df_sce), - c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution") - ) - expect_equal(cluster_df_sce$cell_id, rownames(test_mat)) - - cluster_df_srat <- calculate_clusters(srat) - expect_setequal( - colnames(cluster_df_srat), - c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution") - ) - expect_equal(cluster_df_srat$cell_id, rownames(test_mat)) -}) - - - -test_that("calculate_clusters errors as expected", { - expect_error(calculate_clusters(test_mat, resolution = "string")) - expect_error(calculate_clusters(test_mat, nn = "string")) - expect_error( - calculate_clusters( - test_mat, - cluster_args = list(too_long = 1:10) - ) - ) -}) - - - -test_that("extract_pc_matrix works as expected", { - pc_mat_sce <- extract_pc_matrix(sce) - expect_identical( - pc_mat_sce, - test_mat - ) - - pc_mat_srt <- extract_pc_matrix(srat) - # update test_mat column names to match what will have Seurat changed them to - colnames(test_mat) <- gsub("^PC", "PC_", colnames(test_mat)) - expect_identical(pc_mat_srt, test_mat) -}) - -test_that("extract_pc_matrix errors as expected", { - expect_error( - extract_pc_matrix(sce, pc_name = "bad_name") - ) - expect_error( - extract_pc_matrix(srat, pc_name = "bad_name") - ) - expect_error( - extract_pc_matrix(test_mat) - ) -}) - - - - -test_that("prepare_pc_matrix works as expected with matrix input", { - mat <- prepare_pc_matrix(test_mat) - expect_identical(mat, test_mat) -}) - - -test_that("prepare_pc_matrix works as expected with SCE input", { - mat <- prepare_pc_matrix(sce) - expect_identical(mat, test_mat) -}) - -test_that("prepare_pc_matrix works as expected with Seurat input", { - mat <- prepare_pc_matrix(srat) - # update test_mat column names to match what Seurat will have changed them to - colnames(test_mat) <- gsub("^PC", "PC_", colnames(test_mat)) - expect_identical(mat, test_mat) -}) - - -test_that("prepare_pc_matrix fails as expected ", { - test_mat_nonames <- test_mat - rownames(test_mat_nonames) <- NULL - - expect_error(calculate_clusters(test_mat_nonames)) - expect_error(calculate_clusters("not a matrix")) -}) diff --git a/packages/rOpenScPCA/tests/testthat/test-evaluate-clusters.R b/packages/rOpenScPCA/tests/testthat/test-evaluate-clusters.R deleted file mode 100644 index 83da7ae56..000000000 --- a/packages/rOpenScPCA/tests/testthat/test-evaluate-clusters.R +++ /dev/null @@ -1,100 +0,0 @@ -suppressPackageStartupMessages(library(SingleCellExperiment)) - -set.seed(2024) -sce <- splatter::simpleSimulate(nGenes = 1000, verbose = FALSE) |> - scater::logNormCounts() |> - scater::runPCA(ncomponents = 10) -test_mat <- reducedDim(sce, "PCA") - - -cluster_df <- calculate_clusters(test_mat) - -test_that("calculate_silhouette works as expected", { - df <- calculate_silhouette(test_mat, cluster_df) - - expect_setequal( - colnames(df), - c(colnames(cluster_df), "silhouette_width", "other") - ) - expect_equal(df$cell_id, rownames(test_mat)) - expect_equal(df$cluster, cluster_df$cluster) - expect_vector(df$silhouette_width, ptype = numeric()) - expect_s3_class(df$other, "factor") -}) - - - -test_that("calculate_purity works as expected", { - df <- calculate_purity(test_mat, cluster_df) - - expect_setequal( - colnames(df), - c(colnames(cluster_df), "purity", "maximum") - ) - expect_equal(df$cell_id, rownames(test_mat)) - expect_equal(df$cluster, cluster_df$cluster) - expect_vector(df$purity, ptype = numeric()) - expect_s3_class(df$maximum, "factor") -}) - - - - - -test_that("calculate_stability works as expected with defaults", { - # note that we suppress warnings since this calculation done on fake - # test data gives expected warnings about ties during the ARI calculation. - suppressWarnings({ - df <- calculate_stability(test_mat, cluster_df$cluster) - }) - - expected_names <- colnames(cluster_df)[!(colnames(cluster_df) %in% c("cell_id", "cluster"))] - expect_setequal( - colnames(df), - c("replicate", "ari", expected_names) - ) - expect_equal(df$replicate, 1:20) # checks rows too - expect_vector(df$ari, ptype = numeric()) -}) - - -test_that("calculate_stability works as expected with different replicates", { - # note that we suppress warnings since this calculation done on fake - # test data gives expected warnings about ties during the ARI calculation. - suppressWarnings({ - df <- calculate_stability(test_mat, cluster_df$cluster, replicates = 2) - }) - expect_equal(nrow(df), 2) -}) - - - -test_that("calculate_stability works as expected with object and pc_name", { - reducedDimNames(sce) <- "my_pca" - - # note that we suppress warnings since this calculation done on fake - # test data gives expected warnings about ties during the ARI calculation. - suppressWarnings({ - df <- calculate_stability( - sce, - cluster_df$cluster, - replicates = 2, - pc_name = "my_pca" - ) - }) - expect_equal(nrow(df), 2) -}) - - - -test_that("calculate_stability errors as expected", { - expect_error({ - # mismatched cluster vector length - calculate_stability(test_mat, cluster_df$cluster[1:5]) - }) - - expect_error({ - # cluster_df not a vector - calculate_stability(test_mat, cluster_df) - }) -}) diff --git a/packages/rOpenScPCA/tests/testthat/test-sweep-clusters.R b/packages/rOpenScPCA/tests/testthat/test-sweep-clusters.R deleted file mode 100644 index 4011b88b3..000000000 --- a/packages/rOpenScPCA/tests/testthat/test-sweep-clusters.R +++ /dev/null @@ -1,127 +0,0 @@ -suppressPackageStartupMessages(library(SingleCellExperiment)) - -set.seed(2024) -sce <- splatter::simpleSimulate(nGenes = 1000, verbose = FALSE) |> - scater::logNormCounts() |> - scater::runPCA(ncomponents = 10) - -test_mat <- reducedDim(sce, "PCA") - -srat <- Seurat::CreateSeuratObject(counts = counts(sce), assay = "RNA") -srat[["pca"]] <- Seurat::CreateDimReducObject( - embeddings = test_mat, - key = "PC_", # underscore avoids Seurat warning that it's adding an underscore - assay = "RNA" -) - -test_that("sweep_clusters works as expected with default algorithm & weighting", { - sweep_list <- sweep_clusters( - test_mat, - nn = c(10, 15), - resolution = c(0.5, 1) - ) - - expect_length(sweep_list, 4) - - sweep_list |> - purrr::walk( - \(df) { - expect_setequal( - colnames(df), - c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution") - ) - - # these tests confirm the defaults went through - expect_equal(unique(df$algorithm), "louvain") - expect_equal(unique(df$weighting), "jaccard") - - expect_true( - all(df$nn == 10) || all(df$nn == 15) - ) - expect_true( - all(df$resolution == 0.5) || all(df$resolution == 1) - ) - } - ) -}) - - -test_that("sweep_clusters works as expected with matrix input", { - sweep_list <- sweep_clusters(test_mat) - expect_length(sweep_list, 1) -}) - - -test_that("sweep_clusters works as expected with Seurat input", { - sweep_list <- sweep_clusters(srat) - expect_length(sweep_list, 1) -}) - - - -test_that("sweep_clusters works as expected with non-default algorithm", { - sweep_list <- sweep_clusters( - test_mat, - algorithm = "leiden", - objective_function = "modularity", - resolution = c(0.5, 1) - ) - - sweep_list |> - purrr::walk( - \(df) { - expect_setequal( - colnames(df), - c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution", "objective_function") - ) - - expect_equal(unique(df$algorithm), "leiden") - expect_equal(unique(df$objective_function), "modularity") - - expect_true( - all(df$resolution == 0.5) || all(df$resolution == 1) - ) - } - ) -}) - - - - -test_that("sweep_clusters works as expected with multiple algorithms", { - sweep_list <- sweep_clusters( - test_mat, - algorithm = c("walktrap", "louvain"), - # used by both - nn = c(10, 15), - # only used by louvain - resolution = c(0.5, 1) - ) - - # count algorithms - alg_counts <- sweep_list |> - purrr::map(\(df) unique(df$algorithm)) |> - purrr::reduce(c) - expect_length(alg_counts, 6) - expect_equal(sum(alg_counts == "louvain"), 4) - expect_equal(sum(alg_counts == "walktrap"), 2) - - - - sweep_list |> - purrr::walk( - \(df) { - if (unique(df$algorithm) == "walktrap") { - expect_setequal( - colnames(df), - c("cell_id", "cluster", "algorithm", "weighting", "nn") - ) - } else if (unique(df$algorithm) == "louvain") { - expect_setequal( - colnames(df), - c("cell_id", "cluster", "algorithm", "weighting", "nn", "resolution") - ) - } - } - ) -}) From 40edd89d0523a17b96c62ee205ae1303a1e157c5 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Wed, 6 Nov 2024 13:36:14 -0500 Subject: [PATCH 2/5] update hello-clusters to use relocated package --- analyses/hello-clusters/README.md | 10 +++++----- analyses/hello-clusters/renv.lock | 7 +++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/analyses/hello-clusters/README.md b/analyses/hello-clusters/README.md index 1b31ad574..8e8aab7e5 100644 --- a/analyses/hello-clusters/README.md +++ b/analyses/hello-clusters/README.md @@ -2,7 +2,7 @@ ## Description -This module provides examples of how to use clustering functionality in the `rOpenScPCA` package. +This module provides examples of how to use clustering functionality in the [`rOpenScPCA` package](https://github.com/AlexsLemonade/rOpenScPCA/). When clustering scRNA-seq data, in particular when those clusters are used in downstream analyses, it is important to evaluate the quality of the clusters. The `rOpenScPCA` package provides several functions that leverage the [`bluster` package](https://bioconductor.org/packages/release/bioc/html/bluster.html) to facilitate performing and evaluating graph-based clustering in a reproducible manner. @@ -11,6 +11,7 @@ The `rOpenScPCA` package provides several functions that leverage the [`bluster` The function `calculate_clusters()` can be used to perform graph-based clustering. By default, this function uses the Louvain algorithm with Jaccard weighting. + ### Evaluate clustering `rOpenScPCA` contains several functions to calculate quality metrics for a particular clustering result: @@ -34,14 +35,13 @@ The function `sweep_clusters()` allows you to generate clustering results from a ## Installing rOpenScPCA -The `rOpenScPCA` package is disseminated in the `OpenScPCA-analysis` repository in the `packages` directory. +The `rOpenScPCA` package is available in the [`AlexsLemonade/rOpenScPCA` repository](https://github.com/AlexsLemonade/rOpenScPCA/). + If you use this package in your analysis module, you should install and track it with `renv` as follows: ``` # First, install rOpenScPCA -renv::install("AlexsLemonade/OpenScPCA-analysis:packages/rOpenScPCA") - - +renv::install("AlexsLemonade/rOpenScPCA") # Second, run snapshot to add the package to renv.lock renv::snapshot() diff --git a/analyses/hello-clusters/renv.lock b/analyses/hello-clusters/renv.lock index 1cacfac46..5636b53ee 100644 --- a/analyses/hello-clusters/renv.lock +++ b/analyses/hello-clusters/renv.lock @@ -1859,10 +1859,9 @@ "RemoteType": "github", "RemoteHost": "api.github.com", "RemoteUsername": "AlexsLemonade", - "RemoteRepo": "OpenScPCA-analysis", - "RemoteSubdir": "packages/rOpenScPCA", + "RemoteRepo": "rOpenScPCA", "RemoteRef": "main", - "RemoteSha": "c67fc87806fc8a497d18624d759342ef041e1030", + "RemoteSha": "fc784446f8d86b072e6f7f67287adfff598f4911", "Requirements": [ "BiocParallel", "SingleCellExperiment", @@ -1874,7 +1873,7 @@ "tibble", "tidyr" ], - "Hash": "74da2034ae461cf45a5cf115667ff4e4" + "Hash": "80a604c3be8af1ae30bcda4f96c4e290" }, "rappdirs": { "Package": "rappdirs", From d08839a927a5330d33b3cf6e1ddccb9840637afb Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Wed, 6 Nov 2024 13:39:55 -0500 Subject: [PATCH 3/5] update cell-type-nonETP-ALL-03 to use relocated package --- analyses/cell-type-nonETP-ALL-03/renv.lock | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/analyses/cell-type-nonETP-ALL-03/renv.lock b/analyses/cell-type-nonETP-ALL-03/renv.lock index 84ed5a126..7ddc8099c 100644 --- a/analyses/cell-type-nonETP-ALL-03/renv.lock +++ b/analyses/cell-type-nonETP-ALL-03/renv.lock @@ -2621,10 +2621,9 @@ "RemoteType": "github", "RemoteHost": "api.github.com", "RemoteUsername": "AlexsLemonade", - "RemoteRepo": "OpenScPCA-analysis", - "RemoteSubdir": "packages/rOpenScPCA", + "RemoteRepo": "rOpenScPCA", "RemoteRef": "main", - "RemoteSha": "a5c36235eff73530cf496c4d82715e33e79fc90e", + "RemoteSha": "fc784446f8d86b072e6f7f67287adfff598f4911", "Requirements": [ "BiocParallel", "SingleCellExperiment", @@ -2636,7 +2635,7 @@ "tibble", "tidyr" ], - "Hash": "f320ea9842ac8f8eb77685aa3155751d" + "Hash": "80a604c3be8af1ae30bcda4f96c4e290" }, "rappdirs": { "Package": "rappdirs", From e57c7d718df9b5469a0538c17031f6416cfd0767 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Wed, 6 Nov 2024 13:43:49 -0500 Subject: [PATCH 4/5] update cell-type-ETP-ALL-03 to use relocated package --- analyses/cell-type-ETP-ALL-03/renv.lock | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/analyses/cell-type-ETP-ALL-03/renv.lock b/analyses/cell-type-ETP-ALL-03/renv.lock index 19fba645d..7d9ff86bb 100644 --- a/analyses/cell-type-ETP-ALL-03/renv.lock +++ b/analyses/cell-type-ETP-ALL-03/renv.lock @@ -2075,10 +2075,9 @@ "RemoteType": "github", "RemoteHost": "api.github.com", "RemoteUsername": "AlexsLemonade", - "RemoteRepo": "OpenScPCA-analysis", - "RemoteSubdir": "packages/rOpenScPCA", + "RemoteRepo": "rOpenScPCA", "RemoteRef": "main", - "RemoteSha": "d446cf35158d53e500e8bcacb08d9f2de4688b5a", + "RemoteSha": "fc784446f8d86b072e6f7f67287adfff598f4911", "Requirements": [ "BiocParallel", "SingleCellExperiment", @@ -2090,7 +2089,7 @@ "tibble", "tidyr" ], - "Hash": "5c214b8e7ab3d7fd01fa32daeb51c5f8" + "Hash": "80a604c3be8af1ae30bcda4f96c4e290" }, "rappdirs": { "Package": "rappdirs", From bc37e92ae7887313400b9711f9f8453747dbf411 Mon Sep 17 00:00:00 2001 From: Stephanie Spielman Date: Wed, 6 Nov 2024 13:44:15 -0500 Subject: [PATCH 5/5] remove ropenscpca test GHA --- .github/workflows/test_ropenscpca.yml | 38 --------------------------- 1 file changed, 38 deletions(-) delete mode 100644 .github/workflows/test_ropenscpca.yml diff --git a/.github/workflows/test_ropenscpca.yml b/.github/workflows/test_ropenscpca.yml deleted file mode 100644 index 0ae3f7176..000000000 --- a/.github/workflows/test_ropenscpca.yml +++ /dev/null @@ -1,38 +0,0 @@ -on: - pull_request: - branches: - - main - - feature/* - paths: - - packages/rOpenScPCA/** - -name: Check the rOpenScPCA package - -jobs: - R-CMD-check-renv: - runs-on: ubuntu-22.04 - env: - GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} - steps: - - name: Checkout repo - uses: actions/checkout@v4 - - - name: Set up R - uses: r-lib/actions/setup-r@v2 - with: - r-version: 4.4.0 - use-public-rspm: true - - - name: Set up dependencies - uses: r-lib/actions/setup-r-dependencies@v2 - with: - working-directory: "packages/rOpenScPCA/" - extra-packages: | - any::rcmdcheck - needs: check - - - name: Check package - uses: r-lib/actions/check-r-package@v2 - with: - working-directory: "packages/rOpenScPCA/" - args: 'c("--no-manual")'