Skip to content

Commit

Permalink
Merge pull request #250 from saeyslab/get-expressed-genes-2
Browse files Browse the repository at this point in the history
`get_expressed_genes` - fix for Seurat v5 and add method for matrices
  • Loading branch information
csangara authored Jan 29, 2024
2 parents e9ddb3a + 17d0efd commit 88ed083
Show file tree
Hide file tree
Showing 7 changed files with 136 additions and 133 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: nichenetr
Type: Package
Title: NicheNet: Modeling Intercellular Communication by Linking Ligands to Target Genes
Version: 2.0.4
Version: 2.0.5
Authors@R: c(person("Robin", "Browaeys", role = c("aut")),
person("Chananchida", "Sang-aram", role = c("aut", "cre"), email = "[email protected]"))
Description: This package allows you the investigate intercellular communication from a computational perspective. More specifically, it allows to investigate how interacting cells influence each other's gene expression. Functionalities of this package (e.g. including predicting extracellular upstream regulators and their affected target genes) build upon a probabilistic model of ligand-target links that was inferred by data-integration.
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# Generated by roxygen2: do not edit by hand

S3method(get_expressed_genes,Seurat)
S3method(get_expressed_genes,default)
export(add_hyperparameters_parameter_settings)
export(add_ligand_popularity_measures_to_perfs)
export(add_new_datasource)
Expand Down
223 changes: 103 additions & 120 deletions R/application_prediction.R
Original file line number Diff line number Diff line change
Expand Up @@ -1078,159 +1078,142 @@ nichenet_seuratobj_aggregate = function(receiver, seurat_obj, condition_colname,
background_expressed_genes = background_expressed_genes
))
}
#' @title Determine expressed genes of a cell type from a Seurat object single-cell RNA seq dataset or Seurat spatial transcriptomics dataset
#'
#' @description \code{get_expressed_genes} Return the genes that are expressed in a given cell cluster based on the fraction of cells in that cluster that should express the cell.
#' @usage
#' get_expressed_genes(ident, seurat_obj, pct = 0.10, assay_oi = NULL)
#'
#' @param ident Name of cluster identity/identities of cells
#' @param seurat_obj Single-cell expression dataset as Seurat object https://satijalab.org/seurat/.
#' @param pct We consider genes expressed if they are expressed in at least a specific fraction of cells of a cluster. This number indicates this fraction. Default: 0.10. Choice of this parameter is important and depends largely on the used sequencing platform. We recommend to require a lower fraction (like the default 0.10) for 10X data than for e.g. Smart-seq2 data.
#' @param assay_oi If wanted: specify yourself which assay to look for. Default this value is NULL and as a consequence the 'most advanced' assay will be used to define expressed genes.
#' @title Determine expressed genes of a cell type from an input object
#' @description Return the genes that are expressed in given cell cluster(s) based on the fraction of cells in the cluster(s) that should express the cell.
#' @param celltype_oi Character vector of cell type(s) to be considered. If input is a Seurat object, these must correspond to the cell identities from \code{Idents}.
#' @param object Input matrix with rows as genes and columns as cells
#' @param pct We consider genes expressed if they are expressed in at least a specific fraction of cells of the given cluster(s). This number indicates this fraction. Default: 0.10. Choice of this parameter is important and depends largely on the used sequencing platform. We recommend to require a lower fraction (like the default 0.10) for 10X data than for e.g. Smart-seq2 data.
#' @return A vector containing gene symbols of the expressed genes
#'
#' @rdname get_expressed_genes
#' @export
get_expressed_genes <- function(celltype_oi, object, ...) {
UseMethod("get_expressed_genes", object)
}

#' @param celltype_annot Vector of cell type annotations
#' @import dplyr
#' @rdname get_expressed_genes
#' @examples
#' \dontrun{
#' # For sparse matrix
#' get_expressed_genes("CD8 T", GetAssayData(seuratObj), seuratObj$celltype, pct = 0.10)
#' }
#'
#' @return A character vector with the gene symbols of the expressed genes
#' @export
get_expressed_genes.default <- function(celltype_oi, object, celltype_annot, pct = 0.1) {
requireNamespace("dplyr")

# Check that length of metadata is equal to number of cells
if (length(celltype_annot) != ncol(object)) {
stop("Length of metadata is not equal to number of cells")
}

# Check that celltype_oi is in metadata
if (!celltype_oi %in% celltype_annot) {
stop("Cell type of interest is not in celltype_annot")
}

exprs_mat <- object %>% .[, celltype_annot == celltype_oi]

# Get cells of interest in matrix
n_cells_oi_in_matrix <- ncol(exprs_mat)

if (n_cells_oi_in_matrix < 5000) {
genes <- exprs_mat %>% apply(1, function(x) {
sum(x > 0)/n_cells_oi_in_matrix
}) %>% .[. >= pct] %>% names()
}
else {
splits <- split(1:nrow(exprs_mat), ceiling(seq_along(1:nrow(exprs_mat))/100))
genes <- splits %>% lapply(function(genes_indices) {
begin_i <- genes_indices[1]
end_i <- genes_indices[length(genes_indices)]
exprs <- exprs_mat[begin_i:end_i, , drop = FALSE]
genes <- exprs %>% apply(1, function(x) {
sum(x > 0)/n_cells_oi_in_matrix
}) %>% .[. >= pct] %>% names()
}) %>% unlist() %>%
unname()
}
return(genes)

}

#' @param seurat_obj Single-cell expression or spatial dataset as Seurat object
#' @param assay_oi If wanted: specify yourself which assay to look for. If not NULL, the \code{DefaultAssay} of the Seurat object is used.
#' @param ... additional parameters passed to \code{GetAssayData} (in case the slot/layer needs to be specified)
#'
#' @import Seurat
#' @import dplyr
#'
#' @examples
#' \dontrun{
#' get_expressed_genes(ident = "CD8 T", seurat_obj = seuratObj, pct = 0.10)
#' # For Seurat object
#' get_expressed_genes(celltype_oi = "CD8 T", seurat_obj = seuratObj, pct = 0.10)
#' }
#'
#' @rdname get_expressed_genes
#' @export
#'
get_expressed_genes = function(ident, seurat_obj, pct = 0.1, assay_oi = NULL){
get_expressed_genes.Seurat = function(celltype_oi, seurat_obj, pct = 0.1, assay_oi = NULL, ...){
requireNamespace("Seurat")
requireNamespace("dplyr")

# input check


if (!"RNA" %in% names(seurat_obj@assays)) {
if ("Spatial" %in% names(seurat_obj@assays)) {
if (class(seurat_obj@assays$Spatial@data) != "matrix" &
class(seurat_obj@assays$Spatial@data) != "dgCMatrix") {
warning("Spatial Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$Spatial@data' for default or 'seurat_obj@assays$SCT@data' for when the single-cell transform pipeline was applied")
}
if (sum(dim(seurat_obj@assays$Spatial@data)) == 0) {
stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$Spatial@data'")
}
}
}
else {
if (class(seurat_obj@assays$RNA@data) != "matrix" &
class(seurat_obj@assays$RNA@data) != "dgCMatrix") {
warning("Seurat object should contain a matrix of normalized expression data. Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data or seurat_obj@assays$SCT@data for when the single-cell transform pipeline was applied")
}
if ("integrated" %in% names(seurat_obj@assays)) {
if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$integrated@data)) ==
0)
stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$integrated@data' for integrated data")
}
else if ("SCT" %in% names(seurat_obj@assays)) {
if (sum(dim(seurat_obj@assays$RNA@data)) == 0 & sum(dim(seurat_obj@assays$SCT@data)) ==
0) {
stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data' for default or 'seurat_obj@assays$SCT@data' for data corrected via SCT")
}
}
else {
if (sum(dim(seurat_obj@assays$RNA@data)) == 0) {
stop("Seurat object should contain normalized expression data (numeric matrix). Check 'seurat_obj@assays$RNA@data'")
}
# If assay_oi is not specified, use DefaultAssay
if (is.null(assay_oi)){
assay_oi <- DefaultAssay(seurat_obj)
} else {
# Check that assay exists in Seurat object
if (!assay_oi %in% Seurat::Assays(seurat_obj)){
stop("assay_oi not found in Seurat object")
}
# Set assay_oi to DefaultAssay
DefaultAssay(seurat_obj) <- assay_oi

}
if (sum(ident %in% unique(Idents(seurat_obj))) != length(ident)) {
stop("One or more provided cell clusters is not part of the 'Idents' of your Seurat object")

# If assay_oi is Spatial, give warning
if (assay_oi == "Spatial"){
warning("The Spatial assay will be used to define expressed gene. If the spatial data is spot-based (mixture of cells) and not single-cell resolution, we recommend against directly using nichenetr on because you want to look at cell-cell interactions, and not at spot-spot interactions! ;-) ")
}

if(!is.null(assay_oi)){
if(! assay_oi %in% Seurat::Assays(seurat_obj)){
stop("assay_oi should be an assay of your Seurat object")
}
# Check that celltype_oi is in Seurat object
if (!all(celltype_oi %in% unique(Idents(seurat_obj)))) {
stop("One or more provided cell clusters is not part of the 'Idents' of your Seurat object")
}

# Get cell identities of cluster of interest
cells_oi <- Idents(seurat_obj) %>% .[. %in% celltype_oi] %>% names()

# Get expression matrix from assay_oi
#cells_oi_in_matrix <- intersect(colnames(seurat_obj[[assay_oi]]@data), cells_oi)
#exprs_mat = seurat_obj[[assay_oi]]@data %>% .[, cells_oi_in_matrix]

cells_oi = Idents(seurat_obj) %>% .[Idents(seurat_obj) %in%
ident] %>% names()

# Get exprs matrix: from assay oi or from most advanced assay if assay oi not specifcied

if(!is.null(assay_oi)){
cells_oi_in_matrix = intersect(colnames(seurat_obj[[assay_oi]]@data), cells_oi)
exprs_mat = seurat_obj[[assay_oi]]@data %>% .[, cells_oi_in_matrix]
} else {
if ("integrated" %in% names(seurat_obj@assays)) {
warning("Seurat object is result from the Seurat integration workflow. The expressed genes are now defined based on the integrated slot. You can change this via the assay_oi parameter of the get_expressed_genes() functions. Recommended assays: RNA or SCT")
cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$integrated@data),
cells_oi)
if (length(cells_oi_in_matrix) != length(cells_oi))
stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$integrated@data). Please check that the expression matrix contains cells in columns and genes in rows.")
exprs_mat = seurat_obj@assays$integrated@data %>% .[,
cells_oi_in_matrix]
}
else if ("SCT" %in% names(seurat_obj@assays) & !"Spatial" %in%
names(seurat_obj@assays)) {
warning("Seurat object is result from the Seurat single-cell transform workflow. The expressed genes are defined based on the SCT slot. You can change this via the assay_oi parameter of the get_expressed_genes() functions. Recommended assays: RNA or SCT")
cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$SCT@data),
cells_oi)
if (length(cells_oi_in_matrix) != length(cells_oi))
stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$SCT@data). Please check that the expression matrix contains cells in columns and genes in rows.")
exprs_mat = seurat_obj@assays$SCT@data %>% .[, cells_oi_in_matrix]
}
else if ("Spatial" %in% names(seurat_obj@assays) &
!"SCT" %in% names(seurat_obj@assays)) {
warning("Seurat object is result from the Seurat spatial object. The expressed genes are defined based on the Spatial slot. If the spatial data is spot-based (mixture of cells) and not single-cell resolution, we recommend against directly using nichenetr on spot-based data (because you want to look at cell-cell interactions, and not at spot-spot interactions! ;-) )")
cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$Spatial@data),
cells_oi)
if (length(cells_oi_in_matrix) != length(cells_oi))
stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$Spatial@data). Please check that the expression matrix contains cells in columns and genes in rows.")
exprs_mat = seurat_obj@assays$Spatial@data %>% .[, cells_oi_in_matrix]
}
else if ("Spatial" %in% names(seurat_obj@assays) &
"SCT" %in% names(seurat_obj@assays)) {
warning("Seurat object is result from the Seurat spatial object, followed by the SCT workflow. If the spatial data is spot-based (mixture of cells) and not single-cell resolution, we recommend against directly using nichenetr on spot-based data (because you want to look at cell-cell interactions, and not at spot-spot interactions! The expressed genes are defined based on the SCT slot, but this can be changed via the assay_oi parameter.")
cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$SCT@data),
cells_oi)
if (length(cells_oi_in_matrix) != length(cells_oi))
stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$Spatial@data). Please check that the expression matrix contains cells in columns and genes in rows.")
exprs_mat = seurat_obj@assays$SCT@data %>% .[, cells_oi_in_matrix]
}
else {
if (sum(cells_oi %in% colnames(seurat_obj@assays$RNA@data)) ==
0)
stop("None of the cells are in colnames of 'seurat_obj@assays$RNA@data'. The expression matrix should contain cells in columns and genes in rows.")
cells_oi_in_matrix = intersect(colnames(seurat_obj@assays$RNA@data),
cells_oi)
if (length(cells_oi_in_matrix) != length(cells_oi))
stop("Not all cells of interest are in your expression matrix (seurat_obj@assays$RNA@data). Please check that the expression matrix contains cells in columns and genes in rows.")
exprs_mat = seurat_obj@assays$RNA@data %>% .[, cells_oi_in_matrix]
}
exprs_mat <- subset(seurat_obj, idents = celltype_oi) %>%
GetAssayData(assay = assay_oi, ...)

if (length(cells_oi) != ncol(exprs_mat)){
warning("Not all cells of interest are in your expression matrix.")
}

# use defined cells and exprs matrix to get expressed genes

n_cells_oi_in_matrix = length(cells_oi_in_matrix)
# Use defined cells and exprs matrix to get expressed genes
n_cells_oi_in_matrix <- ncol(exprs_mat)
if (n_cells_oi_in_matrix < 5000) {
genes = exprs_mat %>% apply(1, function(x) {
genes <- exprs_mat %>% apply(1, function(x) {
sum(x > 0)/n_cells_oi_in_matrix
}) %>% .[. >= pct] %>% names()
}
else {
splits = split(1:nrow(exprs_mat), ceiling(seq_along(1:nrow(exprs_mat))/100))
genes = splits %>% lapply(function(genes_indices, exprs,
pct, n_cells_oi_in_matrix) {
begin_i = genes_indices[1]
end_i = genes_indices[length(genes_indices)]
exprs = exprs[begin_i:end_i, , drop = FALSE]
genes = exprs %>% apply(1, function(x) {
splits <- split(1:nrow(exprs_mat), ceiling(seq_along(1:nrow(exprs_mat))/100))
genes <- splits %>% lapply(function(genes_indices) {
begin_i <- genes_indices[1]
end_i <- genes_indices[length(genes_indices)]
exprs <- exprs_mat[begin_i:end_i, , drop = FALSE]
genes <- exprs %>% apply(1, function(x) {
sum(x > 0)/n_cells_oi_in_matrix
}) %>% .[. >= pct] %>% names()
}, exprs_mat, pct, n_cells_oi_in_matrix) %>% unlist() %>%
}) %>% unlist() %>%
unname()
}
return(genes)
Expand Down
36 changes: 27 additions & 9 deletions man/get_expressed_genes.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/nichenet_seuratobj_aggregate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/nichenet_seuratobj_aggregate_cluster_de.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 88ed083

Please sign in to comment.