|
| 1 | +--- |
| 2 | +title: "FuseNet Tutorial" |
| 3 | +date: 'Last Run: `r format(Sys.Date(), "%B %d, %Y")`' |
| 4 | +output: |
| 5 | + html_document: |
| 6 | + theme: united |
| 7 | + df_print: kable |
| 8 | +--- |
| 9 | + |
| 10 | +```{r, include = FALSE} |
| 11 | +options(width = 150) |
| 12 | +knitr::opts_chunk$set( |
| 13 | + collapse = TRUE, |
| 14 | + tidy = FALSE, |
| 15 | + message = FALSE, |
| 16 | + warning = FALSE |
| 17 | +) |
| 18 | +``` |
| 19 | + |
| 20 | +### Load data |
| 21 | + |
| 22 | +We will be analyzing the [CITE-seq](https://www.nature.com/articles/nmeth.4380) human PBMC data. This data can be installed with [SeuratData](https://github.com/satijalab/seurat-data). |
| 23 | + |
| 24 | +```{r setup} |
| 25 | +# install Seurat v4.0.0 and SeuratData |
| 26 | +if (!requireNamespace("Seurat", quietly = TRUE) | utils::packageVersion("Seurat") < "4.0.0") |
| 27 | + remotes::install_version("Seurat", version = "4.0.0") |
| 28 | +if (!requireNamespace("SeuratData", quietly = TRUE)) |
| 29 | + devtools::install_github('satijalab/seurat-data') |
| 30 | +
|
| 31 | +library(FuseNet) |
| 32 | +library(Seurat) |
| 33 | +library(SeuratData) |
| 34 | +InstallData("cbmc") |
| 35 | +library(cbmc.SeuratData) |
| 36 | +
|
| 37 | +# Find highly variable features/genes |
| 38 | +cbmc <- FindVariableFeatures(cbmc, selection.method = "vst", verbose = FALSE) |
| 39 | +``` |
| 40 | + |
| 41 | +### Run FuseNet on RNA data |
| 42 | + |
| 43 | +```{r RNA, fig.height=5, fig.width=10} |
| 44 | +data_rna <- cbmc@assays$RNA@counts[VariableFeatures(cbmc),] |
| 45 | +cbmc_rna <- InitiateFuseNet(raw_data = data_rna, project_name = "RNA", normalization = "cosine", pca_dims = 20, kernel = "gaussian", k = 15) |
| 46 | +cbmc_rna <- GeomSketch(object = cbmc_rna, geom_pca_dims = 20, geom_size = 3000, sketch_n_pca = 20, sketch_k = 15) |
| 47 | +ptm <- proc.time() |
| 48 | +cbmc_rna <- RunFuseNet(object = cbmc_rna, n_iters = 100, pca_dims = 20, k = 15, ratio = 0.5, norm_type = "l1") |
| 49 | +ptm1 <- proc.time() |
| 50 | +print(x = ptm1 - ptm) |
| 51 | +``` |
| 52 | + |
| 53 | +### Run FuseNet on ADT data |
| 54 | + |
| 55 | +```{r ADT} |
| 56 | +data_prt <- cbmc@assays$ADT@counts |
| 57 | +cbmc_prt <- InitiateFuseNet(raw_data = data_prt, project_name = "Protein", normalization = "cosine", kernel = "gaussian", k = 15, verbose = FALSE) |
| 58 | +cbmc_prt <- GeomSketch(object = cbmc_prt, geom_size = 3000, geom_pca_dims = 3, sketch_n_pca = 0, sketch_k = 15) |
| 59 | +ptm <- proc.time() |
| 60 | +cbmc_prt <- RunFuseNet(object = cbmc_prt, n_iters = 100, ratio = 0.5, k = 15, norm_type = "l1") |
| 61 | +ptm1 <- proc.time() |
| 62 | +print(x = ptm1 - ptm) |
| 63 | +``` |
| 64 | + |
| 65 | +### Fuse the RNA and ADT data. |
| 66 | + |
| 67 | +We see that the mouse cells, which were used as the spike-in controls, were defined almost exclusively by RNA modality (RNA weight > 0.8). This is because there is no anti-mouse antibodies were used in the study. |
| 68 | + |
| 69 | +In general lymphocytes were well defined in the ADT modality (protein weights > 0.6). Especially, CD8+ and CD4+ naive and memory T cells were more defined by the ADT data, on the other hand, dendritic cells (DC and pDCs) were more defined in the RNA data due to the absence of its surface markers in the ADT data. |
| 70 | + |
| 71 | +```{r fuse, fig.height=6, fig.width=10} |
| 72 | +fused <- FuseData(cbmc_rna, cbmc_prt, project_k = 15) |
| 73 | +[email protected]["RNA_Weight"] <- fused$fused_weight["RNA",] |
| 74 | +[email protected]["Protein_Weight"] <- fused$fused_weight["Protein",] |
| 75 | +VlnPlot(object = cbmc, features = c("RNA_Weight", "Protein_Weight"), sort = TRUE, group.by = "rna_annotations") |
| 76 | +``` |
| 77 | + |
| 78 | +### Downstream analysis |
| 79 | + |
| 80 | +We can perform UMAP dimensional reduction or clustering on the fused data for downstream analysis. |
| 81 | + |
| 82 | +```{r umap, fig.height=8, fig.width=10} |
| 83 | +umap.dims <- uwot::umap(X = fused$fused_dist, n_components = 2) |
| 84 | +dimnames(x = umap.dims) <- list(Cells(cbmc), c("UMAP_1", "UMAP_2")) |
| 85 | +cbmc@reductions$umap <- CreateDimReducObject(embeddings = umap.dims, key = "UMAP_", assay = c("RNA", "ADT")) |
| 86 | +cbmc@[email protected] = umap.dims |
| 87 | +DimPlot(object = cbmc, group.by = "rna_annotations", reduction = "umap", label = TRUE, repel = TRUE, label.box = TRUE) |
| 88 | +``` |
| 89 | + |
| 90 | +### Session Information |
| 91 | +```{r session} |
| 92 | +print(sessionInfo()) |
| 93 | +``` |
0 commit comments