Merge pull request #129 from stemangiola/metadata-docs

Improve get_metadata() docs
stemangiola · May 8, 2024 · 4c4e025 · 4c4e025
2 parents 7000b1f + 3366190
commit 4c4e025
Show file tree

Hide file tree

Showing 4 changed files with 26 additions and 27 deletions.
diff --git a/R/metadata.R b/R/metadata.R
@@ -73,8 +73,8 @@ SAMPLE_DATABASE_URL <- single_line_str(
 #'
 #' @details
 #'
-#' The metadata was collected from the Bioconductor package `cellxgenedp`. it's
-#' vignette `using_cellxgenedp` provides an overview of the columns in the
+#' The metadata was collected from the Bioconductor package `cellxgenedp`.
+#' `vignette("using_cellxgenedp", package="cellxgenedp")` provides an overview of the columns in the
 #' metadata. The data for which the column `organism_name` included "Homo
 #' sapiens" was collected collected from `cellxgenedp`.
 #'
@@ -84,16 +84,15 @@ SAMPLE_DATABASE_URL <- single_line_str(
 #' Our representation, harmonises the metadata at dataset, sample and cell
 #' levels, in a unique coherent database table.
 #'
-#' Dataset-specific columns (definitions available at cellxgene.cziscience.com)
+#' Dataset-specific columns (definitions available at cellxgene.cziscience.com):
 #' `cell_count`, `collection_id`, `created_at.x`, `created_at.y`,
 #' `dataset_deployments`, `dataset_id`, `file_id`, `filename`, `filetype`,
 #' `is_primary_data.y`, `is_valid`, `linked_genesets`, `mean_genes_per_cell`,
 #' `name`, `published`, `published_at`, `revised_at`, `revision`, `s3_uri`,
 #' `schema_version`, `tombstone`, `updated_at.x`, `updated_at.y`,
 #' `user_submitted`, `x_normalization`
 #'
-#' Sample-specific columns (definitions available at cellxgene.cziscience.com)
-#'
+#' Sample-specific columns (definitions available at cellxgene.cziscience.com):
 #' `sample_`, `.sample_name`, `age_days`, `assay`, `assay_ontology_term_id`,
 #' `development_stage`, `development_stage_ontology_term_id`, `ethnicity`,
 #' `ethnicity_ontology_term_id`, `experiment___`, `organism`,
@@ -102,14 +101,13 @@ SAMPLE_DATABASE_URL <- single_line_str(
 #' `tissue_ontology_term_id`, `disease`, `disease_ontology_term_id`,
 #' `is_primary_data.x`
 #'
-#' Cell-specific columns (definitions available at cellxgene.cziscience.com)
-#'
+#' Cell-specific columns (definitions available at cellxgene.cziscience.com):
 #' `cell_`, `cell_type`, `cell_type_ontology_term_idm`, `cell_type_harmonised`,
 #' `confidence_class`, `cell_annotation_azimuth_l2`,
 #' `cell_annotation_blueprint_singler`
 #'
-#' Through harmonisation and curation we introduced custom column, not present
-#' in the original CELLxGENE metadata
+#' Through harmonisation and curation we introduced custom columns not present
+#' in the original CELLxGENE metadata:
 #'
 #' - `tissue_harmonised`: a coarser tissue name for better filtering
 #' - `age_days`: the number of days corresponding to the age
@@ -129,20 +127,21 @@ SAMPLE_DATABASE_URL <- single_line_str(
 #' - `sample_`: Sample ID
 #' - `.sample_name`: How samples were defined
 #'
-#'
 #' **Possible cache path issues**
 #'
 #' If your default R cache path includes non-standard characters (e.g. dash
-#' because of your user or organisation name), the following error can manifest
+#' because of your user or organisation name), the following error can occur.
 #'
+#' ```
 #' Error in `db_query_fields.DBIConnection()`: ! Can't query fields. Caused by
 #' error: ! Parser Error: syntax error at or near "/" LINE 2: FROM
 #' /Users/bob/Library/Caches...
+#' ```
 #'
 #' The solution is to choose a different cache, for example
-#'
+#' ```R
 #' get_metadata(cache_directory = path.expand('~'))
-#' 
+#' ```
 get_metadata <- function(
     remote_url = get_database_url(),
     cache_directory = get_default_cache_dir(),

diff --git a/R/utils.R b/R/utils.R
@@ -96,7 +96,7 @@ sync_remote_file <- function(full_url, output_file, ...) {
 #' Returns a tibble from a parquet file path
 #' 
 #' Since dbplyr 2.4.0, raw file paths aren't handled very well
-#' See: https://github.com/duckdb/duckdb-r/issues/38
+#' See: <https://github.com/duckdb/duckdb-r/issues/38>
 #' Hence the need for this method
 #' @importFrom glue glue
 #' @importFrom dplyr tbl

diff --git a/man/get_metadata.Rd b/man/get_metadata.Rd
diff --git a/man/read_parquet.Rd b/man/read_parquet.Rd