id_to_column()

dataobservatory-eu · Dec 31, 2023 · 1c7a45e · 1c7a45e
1 parent c65d254
commit 1c7a45e
Show file tree

Hide file tree

Showing 15 changed files with 191 additions and 39 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -13,8 +13,13 @@ S3method(set_var_labels,dataset)
 S3method(summary,dataset)
 S3method(tail,dataset)
 S3method(var_labels,dataset)
+S3method(xsd_convert,boolean)
+S3method(xsd_convert,character)
 S3method(xsd_convert,data.frame)
 S3method(xsd_convert,dataset)
+S3method(xsd_convert,factor)
+S3method(xsd_convert,integer)
+S3method(xsd_convert,numeric)
 S3method(xsd_convert,tibble)
 export("creator<-")
 export("dataset_title<-")
@@ -40,6 +45,7 @@ export(dataset_bibentry)
 export(dataset_download)
 export(dataset_title)
 export(dataset_to_triples)
+export(dataset_ttl_write)
 export(datasource_get)
 export(datasource_set)
 export(describe)
@@ -48,6 +54,7 @@ export(dublincore)
 export(geolocation)
 export(get_prefix)
 export(get_resource_identifier)
+export(id_to_column)
 export(identifier)
 export(is.dataset)
 export(language)
@@ -59,7 +66,6 @@ export(set_var_labels)
 export(size)
 export(subject)
 export(subject_create)
-export(ttl_dataset_write)
 export(var_labels)
 export(version)
 export(xsd_convert)

diff --git a/R/dataset.R b/R/dataset.R
@@ -1,7 +1,7 @@
 #' @title Create a dataset
 #'
 #' @param x An R object that contains the data of the dataset (a data.frame or
-#' inherited from [`data.frame`][base::data.frame()], for example,
+#' inherited from [`data.frame`][base::data.frame()]), for example,
 #' [tibble::tibble()], [tsibble::tsibble()], [data.table::data.table()].
 #' @param author A single person or a vector of persons as authors, declared with
 #' \code{\link[utils:person]{person}}.

diff --git a/R/dataset_to_triples.R b/R/dataset_to_triples.R
@@ -2,43 +2,45 @@
 #' @description The dataset is converted into a three-column long format with
 #' columns \code{s} for subject, \code{p} for predicate and \code{o} for
 #' object.
-#' @param df A [data.frame] or similar object, or a [dataset].
+#' @param x An R object that contains the data of the dataset (a data.frame or
+#' inherited from [`data.frame`][base::data.frame()]), for example, [dataset()]
+#' [tibble::tibble()], [tsibble::tsibble()], [data.table::data.table()].
 #' @param idcol The identifier column. If \code{idcol} is \code{NULL} it attempts to
 #' use the \code{row.names(df)} as an \code{idcol}.
 #' @return The long form version of the original dataset, retaining the attributes
 #' and class.
 #' @export
 
-dataset_to_triples <- function(df, idcol=NULL) {
+dataset_to_triples <- function(x, idcol=NULL) {
 
-  is_dataset <- inherits(df, "dataset")
+  is_dataset <- inherits(x, "dataset")
 
   if (is_dataset) {
-    new_title = paste0(dataset_title(df), " [triple form]")
-    DataBibentry <- dataset_bibentry(df)
-    new_Subject <- subject(df)
+    new_title = paste0(dataset_title(x), " [triple form]")
+    DataBibentry <- dataset_bibentry(x)
+    new_Subject <- subject(x)
   }
 
   if (is.null(idcol)) {
-    df$new_id_col <- row.names(df)
-    idcol <- which(names(df)=="new_id_col" )
-    idcol_pos <- idcol_find(df, idcol)
-    seq_along_cols <- seq_along(df)[-idcol_pos]
+    x$new_id_col <- row.names(x)
+    idcol <- which(names(x)=="new_id_col" )
+    idcol_pos <- idcol_find(x, idcol)
+    seq_along_cols <- seq_along(x)[-idcol_pos]
   } else {
     ## See utils-idcol_find.R for the internal function
-    seq_along_cols <- seq_along(df)[-idcol_find(df, idcol)]
+    seq_along_cols <- seq_along(x)[-idcol_find(x, idcol)]
   }
 
-  triple_list <- lapply (seq_along_cols, function(x) {
-    data.frame(s = df[[idcol]],
-               p = names(df)[x],
-               o = df[[x]]
-    ) })
+  triple_list <- lapply (seq_along_cols, function(i) {
+    data.frame(s = x[[idcol]],
+               p = names(x)[i],
+               o = x[[i]])
+    })
 
   tmp <- do.call(rbind, triple_list)
 
   if (is_dataset) {
-    tmp2 <- dataset(x=tmp, author=creator(df), title = new_title)
+    tmp2 <- dataset(x=tmp, author=creator(x), title = new_title)
     tmp_DSD <- DataStructure(tmp2)
     tmp_DSD$s$label <- "Subject"
     tmp_DSD$s$label <- "Object"

diff --git a/R/id_to_column.R b/R/id_to_column.R
@@ -0,0 +1,57 @@
+#' @title Add identifier to columns
+#'
+#' @description Add a prefixed identifier to the first column of the dataset.
+#' @inheritParams dataset
+#' @param prefix Defaults to \code{eg:} (example.com).
+#' @param ids Defaults to \code{NULL}.
+#' @return A dataset conforming the original sub-class of \code{x}.
+#' @examples
+#'
+#' # Example with a dataaset object:
+#' id_to_column(iris_dataset)
+#'
+#' # Example with a data.frame object:
+#'
+#' id_to_column(iris, prefix="eg:iris-o")
+#' @export
+id_to_column <- function(x, prefix = "eg:", ids = NULL) {
+
+  is_dataset <- is.dataset(x)
+
+  lastcol <- ncol(x)
+
+  if (is.null(ids)) {
+    ids <- gsub("[^[:alnum:]]", "-", row.names(x))
+  } else if (nrow(x)!=length(ids)) {
+    stop("id_to_column(x, ..., ids) : ids must be of same lengths as nrow(x).")
+  }
+
+  if (is.null(prefix)) { prefix <- "" }
+
+  rhs <- x
+  x$rowid <- paste0(prefix, ids)
+  lhs <- x[, "rowid", drop=FALSE]
+
+  if (is_dataset) {
+
+    DataBibentry <- dataset_bibentry(rhs)
+    tmp <- dataset(cbind(lhs, rhs),
+                   author=DataBibentry$author,
+                   title = DataBibentry$title)
+
+    if (nrow(tmp)>0) {
+      row.names(tmp) <- 1:nrow(tmp)
+    } else {
+      row.names(tmp) <- NULL
+    }
+
+    attr(tmp, "DataBibentry") <-  DataBibentry
+
+  } else {
+    tmp <- cbind(lhs, rhs)
+  }
+  tmp
+}
+
+
+
diff --git a/R/ttl_dataset_write.R b/R/ttl_dataset_write.R
@@ -16,12 +16,12 @@
 #'
 #' examplefile <- file.path(tempdir(), "ttl_dataset_write.ttl")
 #'
-#' ttl_dataset_write(tdf=testtdf, file_path = examplefile)
+#' dataset_ttl_write(tdf=testtdf, file_path = examplefile)
 #'
 #' readLines(examplefile)
 #' @export
 
-ttl_dataset_write <- function(tdf,
+dataset_ttl_write <- function(tdf,
                               ttl_namespace = NULL,
                               file_path = NULL,
                               overwrite = TRUE) {
@@ -30,7 +30,7 @@ ttl_dataset_write <- function(tdf,
   default_namespace <- getdata("dataset_namespace")
   default_namespace <- default_namespace[
     which(default_namespace$prefix %in% c("rdf:", "rdfs:", "owl:",
-                                          "qb:", "dcat:")),]
+                                          "qb:", "dcat:", "xsd:")),]
 
   ## validate dataset
   validate_tdf(tdf)

diff --git a/R/xsd_convert.R b/R/xsd_convert.R
@@ -24,7 +24,7 @@ xsd_convert.data.frame <- function(x, idcol=NULL, ...) {
 
     type <- switch(class(t)[[1]],
                    "numeric"   = "xs:decimal",
-                   "factor"    = "codelist",
+                   "factor"    = "xs:string",
                    "logical"   = "xs:boolean",
                    "integer"   = "xs:integer",
                    "Date"      = "xs:date",
@@ -90,28 +90,42 @@ xsd_convert.tibble <- function(x, idcol=NULL,...) {
   NextMethod()
 }
 
+#' @rdname xsd_convert
+#' @export
+#' @exportS3Method
 xsd_convert.character <- function(x, idcol=NULL, ...) {
   var_type <-  "xs:string"
   paste0('\"', x,  '\"', "^^<", var_type, ">")
 }
 
+#' @rdname xsd_convert
+#' @export
+#' @exportS3Method
 xsd_convert.numeric <- function(x, idcol=NULL, ...) {
   var_type <-  "xs:decimal"
   paste0('\"', as.character(x),  '\"', "^^<", var_type, ">")
 }
 
+#' @rdname xsd_convert
+#' @export
+#' @exportS3Method
 xsd_convert.integer <- function(x, idcol=NULL, ...) {
   var_type <-  "xs:integer"
   paste0('\"', as.character(x),  '\"', "^^<", var_type, ">")
 }
 
+#' @rdname xsd_convert
+#' @export
+#' @exportS3Method
 xsd_convert.boolean <- function(x, idcol=NULL, ...) {
   var_type <-  "xs:boolean"
   paste0('\"', as.character(x),  '\"', "^^<", var_type, ">")
 }
 
+#' @rdname xsd_convert
+#' @export
+#' @exportS3Method
 xsd_convert.factor<- function(x, idcol=NULL, codelist=NULL ) {
-
   if (is.null(codelist)) {
     var_type <-  "xs:string"
     paste0('\"', x,  '\"', "^^<", var_type, ">")

diff --git a/_pkgdown.yml b/_pkgdown.yml
@@ -68,6 +68,7 @@ reference:
     contents:
       - dataset
       - subsetting
+      - id_to_column
       - head.dataset
       - dataset_bibentry
       - describe
@@ -113,7 +114,7 @@ reference:
       - xsd_convert
       - get_prefix
       - dataset_to_triples
-      - ttl_dataset_write
+      - dataset_ttl_write
   - title: "Reproducible research workflows"
     desc: >
       Improve reproducability with recording data lifecycle, processing history as

diff --git a/man/dataset.Rd b/man/dataset.Rd
diff --git a/man/dataset_to_triples.Rd b/man/dataset_to_triples.Rd
diff --git a/man/ttl_dataset_write.Rd → man/dataset_ttl_write.Rd b/man/ttl_dataset_write.Rd → man/dataset_ttl_write.Rd
diff --git a/man/id_to_column.Rd b/man/id_to_column.Rd
diff --git a/man/xsd_convert.Rd b/man/xsd_convert.Rd
diff --git a/tests/testthat/test-dataset_to_triples.R b/tests/testthat/test-dataset_to_triples.R
@@ -0,0 +1,13 @@
+
+
+test_that("dataset_to_triples works()", {
+  expect_equal(class(dataset_to_triples(iris_dataset)), "data.frame")
+  expect_equal(ncol(dataset_to_triples(iris_dataset)), 3)
+  expect_equal(nrow(dataset_to_triples(head(iris_dataset, 3))), dim(head(iris_dataset, 3))[1]*dim(head(iris_dataset, 3))[2])
+})
+
+
+
+
+
+