diff --git a/NAMESPACE b/NAMESPACE index 62da194..0a6eb6e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,8 +13,13 @@ S3method(set_var_labels,dataset) S3method(summary,dataset) S3method(tail,dataset) S3method(var_labels,dataset) +S3method(xsd_convert,boolean) +S3method(xsd_convert,character) S3method(xsd_convert,data.frame) S3method(xsd_convert,dataset) +S3method(xsd_convert,factor) +S3method(xsd_convert,integer) +S3method(xsd_convert,numeric) S3method(xsd_convert,tibble) export("creator<-") export("dataset_title<-") @@ -40,6 +45,7 @@ export(dataset_bibentry) export(dataset_download) export(dataset_title) export(dataset_to_triples) +export(dataset_ttl_write) export(datasource_get) export(datasource_set) export(describe) @@ -48,6 +54,7 @@ export(dublincore) export(geolocation) export(get_prefix) export(get_resource_identifier) +export(id_to_column) export(identifier) export(is.dataset) export(language) @@ -59,7 +66,6 @@ export(set_var_labels) export(size) export(subject) export(subject_create) -export(ttl_dataset_write) export(var_labels) export(version) export(xsd_convert) diff --git a/R/dataset.R b/R/dataset.R index 4924295..f6ceda0 100644 --- a/R/dataset.R +++ b/R/dataset.R @@ -1,7 +1,7 @@ #' @title Create a dataset #' #' @param x An R object that contains the data of the dataset (a data.frame or -#' inherited from [`data.frame`][base::data.frame()], for example, +#' inherited from [`data.frame`][base::data.frame()]), for example, #' [tibble::tibble()], [tsibble::tsibble()], [data.table::data.table()]. #' @param author A single person or a vector of persons as authors, declared with #' \code{\link[utils:person]{person}}. diff --git a/R/dataset_to_triples.R b/R/dataset_to_triples.R index d17cf9f..7cf0bb8 100644 --- a/R/dataset_to_triples.R +++ b/R/dataset_to_triples.R @@ -2,43 +2,45 @@ #' @description The dataset is converted into a three-column long format with #' columns \code{s} for subject, \code{p} for predicate and \code{o} for #' object. -#' @param df A [data.frame] or similar object, or a [dataset]. +#' @param x An R object that contains the data of the dataset (a data.frame or +#' inherited from [`data.frame`][base::data.frame()]), for example, [dataset()] +#' [tibble::tibble()], [tsibble::tsibble()], [data.table::data.table()]. #' @param idcol The identifier column. If \code{idcol} is \code{NULL} it attempts to #' use the \code{row.names(df)} as an \code{idcol}. #' @return The long form version of the original dataset, retaining the attributes #' and class. #' @export -dataset_to_triples <- function(df, idcol=NULL) { +dataset_to_triples <- function(x, idcol=NULL) { - is_dataset <- inherits(df, "dataset") + is_dataset <- inherits(x, "dataset") if (is_dataset) { - new_title = paste0(dataset_title(df), " [triple form]") - DataBibentry <- dataset_bibentry(df) - new_Subject <- subject(df) + new_title = paste0(dataset_title(x), " [triple form]") + DataBibentry <- dataset_bibentry(x) + new_Subject <- subject(x) } if (is.null(idcol)) { - df$new_id_col <- row.names(df) - idcol <- which(names(df)=="new_id_col" ) - idcol_pos <- idcol_find(df, idcol) - seq_along_cols <- seq_along(df)[-idcol_pos] + x$new_id_col <- row.names(x) + idcol <- which(names(x)=="new_id_col" ) + idcol_pos <- idcol_find(x, idcol) + seq_along_cols <- seq_along(x)[-idcol_pos] } else { ## See utils-idcol_find.R for the internal function - seq_along_cols <- seq_along(df)[-idcol_find(df, idcol)] + seq_along_cols <- seq_along(x)[-idcol_find(x, idcol)] } - triple_list <- lapply (seq_along_cols, function(x) { - data.frame(s = df[[idcol]], - p = names(df)[x], - o = df[[x]] - ) }) + triple_list <- lapply (seq_along_cols, function(i) { + data.frame(s = x[[idcol]], + p = names(x)[i], + o = x[[i]]) + }) tmp <- do.call(rbind, triple_list) if (is_dataset) { - tmp2 <- dataset(x=tmp, author=creator(df), title = new_title) + tmp2 <- dataset(x=tmp, author=creator(x), title = new_title) tmp_DSD <- DataStructure(tmp2) tmp_DSD$s$label <- "Subject" tmp_DSD$s$label <- "Object" diff --git a/R/id_to_column.R b/R/id_to_column.R new file mode 100644 index 0000000..0e96a17 --- /dev/null +++ b/R/id_to_column.R @@ -0,0 +1,57 @@ +#' @title Add identifier to columns +#' +#' @description Add a prefixed identifier to the first column of the dataset. +#' @inheritParams dataset +#' @param prefix Defaults to \code{eg:} (example.com). +#' @param ids Defaults to \code{NULL}. +#' @return A dataset conforming the original sub-class of \code{x}. +#' @examples +#' +#' # Example with a dataaset object: +#' id_to_column(iris_dataset) +#' +#' # Example with a data.frame object: +#' +#' id_to_column(iris, prefix="eg:iris-o") +#' @export +id_to_column <- function(x, prefix = "eg:", ids = NULL) { + + is_dataset <- is.dataset(x) + + lastcol <- ncol(x) + + if (is.null(ids)) { + ids <- gsub("[^[:alnum:]]", "-", row.names(x)) + } else if (nrow(x)!=length(ids)) { + stop("id_to_column(x, ..., ids) : ids must be of same lengths as nrow(x).") + } + + if (is.null(prefix)) { prefix <- "" } + + rhs <- x + x$rowid <- paste0(prefix, ids) + lhs <- x[, "rowid", drop=FALSE] + + if (is_dataset) { + + DataBibentry <- dataset_bibentry(rhs) + tmp <- dataset(cbind(lhs, rhs), + author=DataBibentry$author, + title = DataBibentry$title) + + if (nrow(tmp)>0) { + row.names(tmp) <- 1:nrow(tmp) + } else { + row.names(tmp) <- NULL + } + + attr(tmp, "DataBibentry") <- DataBibentry + + } else { + tmp <- cbind(lhs, rhs) + } + tmp +} + + + diff --git a/R/ttl_dataset_write.R b/R/ttl_dataset_write.R index 6c60260..af8ae0f 100644 --- a/R/ttl_dataset_write.R +++ b/R/ttl_dataset_write.R @@ -16,12 +16,12 @@ #' #' examplefile <- file.path(tempdir(), "ttl_dataset_write.ttl") #' -#' ttl_dataset_write(tdf=testtdf, file_path = examplefile) +#' dataset_ttl_write(tdf=testtdf, file_path = examplefile) #' #' readLines(examplefile) #' @export -ttl_dataset_write <- function(tdf, +dataset_ttl_write <- function(tdf, ttl_namespace = NULL, file_path = NULL, overwrite = TRUE) { @@ -30,7 +30,7 @@ ttl_dataset_write <- function(tdf, default_namespace <- getdata("dataset_namespace") default_namespace <- default_namespace[ which(default_namespace$prefix %in% c("rdf:", "rdfs:", "owl:", - "qb:", "dcat:")),] + "qb:", "dcat:", "xsd:")),] ## validate dataset validate_tdf(tdf) diff --git a/R/xsd_convert.R b/R/xsd_convert.R index 382d5a4..17a07dd 100644 --- a/R/xsd_convert.R +++ b/R/xsd_convert.R @@ -24,7 +24,7 @@ xsd_convert.data.frame <- function(x, idcol=NULL, ...) { type <- switch(class(t)[[1]], "numeric" = "xs:decimal", - "factor" = "codelist", + "factor" = "xs:string", "logical" = "xs:boolean", "integer" = "xs:integer", "Date" = "xs:date", @@ -90,28 +90,42 @@ xsd_convert.tibble <- function(x, idcol=NULL,...) { NextMethod() } +#' @rdname xsd_convert +#' @export +#' @exportS3Method xsd_convert.character <- function(x, idcol=NULL, ...) { var_type <- "xs:string" paste0('\"', x, '\"', "^^<", var_type, ">") } +#' @rdname xsd_convert +#' @export +#' @exportS3Method xsd_convert.numeric <- function(x, idcol=NULL, ...) { var_type <- "xs:decimal" paste0('\"', as.character(x), '\"', "^^<", var_type, ">") } +#' @rdname xsd_convert +#' @export +#' @exportS3Method xsd_convert.integer <- function(x, idcol=NULL, ...) { var_type <- "xs:integer" paste0('\"', as.character(x), '\"', "^^<", var_type, ">") } +#' @rdname xsd_convert +#' @export +#' @exportS3Method xsd_convert.boolean <- function(x, idcol=NULL, ...) { var_type <- "xs:boolean" paste0('\"', as.character(x), '\"', "^^<", var_type, ">") } +#' @rdname xsd_convert +#' @export +#' @exportS3Method xsd_convert.factor<- function(x, idcol=NULL, codelist=NULL ) { - if (is.null(codelist)) { var_type <- "xs:string" paste0('\"', x, '\"', "^^<", var_type, ">") diff --git a/_pkgdown.yml b/_pkgdown.yml index 8e010ed..0349c7c 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -68,6 +68,7 @@ reference: contents: - dataset - subsetting + - id_to_column - head.dataset - dataset_bibentry - describe @@ -113,7 +114,7 @@ reference: - xsd_convert - get_prefix - dataset_to_triples - - ttl_dataset_write + - dataset_ttl_write - title: "Reproducible research workflows" desc: > Improve reproducability with recording data lifecycle, processing history as diff --git a/man/dataset.Rd b/man/dataset.Rd index 3348524..4f7a98b 100644 --- a/man/dataset.Rd +++ b/man/dataset.Rd @@ -63,7 +63,7 @@ is.dataset(x) } \arguments{ \item{x}{An R object that contains the data of the dataset (a data.frame or -inherited from \code{\link[base:data.frame]{data.frame}}, for example, +inherited from \code{\link[base:data.frame]{data.frame}}), for example, \code{\link[tibble:tibble]{tibble::tibble()}}, \code{\link[tsibble:tsibble]{tsibble::tsibble()}}, \code{\link[data.table:data.table]{data.table::data.table()}}.} \item{author}{A single person or a vector of persons as authors, declared with diff --git a/man/dataset_to_triples.Rd b/man/dataset_to_triples.Rd index 284383b..d476a35 100644 --- a/man/dataset_to_triples.Rd +++ b/man/dataset_to_triples.Rd @@ -4,10 +4,12 @@ \alias{dataset_to_triples} \title{Dataset to triples (three columns)} \usage{ -dataset_to_triples(df, idcol = NULL) +dataset_to_triples(x, idcol = NULL) } \arguments{ -\item{df}{A \link{data.frame} or similar object, or a \link{dataset}.} +\item{x}{An R object that contains the data of the dataset (a data.frame or +inherited from \code{\link[base:data.frame]{data.frame}}), for example, \code{\link[=dataset]{dataset()}} +\code{\link[tibble:tibble]{tibble::tibble()}}, \code{\link[tsibble:tsibble]{tsibble::tsibble()}}, \code{\link[data.table:data.table]{data.table::data.table()}}.} \item{idcol}{The identifier column. If \code{idcol} is \code{NULL} it attempts to use the \code{row.names(df)} as an \code{idcol}.} diff --git a/man/ttl_dataset_write.Rd b/man/dataset_ttl_write.Rd similarity index 88% rename from man/ttl_dataset_write.Rd rename to man/dataset_ttl_write.Rd index a145615..31d6f4c 100644 --- a/man/ttl_dataset_write.Rd +++ b/man/dataset_ttl_write.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/ttl_dataset_write.R -\name{ttl_dataset_write} -\alias{ttl_dataset_write} +\name{dataset_ttl_write} +\alias{dataset_ttl_write} \title{Write a dataset into Turtle serialisation} \usage{ -ttl_dataset_write( +dataset_ttl_write( tdf, ttl_namespace = NULL, file_path = NULL, @@ -35,7 +35,7 @@ testtdf <- data.frame (s = c("eg:o1", "eg:01", "eg:02"), examplefile <- file.path(tempdir(), "ttl_dataset_write.ttl") -ttl_dataset_write(tdf=testtdf, file_path = examplefile) +dataset_ttl_write(tdf=testtdf, file_path = examplefile) readLines(examplefile) } diff --git a/man/id_to_column.Rd b/man/id_to_column.Rd new file mode 100644 index 0000000..9b278dc --- /dev/null +++ b/man/id_to_column.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/id_to_column.R +\name{id_to_column} +\alias{id_to_column} +\title{Add identifier to columns} +\usage{ +id_to_column(x, prefix = "eg:", ids = NULL) +} +\arguments{ +\item{x}{An R object that contains the data of the dataset (a data.frame or +inherited from \code{\link[base:data.frame]{data.frame}}), for example, +\code{\link[tibble:tibble]{tibble::tibble()}}, \code{\link[tsibble:tsibble]{tsibble::tsibble()}}, \code{\link[data.table:data.table]{data.table::data.table()}}.} + +\item{prefix}{Defaults to \code{eg:} (example.com).} + +\item{ids}{Defaults to \code{NULL}.} +} +\value{ +A dataset conforming the original sub-class of \code{x}. +} +\description{ +Add a prefixed identifier to the first column of the dataset. +} +\examples{ + +# Example with a dataaset object: +id_to_column(iris_dataset) + +# Example with a data.frame object: + +id_to_column(iris, prefix="eg:iris-o") +} diff --git a/man/xsd_convert.Rd b/man/xsd_convert.Rd index 521dd86..fb13db2 100644 --- a/man/xsd_convert.Rd +++ b/man/xsd_convert.Rd @@ -5,6 +5,11 @@ \alias{xsd_convert.data.frame} \alias{xsd_convert.dataset} \alias{xsd_convert.tibble} +\alias{xsd_convert.character} +\alias{xsd_convert.numeric} +\alias{xsd_convert.integer} +\alias{xsd_convert.boolean} +\alias{xsd_convert.factor} \title{Convert to XML Schema Definition (XSD) types} \usage{ xsd_convert(x, idcol, ...) @@ -14,10 +19,20 @@ xsd_convert(x, idcol, ...) \method{xsd_convert}{dataset}(x, idcol = NULL, ...) \method{xsd_convert}{tibble}(x, idcol = NULL, ...) + +\method{xsd_convert}{character}(x, idcol = NULL, ...) + +\method{xsd_convert}{numeric}(x, idcol = NULL, ...) + +\method{xsd_convert}{integer}(x, idcol = NULL, ...) + +\method{xsd_convert}{boolean}(x, idcol = NULL, ...) + +\method{xsd_convert}{factor}(x, idcol = NULL, codelist = NULL) } \arguments{ \item{x}{An R object that contains the data of the dataset (a data.frame or -inherited from \code{\link[base:data.frame]{data.frame}}, for example, +inherited from \code{\link[base:data.frame]{data.frame}}), for example, \code{\link[tibble:tibble]{tibble::tibble()}}, \code{\link[tsibble:tsibble]{tsibble::tsibble()}}, \code{\link[data.table:data.table]{data.table::data.table()}}.} \item{idcol}{The name or position of the column that contains the row diff --git a/tests/testthat/test-dataset_to_triples.R b/tests/testthat/test-dataset_to_triples.R new file mode 100644 index 0000000..412b219 --- /dev/null +++ b/tests/testthat/test-dataset_to_triples.R @@ -0,0 +1,13 @@ + + +test_that("dataset_to_triples works()", { + expect_equal(class(dataset_to_triples(iris_dataset)), "data.frame") + expect_equal(ncol(dataset_to_triples(iris_dataset)), 3) + expect_equal(nrow(dataset_to_triples(head(iris_dataset, 3))), dim(head(iris_dataset, 3))[1]*dim(head(iris_dataset, 3))[2]) +}) + + + + + + diff --git a/tests/testthat/test-ttl_dataset_write.R b/tests/testthat/test-dataset_ttl_write.R similarity index 64% rename from tests/testthat/test-ttl_dataset_write.R rename to tests/testthat/test-dataset_ttl_write.R index 250bc04..9a70b3d 100644 --- a/tests/testthat/test-ttl_dataset_write.R +++ b/tests/testthat/test-dataset_ttl_write.R @@ -1,14 +1,14 @@ -testfile <- file.path(tempdir(), "test-ttl_dataset_write.ttl") +testfile <- file.path(tempdir(), "test-dataset_ttl_write.ttl") testtdf <- data.frame ( s = c("eg:o1", "eg:01", "eg:02"), p = c("a", "eg-var:", "eg-var"), o = c("qb:Observation", "\"1\"^^", "\"2\"^^") ) -ttl_dataset_write(tdf=testtdf, +dataset_ttl_write(tdf=testtdf, ttl_namespace = NULL, file_path=testfile, overwrite=TRUE) -test_that("ttl_dataset_write() works:", { +test_that("dataset_ttl_write() works:", { expect_true(file.exists(testfile)) expect_true(grepl("@prefix owl:", readLines(testfile)[1])) expect_equal(sum( @@ -19,7 +19,7 @@ test_that("ttl_dataset_write() works:", { , 1) }) -test_that("ttl_dataset_write() validation works:", { - expect_error(ttl_dataset_write(tdf=iris, file = tempfile())) - expect_error(ttl_dataset_write(tdf=list(), file = tempfile())) +test_that("dataset_ttl_write() validation works:", { + expect_error(dataset_ttl_write(tdf=iris, file = tempfile())) + expect_error(dataset_ttl_write(tdf=list(), file = tempfile())) }) diff --git a/tests/testthat/test-id_to_column.R b/tests/testthat/test-id_to_column.R new file mode 100644 index 0000000..3d85664 --- /dev/null +++ b/tests/testthat/test-id_to_column.R @@ -0,0 +1,10 @@ + + +test_that("id_to_column works ()", { + expect_true(is.dataset(head(id_to_column(iris_dataset)))) + expect_true(is.data.frame(head(id_to_column(iris)))) + expect_equal(head(id_to_column(iris_dataset), 3)$rowid, paste0("eg:iris-o", 1:3)) + expect_equal(head(id_to_column(x=iris, prefix="eg:iris-o" ), 3)$rowid, paste0("eg:iris-o", 1:3)) +}) + +