Skip to content

Commit

Permalink
id_to_column()
Browse files Browse the repository at this point in the history
  • Loading branch information
antaldaniel committed Dec 31, 2023
1 parent c65d254 commit 1c7a45e
Show file tree
Hide file tree
Showing 15 changed files with 191 additions and 39 deletions.
8 changes: 7 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,13 @@ S3method(set_var_labels,dataset)
S3method(summary,dataset)
S3method(tail,dataset)
S3method(var_labels,dataset)
S3method(xsd_convert,boolean)
S3method(xsd_convert,character)
S3method(xsd_convert,data.frame)
S3method(xsd_convert,dataset)
S3method(xsd_convert,factor)
S3method(xsd_convert,integer)
S3method(xsd_convert,numeric)
S3method(xsd_convert,tibble)
export("creator<-")
export("dataset_title<-")
Expand All @@ -40,6 +45,7 @@ export(dataset_bibentry)
export(dataset_download)
export(dataset_title)
export(dataset_to_triples)
export(dataset_ttl_write)
export(datasource_get)
export(datasource_set)
export(describe)
Expand All @@ -48,6 +54,7 @@ export(dublincore)
export(geolocation)
export(get_prefix)
export(get_resource_identifier)
export(id_to_column)
export(identifier)
export(is.dataset)
export(language)
Expand All @@ -59,7 +66,6 @@ export(set_var_labels)
export(size)
export(subject)
export(subject_create)
export(ttl_dataset_write)
export(var_labels)
export(version)
export(xsd_convert)
Expand Down
2 changes: 1 addition & 1 deletion R/dataset.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' @title Create a dataset
#'
#' @param x An R object that contains the data of the dataset (a data.frame or
#' inherited from [`data.frame`][base::data.frame()], for example,
#' inherited from [`data.frame`][base::data.frame()]), for example,
#' [tibble::tibble()], [tsibble::tsibble()], [data.table::data.table()].
#' @param author A single person or a vector of persons as authors, declared with
#' \code{\link[utils:person]{person}}.
Expand Down
36 changes: 19 additions & 17 deletions R/dataset_to_triples.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,43 +2,45 @@
#' @description The dataset is converted into a three-column long format with
#' columns \code{s} for subject, \code{p} for predicate and \code{o} for
#' object.
#' @param df A [data.frame] or similar object, or a [dataset].
#' @param x An R object that contains the data of the dataset (a data.frame or
#' inherited from [`data.frame`][base::data.frame()]), for example, [dataset()]
#' [tibble::tibble()], [tsibble::tsibble()], [data.table::data.table()].
#' @param idcol The identifier column. If \code{idcol} is \code{NULL} it attempts to
#' use the \code{row.names(df)} as an \code{idcol}.
#' @return The long form version of the original dataset, retaining the attributes
#' and class.
#' @export

dataset_to_triples <- function(df, idcol=NULL) {
dataset_to_triples <- function(x, idcol=NULL) {

is_dataset <- inherits(df, "dataset")
is_dataset <- inherits(x, "dataset")

if (is_dataset) {
new_title = paste0(dataset_title(df), " [triple form]")
DataBibentry <- dataset_bibentry(df)
new_Subject <- subject(df)
new_title = paste0(dataset_title(x), " [triple form]")
DataBibentry <- dataset_bibentry(x)
new_Subject <- subject(x)
}

if (is.null(idcol)) {
df$new_id_col <- row.names(df)
idcol <- which(names(df)=="new_id_col" )
idcol_pos <- idcol_find(df, idcol)
seq_along_cols <- seq_along(df)[-idcol_pos]
x$new_id_col <- row.names(x)
idcol <- which(names(x)=="new_id_col" )
idcol_pos <- idcol_find(x, idcol)
seq_along_cols <- seq_along(x)[-idcol_pos]
} else {
## See utils-idcol_find.R for the internal function
seq_along_cols <- seq_along(df)[-idcol_find(df, idcol)]
seq_along_cols <- seq_along(x)[-idcol_find(x, idcol)]
}

triple_list <- lapply (seq_along_cols, function(x) {
data.frame(s = df[[idcol]],
p = names(df)[x],
o = df[[x]]
) })
triple_list <- lapply (seq_along_cols, function(i) {
data.frame(s = x[[idcol]],
p = names(x)[i],
o = x[[i]])
})

tmp <- do.call(rbind, triple_list)

if (is_dataset) {
tmp2 <- dataset(x=tmp, author=creator(df), title = new_title)
tmp2 <- dataset(x=tmp, author=creator(x), title = new_title)
tmp_DSD <- DataStructure(tmp2)
tmp_DSD$s$label <- "Subject"
tmp_DSD$s$label <- "Object"
Expand Down
57 changes: 57 additions & 0 deletions R/id_to_column.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#' @title Add identifier to columns
#'
#' @description Add a prefixed identifier to the first column of the dataset.
#' @inheritParams dataset
#' @param prefix Defaults to \code{eg:} (example.com).
#' @param ids Defaults to \code{NULL}.
#' @return A dataset conforming the original sub-class of \code{x}.
#' @examples
#'
#' # Example with a dataaset object:
#' id_to_column(iris_dataset)
#'
#' # Example with a data.frame object:
#'
#' id_to_column(iris, prefix="eg:iris-o")
#' @export
id_to_column <- function(x, prefix = "eg:", ids = NULL) {

is_dataset <- is.dataset(x)

lastcol <- ncol(x)

if (is.null(ids)) {
ids <- gsub("[^[:alnum:]]", "-", row.names(x))
} else if (nrow(x)!=length(ids)) {
stop("id_to_column(x, ..., ids) : ids must be of same lengths as nrow(x).")
}

if (is.null(prefix)) { prefix <- "" }

rhs <- x
x$rowid <- paste0(prefix, ids)
lhs <- x[, "rowid", drop=FALSE]

if (is_dataset) {

DataBibentry <- dataset_bibentry(rhs)
tmp <- dataset(cbind(lhs, rhs),
author=DataBibentry$author,
title = DataBibentry$title)

if (nrow(tmp)>0) {
row.names(tmp) <- 1:nrow(tmp)
} else {
row.names(tmp) <- NULL
}

attr(tmp, "DataBibentry") <- DataBibentry

} else {
tmp <- cbind(lhs, rhs)
}
tmp
}



6 changes: 3 additions & 3 deletions R/ttl_dataset_write.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
#'
#' examplefile <- file.path(tempdir(), "ttl_dataset_write.ttl")
#'
#' ttl_dataset_write(tdf=testtdf, file_path = examplefile)
#' dataset_ttl_write(tdf=testtdf, file_path = examplefile)
#'
#' readLines(examplefile)
#' @export

ttl_dataset_write <- function(tdf,
dataset_ttl_write <- function(tdf,
ttl_namespace = NULL,
file_path = NULL,
overwrite = TRUE) {
Expand All @@ -30,7 +30,7 @@ ttl_dataset_write <- function(tdf,
default_namespace <- getdata("dataset_namespace")
default_namespace <- default_namespace[
which(default_namespace$prefix %in% c("rdf:", "rdfs:", "owl:",
"qb:", "dcat:")),]
"qb:", "dcat:", "xsd:")),]

## validate dataset
validate_tdf(tdf)
Expand Down
18 changes: 16 additions & 2 deletions R/xsd_convert.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ xsd_convert.data.frame <- function(x, idcol=NULL, ...) {

type <- switch(class(t)[[1]],
"numeric" = "xs:decimal",
"factor" = "codelist",
"factor" = "xs:string",
"logical" = "xs:boolean",
"integer" = "xs:integer",
"Date" = "xs:date",
Expand Down Expand Up @@ -90,28 +90,42 @@ xsd_convert.tibble <- function(x, idcol=NULL,...) {
NextMethod()
}

#' @rdname xsd_convert
#' @export
#' @exportS3Method
xsd_convert.character <- function(x, idcol=NULL, ...) {
var_type <- "xs:string"
paste0('\"', x, '\"', "^^<", var_type, ">")
}

#' @rdname xsd_convert
#' @export
#' @exportS3Method
xsd_convert.numeric <- function(x, idcol=NULL, ...) {
var_type <- "xs:decimal"
paste0('\"', as.character(x), '\"', "^^<", var_type, ">")
}

#' @rdname xsd_convert
#' @export
#' @exportS3Method
xsd_convert.integer <- function(x, idcol=NULL, ...) {
var_type <- "xs:integer"
paste0('\"', as.character(x), '\"', "^^<", var_type, ">")
}

#' @rdname xsd_convert
#' @export
#' @exportS3Method
xsd_convert.boolean <- function(x, idcol=NULL, ...) {
var_type <- "xs:boolean"
paste0('\"', as.character(x), '\"', "^^<", var_type, ">")
}

#' @rdname xsd_convert
#' @export
#' @exportS3Method
xsd_convert.factor<- function(x, idcol=NULL, codelist=NULL ) {

if (is.null(codelist)) {
var_type <- "xs:string"
paste0('\"', x, '\"', "^^<", var_type, ">")
Expand Down
3 changes: 2 additions & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ reference:
contents:
- dataset
- subsetting
- id_to_column
- head.dataset
- dataset_bibentry
- describe
Expand Down Expand Up @@ -113,7 +114,7 @@ reference:
- xsd_convert
- get_prefix
- dataset_to_triples
- ttl_dataset_write
- dataset_ttl_write
- title: "Reproducible research workflows"
desc: >
Improve reproducability with recording data lifecycle, processing history as
Expand Down
2 changes: 1 addition & 1 deletion man/dataset.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions man/dataset_to_triples.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions man/ttl_dataset_write.Rd → man/dataset_ttl_write.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

32 changes: 32 additions & 0 deletions man/id_to_column.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

17 changes: 16 additions & 1 deletion man/xsd_convert.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions tests/testthat/test-dataset_to_triples.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@


test_that("dataset_to_triples works()", {
expect_equal(class(dataset_to_triples(iris_dataset)), "data.frame")
expect_equal(ncol(dataset_to_triples(iris_dataset)), 3)
expect_equal(nrow(dataset_to_triples(head(iris_dataset, 3))), dim(head(iris_dataset, 3))[1]*dim(head(iris_dataset, 3))[2])
})






Loading

0 comments on commit 1c7a45e

Please sign in to comment.