diff --git a/R/type-vctr.R b/R/type-vctr.R index b2ab8dd8d..72583d055 100644 --- a/R/type-vctr.R +++ b/R/type-vctr.R @@ -68,51 +68,12 @@ new_vctr <- function(.data, ..., class = character(), inherit_base_type = NULL) { - if (!is_vector(.data)) { - abort("`.data` must be a vector type.") - } - - if (is_list(.data)) { - if (is.data.frame(.data)) { - abort("`.data` can't be a data frame.") - } - - if (is.null(inherit_base_type)) { - inherit_base_type <- TRUE - } else if (is_false(inherit_base_type)) { - abort("List `.data` must inherit from the base type.") - } - } - - # Default to `FALSE` in all cases except lists - if (is.null(inherit_base_type)) { - inherit_base_type <- FALSE - } - - names <- names(.data) - names <- names_repair_missing(names) - - class <- c(class, "vctrs_vctr", if (inherit_base_type) typeof(.data)) - attrib <- list(names = names, ..., class = class) - - vec_set_attributes(.data, attrib) + .External(vctrs_new_vctr, .data, class, inherit_base_type, ...) } +new_vctr <- fn_inline_formals(new_vctr, "class") names_repair_missing <- function(x) { - if (is.null(x)) { - return(x) - } - - missing <- vec_equal_na(x) - - if (any(missing)) { - # We never want to allow `NA_character_` names to slip through, but - # erroring on them has caused issues. Instead, we repair them to the - # empty string (#784). - x <- vec_assign(x, missing, "") - } - - x + .Call(vctrs_name_repair_missing, x) } #' @export diff --git a/man/new_vctr.Rd b/man/new_vctr.Rd index d3101a8f6..c3a918ef8 100644 --- a/man/new_vctr.Rd +++ b/man/new_vctr.Rd @@ -5,7 +5,7 @@ \alias{vctr} \title{vctr (vector) S3 class} \usage{ -new_vctr(.data, ..., class = character(), inherit_base_type = NULL) +new_vctr(.data, ..., class = character(0), inherit_base_type = NULL) } \arguments{ \item{.data}{Foundation of class. Must be a vector} diff --git a/src/decl/type-vctr-decl.h b/src/decl/type-vctr-decl.h new file mode 100644 index 000000000..7269c9b07 --- /dev/null +++ b/src/decl/type-vctr-decl.h @@ -0,0 +1,4 @@ +static r_obj* classes_vctrs_vctr = NULL; +static r_obj* vec_set_attributes_call = NULL; + +static r_obj* names_repair_missing(r_obj* x); diff --git a/src/init.c b/src/init.c index 617350fe1..89e4ca087 100644 --- a/src/init.c +++ b/src/init.c @@ -138,6 +138,7 @@ extern r_obj* vctrs_integer64_proxy(r_obj*); extern r_obj* vctrs_integer64_restore(r_obj*); extern r_obj* vctrs_list_drop_empty(r_obj*); extern r_obj* vctrs_is_altrep(r_obj* x); +extern r_obj* vctrs_name_repair_missing(r_obj* x); // Maturing @@ -296,6 +297,7 @@ static const R_CallMethodDef CallEntries[] = { {"vctrs_integer64_restore", (DL_FUNC) &vctrs_integer64_restore, 1}, {"vctrs_list_drop_empty", (DL_FUNC) &vctrs_list_drop_empty, 1}, {"vctrs_is_altrep", (DL_FUNC) &vctrs_is_altrep, 1}, + {"vctrs_name_repair_missing", (DL_FUNC) &vctrs_name_repair_missing, 1}, {NULL, NULL, 0} }; @@ -309,6 +311,7 @@ extern SEXP vctrs_rbind(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_cbind(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_c(SEXP, SEXP, SEXP, SEXP); extern SEXP vctrs_new_data_frame(SEXP); +extern SEXP vctrs_new_vctr(SEXP); static const R_ExternalMethodDef ExtEntries[] = { {"vctrs_type_common", (DL_FUNC) &vctrs_type_common, 1}, @@ -321,6 +324,7 @@ static const R_ExternalMethodDef ExtEntries[] = { {"vctrs_cbind", (DL_FUNC) &vctrs_cbind, 3}, {"vctrs_c", (DL_FUNC) &vctrs_c, 3}, {"vctrs_new_data_frame", (DL_FUNC) &vctrs_new_data_frame, -1}, + {"vctrs_new_vctr", (DL_FUNC) &vctrs_new_vctr, -1}, {NULL, NULL, 0} }; @@ -367,6 +371,7 @@ void vctrs_init_type(SEXP ns); void vctrs_init_type_data_frame(SEXP ns); void vctrs_init_type_date_time(SEXP ns); void vctrs_init_type_info(SEXP ns); +void vctrs_init_type_vctr(r_obj* ns); void vctrs_init_unspecified(SEXP ns); void vctrs_init_utils(SEXP ns); @@ -390,6 +395,7 @@ SEXP vctrs_init_library(SEXP ns) { vctrs_init_type_data_frame(ns); vctrs_init_type_date_time(ns); vctrs_init_type_info(ns); + vctrs_init_type_vctr(ns); vctrs_init_unspecified(ns); vctrs_init_utils(ns); return R_NilValue; diff --git a/src/type-vctr.c b/src/type-vctr.c new file mode 100644 index 000000000..668bb2f76 --- /dev/null +++ b/src/type-vctr.c @@ -0,0 +1,188 @@ +#include "type-vctr.h" +#include "vctrs.h" +#include "utils.h" + +#include "decl/type-vctr-decl.h" + +// [[ register(external = TRUE) ]] +r_obj* vctrs_new_vctr(r_obj* args) { + args = r_node_cdr(args); + + r_obj* data = r_node_car(args); args = r_node_cdr(args); + r_obj* cls = r_node_car(args); args = r_node_cdr(args); + r_obj* inherit_base_type = r_node_car(args); args = r_node_cdr(args); + r_obj* attributes = args; + + return new_vctr( + data, + cls, + inherit_base_type, + attributes + ); +} + +// [[ include("type-vctr.h") ]] +r_obj* new_vctr(r_obj* data, + r_obj* cls, + r_obj* inherit_base_type, + r_obj* attributes) { + if (!r_is_vector(data)) { + r_abort("`.data` must be a vector type."); + } + if (r_typeof(cls) != R_TYPE_character) { + r_abort("`class` must be a character vector."); + } + if ((inherit_base_type != r_null) && !r_is_bool(inherit_base_type)) { + r_abort("`inherit_base_type` must be `NULL` or a single `TRUE` or `FALSE`."); + } + + const enum r_type type_attributes = r_typeof(attributes); + + if (type_attributes != R_TYPE_pairlist && type_attributes != R_TYPE_null) { + r_stop_internal("new_vctr", "`attributes` must be a pairlist or `NULL`."); + } + + const enum r_type type = r_typeof(data); + + if (type == R_TYPE_list && r_inherits(data, "data.frame")) { + r_abort("`.data` can't be a data frame."); + } + + bool c_inherit_base_type = false; + + if (type == R_TYPE_list) { + if (inherit_base_type == r_null) { + // List types always inherit the base type + c_inherit_base_type = true; + } else { + c_inherit_base_type = r_lgl_get(inherit_base_type, 0); + + if (!c_inherit_base_type) { + r_abort("List `.data` must inherit from the base type."); + } + } + } else { + if (inherit_base_type == r_null) { + c_inherit_base_type = false; + } else { + c_inherit_base_type = r_lgl_get(inherit_base_type, 0); + } + } + + bool has_names_in_attributes = false; + + for (r_obj* node = attributes; node != R_NilValue; node = r_node_cdr(node)) { + r_obj* tag = r_node_tag(node); + + if (tag == R_ClassSymbol) { + // Check for this in case we ever allow dynamic dots + r_abort("Can't supply `class` in `...`."); + } + + if (tag == R_NamesSymbol) { + has_names_in_attributes = true; + } + } + + r_keep_t pi; + KEEP_HERE(attributes, &pi); + + if (!has_names_in_attributes) { + // Take names from `data` if `attributes` doesn't have any + r_obj* names = KEEP(r_names(data)); + names = KEEP(names_repair_missing(names)); + + if (names != r_null) { + attributes = r_new_node3(names, attributes, R_NamesSymbol); + KEEP_AT(attributes, pi); + } + + FREE(2); + } + + cls = KEEP(chr_c(cls, classes_vctrs_vctr)); + + if (c_inherit_base_type) { + r_obj* base_type = KEEP(r_type_as_character(type)); + cls = chr_c(cls, base_type); + FREE(1); + } + KEEP(cls); + + attributes = r_new_node3(cls, attributes, R_ClassSymbol); + KEEP_AT(attributes, pi); + + // Required conversion to VECSXP for `attributes<-` + attributes = KEEP(Rf_PairToVectorList(attributes)); + + // We don't have access to `Rf_shallow_duplicate_attr()`, which can create + // an ALTREP wrapper cheaply, but `vec_set_attributes()` does through + // `attributes<-` + r_obj* out = r_eval_with_xy( + vec_set_attributes_call, + data, + attributes, + vctrs_ns_env + ); + + FREE(4); + return out; +} + + +// [[ register() ]] +r_obj* vctrs_name_repair_missing(r_obj* x) { + return names_repair_missing(x); +} + +static +r_obj* names_repair_missing(r_obj* x) { + // We never want to allow `NA_character_` names to slip through, but + // erroring on them has caused issues. Instead, we repair them to the + // empty string (#784). + + if (x == r_null) { + return x; + } + + if (r_typeof(x) != R_TYPE_character) { + r_abort("`x` must be a character vector of names."); + } + + const r_ssize size = r_length(x); + r_obj* const* v_x = r_chr_cbegin(x); + + r_ssize i = 0; + bool any_missing = false; + + for (; i < size; ++i) { + if (v_x[i] == r_globals.na_str) { + any_missing = true; + break; + } + } + + if (!any_missing) { + return x; + } + + r_obj* out = KEEP(r_clone(x)); + + for (; i < size; ++i) { + if (v_x[i] == r_globals.na_str) { + r_chr_poke(out, i, strings_empty); + } + } + + FREE(1); + return out; +} + + +void vctrs_init_type_vctr(r_obj* ns) { + classes_vctrs_vctr = r_new_shared_vector(R_TYPE_character, 1); + r_chr_poke(classes_vctrs_vctr, 0, r_str("vctrs_vctr")); + + vec_set_attributes_call = r_parse("vec_set_attributes(x, y)"); + r_preserve(vec_set_attributes_call); +} diff --git a/src/type-vctr.h b/src/type-vctr.h new file mode 100644 index 000000000..ef99a6563 --- /dev/null +++ b/src/type-vctr.h @@ -0,0 +1,11 @@ +#ifndef VCTRS_TYPE_VCTR_H +#define VCTRS_TYPE_VCTR_H + +#include + +r_obj* new_vctr(r_obj* data, + r_obj* cls, + r_obj* inherit_base_type, + r_obj* attributes); + +#endif diff --git a/src/utils.h b/src/utils.h index fcfdcf323..8183b4bbf 100644 --- a/src/utils.h +++ b/src/utils.h @@ -270,6 +270,22 @@ SEXP r_new_environment(SEXP parent) { return env; } +static inline +bool r_is_vector(r_obj* x) { + switch(r_typeof(x)) { + case R_TYPE_logical: + case R_TYPE_integer: + case R_TYPE_double: + case R_TYPE_complex: + case R_TYPE_character: + case R_TYPE_raw: + case R_TYPE_list: + return true; + default: + return false; + } +} + SEXP r_protect(SEXP x); bool r_is_number(SEXP x); bool r_is_positive_number(SEXP x); @@ -398,6 +414,8 @@ void c_print_backtrace(); SEXP chr_c(SEXP x, SEXP y); +SEXP r_new_shared_vector(SEXPTYPE type, R_len_t n); + extern SEXP vctrs_ns_env; extern SEXP vctrs_shared_empty_str; diff --git a/tests/testthat/_snaps/type-vctr.md b/tests/testthat/_snaps/type-vctr.md index c5a8190f5..b66c5334b 100644 --- a/tests/testthat/_snaps/type-vctr.md +++ b/tests/testthat/_snaps/type-vctr.md @@ -1,3 +1,29 @@ +# `class` must be a character vector + + Code + (expect_error(new_vctr(1, class = 1))) + Output + + Error: `class` must be a character vector. + +# `inherit_base_type` is validated + + Code + (expect_error(new_vctr(1, inherit_base_type = 1))) + Output + + Error: `inherit_base_type` must be `NULL` or a single `TRUE` or `FALSE`. + Code + (expect_error(new_vctr(1, inherit_base_type = NA))) + Output + + Error: `inherit_base_type` must be `NULL` or a single `TRUE` or `FALSE`. + Code + (expect_error(new_vctr(1, inherit_base_type = c(TRUE, FALSE)))) + Output + + Error: `inherit_base_type` must be `NULL` or a single `TRUE` or `FALSE`. + # na.fail() works Code diff --git a/tests/testthat/test-type-vctr.R b/tests/testthat/test-type-vctr.R index 56a7e05da..32706b256 100644 --- a/tests/testthat/test-type-vctr.R +++ b/tests/testthat/test-type-vctr.R @@ -11,6 +11,26 @@ test_that(".data must be a vector", { expect_error(new_vctr(mean), "vector type") }) +test_that("`class` must be a character vector", { + expect_snapshot((expect_error(new_vctr(1, class = 1)))) +}) + +test_that("`inherit_base_type` is validated", { + expect_snapshot({ + (expect_error(new_vctr(1, inherit_base_type = 1))) + (expect_error(new_vctr(1, inherit_base_type = NA))) + (expect_error(new_vctr(1, inherit_base_type = c(TRUE, FALSE)))) + }) +}) + +test_that("names come from `.data`", { + expect_named(new_vctr(structure(1, names = "x")), "x") +}) + +test_that("names provided through `...` override those in `.data`", { + expect_named(new_vctr(structure(1, names = "x"), names = "y"), "y") +}) + test_that("attributes other than names are ignored", { out <- new_vctr(structure(1, a = 1)) expect_null(attributes(out)$a) @@ -32,6 +52,15 @@ test_that("Can opt out of base type", { expect_s3_class(x, c("x", "vctrs_vctr"), exact = TRUE) }) +test_that("base type is correct for atomic types", { + expect_s3_class(new_vctr(logical(), inherit_base_type = TRUE), "logical") + expect_s3_class(new_vctr(integer(), inherit_base_type = TRUE), "integer") + expect_s3_class(new_vctr(double(), inherit_base_type = TRUE), "double") + expect_s3_class(new_vctr(character(), inherit_base_type = TRUE), "character") + expect_s3_class(new_vctr(raw(), inherit_base_type = TRUE), "raw") + expect_s3_class(new_vctr(complex(), inherit_base_type = TRUE), "complex") +}) + test_that("base type is always set for lists", { expect_s3_class(new_vctr(list()), "list") })