Merge pull request #103 from NorwegianVeterinaryInstitute/dev

chore: NVIdb v0.13.1
NorwegianVeterinaryInstitute · Dec 19, 2024 · 674d721 · 674d721
2 parents d096019 + 6962f33
commit 674d721
Show file tree

Hide file tree

Showing 7 changed files with 251 additions and 44 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: NVIdb
 Title: Tools to facilitate the use of NVI's databases
-Version: 0.13.0
-Date: 2024-12-13
+Version: 0.13.1
+Date: 2024-12-19
 Authors@R: 
   c(person(given = "Petter",
            family = "Hopp",

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,10 @@
+# NVIdb 0.13.1 - (2024-12-19)
+
+## New features:
+
+- `standardize_columns` now accepts list or filename and path as input to `standards`.
+
+
 # NVIdb 0.13.0 - (2024-12-13)
 
 ## Bug fixes:

diff --git a/R/standardize_columns.R b/R/standardize_columns.R
@@ -2,7 +2,7 @@
 #' @description Standardizes column names, labels, column width
 #'     for variables in external databases.
 #'
-#' @details The standardization table is under development. This
+#' @details The standardisation table is under development. This
 #'     function only works when being connected to the NVI network.
 #'
 #' Variables in internal and external data sources uses
@@ -16,6 +16,41 @@
 #'     \ifelse{html}{\code{\link[data.table:fread]{data.table::fread}}}{\code{data.table::fread}}
 #'     can be generated.
 #'
+#' \code{standards} gives the source file or the data.frame with the standards
+#'     for formatting the columns. Default is to give the general source csv
+#'     file. It can also be a data.frame as for example the
+#'     \ifelse{html}{\code{\link[OKplan:OK_column_standards]{OKplan::OK_column_standards}}}{\code{OKplan::OK_column_standards}}
+#'     giving the standards when generating
+#'     selection files for the Norwegian surveillance programmes. As this file
+#'     is embedded into OKplan-package, it may be convenient to update the
+#'     source file and load it as a csv file. On some occasions, it may be most
+#'     easy to input the column standards directly using a \code{list}.
+#'
+#'     #' The list input to column_standards must follow a specific format.
+#'     It is a \code{list} with at least three named vectors:
+#' \itemize{
+#' \item \code{colname}: a vector of all columns in in the source file that
+#'     should be included in the Excel report with the selection list.
+#' \item \code{collabel}: A vector with the labels that should be used in the
+#'     Excel report.
+#' \item \code{colwidth}: A vector with the column width that should be used
+#'     in the Excel report.
+#' }
+#'
+#'     In addition one may input:
+#'
+#' \itemize{
+#' \item \code{colorder}: the order of the columns to be used in the Excel report.
+#'     The default is to use the same order as they are entered in the vectors.
+#' \item \code{column_db}: input added as a possibility to keep on the same format
+#'     as \code{column_standards}. Not necessary to input.
+#' \item \code{table_db}: input added as a possibility to keep on the same format
+#'     as \code{column_standards}. Must be the same as \code{dbsource}.
+#'     Not necessary to input.
+#' }
+#'
+#' All vectors must have the same order and the same length.
+#'
 #' \code{property = "colnames"} will replace the column names
 #'     in a data frame with standardized column names. All
 #'     standard column names is snake_case. If no standard name
@@ -78,8 +113,11 @@
 #' The database that is the source of data. Should be the name of
 #'     the data source as registered in column_standards table. Defaults
 #'     to \code{deparse(substitute(data))}.
-#' @param standards [\code{character(1)}]\cr
-#' For giving alternative standard tables to column_standards.
+#' @param standards [\code{data.frame} | \code{list} | \code{character(1)}]\cr
+#' For giving alternatives to the standard table for column_standards using
+#'     different formats, see details. Defaults to
+#'     file.path(NVIdb::set_dir_NVI("ProgrammeringR", slash = FALSE),
+#'               "standardization", "colnames", "column_standards.csv").
 #' @param property [\code{character(1)}]\cr
 #' Property of the column that should be standardized. Must be one
 #'     of c("colnames", "colclasses", "collabels", "colwidths_Excel",
@@ -135,13 +173,20 @@
 standardize_columns <- function(data,
                                 dbsource = deparse(substitute(data)),
                                 #   csvfile = NULL,
-                                standards = NULL,
+                                standards = file.path(NVIdb::set_dir_NVI("ProgrammeringR", slash = FALSE),
+                                                      "standardization", "colnames", "column_standards.csv"),
                                 property,
                                 language = "no",
                                 exclude = FALSE,
                                 ...) {
   # TO DO: replace reading column standards with including column standards in sysdata for the package.
 
+  # PREPARE ARGUMENTS BEFORE CHECKING ----
+  if (is.null(standards)) {
+    standards <- file.path(NVIdb::set_dir_NVI("ProgrammeringR", slash = FALSE),
+                           "standardization", "colnames", "column_standards.csv")
+  }
+
   # ARGUMENT CHECKING ----
   # Object to store check-results
   checks <- checkmate::makeAssertCollection()
@@ -158,14 +203,34 @@ standardize_columns <- function(data,
   }
   checkmate::assert_character(dbsource, len = 1, min.chars = 1, add = checks)
 
-  checkmate::assert_data_frame(standards, null.ok = TRUE, add = checks)
-
+  # standards
+  # checkmate::assert_data_frame(standards, null.ok = TRUE, add = checks)
+  checkmate::assert(checkmate::check_class(standards, classes = c("data.frame")),
+                    checkmate::check_class(standards, classes = c("list")),
+                    checkmate::check_class(standards, classes = c("character")),
+                    add = checks)
+  if (inherits(standards, what = "character")) {
+    checkmate::assert_file_exists(standards, add = checks)
+  }
+  if (inherits(standards, what = "list")) {
+    lengths_standard <- lengths(standards)
+    NVIcheckmate::assert_integer(lengths_standard, lower = lengths_standard[1], upper = lengths_standard[1],
+                                 min.len = 3, max.len = 6,
+                                 comment = "When input as a list, all elements must have the same length",
+                                 add = checks)
+    checkmate::assert_subset(names(standards), choices = c("table_db", "colname_db", "colname", "collabel", "colwidth", "colorder"),
+                             add = checks)
+  }
+  if (inherits(standards, what = "data.frame")) {
+    checkmate::assert_data_frame(standards, min.rows = 1, min.cols = 6, add = checks)
+  }
+# property
   checkmate::assert_subset(tolower(property),
                            choices = c("colnames", "colclasses",
                                        "collabels", "colwidths_excel",
                                        "colwidths_DT", "colorder"),
                            add = checks)
-
+  # language
   checkmate::assert_subset(language, choices = c("no", "en"), add = checks)
 
   checkmate::assert_logical(exclude, add = checks)
@@ -177,12 +242,36 @@ standardize_columns <- function(data,
   dbsource <- tolower(dbsource)
 
   # Reading column standards from a csv-file based on in an Excel file
-  if (is.null(standards)) {
-    column_standards <- utils::read.csv2(
-      file = paste0(NVIdb::set_dir_NVI("ProgrammeringR"), "standardization/column_standards.csv"),
-      fileEncoding = "UTF-8"
-    )
-  } else {
+  # if (is.null(standards)) {
+  #   column_standards <- utils::read.csv2(
+  #     file = file.path(NVIdb::set_dir_NVI("ProgrammeringR", slash = FALSE),
+  #                      "standardization", "column_standards.csv"),
+  #     fileEncoding = "UTF-8"
+  #   )
+  # } else {
+  #   column_standards <- standards
+  # }
+  if (inherits(standards, what = "character")) {
+    column_standards <- utils::read.csv2(file = standards, fileEncoding = "UTF-8")
+  }
+  if (inherits(standards, what = "list")) {
+    column_standards <- as.data.frame((standards))
+
+    if (!"table_db" %in% colnames(column_standards)) {
+      column_standards$table_db <- dbsource
+    }
+
+    if (!"colname_db" %in% colnames(column_standards)) {
+      column_standards$colname_db <- column_standards$colname
+    }
+
+    if (!"colorder" %in% colnames(column_standards)) {
+      column_standards$colorder <- c(1:dim(column_standards)[1])
+    }
+    colnames(column_standards)[which(colnames(column_standards) == "collabel")] <- "label_1_no"
+    colnames(column_standards)[which(colnames(column_standards) == "colwidth")] <- "colwidth_Excel"
+  }
+  if (inherits(standards, what = "data.frame")) {
     column_standards <- standards
   }
 

diff --git a/man/standardize_columns.Rd b/man/standardize_columns.Rd
diff --git a/tests/testthat/test_login.R b/tests/testthat/test_login.R
@@ -7,7 +7,7 @@ test_that("Log in to db services", {
 
   linewidth <- options("width")
   options(width = 80)
-  
+
   odbc_connected <- login("PJS")
   expect_true(as.vector(odbc_connected) >= 1)
   RODBC::odbcClose(odbc_connected)
@@ -21,7 +21,7 @@ test_that("Log in to db services", {
                                 regexp = "'login_by_credentials_PJS' is replaced by 'login_by_credentials")
   expect_true(as.vector(odbc_connected) >= 1)
   RODBC::odbcClose(odbc_connected)
-  
+
   options(width = unlist(linewidth))
 })
 
@@ -49,4 +49,3 @@ test_that("Errors or warnings for login", {
 
   options(width = unlist(linewidth))
 })
-