Merge pull request #7 from NorwegianVeterinaryInstitute/develop

PetterHopp · web-flow · commit 29c165e76c23 · 2021-05-07T16:24:07.000+02:00
NVIdb v0.5.3
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: NVIdb
 Title: Facilitating use of Norwegian Veterinary Institute's databases
-Version: 0.5.2
-Date: 2021-04-24
+Version: 0.5.3
+Date: 2021-05-07
 Authors@R: 
   c(person(given = "Petter",
            family = "Hopp",
@@ -29,27 +29,28 @@ License: BSD_3_clause + file LICENSE
 Encoding: UTF-8
 LazyData: true
 Imports:
-    utils,
     stats,
+    utils,
+    checkmate,
+    data.table,
+    dplyr,
     getPass,
-    svDialogs,
     keyring,
-    RODBC,
-    snakecase,
     magrittr,
-    checkmate,
     poorman (>= 0.2.3),
-    data.table
+    RODBC,
+    snakecase,
+    svDialogs
 RoxygenNote: 7.1.1
 Suggests:
-    spelling, 
+    devtools,
+    knitr,
     remotes,
-    testthat,
+    rmarkdown,
     roxygen2,
-    withr,
-    devtools,
+    spelling, 
     styler,
-    knitr,
-    rmarkdown
+    testthat,
+    withr
 VignetteBuilder: knitr
 Language: en-GB
diff --git a/NEWS b/NEWS
@@ -1,3 +1,18 @@
+NVIdb 0.5.3 - (2021-05-07)
+--------------------------
+
+New features:
+
+- add_PJS_code_description now translates registertype (type of location or address) and konkl_type (conclusion type) to descriptive text.
+
+- standardize_PJSdata now transforms sak_forst_avsluttet (from v_innsendelse) to date format.
+
+
+Bug fixes:
+
+- There was a problem with standardize_columns and the function was rewritten to use dplyr in stead of poorman.
+
+
 NVIdb 0.5.2 - (2021-04-24)
 --------------------------
 
diff --git a/R/add_PJS_code_description.R b/R/add_PJS_code_description.R
@@ -11,20 +11,24 @@
 #'     The function uses a premade translation table (PJS_codes_2_text.csv) that normally is updated every night from PJS.
 #'
 #'     Currently, the translation table has PJS-codes and the corresponding description for the following PJS variable types:
-#'     hensikt,
-#'     utbrudd,
-#'     art,
-#'     artrase,
-#'     driftsform,
-#'     provetype,
-#'     provemateriale,
-#'     kjonn,
-#'     forbehandling,
-#'     fysiologisk_stadium,
-#'     metode,
-#'     kjennelse,
-#'     analytt,
-#'     seksjon.
+#' \itemize{
+#'   \item hensikt
+#'   \item utbrudd
+#'   \item registertype (categories for locations and addresses)
+#'   \item seksjon
+#'   \item art (species and breed codes to species name)
+#'   \item artrase (species and breed codes to species or breed name)
+#'   \item driftsform
+#'   \item provetype
+#'   \item provemateriale
+#'   \item kjonn
+#'   \item forbehandling
+#'   \item fysiologisk_stadium
+#'   \item metode
+#'   \item konkl_type
+#'   \item kjennelse
+#'   \item analytt.
+#' }
 #'
 #'     \code{position =} is used to give the place if the new columns in the data.frame. For \code{position = "right"} the new variables are
 #'     placed to the right of the code_variable. Likewise, for \code{position = "left"} the new variables are placed to the left of the
diff --git a/R/standardize_PJSdata.R b/R/standardize_PJSdata.R
@@ -57,7 +57,7 @@ standardize_PJSdata <- function(PJSdata, dbsource = "v2_sak_m_res") {
 
   # Change to date for date-variables
   # Done before trimming character variables to reduce variables that needs to be trimmed
-  cols_2_modify <- intersect(colnames(PJSdata), c("mottatt", "uttatt", "avsluttet",
+  cols_2_modify <- intersect(colnames(PJSdata), c("mottatt", "uttatt", "avsluttet", "sak_forst_avsluttet",
                                                   "uttatt_parprove", "mottatt_parprove",
                                                   "und_godkjent", "und_avsluttet"))
   PJSdata[,cols_2_modify] <- lapply(PJSdata[,cols_2_modify], as.Date, format = "%d.%m.%y")
diff --git a/R/standardize_columns.R b/R/standardize_columns.R
@@ -115,7 +115,7 @@ standardize_columns <- function(data,
 
   # Reading column standards from a csv-file based on in an Excel file
   if (is.null(standards)) {
-    column_standards <- utils::read.csv2(file = paste0(set_dir_NVI("ProgrammeringR"),"standardization/column_standards.csv"),
+    column_standards <- utils::read.csv2(file = paste0(NVIdb::set_dir_NVI("ProgrammeringR"),"standardization/column_standards.csv"),
                                          fileEncoding = "UTF-8")
   } else {
     column_standards <- standards
@@ -131,10 +131,10 @@ standardize_columns <- function(data,
 
     standard <- column_standards %>%
       # Filter to include only information for relevant column names and with property information
-      poorman::filter(colname_db %in% columnnames$V1) %>%
-      poorman::filter(!is.na(colname)) %>%
-      poorman::select(table_db, colname_db, colname) %>%
-      poorman::distinct()
+      dplyr::filter(colname_db %in% columnnames$V1) %>%
+      dplyr::filter(!is.na(colname)) %>%
+      dplyr::select(table_db, colname_db, colname) %>%
+      dplyr::distinct()
 
     # Keep information on relevant table name and combine information for all other tables
     standard[which(standard$table_db != dbsource), "table_db"] <- NA
@@ -143,12 +143,12 @@ standardize_columns <- function(data,
     if (dim(standard)[1] > 0) {
       standard <- standard %>%
         # Identify column names with only one suggested column width
-        poorman::add_count(colname_db, name = "n") %>%
-        poorman::ungroup() %>%
+        dplyr::add_count(colname_db, name = "n") %>%
+        dplyr::ungroup() %>%
         # Select column width either if only one suggested or for the current table
-        poorman::filter(n == 1 | table_db == dbsource & n > 1) %>%
-        poorman::select(colname_db, colname) %>%
-        poorman::distinct()
+        dplyr::filter(n == 1 | table_db == dbsource & n > 1) %>%
+        dplyr::select(colname_db, colname) %>%
+        dplyr::distinct()
     }
 
     # # Standardize column names
@@ -218,10 +218,10 @@ standardize_columns <- function(data,
     # Standard labels in Norwegian is always generated as is used to impute missing labels in other languages
     standard <- column_standards %>%
       # Filter to include only information for relevant column names and with property information
-      poorman::filter(colname %in% collabels$V1) %>%
-      poorman::filter(!is.na(label_1_no)) %>%
-      poorman::select(table_db, colname, label_1_no) %>%
-      poorman::distinct()
+      dplyr::filter(colname %in% collabels$V1) %>%
+      dplyr::filter(!is.na(label_1_no)) %>%
+      dplyr::select(table_db, colname, label_1_no) %>%
+      dplyr::distinct()
 
     # Keep information on relevant table name and combine information for all other tables
     standard[which(standard$table_db != dbsource), "table_db"] <- NA
@@ -230,21 +230,21 @@ standardize_columns <- function(data,
     if (dim(standard)[1] > 0) {
       standard <- standard %>%
         # Identify column names with only one suggested column width
-        poorman::add_count(colname, name = "n") %>%
-        poorman::ungroup() %>%
+        dplyr::add_count(colname, name = "n") %>%
+        dplyr::ungroup() %>%
         # Select column width either if only one suggested or for the current table
-        poorman::filter(n == 1 | table_db == dbsource & n > 1) %>%
-        poorman::select(colname = colname, label = label_1_no) %>%
-        poorman::distinct()
+        dplyr::filter(n == 1 | table_db == dbsource & n > 1) %>%
+        dplyr::select(colname = colname, label = label_1_no) %>%
+        dplyr::distinct()
     }
 
     ## English column labels ----
     if (language == "en") {
       standard_en <- column_standards %>%
-        poorman::filter(colname %in% collabels$V1) %>%
-        poorman::filter(!is.na(label_1_en)) %>%
-        poorman::select(table_db, colname, label_1_en) %>%
-        poorman::distinct()
+        dplyr::filter(colname %in% collabels$V1) %>%
+        dplyr::filter(!is.na(label_1_en)) %>%
+        dplyr::select(table_db, colname, label_1_en) %>%
+        dplyr::distinct()
 
       # Keep information on relevant table name and combine information for all other tables
       standard_en[which(standard_en$table_db != dbsource), "table_db"] <- NA
@@ -253,18 +253,18 @@ standardize_columns <- function(data,
       if (dim(standard_en)[1] > 0) {
         standard_en <- standard_en %>%
           # Identify column names with only one suggested column width
-          poorman::add_count(colname, name = "n") %>%
-          poorman::ungroup() %>%
-          poorman::filter(n == 1 | table_db == dbsource & n > 1) %>%
-          poorman::select(colname, label_1_en) %>%
-          poorman::distinct()
+          dplyr::add_count(colname, name = "n") %>%
+          dplyr::ungroup() %>%
+          dplyr::filter(n == 1 | table_db == dbsource & n > 1) %>%
+          dplyr::select(colname, label_1_en) %>%
+          dplyr::distinct()
       }
 
       # Impute missing labels with Norwegian labels
       standard <- standard_en %>%
-        poorman::full_join(standard, by = c("colname" = "colname")) %>%
-        poorman::mutate(label = poorman::coalesce(label_1_en, label)) %>%
-        poorman::select(colname, label)
+        dplyr::full_join(standard, by = c("colname" = "colname")) %>%
+        dplyr::mutate(label = dplyr::coalesce(label_1_en, label)) %>%
+        dplyr::select(colname, label)
     }
 
     ## Impute Sentence case for those without defined label ----¨
@@ -302,13 +302,13 @@ standardize_columns <- function(data,
     # Standardize colwidths
     standard <- column_standards %>%
       # Filter to include only information for relevant column names and with property information
-      poorman::filter(colname %in% colwidths$V1) %>%
-      poorman::filter(!is.na(colwidth_Excel)) %>%
-      poorman::select(table_db = table_db, colname = colname, colwidth = colwidth_Excel)
+      dplyr::filter(colname %in% colwidths$V1) %>%
+      dplyr::filter(!is.na(colwidth_Excel)) %>%
+      dplyr::select(table_db = table_db, colname = colname, colwidth = colwidth_Excel)
     # uses which below as there seem to be a bug so that case_when doesn't work properly within a function
-    # poorman::mutate(table_db = poorman::case_when(table_db == "dbsource" ~ table_db,
+    # dplyr::mutate(table_db = dplyr::case_when(table_db == "dbsource" ~ table_db,
     #                                               TRUE ~ as.character(NA))) %>%
-    # poorman::distinct()
+    # dplyr::distinct()
     # Keep information on relevant table name and combine information for all other tables
     standard[which(standard$table_db != dbsource), "table_db"] <- NA
     standard <- unique(standard)
@@ -317,12 +317,12 @@ standardize_columns <- function(data,
     if (dim(standard)[1] > 0) {
       standard <- standard %>%
         # Identify column names with only one suggested column width
-        poorman::add_count(colname, name = "n") %>%
-        poorman::ungroup() %>%
+        dplyr::add_count(colname, name = "n") %>%
+        dplyr::ungroup() %>%
         # Select column width either if only one suggested or for the current table
-        poorman::filter(n == 1 | table_db == dbsource & n > 1) %>%
-        poorman::select(colname, colwidth) %>%
-        poorman::distinct()
+        dplyr::filter(n == 1 | table_db == dbsource & n > 1) %>%
+        dplyr::select(colname, colwidth) %>%
+        dplyr::distinct()
     }
 
     # New column with standard column names¨
@@ -356,17 +356,17 @@ standardize_columns <- function(data,
       # Standard labels in Norwegian is always generated as is used to impute missing labels in other languages
       standard <- column_standards %>%
         # Filter to include only information for relevant column names and with property information
-        poorman::filter(table_db == dbsource) %>%
-        poorman::filter(colname %in% columnorder$V1) %>%
-        poorman::filter(!is.na(colorder)) %>%
-        poorman::select(colname, colorder) %>%
-        poorman::distinct() %>%
+        dplyr::filter(table_db == dbsource) %>%
+        dplyr::filter(colname %in% columnorder$V1) %>%
+        dplyr::filter(!is.na(colorder)) %>%
+        dplyr::select(colname, colorder) %>%
+        dplyr::distinct() %>%
         # removes colorders with more than suggested position
-        poorman::add_count(colname, name = "n") %>%
-        poorman::filter(n == 1) %>%
-        poorman::select(colname, colorder)
+        dplyr::add_count(colname, name = "n") %>%
+        dplyr::filter(n == 1) %>%
+        dplyr::select(colname, colorder)
       # Sort according to first column, replaced by order
-      # poorman::arrange(colorder)
+      # dplyr::arrange(colorder)
 
       standard <- standard[order(standard$colorder),]
 
diff --git a/README.md b/README.md
diff --git a/man/add_PJS_code_description.Rd b/man/add_PJS_code_description.Rd