From 3e72322e0dd5bac36ae4e4c6e53e5d8a6bdbfae9 Mon Sep 17 00:00:00 2001 From: edhell Date: Tue, 6 Jun 2023 17:46:04 -0300 Subject: [PATCH] version 0.7 --- DESCRIPTION | 2 +- NEWS.md | 2 +- R/codegen.R | 7 ++--- R/json_files.R | 6 ++++- vignettes/xgb-tidymodel.Rmd | 52 +++++++++++++++++++++++++++++-------- 5 files changed, 52 insertions(+), 17 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index a2e67b3..4e46b05 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: sasctl Title: The sasctl package enables easy communication between the SAS Viya platform APIs and the R runtime -Version: 0.6.4.9000 +Version: 0.7.0 Author: Eduardo Hellas Authors@R: c( person(given = "Eduardo", diff --git a/NEWS.md b/NEWS.md index dbd7720..48129f9 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# sasctl (development version) +# sasctl 0.7.0 * Fixed correct release retrieval when using `session` to connect to Viya 2020.x * Added `codegen` function in experimental state. Works for simple `lm`, `glm` models and [tidymodels](https://www.tidymodels.org/) `workflow` with regression or classification model mode` diff --git a/R/codegen.R b/R/codegen.R index 85a5bbc..f98315e 100644 --- a/R/codegen.R +++ b/R/codegen.R @@ -201,7 +201,7 @@ codegen.workflow <- function(tm_workflow, path = "scoreCode.R", rds = "model.rds output_list <- list(EM_CLASSIFICATION = predictions[[".pred"]], EM_EVENTPROBABILITY = predictions[[".pred_<>"]], - EM_PROBABILITY = subset(predictions, select = -c(.pred))[boolClass], + EM_PROBABILITY = apply(subset(predictions, select = -c(.pred)), 1, max), I_<> = predictions[[".pred"]], <> = predictions[[".pred"]], <> @@ -237,9 +237,10 @@ codegen.workflow <- function(tm_workflow, path = "scoreCode.R", rds = "model.rds if (!exists("sasctlRmodel")) { assign("sasctlRmodel", readRDS(file = paste(rdsPath, "<>", sep = "")), envir = .GlobalEnv) - <> + } - + <> + data <- data.frame(<>) predictions <- predict(sasctlRmodel, new_data = data, type = "<>") diff --git a/R/json_files.R b/R/json_files.R index 86f5bf0..9c35ccd 100644 --- a/R/json_files.R +++ b/R/json_files.R @@ -37,11 +37,15 @@ write_in_out_json <- function(data, input = TRUE, path = './', noFile = FALSE){ if (is.factor(x)) as.character(x) else x })) ### variable lengths, 8 is default for numeric + + size <- sapply(data, FUN = function(x){ - if (is.character(x)) max(sapply(x, nchar)) else {8}} + if (is.character(x)) max(sapply(x, nchar), na.rm = TRUE) else {8}} ) + size[sapply(size, function(x) is.na(x) | x == 0 )] <- 8 + df <- data.frame(name = names(vars), length = size, type = vars, diff --git a/vignettes/xgb-tidymodel.Rmd b/vignettes/xgb-tidymodel.Rmd index b0efa9d..66e86eb 100644 --- a/vignettes/xgb-tidymodel.Rmd +++ b/vignettes/xgb-tidymodel.Rmd @@ -14,7 +14,7 @@ knitr::opts_chunk$set( ) ``` -R-sasctl has tools to help you create all the necessary files to upload an R model successfully to SAS Viya Model Manager and be able to take advantage of the many . +R-sasctl has tools to help you create all the necessary files to upload an R model successfully to SAS Viya Model Manager and be able to take advantage of its many features. ## Simple data preparation @@ -126,6 +126,8 @@ Scoring codes are required to run R and Python models in SAS Model Manager, `sas You will notice that the scoring code has many `EM_*` and `P_<><>` variables. They're not required, but it makes very consistent on how models made in SAS Viya UI are created. Making it easier to mix these models. + +## Generating the Score code ```{r vars, echo=FALSE, include=FALSE} used_vars <- paste0("`",colnames(xgb_fitted[["pre"]][["mold"]][["predictors"]]), "`", collapse = ", ") expected_vars <- paste0("`",colnames(hmeq)[-1], "`", collapse = ", ") @@ -139,24 +141,52 @@ We can use the `inputs` to generate alternate input variables. In this case it i code <- codegen(xgb_fitted, path = paste0(path, "scoreCode.R"), inputs = colnames(hmeq)[-1], + referenceLevel = 1, rds = modelrda) code ``` -Now we create some additional files which are required to configure SAS Model Manager when uploading the files. For the variables specifically, should match the inputs from the model and outputs (outputs may be needed to match what comes out the scoring code) +### Testing the scoring code + +```{r test_code} + +## getting the .rda Path to be called in the function +## this is simulating what Viya passes to R when calling it +rdsPath <- path + +## Calling the generated code string as code to +## create the function locally +codeExpression <- str2expression(code) +eval(codeExpression) + +## this is a helper to create the variables +## cat(paste0(colnames(hmeq)[-1], " = hmeq[, '", colnames(hmeq)[-1],"']", collapse = ",\n " )) + +scoreRes <- scoreFunction(LOAN = hmeq[, 'LOAN'], + MORTDUE = hmeq[, 'MORTDUE'], + VALUE = hmeq[, 'VALUE'], + REASON = hmeq[, 'REASON'], + JOB = hmeq[, 'JOB'], + YOJ = hmeq[, 'YOJ'], + DEROG = hmeq[, 'DEROG'], + DELINQ = hmeq[, 'DELINQ'], + CLAGE = hmeq[, 'CLAGE'], + NINQ = hmeq[, 'NINQ'], + CLNO = hmeq[, 'CLNO'], + DEBTINC = hmeq[, 'DEBTINC']) + +scoreRes <- as.data.frame(scoreRes) +head(scoreRes) +``` -```{r creating_files5} -## note required to be correct values, just correct format -scoreddf$I_BAD <- as.character(scoreddf$BAD) -scoreddf$BAD <- as.character(scoreddf$BAD) -scoreddf$EM_EVENTPROBABILITY <- 0.1 -scoreddf$EM_PROBABILITY <- 0.1 -scoreddf$EM_CLASSIFICATION <- as.character(scoreddf$BAD) -write_in_out_json(hmeq[,-1], input = TRUE, path = path) -write_in_out_json(scoreddf[-3], input = FALSE, path = path) +Now we create some additional files which are required to configure SAS Model Manager when uploading the files. For the variables specifically, should match the inputs from the model and outputs. We can use the result from the scoring code test from the previous chunck. Otherwise you can create a `data.frame` with the expected outputs and pass it. +```{r creating_files5} + +write_in_out_json(hmeq[,-1], input = TRUE, path = path) +write_in_out_json(scoreRes, input = FALSE, path = path) write_fileMetadata_json(scoreCodeName = "scoreCode.R", scoreResource = modelrda,