From b9198732f3f3206b0e30f923a7953c47b2d1b566 Mon Sep 17 00:00:00 2001 From: Robert Link Date: Wed, 20 Sep 2017 17:12:26 -0400 Subject: [PATCH 1/4] Add IIASA format to options. Mostly working. Produces one extraneous row at the top of the file. --- DESCRIPTION | 3 +- R/mcl.R | 172 +++++++++++++++++++++++++++++------ R/output.R | 12 ++- man/generate.Rd | 74 +++++++++------ man/iiasa_sortcols.Rd | 15 +++ man/iiasafy.Rd | 26 ++++++ man/output_csv.Rd | 6 +- man/proc_var_iiasa.Rd | 15 +++ tests/testthat/test_output.R | 8 +- 9 files changed, 263 insertions(+), 68 deletions(-) create mode 100644 man/iiasa_sortcols.Rd create mode 100644 man/iiasafy.Rd create mode 100644 man/proc_var_iiasa.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 2246102..4dfcc61 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: iamrpt Title: Convert GCAM results to the format required by various IAM experiment databases -Version: 0.1.0 +Version: 0.1.0-9000 Authors@R: c( person("Robert", "Link", email = "robert.link@pnnl.gov", role = c("aut", "cre")), person("Xavier", "Gutierrez", email = "xavier.gutierrez@pnnl.gov", role = c("aut")) @@ -18,6 +18,7 @@ Imports: rgcam (>= 0.4.2), readr (>= 1.1.1), dplyr (>= 0.5), + tidyr (>= 0.6), stringr (>= 1.2.0), assertthat (>= 0.2.0), lubridate (>= 1.6.0), diff --git a/R/mcl.R b/R/mcl.R index 21872b0..2a51ccf 100644 --- a/R/mcl.R +++ b/R/mcl.R @@ -52,20 +52,33 @@ #' arugments to the system, or set as R options. The names of the options and #' their functions are: #' \describe{ -#' \item{\strong{iamrpt.fileformat}}{File format for output. Options are +#' \item{\code{iamrpt.fileformat}}{File format for output. Options are #' \code{"CSV"} and \code{"XLSX"}} -#' \item{\strong{iamrpt.scenmerge}}{If \code{TRUE}, for each variable merge the +#' \item{\code{iamrpt.scenmerge}}{If \code{TRUE}, for each variable merge the #' results for all scenarios into a single table (distinguished by the value of #' the scenario column). Otherwise, create a separate table for each #' combination of scenario and variable.} -#' \item{\strong{iamrpt.tabs}}{If \code{TRUE}, write each table to a separate tab (if -#' outputting to an xlsx file) or file (if outputting to csv files). In the -#' former case each tab/file will be named with the output variable name and -#' scenario (if applicable). In the latter case all of the tables will be -#' written into a single tab or file, with the name of the scenario and variable -#' before each table.} +#' \item{\code{iamrpt.dataformat}}{Specify the data format; that is, how +#' the data is organized in the output files. Three options are available: +#' \describe{ +#' \item{\code{"tabs"}}{Each table generated goes into a separate tab (if +#' XLS output is selected) or file (if CSV output is selected). The tab or file +#' will be named with the output of the table.} +#' \item{\code{"merged"}}{The tables will be output sequentially into a +#' single tab or file. Each table will be preceded by its name. This is +#' similar to the format used by GCAM to output batch queries.} +#' \item{\code{"IIASA"}}{The database format used by IIASA. In this +#' format each table is spread into a row in a merged table, with a column to +#' identify the variable that each row comes from.} +#' } +#' } +#' \item{\code{iamrpt.wideformat}}{If \code{TRUE}, reshape the tables into +#' wide format (years as columns) before output. Otherwise, leave them in long +#' format. If the IIASA data format is selected, then this option is ignored, +#' since the IIASA format requires wide data.} #' } #' +#' #' Output filenames will be chosen automatically. For an XLSX file the filename #' will be 'iamrpt.xlsx'. For CSV output with \code{tabs == FALSE} the result #' will be 'iamrpt.csv'. For CSV output with \code{tabs == TRUE} the output @@ -99,32 +112,37 @@ #' #' The filter functions currently recognized by the system are #' \describe{ -#' \item{\strong{==}}{String equality} -#' \item{\strong{!=}}{String inequality} -#' \item{\strong{<}}{Numeric less-than} -#' \item{\strong{>}}{Numeric greather-than} -#' \item{\strong{<=}}{Numeric less-than-or-equals} -#' \item{\strong{>=}}{Numeric greater-than-or-equals} -#' \item{\strong{matches}}{Regular expression match. Note that because of the -#' way we parse these strings you can't have a ',', ';', '(', or ')' in your +#' \item{\code{==}}{String equality} +#' \item{\code{!=}}{String inequality} +#' \item{\code{<}}{Numeric less-than} +#' \item{\code{>}}{Numeric greather-than} +#' \item{\code{<=}}{Numeric less-than-or-equals} +#' \item{\code{>=}}{Numeric greater-than-or-equals} +#' \item{\code{matches}}{Regular expression match. Note that because of the +#' way we parse these strings you can't have a \code{','}, \code{';'}, +#' \code{'('}, or \code{')'} in your #' regular expressions for this function or any of the ones below.} -#' \item{\strong{matchesi}}{Case-insensitive regular expression match.} -#' \item{\strong{notmatches}}{Regular expression inverted match. That is, +#' \item{\code{matchesi}}{Case-insensitive regular expression match.} +#' \item{\code{notmatches}}{Regular expression inverted match. That is, #' select the rows that do \emph{not} match the given regular expression.} -#' \item{\strong{notmatchesi}}{Case-insensitive regular expression inverted +#' \item{\code{notmatchesi}}{Case-insensitive regular expression inverted #' match.} #' } #' #' @param scenctl Name of the scenario control file. #' @param varctl Name of the variable control file. #' @param dbloc Directory holding the GCAM databases +#' @param outputdir Directory to write output to. Default is the current +#' working directory. +#' @param model Name of the model (e.g., \code{'GCAM'}). This is required for +#' the IIASA data format. It is ignored for all other formats. #' @param fileformat Desired format for output files. #' @param scenmerge Flag: if true, merge scenarios; otherwise, leave scenarios #' as separate tables. -#' @param tabs Flag: if true, put each table into a separate tab or file. -#' Otherwise, put them all into a single long tab/file. -#' @param outputdir Directory to write output to. Default is the current -#' working directory. +#' @param dataformat Indicates desired data format. Supported formats are +#' \code{'tabs'}, \code{'merged'}, or \code{'IIASA'} +#' @param wideformat Flag: if true, convert data to wide format before output; +#' otherwise, leave in long format. #' @return NULL; the report will be written to output files as described in the #' Output section. #' @importFrom magrittr %>% @@ -132,16 +150,32 @@ generate <- function(scenctl, varctl, dbloc, + outputdir = getwd(), + model = 'GCAM', fileformat = getOption('iamrpt.fileformat', 'CSV'), scenmerge = getOption('iamrpt.scenmerge', TRUE), - tabs = getOption('iamrpt.tabs', TRUE), - outputdir = getwd()) + dataformat = getOption('iamrpt.dataformat', 'tabs'), + wideformat = getOption('iamrpt.wideformat', TRUE) + ) { + year <- value <- NULL # silence package check notes. suppressMessages({scenctl <- readr::read_csv(scenctl)}) suppressMessages({varctl <- readr::read_csv(varctl)}) validatectl(scenctl, varctl) + ## special condition: If using the IIASA format, all variables must be + ## aggregated to region. If all left blank, then replace them silently. + ## Otherwise issue a warning and replace. + if(dataformat == 'IIASA') { + if(any(varctl$`aggregation keys` != 'scenario, region') && + !(all(is.na(varctl$`aggregation keys`) | varctl$`aggregation keys` == ''))) { + warning('Variables must be aggregated to region for IIASA output format. ', + 'Aggregation keys will be replaced with "scenario, region".') + } + varctl[['aggregation keys']] <- 'scenario, region' + } + gcvars <- varctl[['GCAM variable']] ## Collect the queries that we will need to run. @@ -160,19 +194,43 @@ generate <- function(scenctl, if(scenmerge) - merge_scenarios(rslts) + rslts <- merge_scenarios(rslts) + if(dataformat == 'IIASA') { + ## convert results to IIASA format. If we didn't merge scenarios, write + ## each one to a separate file named for the scenario; otherwise write a + ## single file. + . <- NULL # suppress notes + if(scenmerge) { + rslts <- iiasafy(rslts) %>% + dplyr::mutate(Model=model) %>% + iiasa_sortcols() %>% + list(allscen=.) + dataformat <- 'merged' + } + else { + rslts <- lapply(rslts, iiasafy) %>% + lapply(function(df) { + dplyr::mutate(df, Model=model) %>% + iiasa_sortcols() + }) + dataformat <- 'tabs' + } + } + else if(wideformat) { + rslts <- lapply(rslts, function(df) {tidyr::spread(df, year, value)}) + } if(fileformat == 'XLSX') { - output_xlsx(rslts, tabs, outputdir) + output_xlsx(rslts, dataformat, outputdir) } else if(fileformat == 'CSV') { - output_csv(rslts, tabs, outputdir) + output_csv(rslts, dataformat, outputdir) } else { warning('Unknown file format ', fileformat, ' requested. ', 'Writing as CSV.') - output_csv(rslts, tabs, outputdir) + output_csv(rslts, dataformat, outputdir) } message('FIN.') @@ -282,6 +340,7 @@ validatectl <- function(scenctl, varctl) validate1(scenctl, 'scenario control', scencols, scenrqd) validate1(varctl, 'variable control', varcols, varrqd) + invisible(NULL) } @@ -315,3 +374,58 @@ validate1 <- function(ctl, ctlname, expectcols, rqdcols) { stop('Missing data prohibited in these ', ctlname, ' columns: ', missingstr) } } + +#' Convert a list of tables to a single table in IIASA format +#' +#' The result of this transformation will be a single table with the following +#' columns: +#' +#' \itemize{ +#' \item{Model} +#' \item{Scenario} +#' \item{Region} +#' \item{Variable (taken from the output name of the input)} +#' \item{Unit} +#' \item{NNNN - one for each year} +#' } +#' +#' @param datalist List of data frames, one for each variable. +#' @keywords internal +iiasafy <- function(datalist) +{ + varlist <- lapply(datalist, proc_var_iiasa) + + varlist <- lapply(names(varlist), # Add variable name (need access to names(varlist) for this.) + function(var) { + dplyr::mutate(varlist[[var]], Variable=var) + }) %>% + dplyr::bind_rows() # Combine into a single table +} + + +#' Select the columns needed for the IIASA format +#' +#' Starting with data in long format, keep only the columns needed to form the +#' IIASA format, namely, scenario, region, year, value, and Units. Then rename +#' variables according to the IIASA conventions, and spread to wide format. We don't +#' add the model or variable names at this point, however. +#' @keywords internal +proc_var_iiasa <- function(df) +{ + scenario <- region <- variable <- year <- value <- Units <- NULL # silence + # check notes + df <- df %>% + dplyr::select(scenario, region, year, value, Units) %>% + dplyr::rename(Scenario=scenario, Region=region, Unit=Units) %>% + tidyr::spread(year, value) +} + +#' Put columns in canonical order for IIASA data format +#' +#' @param df Data frame +#' @keywords internal +iiasa_sortcols <- function(df) +{ + cols <- unique(c('Model', 'Scenario', 'Region', 'Variable', 'Unit', names(df))) + dplyr::select(df, dplyr::one_of(cols)) +} diff --git a/R/output.R b/R/output.R index 5f9540d..54407b4 100644 --- a/R/output.R +++ b/R/output.R @@ -4,12 +4,12 @@ #' #' @param rslts Results tables from \code{\link{generate}}. This must be either #' a list of data frames or a list of lists of data frames. -#' @param tabs Flag indicating whether variables should be written to separate -#' tabs/files. +#' @param dataformat Indicator of data format: If 'tabs', write to separate files; if 'merged' +#' write merged results to a single file. #' @param dirname Directory to write output file(s) into. #' @importFrom assertthat assert_that #' @keywords internal -output_csv <- function(rslts, tabs, dirname) +output_csv <- function(rslts, dataformat, dirname) { assert_that(is.list(rslts), !is.data.frame(rslts)) @@ -30,7 +30,7 @@ output_csv <- function(rslts, tabs, dirname) ## Now we should have a list of data frames. Output them to file(s) one ## by one. - if(tabs) { + if(dataformat=='tabs') { ## One file for each table for(tblname in names(rslts)) { filename <- alternate_filename(file.path(dirname, paste0(tblname, @@ -54,7 +54,9 @@ output_csv <- function(rslts, tabs, dirname) cat('\n', file=fcon) } - cat(tblname, '\n', file=fcon, sep='') + if(!('Variable' %in% names(rslts[[tblname]]))) { + cat(tblname, '\n', file=fcon, sep='') + } readr::write_csv(rslts[[tblname]], fcon) } close(fcon) diff --git a/man/generate.Rd b/man/generate.Rd index f4d5bb2..46fac89 100644 --- a/man/generate.Rd +++ b/man/generate.Rd @@ -4,9 +4,11 @@ \alias{generate} \title{Generate a report for a GCAM experiment} \usage{ -generate(scenctl, varctl, dbloc, fileformat = getOption("iamrpt.fileformat", - "CSV"), scenmerge = getOption("iamrpt.scenmerge", TRUE), - tabs = getOption("iamrpt.tabs", TRUE), outputdir = getwd()) +generate(scenctl, varctl, dbloc, outputdir = getwd(), model = "GCAM", + fileformat = getOption("iamrpt.fileformat", "CSV"), + scenmerge = getOption("iamrpt.scenmerge", TRUE), + dataformat = getOption("iamrpt.dataformat", "tabs"), + wideformat = getOption("iamrpt.wideformat", TRUE)) } \arguments{ \item{scenctl}{Name of the scenario control file.} @@ -15,16 +17,22 @@ generate(scenctl, varctl, dbloc, fileformat = getOption("iamrpt.fileformat", \item{dbloc}{Directory holding the GCAM databases} +\item{outputdir}{Directory to write output to. Default is the current +working directory.} + +\item{model}{Name of the model (e.g., \code{'GCAM'}). This is required for +the IIASA data format. It is ignored for all other formats.} + \item{fileformat}{Desired format for output files.} \item{scenmerge}{Flag: if true, merge scenarios; otherwise, leave scenarios as separate tables.} -\item{tabs}{Flag: if true, put each table into a separate tab or file. -Otherwise, put them all into a single long tab/file.} +\item{dataformat}{Indicates desired data format. Supported formats are +\code{'tabs'}, \code{'merged'}, or \code{'IIASA'}} -\item{outputdir}{Directory to write output to. Default is the current -working directory.} +\item{wideformat}{Flag: if true, convert data to wide format before output; +otherwise, leave in long format.} } \value{ NULL; the report will be written to output files as described in the @@ -83,20 +91,33 @@ The system has several options for formatting output. These can be passed as arugments to the system, or set as R options. The names of the options and their functions are: \describe{ - \item{\strong{iamrpt.fileformat}}{File format for output. Options are + \item{\code{iamrpt.fileformat}}{File format for output. Options are \code{"CSV"} and \code{"XLSX"}} - \item{\strong{iamrpt.scenmerge}}{If \code{TRUE}, for each variable merge the + \item{\code{iamrpt.scenmerge}}{If \code{TRUE}, for each variable merge the results for all scenarios into a single table (distinguished by the value of the scenario column). Otherwise, create a separate table for each combination of scenario and variable.} - \item{\strong{iamrpt.tabs}}{If \code{TRUE}, write each table to a separate tab (if -outputting to an xlsx file) or file (if outputting to csv files). In the -former case each tab/file will be named with the output variable name and -scenario (if applicable). In the latter case all of the tables will be -written into a single tab or file, with the name of the scenario and variable -before each table.} + \item{\code{iamrpt.dataformat}}{Specify the data format; that is, how +the data is organized in the output files. Three options are available: + \describe{ + \item{\code{"tabs"}}{Each table generated goes into a separate tab (if +XLS output is selected) or file (if CSV output is selected). The tab or file +will be named with the output of the table.} + \item{\code{"merged"}}{The tables will be output sequentially into a +single tab or file. Each table will be preceded by its name. This is +similar to the format used by GCAM to output batch queries.} + \item{\code{"IIASA"}}{The database format used by IIASA. In this +format each table is spread into a row in a merged table, with a column to +identify the variable that each row comes from.} + } + } + \item{\code{iamrpt.wideformat}}{If \code{TRUE}, reshape the tables into +wide format (years as columns) before output. Otherwise, leave them in long +format. If the IIASA data format is selected, then this option is ignored, +since the IIASA format requires wide data.} } + Output filenames will be chosen automatically. For an XLSX file the filename will be 'iamrpt.xlsx'. For CSV output with \code{tabs == FALSE} the result will be 'iamrpt.csv'. For CSV output with \code{tabs == TRUE} the output @@ -132,19 +153,20 @@ trimmed. The filter functions currently recognized by the system are \describe{ - \item{\strong{==}}{String equality} - \item{\strong{!=}}{String inequality} - \item{\strong{<}}{Numeric less-than} - \item{\strong{>}}{Numeric greather-than} - \item{\strong{<=}}{Numeric less-than-or-equals} - \item{\strong{>=}}{Numeric greater-than-or-equals} - \item{\strong{matches}}{Regular expression match. Note that because of the -way we parse these strings you can't have a ',', ';', '(', or ')' in your + \item{\code{==}}{String equality} + \item{\code{!=}}{String inequality} + \item{\code{<}}{Numeric less-than} + \item{\code{>}}{Numeric greather-than} + \item{\code{<=}}{Numeric less-than-or-equals} + \item{\code{>=}}{Numeric greater-than-or-equals} + \item{\code{matches}}{Regular expression match. Note that because of the +way we parse these strings you can't have a \code{','}, \code{';'}, +\code{'('}, or \code{')'} in your regular expressions for this function or any of the ones below.} - \item{\strong{matchesi}}{Case-insensitive regular expression match.} - \item{\strong{notmatches}}{Regular expression inverted match. That is, + \item{\code{matchesi}}{Case-insensitive regular expression match.} + \item{\code{notmatches}}{Regular expression inverted match. That is, select the rows that do \emph{not} match the given regular expression.} - \item{\strong{notmatchesi}}{Case-insensitive regular expression inverted + \item{\code{notmatchesi}}{Case-insensitive regular expression inverted match.} } } diff --git a/man/iiasa_sortcols.Rd b/man/iiasa_sortcols.Rd new file mode 100644 index 0000000..58cce57 --- /dev/null +++ b/man/iiasa_sortcols.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mcl.R +\name{iiasa_sortcols} +\alias{iiasa_sortcols} +\title{Put columns in canonical order for IIASA data format} +\usage{ +iiasa_sortcols(df) +} +\arguments{ +\item{df}{Data frame} +} +\description{ +Put columns in canonical order for IIASA data format +} +\keyword{internal} diff --git a/man/iiasafy.Rd b/man/iiasafy.Rd new file mode 100644 index 0000000..0c809ac --- /dev/null +++ b/man/iiasafy.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mcl.R +\name{iiasafy} +\alias{iiasafy} +\title{Convert a list of tables to a single table in IIASA format} +\usage{ +iiasafy(datalist) +} +\arguments{ +\item{datalist}{List of data frames, one for each variable.} +} +\description{ +The result of this transformation will be a single table with the following +columns: +} +\details{ +\itemize{ + \item{Model} + \item{Scenario} + \item{Region} + \item{Variable (taken from the output name of the input)} + \item{Unit} + \item{NNNN - one for each year} +} +} +\keyword{internal} diff --git a/man/output_csv.Rd b/man/output_csv.Rd index 84c1b6b..7daf7ba 100644 --- a/man/output_csv.Rd +++ b/man/output_csv.Rd @@ -5,7 +5,7 @@ \alias{output_xlsx} \title{Output function for CSV format} \usage{ -output_csv(rslts, tabs, dirname) +output_csv(rslts, dataformat, dirname) output_xlsx(rslts, tabs, dirname) } @@ -13,8 +13,8 @@ output_xlsx(rslts, tabs, dirname) \item{rslts}{Results tables from \code{\link{generate}}. This must be either a list of data frames or a list of lists of data frames.} -\item{tabs}{Flag indicating whether variables should be written to separate -tabs/files.} +\item{dataformat}{Indicator of data format: If 'tabs', write to separate files; if 'merged' +write merged results to a single file.} \item{dirname}{Directory to write output file(s) into.} } diff --git a/man/proc_var_iiasa.Rd b/man/proc_var_iiasa.Rd new file mode 100644 index 0000000..2bb487d --- /dev/null +++ b/man/proc_var_iiasa.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/mcl.R +\name{proc_var_iiasa} +\alias{proc_var_iiasa} +\title{Select the columns needed for the IIASA format} +\usage{ +proc_var_iiasa(df) +} +\description{ +Starting with data in long format, keep only the columns needed to form the +IIASA format, namely, scenario, region, year, value, and Units. Then rename +variables according to the IIASA conventions, and spread to wide format. We don't +add the model or variable names at this point, however. +} +\keyword{internal} diff --git a/tests/testthat/test_output.R b/tests/testthat/test_output.R index ee45395..132f1b8 100644 --- a/tests/testthat/test_output.R +++ b/tests/testthat/test_output.R @@ -112,7 +112,7 @@ test_that('output_csv works for separate tabs mode.', { flist <- file.path(dir, paste0(vlist, '.csv')) on.exit(unlink(flist)) - output_csv(rslt, TRUE, dir) + output_csv(rslt, 'tabs', dir) for(i in seq_along(vlist)) { file <- flist[i] @@ -131,7 +131,7 @@ test_that('output_csv works for separate tabs mode.', { flist <- file.path(dir, paste0(vlist, '.csv')) on.exit(unlink(flist), add=TRUE) - output_csv(rsltmrg, TRUE, dir) + output_csv(rsltmrg, 'tabs', dir) for(i in seq_along(vlist)) { file <- flist[i] @@ -150,7 +150,7 @@ test_that('output_csv works for single tab mode.', ## unmerged filename <- file.path(dir, 'iamrpt.csv') on.exit(unlink(filename)) - output_csv(rslt, FALSE, dir) + output_csv(rslt, 'merged', dir) expect_true(file.exists(filename)) ## spot check a few lines in the data @@ -170,7 +170,7 @@ test_that('output_csv works for single tab mode.', ## merged version filename <- file.path(dir, 'iamrpt001.csv') on.exit(unlink(filename), add=TRUE) - output_csv(rsltmrg, FALSE, dir) + output_csv(rsltmrg, 'merged', dir) expect_true(file.exists(filename)) ## spot check important lines From 3b843514abcbd381be3bc3827aa517849532ea19 Mon Sep 17 00:00:00 2001 From: Robert Link Date: Wed, 20 Sep 2017 20:57:51 -0400 Subject: [PATCH 2/4] Add an example variable control file for IIASA format output. --- inst/extdata/example-iiasa-variable.ctl | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 inst/extdata/example-iiasa-variable.ctl diff --git a/inst/extdata/example-iiasa-variable.ctl b/inst/extdata/example-iiasa-variable.ctl new file mode 100644 index 0000000..4dc959d --- /dev/null +++ b/inst/extdata/example-iiasa-variable.ctl @@ -0,0 +1,7 @@ +GCAM variable,output variable,aggregation keys,aggregation function,start year,end year,filters,output units +Population,Population,,,2000,2050,,thous +pcGDP(PPP),GDP|PPP,,,2000,2050,,Thous80US$/per +Electricity,Electricity|Generation,,,2000,2050,(matches; sector; electricity),MWh +Electricity,Electricity|Total,,,2000,2050,, +Electricity,Electricity|Rooftop PV,,,2000,2050,(matches; sector; elect_td_bld),MWh +Electricity,Electricity|Rooftop PV|Ridiculous,,,2000,2050,"(notmatches; sector; electricity), (notmatches; sector; industrial energy use)",MWh From e970aeca1a4a7c6c3ccf3c46355259341e4e0251 Mon Sep 17 00:00:00 2001 From: Robert Link Date: Thu, 21 Sep 2017 08:50:22 -0400 Subject: [PATCH 3/4] Move iiasafy functions to output.R --- DESCRIPTION | 2 +- R/mcl.R | 54 ---------------------------------- R/output.R | 56 ++++++++++++++++++++++++++++++++++++ man/iiasa_sortcols.Rd | 2 +- man/iiasafy.Rd | 2 +- man/proc_var_iiasa.Rd | 2 +- tests/testthat/test_output.R | 34 ++++++++++++++++++++++ 7 files changed, 94 insertions(+), 58 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4dfcc61..deb6f4a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: iamrpt Title: Convert GCAM results to the format required by various IAM experiment databases -Version: 0.1.0-9000 +Version: 0.2.0 Authors@R: c( person("Robert", "Link", email = "robert.link@pnnl.gov", role = c("aut", "cre")), person("Xavier", "Gutierrez", email = "xavier.gutierrez@pnnl.gov", role = c("aut")) diff --git a/R/mcl.R b/R/mcl.R index 2a51ccf..9d8e58a 100644 --- a/R/mcl.R +++ b/R/mcl.R @@ -375,57 +375,3 @@ validate1 <- function(ctl, ctlname, expectcols, rqdcols) { } } -#' Convert a list of tables to a single table in IIASA format -#' -#' The result of this transformation will be a single table with the following -#' columns: -#' -#' \itemize{ -#' \item{Model} -#' \item{Scenario} -#' \item{Region} -#' \item{Variable (taken from the output name of the input)} -#' \item{Unit} -#' \item{NNNN - one for each year} -#' } -#' -#' @param datalist List of data frames, one for each variable. -#' @keywords internal -iiasafy <- function(datalist) -{ - varlist <- lapply(datalist, proc_var_iiasa) - - varlist <- lapply(names(varlist), # Add variable name (need access to names(varlist) for this.) - function(var) { - dplyr::mutate(varlist[[var]], Variable=var) - }) %>% - dplyr::bind_rows() # Combine into a single table -} - - -#' Select the columns needed for the IIASA format -#' -#' Starting with data in long format, keep only the columns needed to form the -#' IIASA format, namely, scenario, region, year, value, and Units. Then rename -#' variables according to the IIASA conventions, and spread to wide format. We don't -#' add the model or variable names at this point, however. -#' @keywords internal -proc_var_iiasa <- function(df) -{ - scenario <- region <- variable <- year <- value <- Units <- NULL # silence - # check notes - df <- df %>% - dplyr::select(scenario, region, year, value, Units) %>% - dplyr::rename(Scenario=scenario, Region=region, Unit=Units) %>% - tidyr::spread(year, value) -} - -#' Put columns in canonical order for IIASA data format -#' -#' @param df Data frame -#' @keywords internal -iiasa_sortcols <- function(df) -{ - cols <- unique(c('Model', 'Scenario', 'Region', 'Variable', 'Unit', names(df))) - dplyr::select(df, dplyr::one_of(cols)) -} diff --git a/R/output.R b/R/output.R index 54407b4..194b1e5 100644 --- a/R/output.R +++ b/R/output.R @@ -124,3 +124,59 @@ nameparse <- function(name) c(stringr::str_c(splt[1:(len-1)], collapse='.'), splt[len]) } } + + +#' Convert a list of tables to a single table in IIASA format +#' +#' The result of this transformation will be a single table with the following +#' columns: +#' +#' \itemize{ +#' \item{Model} +#' \item{Scenario} +#' \item{Region} +#' \item{Variable (taken from the output name of the input)} +#' \item{Unit} +#' \item{NNNN - one for each year} +#' } +#' +#' @param datalist List of data frames, one for each variable. +#' @keywords internal +iiasafy <- function(datalist) +{ + varlist <- lapply(datalist, proc_var_iiasa) + + varlist <- lapply(names(varlist), # Add variable name (need access to names(varlist) for this.) + function(var) { + dplyr::mutate(varlist[[var]], Variable=var) + }) %>% + dplyr::bind_rows() # Combine into a single table +} + + +#' Select the columns needed for the IIASA format +#' +#' Starting with data in long format, keep only the columns needed to form the +#' IIASA format, namely, scenario, region, year, value, and Units. Then rename +#' variables according to the IIASA conventions, and spread to wide format. We don't +#' add the model or variable names at this point, however. +#' @keywords internal +proc_var_iiasa <- function(df) +{ + scenario <- region <- year <- value <- Units <- NULL # silence + # check notes + df <- df %>% + dplyr::select(scenario, region, year, value, Units) %>% + dplyr::rename(Scenario=scenario, Region=region, Unit=Units) %>% + tidyr::spread(year, value) +} + +#' Put columns in canonical order for IIASA data format +#' +#' @param df Data frame +#' @keywords internal +iiasa_sortcols <- function(df) +{ + cols <- unique(c('Model', 'Scenario', 'Region', 'Variable', 'Unit', names(df))) + dplyr::select(df, dplyr::one_of(cols)) +} diff --git a/man/iiasa_sortcols.Rd b/man/iiasa_sortcols.Rd index 58cce57..de9b992 100644 --- a/man/iiasa_sortcols.Rd +++ b/man/iiasa_sortcols.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mcl.R +% Please edit documentation in R/output.R \name{iiasa_sortcols} \alias{iiasa_sortcols} \title{Put columns in canonical order for IIASA data format} diff --git a/man/iiasafy.Rd b/man/iiasafy.Rd index 0c809ac..8adc482 100644 --- a/man/iiasafy.Rd +++ b/man/iiasafy.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mcl.R +% Please edit documentation in R/output.R \name{iiasafy} \alias{iiasafy} \title{Convert a list of tables to a single table in IIASA format} diff --git a/man/proc_var_iiasa.Rd b/man/proc_var_iiasa.Rd index 2bb487d..07f0202 100644 --- a/man/proc_var_iiasa.Rd +++ b/man/proc_var_iiasa.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/mcl.R +% Please edit documentation in R/output.R \name{proc_var_iiasa} \alias{proc_var_iiasa} \title{Select the columns needed for the IIASA format} diff --git a/tests/testthat/test_output.R b/tests/testthat/test_output.R index 132f1b8..335f2d6 100644 --- a/tests/testthat/test_output.R +++ b/tests/testthat/test_output.R @@ -189,3 +189,37 @@ test_that('output_csv works for single tab mode.', 'billion m^2,Reference,Africa_Northern,resid,resid,resid_building,2075,5.3585') }) + + +test_that('Single table can be converted to iiasa format.', { + pop <- dplyr::filter(popq, year >= 2005, year <= 2020) + popiia <- proc_var_iiasa(pop) %>% + dplyr:::mutate(Variable='Population', Model='GCAM') %>% + iiasa_sortcols() + + expect_equal(nrow(popiia), length(unique(pop$region))) + expect_identical(names(popiia), c('Model', 'Scenario', 'Region', 'Variable', + 'Unit', as.character(seq(2005,2020,5)))) + expect_equal(popiia[['2010']], dplyr::filter(pop, year==2010)[['value']]) +}) + +test_that('List of tables can be converted to iiasa format.', { + pop <- dplyr::filter(popq, year >= 2005, year <= 2020) + flrspc <- + dplyr::filter(flrspcq, year >= 2005, year <= 2020) %>% + aggregate('sum', 'scenario, region') + + allvar <- list(Population=pop, Floorspace=flrspc) + + iitbl <- iiasafy(allvar) %>% + dplyr::mutate(Model='GCAM') %>% + iiasa_sortcols() + + expect_true(is.data.frame(iitbl)) + expect_equal(nrow(iitbl), 2*length(unique(pop$region))) + expect_identical(names(iitbl), c('Model', 'Scenario', 'Region', 'Variable', + 'Unit', as.character(seq(2005,2020,5)))) + + expect_identical(unique(iitbl$Variable), c('Population', 'Floorspace')) + +}) From f4e20a2df61fae124fecde973c36a80077fcc6da Mon Sep 17 00:00:00 2001 From: Robert Link Date: Thu, 21 Sep 2017 09:44:22 -0400 Subject: [PATCH 4/4] Automate production of package internal data Verified identical with old internal data. --- data-raw/sysdata.R | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 data-raw/sysdata.R diff --git a/data-raw/sysdata.R b/data-raw/sysdata.R new file mode 100644 index 0000000..c1dbaa9 --- /dev/null +++ b/data-raw/sysdata.R @@ -0,0 +1,14 @@ +## Generate the internal data for the package +## This must be sourced as a script because of the way devtools::use_data works. +## Source it from the top level of a development copy of the package. + + +source('data-raw/gdpdef.R') +gdpdef <- calc.gdpdef('data-raw/GDPDEF.csv') + +source('data-raw/energyconv.R') +energyconv <- prep.energyconv() + +devtools::use_data(gdpdef, energyconv, internal=TRUE, overwrite=TRUE, + compress='xz') +