Merge pull request #38 from JGCRI/iiasafy

Add IIASA output mode.
JGCRI · Sep 21, 2017 · 7e2b764 · 7e2b764
2 parents 11aeaf8 + f4e20a2
commit 7e2b764
Show file tree

Hide file tree

Showing 11 changed files with 320 additions and 68 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: iamrpt
 Title: Convert GCAM results to the format required by various IAM experiment databases
-Version: 0.1.0
+Version: 0.2.0
 Authors@R: c(
     person("Robert", "Link", email = "[email protected]", role = c("aut", "cre")),
     person("Xavier", "Gutierrez", email = "[email protected]", role = c("aut"))
@@ -18,6 +18,7 @@ Imports:
     rgcam (>= 0.4.2),
     readr (>= 1.1.1),
     dplyr (>= 0.5),
+    tidyr (>= 0.6),
     stringr (>= 1.2.0),
     assertthat (>= 0.2.0),
     lubridate (>= 1.6.0),

diff --git a/R/mcl.R b/R/mcl.R
@@ -52,20 +52,33 @@
 #' arugments to the system, or set as R options.  The names of the options and
 #' their functions are:
 #' \describe{
-#'     \item{\strong{iamrpt.fileformat}}{File format for output.  Options are
+#'     \item{\code{iamrpt.fileformat}}{File format for output.  Options are
 #' \code{"CSV"} and \code{"XLSX"}}
-#'     \item{\strong{iamrpt.scenmerge}}{If \code{TRUE}, for each variable merge the
+#'     \item{\code{iamrpt.scenmerge}}{If \code{TRUE}, for each variable merge the
 #' results for all scenarios into a single table (distinguished by the value of
 #' the scenario column).  Otherwise, create a separate table for each
 #' combination of scenario and variable.}
-#'     \item{\strong{iamrpt.tabs}}{If \code{TRUE}, write each table to a separate tab (if
-#' outputting to an xlsx file) or file (if outputting to csv files).  In the
-#' former case each tab/file will be named with the output variable name and
-#' scenario (if applicable).  In the latter case all of the tables will be
-#' written into a single tab or file, with the name of the scenario and variable
-#' before each table.}
+#'     \item{\code{iamrpt.dataformat}}{Specify the data format; that is, how
+#' the data is organized in the output files.  Three options are available:
+#'       \describe{
+#'         \item{\code{"tabs"}}{Each table generated goes into a separate tab (if
+#' XLS output is selected) or file (if CSV output is selected).  The tab or file
+#' will be named with the output of the table.}
+#'         \item{\code{"merged"}}{The tables will be output sequentially into a
+#' single tab or file.  Each table will be preceded by its name.  This is
+#' similar to the format used by GCAM to output batch queries.}
+#'         \item{\code{"IIASA"}}{The database format used by IIASA.  In this
+#' format each table is spread into a row in a merged table, with a column to
+#' identify the variable that each row comes from.}
+#'    }
+#'  }
+#'    \item{\code{iamrpt.wideformat}}{If \code{TRUE}, reshape the tables into
+#' wide format (years as columns) before output.  Otherwise, leave them in long
+#' format.  If the IIASA data format is selected, then this option is ignored,
+#' since the IIASA format requires wide data.}
 #' }
 #'
+#'
 #' Output filenames will be chosen automatically.  For an XLSX file the filename
 #' will be 'iamrpt.xlsx'.  For CSV output with \code{tabs == FALSE} the result
 #' will be 'iamrpt.csv'.  For CSV output with \code{tabs == TRUE} the output
@@ -99,49 +112,70 @@
 #'
 #' The filter functions currently recognized by the system are
 #' \describe{
-#'   \item{\strong{==}}{String equality}
-#'   \item{\strong{!=}}{String inequality}
-#'   \item{\strong{<}}{Numeric less-than}
-#'   \item{\strong{>}}{Numeric greather-than}
-#'   \item{\strong{<=}}{Numeric less-than-or-equals}
-#'   \item{\strong{>=}}{Numeric greater-than-or-equals}
-#'   \item{\strong{matches}}{Regular expression match.  Note that because of the
-#' way we parse these strings you can't have a ',', ';', '(', or ')' in your
+#'   \item{\code{==}}{String equality}
+#'   \item{\code{!=}}{String inequality}
+#'   \item{\code{<}}{Numeric less-than}
+#'   \item{\code{>}}{Numeric greather-than}
+#'   \item{\code{<=}}{Numeric less-than-or-equals}
+#'   \item{\code{>=}}{Numeric greater-than-or-equals}
+#'   \item{\code{matches}}{Regular expression match.  Note that because of the
+#' way we parse these strings you can't have a \code{','}, \code{';'},
+#' \code{'('}, or \code{')'} in your
 #' regular expressions for this function or any of the ones below.}
-#'   \item{\strong{matchesi}}{Case-insensitive regular expression match.}
-#'   \item{\strong{notmatches}}{Regular expression inverted match.  That is,
+#'   \item{\code{matchesi}}{Case-insensitive regular expression match.}
+#'   \item{\code{notmatches}}{Regular expression inverted match.  That is,
 #' select the rows that do \emph{not} match the given regular expression.}
-#'   \item{\strong{notmatchesi}}{Case-insensitive regular expression inverted
+#'   \item{\code{notmatchesi}}{Case-insensitive regular expression inverted
 #' match.}
 #' }
 #'
 #' @param scenctl Name of the scenario control file.
 #' @param varctl Name of the variable control file.
 #' @param dbloc Directory holding the GCAM databases
+#' @param outputdir Directory to write output to.  Default is the current
+#' working directory.
+#' @param model Name of the model (e.g., \code{'GCAM'}).  This is required for
+#' the IIASA data format.  It is ignored for all other formats.
 #' @param fileformat Desired format for output files.
 #' @param scenmerge Flag: if true, merge scenarios; otherwise, leave scenarios
 #' as separate tables.
-#' @param tabs Flag: if true, put each table into a separate tab or file.
-#' Otherwise, put them all into a single long tab/file.
-#' @param outputdir Directory to write output to.  Default is the current
-#' working directory.
+#' @param dataformat Indicates desired data format.  Supported formats are
+#' \code{'tabs'}, \code{'merged'}, or \code{'IIASA'}
+#' @param wideformat Flag: if true, convert data to wide format before output;
+#' otherwise, leave in long format.
 #' @return NULL; the report will be written to output files as described in the
 #' Output section.
 #' @importFrom magrittr %>%
 #' @export
 generate <- function(scenctl,
                      varctl,
                      dbloc,
+                     outputdir = getwd(),
+                     model = 'GCAM',
                      fileformat = getOption('iamrpt.fileformat', 'CSV'),
                      scenmerge = getOption('iamrpt.scenmerge', TRUE),
-                     tabs = getOption('iamrpt.tabs', TRUE),
-                     outputdir = getwd())
+                     dataformat = getOption('iamrpt.dataformat', 'tabs'),
+                     wideformat = getOption('iamrpt.wideformat', TRUE)
+                     )
 {
+    year <- value <- NULL               # silence package check notes.
     suppressMessages({scenctl <- readr::read_csv(scenctl)})
     suppressMessages({varctl <- readr::read_csv(varctl)})
 
     validatectl(scenctl, varctl)
 
+    ## special condition:  If using the IIASA format, all variables must be
+    ## aggregated to region.  If all left blank, then replace them silently.
+    ## Otherwise issue a warning and replace.
+    if(dataformat == 'IIASA') {
+        if(any(varctl$`aggregation keys` != 'scenario, region') &&
+           !(all(is.na(varctl$`aggregation keys`) | varctl$`aggregation keys` == ''))) {
+            warning('Variables must be aggregated to region for IIASA output format. ',
+                    'Aggregation keys will be replaced with "scenario, region".')
+        }
+        varctl[['aggregation keys']] <- 'scenario, region'
+    }
+
     gcvars <- varctl[['GCAM variable']]
 
     ## Collect the queries that we will need to run.
@@ -160,19 +194,43 @@ generate <- function(scenctl,
 
 
     if(scenmerge)
-        merge_scenarios(rslts)
+        rslts <- merge_scenarios(rslts)
 
+    if(dataformat == 'IIASA') {
+        ## convert results to IIASA format.  If we didn't merge scenarios, write
+        ## each one to a separate file named for the scenario; otherwise write a
+        ## single file.
+        . <- NULL    # suppress notes
+        if(scenmerge) {
+            rslts <- iiasafy(rslts) %>%
+                dplyr::mutate(Model=model) %>%
+                iiasa_sortcols() %>%
+                list(allscen=.)
+            dataformat <- 'merged'
+        }
+        else {
+            rslts <- lapply(rslts, iiasafy) %>%
+              lapply(function(df) {
+                  dplyr::mutate(df, Model=model) %>%
+                      iiasa_sortcols()
+              })
+            dataformat <- 'tabs'
+        }
+    }
+    else if(wideformat) {
+        rslts <- lapply(rslts, function(df) {tidyr::spread(df, year, value)})
+    }
 
     if(fileformat == 'XLSX') {
-        output_xlsx(rslts, tabs, outputdir)
+        output_xlsx(rslts, dataformat, outputdir)
     }
     else if(fileformat == 'CSV') {
-        output_csv(rslts, tabs, outputdir)
+        output_csv(rslts, dataformat, outputdir)
     }
     else {
         warning('Unknown file format ', fileformat, ' requested. ',
                 'Writing as CSV.')
-        output_csv(rslts, tabs, outputdir)
+        output_csv(rslts, dataformat, outputdir)
     }
 
     message('FIN.')
@@ -282,6 +340,7 @@ validatectl <- function(scenctl, varctl)
 
     validate1(scenctl, 'scenario control', scencols, scenrqd)
     validate1(varctl, 'variable control', varcols, varrqd)
+
     invisible(NULL)
 }
 
@@ -315,3 +374,4 @@ validate1 <- function(ctl, ctlname, expectcols, rqdcols) {
         stop('Missing data prohibited in these ', ctlname, ' columns: ', missingstr)
     }
 }
+
diff --git a/R/output.R b/R/output.R
@@ -4,12 +4,12 @@
 #'
 #' @param rslts Results tables from \code{\link{generate}}.  This must be either
 #' a list of data frames or a list of lists of data frames.
-#' @param tabs Flag indicating whether variables should be written to separate
-#' tabs/files.
+#' @param dataformat Indicator of data format:  If 'tabs', write to separate files; if 'merged'
+#' write merged results to a single file.
 #' @param dirname Directory to write output file(s) into.
 #' @importFrom assertthat assert_that
 #' @keywords internal
-output_csv <- function(rslts, tabs, dirname)
+output_csv <- function(rslts, dataformat, dirname)
 {
     assert_that(is.list(rslts), !is.data.frame(rslts))
 
@@ -30,7 +30,7 @@ output_csv <- function(rslts, tabs, dirname)
 
     ## Now we should have a list of data frames.  Output them to file(s) one
     ## by one.
-    if(tabs) {
+    if(dataformat=='tabs') {
         ## One file for each table
         for(tblname in names(rslts)) {
             filename <- alternate_filename(file.path(dirname, paste0(tblname,
@@ -54,7 +54,9 @@ output_csv <- function(rslts, tabs, dirname)
                 cat('\n', file=fcon)
             }
 
-            cat(tblname, '\n', file=fcon, sep='')
+            if(!('Variable' %in% names(rslts[[tblname]]))) {
+                cat(tblname, '\n', file=fcon, sep='')
+            }
             readr::write_csv(rslts[[tblname]], fcon)
         }
         close(fcon)
@@ -122,3 +124,59 @@ nameparse <- function(name)
         c(stringr::str_c(splt[1:(len-1)], collapse='.'), splt[len])
     }
 }
+
+
+#' Convert a list of tables to a single table in IIASA format
+#'
+#' The result of this transformation will be a single table with the following
+#' columns:
+#'
+#' \itemize{
+#'   \item{Model}
+#'   \item{Scenario}
+#'   \item{Region}
+#'   \item{Variable (taken from the output name of the input)}
+#'   \item{Unit}
+#'   \item{NNNN - one for each year}
+#' }
+#'
+#' @param datalist List of data frames, one for each variable.
+#' @keywords internal
+iiasafy <- function(datalist)
+{
+    varlist <- lapply(datalist, proc_var_iiasa)
+
+    varlist <- lapply(names(varlist),   # Add variable name (need access to names(varlist) for this.)
+                      function(var) {
+                          dplyr::mutate(varlist[[var]], Variable=var)
+                      }) %>%
+      dplyr::bind_rows()              # Combine into a single table
+}
+
+
+#' Select the columns needed for the IIASA format
+#'
+#' Starting with data in long format, keep only the columns needed to form the
+#' IIASA format, namely, scenario, region, year, value, and Units.  Then rename
+#' variables according to the IIASA conventions, and spread to wide format.  We don't
+#' add the model or variable names at this point, however.
+#' @keywords internal
+proc_var_iiasa <- function(df)
+{
+    scenario <- region <- year <- value <- Units <- NULL # silence
+                                        # check notes
+    df <- df %>%
+        dplyr::select(scenario, region, year, value, Units) %>%
+        dplyr::rename(Scenario=scenario, Region=region, Unit=Units) %>%
+        tidyr::spread(year, value)
+}
+
+#' Put columns in canonical order for IIASA data format
+#'
+#' @param df Data frame
+#' @keywords internal
+iiasa_sortcols <- function(df)
+{
+    cols <- unique(c('Model', 'Scenario', 'Region', 'Variable', 'Unit', names(df)))
+    dplyr::select(df, dplyr::one_of(cols))
+}
diff --git a/data-raw/sysdata.R b/data-raw/sysdata.R
@@ -0,0 +1,14 @@
+## Generate the internal data for the package
+## This must be sourced as a script because of the way devtools::use_data works.
+## Source it from the top level of a development copy of the package.
+
+
+source('data-raw/gdpdef.R')
+gdpdef <- calc.gdpdef('data-raw/GDPDEF.csv')
+
+source('data-raw/energyconv.R')
+energyconv <- prep.energyconv()
+
+devtools::use_data(gdpdef, energyconv, internal=TRUE, overwrite=TRUE,
+                   compress='xz')
+
diff --git a/inst/extdata/example-iiasa-variable.ctl b/inst/extdata/example-iiasa-variable.ctl
@@ -0,0 +1,7 @@
+GCAM variable,output variable,aggregation keys,aggregation function,start year,end year,filters,output units
+Population,Population,,,2000,2050,,thous
+pcGDP(PPP),GDP|PPP,,,2000,2050,,Thous80US$/per
+Electricity,Electricity|Generation,,,2000,2050,(matches; sector; electricity),MWh
+Electricity,Electricity|Total,,,2000,2050,,
+Electricity,Electricity|Rooftop PV,,,2000,2050,(matches; sector; elect_td_bld),MWh
+Electricity,Electricity|Rooftop PV|Ridiculous,,,2000,2050,"(notmatches; sector; electricity), (notmatches; sector; industrial energy use)",MWh