From b9198732f3f3206b0e30f923a7953c47b2d1b566 Mon Sep 17 00:00:00 2001
From: Robert Link <robert.link@pnnl.gov>
Date: Wed, 20 Sep 2017 17:12:26 -0400
Subject: [PATCH 1/4] Add IIASA format to options.

Mostly working.  Produces one extraneous row at the top of the file.
---
 DESCRIPTION                  |   3 +-
 R/mcl.R                      | 172 +++++++++++++++++++++++++++++------
 R/output.R                   |  12 ++-
 man/generate.Rd              |  74 +++++++++------
 man/iiasa_sortcols.Rd        |  15 +++
 man/iiasafy.Rd               |  26 ++++++
 man/output_csv.Rd            |   6 +-
 man/proc_var_iiasa.Rd        |  15 +++
 tests/testthat/test_output.R |   8 +-
 9 files changed, 263 insertions(+), 68 deletions(-)
 create mode 100644 man/iiasa_sortcols.Rd
 create mode 100644 man/iiasafy.Rd
 create mode 100644 man/proc_var_iiasa.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 2246102..4dfcc61 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: iamrpt
 Title: Convert GCAM results to the format required by various IAM experiment databases
-Version: 0.1.0
+Version: 0.1.0-9000
 Authors@R: c(
     person("Robert", "Link", email = "robert.link@pnnl.gov", role = c("aut", "cre")),
     person("Xavier", "Gutierrez", email = "xavier.gutierrez@pnnl.gov", role = c("aut"))
@@ -18,6 +18,7 @@ Imports:
     rgcam (>= 0.4.2),
     readr (>= 1.1.1),
     dplyr (>= 0.5),
+    tidyr (>= 0.6),
     stringr (>= 1.2.0),
     assertthat (>= 0.2.0),
     lubridate (>= 1.6.0),
diff --git a/R/mcl.R b/R/mcl.R
index 21872b0..2a51ccf 100644
--- a/R/mcl.R
+++ b/R/mcl.R
@@ -52,20 +52,33 @@
 #' arugments to the system, or set as R options.  The names of the options and
 #' their functions are:
 #' \describe{
-#'     \item{\strong{iamrpt.fileformat}}{File format for output.  Options are
+#'     \item{\code{iamrpt.fileformat}}{File format for output.  Options are
 #' \code{"CSV"} and \code{"XLSX"}}
-#'     \item{\strong{iamrpt.scenmerge}}{If \code{TRUE}, for each variable merge the
+#'     \item{\code{iamrpt.scenmerge}}{If \code{TRUE}, for each variable merge the
 #' results for all scenarios into a single table (distinguished by the value of
 #' the scenario column).  Otherwise, create a separate table for each
 #' combination of scenario and variable.}
-#'     \item{\strong{iamrpt.tabs}}{If \code{TRUE}, write each table to a separate tab (if
-#' outputting to an xlsx file) or file (if outputting to csv files).  In the
-#' former case each tab/file will be named with the output variable name and
-#' scenario (if applicable).  In the latter case all of the tables will be
-#' written into a single tab or file, with the name of the scenario and variable
-#' before each table.}
+#'     \item{\code{iamrpt.dataformat}}{Specify the data format; that is, how
+#' the data is organized in the output files.  Three options are available:
+#'       \describe{
+#'         \item{\code{"tabs"}}{Each table generated goes into a separate tab (if
+#' XLS output is selected) or file (if CSV output is selected).  The tab or file
+#' will be named with the output of the table.}
+#'         \item{\code{"merged"}}{The tables will be output sequentially into a
+#' single tab or file.  Each table will be preceded by its name.  This is
+#' similar to the format used by GCAM to output batch queries.}
+#'         \item{\code{"IIASA"}}{The database format used by IIASA.  In this
+#' format each table is spread into a row in a merged table, with a column to
+#' identify the variable that each row comes from.}
+#'    }
+#'  }
+#'    \item{\code{iamrpt.wideformat}}{If \code{TRUE}, reshape the tables into
+#' wide format (years as columns) before output.  Otherwise, leave them in long
+#' format.  If the IIASA data format is selected, then this option is ignored,
+#' since the IIASA format requires wide data.}
 #' }
 #'
+#'
 #' Output filenames will be chosen automatically.  For an XLSX file the filename
 #' will be 'iamrpt.xlsx'.  For CSV output with \code{tabs == FALSE} the result
 #' will be 'iamrpt.csv'.  For CSV output with \code{tabs == TRUE} the output
@@ -99,32 +112,37 @@
 #'
 #' The filter functions currently recognized by the system are
 #' \describe{
-#'   \item{\strong{==}}{String equality}
-#'   \item{\strong{!=}}{String inequality}
-#'   \item{\strong{<}}{Numeric less-than}
-#'   \item{\strong{>}}{Numeric greather-than}
-#'   \item{\strong{<=}}{Numeric less-than-or-equals}
-#'   \item{\strong{>=}}{Numeric greater-than-or-equals}
-#'   \item{\strong{matches}}{Regular expression match.  Note that because of the
-#' way we parse these strings you can't have a ',', ';', '(', or ')' in your
+#'   \item{\code{==}}{String equality}
+#'   \item{\code{!=}}{String inequality}
+#'   \item{\code{<}}{Numeric less-than}
+#'   \item{\code{>}}{Numeric greather-than}
+#'   \item{\code{<=}}{Numeric less-than-or-equals}
+#'   \item{\code{>=}}{Numeric greater-than-or-equals}
+#'   \item{\code{matches}}{Regular expression match.  Note that because of the
+#' way we parse these strings you can't have a \code{','}, \code{';'},
+#' \code{'('}, or \code{')'} in your
 #' regular expressions for this function or any of the ones below.}
-#'   \item{\strong{matchesi}}{Case-insensitive regular expression match.}
-#'   \item{\strong{notmatches}}{Regular expression inverted match.  That is,
+#'   \item{\code{matchesi}}{Case-insensitive regular expression match.}
+#'   \item{\code{notmatches}}{Regular expression inverted match.  That is,
 #' select the rows that do \emph{not} match the given regular expression.}
-#'   \item{\strong{notmatchesi}}{Case-insensitive regular expression inverted
+#'   \item{\code{notmatchesi}}{Case-insensitive regular expression inverted
 #' match.}
 #' }
 #'
 #' @param scenctl Name of the scenario control file.
 #' @param varctl Name of the variable control file.
 #' @param dbloc Directory holding the GCAM databases
+#' @param outputdir Directory to write output to.  Default is the current
+#' working directory.
+#' @param model Name of the model (e.g., \code{'GCAM'}).  This is required for
+#' the IIASA data format.  It is ignored for all other formats.
 #' @param fileformat Desired format for output files.
 #' @param scenmerge Flag: if true, merge scenarios; otherwise, leave scenarios
 #' as separate tables.
-#' @param tabs Flag: if true, put each table into a separate tab or file.
-#' Otherwise, put them all into a single long tab/file.
-#' @param outputdir Directory to write output to.  Default is the current
-#' working directory.
+#' @param dataformat Indicates desired data format.  Supported formats are
+#' \code{'tabs'}, \code{'merged'}, or \code{'IIASA'}
+#' @param wideformat Flag: if true, convert data to wide format before output;
+#' otherwise, leave in long format.
 #' @return NULL; the report will be written to output files as described in the
 #' Output section.
 #' @importFrom magrittr %>%
@@ -132,16 +150,32 @@
 generate <- function(scenctl,
                      varctl,
                      dbloc,
+                     outputdir = getwd(),
+                     model = 'GCAM',
                      fileformat = getOption('iamrpt.fileformat', 'CSV'),
                      scenmerge = getOption('iamrpt.scenmerge', TRUE),
-                     tabs = getOption('iamrpt.tabs', TRUE),
-                     outputdir = getwd())
+                     dataformat = getOption('iamrpt.dataformat', 'tabs'),
+                     wideformat = getOption('iamrpt.wideformat', TRUE)
+                     )
 {
+    year <- value <- NULL               # silence package check notes.
     suppressMessages({scenctl <- readr::read_csv(scenctl)})
     suppressMessages({varctl <- readr::read_csv(varctl)})
 
     validatectl(scenctl, varctl)
 
+    ## special condition:  If using the IIASA format, all variables must be
+    ## aggregated to region.  If all left blank, then replace them silently.
+    ## Otherwise issue a warning and replace.
+    if(dataformat == 'IIASA') {
+        if(any(varctl$`aggregation keys` != 'scenario, region') &&
+           !(all(is.na(varctl$`aggregation keys`) | varctl$`aggregation keys` == ''))) {
+            warning('Variables must be aggregated to region for IIASA output format. ',
+                    'Aggregation keys will be replaced with "scenario, region".')
+        }
+        varctl[['aggregation keys']] <- 'scenario, region'
+    }
+
     gcvars <- varctl[['GCAM variable']]
 
     ## Collect the queries that we will need to run.
@@ -160,19 +194,43 @@ generate <- function(scenctl,
 
 
     if(scenmerge)
-        merge_scenarios(rslts)
+        rslts <- merge_scenarios(rslts)
 
+    if(dataformat == 'IIASA') {
+        ## convert results to IIASA format.  If we didn't merge scenarios, write
+        ## each one to a separate file named for the scenario; otherwise write a
+        ## single file.
+        . <- NULL    # suppress notes
+        if(scenmerge) {
+            rslts <- iiasafy(rslts) %>%
+                dplyr::mutate(Model=model) %>%
+                iiasa_sortcols() %>%
+                list(allscen=.)
+            dataformat <- 'merged'
+        }
+        else {
+            rslts <- lapply(rslts, iiasafy) %>%
+              lapply(function(df) {
+                  dplyr::mutate(df, Model=model) %>%
+                      iiasa_sortcols()
+              })
+            dataformat <- 'tabs'
+        }
+    }
+    else if(wideformat) {
+        rslts <- lapply(rslts, function(df) {tidyr::spread(df, year, value)})
+    }
 
     if(fileformat == 'XLSX') {
-        output_xlsx(rslts, tabs, outputdir)
+        output_xlsx(rslts, dataformat, outputdir)
     }
     else if(fileformat == 'CSV') {
-        output_csv(rslts, tabs, outputdir)
+        output_csv(rslts, dataformat, outputdir)
     }
     else {
         warning('Unknown file format ', fileformat, ' requested. ',
                 'Writing as CSV.')
-        output_csv(rslts, tabs, outputdir)
+        output_csv(rslts, dataformat, outputdir)
     }
 
     message('FIN.')
@@ -282,6 +340,7 @@ validatectl <- function(scenctl, varctl)
 
     validate1(scenctl, 'scenario control', scencols, scenrqd)
     validate1(varctl, 'variable control', varcols, varrqd)
+
     invisible(NULL)
 }
 
@@ -315,3 +374,58 @@ validate1 <- function(ctl, ctlname, expectcols, rqdcols) {
         stop('Missing data prohibited in these ', ctlname, ' columns: ', missingstr)
     }
 }
+
+#' Convert a list of tables to a single table in IIASA format
+#'
+#' The result of this transformation will be a single table with the following
+#' columns:
+#'
+#' \itemize{
+#'   \item{Model}
+#'   \item{Scenario}
+#'   \item{Region}
+#'   \item{Variable (taken from the output name of the input)}
+#'   \item{Unit}
+#'   \item{NNNN - one for each year}
+#' }
+#'
+#' @param datalist List of data frames, one for each variable.
+#' @keywords internal
+iiasafy <- function(datalist)
+{
+    varlist <- lapply(datalist, proc_var_iiasa)
+
+    varlist <- lapply(names(varlist),   # Add variable name (need access to names(varlist) for this.)
+                      function(var) {
+                          dplyr::mutate(varlist[[var]], Variable=var)
+                      }) %>%
+      dplyr::bind_rows()              # Combine into a single table
+}
+
+
+#' Select the columns needed for the IIASA format
+#'
+#' Starting with data in long format, keep only the columns needed to form the
+#' IIASA format, namely, scenario, region, year, value, and Units.  Then rename
+#' variables according to the IIASA conventions, and spread to wide format.  We don't
+#' add the model or variable names at this point, however.
+#' @keywords internal
+proc_var_iiasa <- function(df)
+{
+    scenario <- region <- variable <- year <- value <- Units <- NULL # silence
+                                        # check notes
+    df <- df %>%
+        dplyr::select(scenario, region, year, value, Units) %>%
+        dplyr::rename(Scenario=scenario, Region=region, Unit=Units) %>%
+        tidyr::spread(year, value)
+}
+
+#' Put columns in canonical order for IIASA data format
+#'
+#' @param df Data frame
+#' @keywords internal
+iiasa_sortcols <- function(df)
+{
+    cols <- unique(c('Model', 'Scenario', 'Region', 'Variable', 'Unit', names(df)))
+    dplyr::select(df, dplyr::one_of(cols))
+}
diff --git a/R/output.R b/R/output.R
index 5f9540d..54407b4 100644
--- a/R/output.R
+++ b/R/output.R
@@ -4,12 +4,12 @@
 #'
 #' @param rslts Results tables from \code{\link{generate}}.  This must be either
 #' a list of data frames or a list of lists of data frames.
-#' @param tabs Flag indicating whether variables should be written to separate
-#' tabs/files.
+#' @param dataformat Indicator of data format:  If 'tabs', write to separate files; if 'merged'
+#' write merged results to a single file.
 #' @param dirname Directory to write output file(s) into.
 #' @importFrom assertthat assert_that
 #' @keywords internal
-output_csv <- function(rslts, tabs, dirname)
+output_csv <- function(rslts, dataformat, dirname)
 {
     assert_that(is.list(rslts), !is.data.frame(rslts))
 
@@ -30,7 +30,7 @@ output_csv <- function(rslts, tabs, dirname)
 
     ## Now we should have a list of data frames.  Output them to file(s) one
     ## by one.
-    if(tabs) {
+    if(dataformat=='tabs') {
         ## One file for each table
         for(tblname in names(rslts)) {
             filename <- alternate_filename(file.path(dirname, paste0(tblname,
@@ -54,7 +54,9 @@ output_csv <- function(rslts, tabs, dirname)
                 cat('\n', file=fcon)
             }
 
-            cat(tblname, '\n', file=fcon, sep='')
+            if(!('Variable' %in% names(rslts[[tblname]]))) {
+                cat(tblname, '\n', file=fcon, sep='')
+            }
             readr::write_csv(rslts[[tblname]], fcon)
         }
         close(fcon)
diff --git a/man/generate.Rd b/man/generate.Rd
index f4d5bb2..46fac89 100644
--- a/man/generate.Rd
+++ b/man/generate.Rd
@@ -4,9 +4,11 @@
 \alias{generate}
 \title{Generate a report for a GCAM experiment}
 \usage{
-generate(scenctl, varctl, dbloc, fileformat = getOption("iamrpt.fileformat",
-  "CSV"), scenmerge = getOption("iamrpt.scenmerge", TRUE),
-  tabs = getOption("iamrpt.tabs", TRUE), outputdir = getwd())
+generate(scenctl, varctl, dbloc, outputdir = getwd(), model = "GCAM",
+  fileformat = getOption("iamrpt.fileformat", "CSV"),
+  scenmerge = getOption("iamrpt.scenmerge", TRUE),
+  dataformat = getOption("iamrpt.dataformat", "tabs"),
+  wideformat = getOption("iamrpt.wideformat", TRUE))
 }
 \arguments{
 \item{scenctl}{Name of the scenario control file.}
@@ -15,16 +17,22 @@ generate(scenctl, varctl, dbloc, fileformat = getOption("iamrpt.fileformat",
 
 \item{dbloc}{Directory holding the GCAM databases}
 
+\item{outputdir}{Directory to write output to.  Default is the current
+working directory.}
+
+\item{model}{Name of the model (e.g., \code{'GCAM'}).  This is required for
+the IIASA data format.  It is ignored for all other formats.}
+
 \item{fileformat}{Desired format for output files.}
 
 \item{scenmerge}{Flag: if true, merge scenarios; otherwise, leave scenarios
 as separate tables.}
 
-\item{tabs}{Flag: if true, put each table into a separate tab or file.
-Otherwise, put them all into a single long tab/file.}
+\item{dataformat}{Indicates desired data format.  Supported formats are
+\code{'tabs'}, \code{'merged'}, or \code{'IIASA'}}
 
-\item{outputdir}{Directory to write output to.  Default is the current
-working directory.}
+\item{wideformat}{Flag: if true, convert data to wide format before output;
+otherwise, leave in long format.}
 }
 \value{
 NULL; the report will be written to output files as described in the
@@ -83,20 +91,33 @@ The system has several options for formatting output.  These can be passed as
 arugments to the system, or set as R options.  The names of the options and
 their functions are:
 \describe{
-    \item{\strong{iamrpt.fileformat}}{File format for output.  Options are
+    \item{\code{iamrpt.fileformat}}{File format for output.  Options are
 \code{"CSV"} and \code{"XLSX"}}
-    \item{\strong{iamrpt.scenmerge}}{If \code{TRUE}, for each variable merge the
+    \item{\code{iamrpt.scenmerge}}{If \code{TRUE}, for each variable merge the
 results for all scenarios into a single table (distinguished by the value of
 the scenario column).  Otherwise, create a separate table for each
 combination of scenario and variable.}
-    \item{\strong{iamrpt.tabs}}{If \code{TRUE}, write each table to a separate tab (if
-outputting to an xlsx file) or file (if outputting to csv files).  In the
-former case each tab/file will be named with the output variable name and
-scenario (if applicable).  In the latter case all of the tables will be
-written into a single tab or file, with the name of the scenario and variable
-before each table.}
+    \item{\code{iamrpt.dataformat}}{Specify the data format; that is, how
+the data is organized in the output files.  Three options are available:
+      \describe{
+        \item{\code{"tabs"}}{Each table generated goes into a separate tab (if
+XLS output is selected) or file (if CSV output is selected).  The tab or file
+will be named with the output of the table.}
+        \item{\code{"merged"}}{The tables will be output sequentially into a
+single tab or file.  Each table will be preceded by its name.  This is
+similar to the format used by GCAM to output batch queries.}
+        \item{\code{"IIASA"}}{The database format used by IIASA.  In this
+format each table is spread into a row in a merged table, with a column to
+identify the variable that each row comes from.}
+   }
+ }
+   \item{\code{iamrpt.wideformat}}{If \code{TRUE}, reshape the tables into
+wide format (years as columns) before output.  Otherwise, leave them in long
+format.  If the IIASA data format is selected, then this option is ignored,
+since the IIASA format requires wide data.}
 }
 
+
 Output filenames will be chosen automatically.  For an XLSX file the filename
 will be 'iamrpt.xlsx'.  For CSV output with \code{tabs == FALSE} the result
 will be 'iamrpt.csv'.  For CSV output with \code{tabs == TRUE} the output
@@ -132,19 +153,20 @@ trimmed.
 
 The filter functions currently recognized by the system are
 \describe{
-  \item{\strong{==}}{String equality}
-  \item{\strong{!=}}{String inequality}
-  \item{\strong{<}}{Numeric less-than}
-  \item{\strong{>}}{Numeric greather-than}
-  \item{\strong{<=}}{Numeric less-than-or-equals}
-  \item{\strong{>=}}{Numeric greater-than-or-equals}
-  \item{\strong{matches}}{Regular expression match.  Note that because of the
-way we parse these strings you can't have a ',', ';', '(', or ')' in your
+  \item{\code{==}}{String equality}
+  \item{\code{!=}}{String inequality}
+  \item{\code{<}}{Numeric less-than}
+  \item{\code{>}}{Numeric greather-than}
+  \item{\code{<=}}{Numeric less-than-or-equals}
+  \item{\code{>=}}{Numeric greater-than-or-equals}
+  \item{\code{matches}}{Regular expression match.  Note that because of the
+way we parse these strings you can't have a \code{','}, \code{';'},
+\code{'('}, or \code{')'} in your
 regular expressions for this function or any of the ones below.}
-  \item{\strong{matchesi}}{Case-insensitive regular expression match.}
-  \item{\strong{notmatches}}{Regular expression inverted match.  That is,
+  \item{\code{matchesi}}{Case-insensitive regular expression match.}
+  \item{\code{notmatches}}{Regular expression inverted match.  That is,
 select the rows that do \emph{not} match the given regular expression.}
-  \item{\strong{notmatchesi}}{Case-insensitive regular expression inverted
+  \item{\code{notmatchesi}}{Case-insensitive regular expression inverted
 match.}
 }
 }
diff --git a/man/iiasa_sortcols.Rd b/man/iiasa_sortcols.Rd
new file mode 100644
index 0000000..58cce57
--- /dev/null
+++ b/man/iiasa_sortcols.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mcl.R
+\name{iiasa_sortcols}
+\alias{iiasa_sortcols}
+\title{Put columns in canonical order for IIASA data format}
+\usage{
+iiasa_sortcols(df)
+}
+\arguments{
+\item{df}{Data frame}
+}
+\description{
+Put columns in canonical order for IIASA data format
+}
+\keyword{internal}
diff --git a/man/iiasafy.Rd b/man/iiasafy.Rd
new file mode 100644
index 0000000..0c809ac
--- /dev/null
+++ b/man/iiasafy.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mcl.R
+\name{iiasafy}
+\alias{iiasafy}
+\title{Convert a list of tables to a single table in IIASA format}
+\usage{
+iiasafy(datalist)
+}
+\arguments{
+\item{datalist}{List of data frames, one for each variable.}
+}
+\description{
+The result of this transformation will be a single table with the following
+columns:
+}
+\details{
+\itemize{
+  \item{Model}
+  \item{Scenario}
+  \item{Region}
+  \item{Variable (taken from the output name of the input)}
+  \item{Unit}
+  \item{NNNN - one for each year}
+}
+}
+\keyword{internal}
diff --git a/man/output_csv.Rd b/man/output_csv.Rd
index 84c1b6b..7daf7ba 100644
--- a/man/output_csv.Rd
+++ b/man/output_csv.Rd
@@ -5,7 +5,7 @@
 \alias{output_xlsx}
 \title{Output function for CSV format}
 \usage{
-output_csv(rslts, tabs, dirname)
+output_csv(rslts, dataformat, dirname)
 
 output_xlsx(rslts, tabs, dirname)
 }
@@ -13,8 +13,8 @@ output_xlsx(rslts, tabs, dirname)
 \item{rslts}{Results tables from \code{\link{generate}}.  This must be either
 a list of data frames or a list of lists of data frames.}
 
-\item{tabs}{Flag indicating whether variables should be written to separate
-tabs/files.}
+\item{dataformat}{Indicator of data format:  If 'tabs', write to separate files; if 'merged'
+write merged results to a single file.}
 
 \item{dirname}{Directory to write output file(s) into.}
 }
diff --git a/man/proc_var_iiasa.Rd b/man/proc_var_iiasa.Rd
new file mode 100644
index 0000000..2bb487d
--- /dev/null
+++ b/man/proc_var_iiasa.Rd
@@ -0,0 +1,15 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/mcl.R
+\name{proc_var_iiasa}
+\alias{proc_var_iiasa}
+\title{Select the columns needed for the IIASA format}
+\usage{
+proc_var_iiasa(df)
+}
+\description{
+Starting with data in long format, keep only the columns needed to form the
+IIASA format, namely, scenario, region, year, value, and Units.  Then rename
+variables according to the IIASA conventions, and spread to wide format.  We don't
+add the model or variable names at this point, however.
+}
+\keyword{internal}
diff --git a/tests/testthat/test_output.R b/tests/testthat/test_output.R
index ee45395..132f1b8 100644
--- a/tests/testthat/test_output.R
+++ b/tests/testthat/test_output.R
@@ -112,7 +112,7 @@ test_that('output_csv works for separate tabs mode.', {
     flist <- file.path(dir, paste0(vlist, '.csv'))
     on.exit(unlink(flist))
 
-    output_csv(rslt, TRUE, dir)
+    output_csv(rslt, 'tabs', dir)
 
     for(i in seq_along(vlist)) {
         file <- flist[i]
@@ -131,7 +131,7 @@ test_that('output_csv works for separate tabs mode.', {
     flist <- file.path(dir, paste0(vlist, '.csv'))
     on.exit(unlink(flist), add=TRUE)
 
-    output_csv(rsltmrg, TRUE, dir)
+    output_csv(rsltmrg, 'tabs', dir)
 
     for(i in seq_along(vlist))  {
         file <- flist[i]
@@ -150,7 +150,7 @@ test_that('output_csv works for single tab mode.',
     ## unmerged
     filename <- file.path(dir, 'iamrpt.csv')
     on.exit(unlink(filename))
-    output_csv(rslt, FALSE, dir)
+    output_csv(rslt, 'merged', dir)
     expect_true(file.exists(filename))
 
     ## spot check a few lines in the data
@@ -170,7 +170,7 @@ test_that('output_csv works for single tab mode.',
     ## merged version
     filename <- file.path(dir, 'iamrpt001.csv')
     on.exit(unlink(filename), add=TRUE)
-    output_csv(rsltmrg, FALSE, dir)
+    output_csv(rsltmrg, 'merged', dir)
     expect_true(file.exists(filename))
 
     ## spot check important lines

From 3b843514abcbd381be3bc3827aa517849532ea19 Mon Sep 17 00:00:00 2001
From: Robert Link <robert.link@pnnl.gov>
Date: Wed, 20 Sep 2017 20:57:51 -0400
Subject: [PATCH 2/4] Add an example variable control file for IIASA format
 output.

---
 inst/extdata/example-iiasa-variable.ctl | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 inst/extdata/example-iiasa-variable.ctl

diff --git a/inst/extdata/example-iiasa-variable.ctl b/inst/extdata/example-iiasa-variable.ctl
new file mode 100644
index 0000000..4dc959d
--- /dev/null
+++ b/inst/extdata/example-iiasa-variable.ctl
@@ -0,0 +1,7 @@
+GCAM variable,output variable,aggregation keys,aggregation function,start year,end year,filters,output units
+Population,Population,,,2000,2050,,thous
+pcGDP(PPP),GDP|PPP,,,2000,2050,,Thous80US$/per
+Electricity,Electricity|Generation,,,2000,2050,(matches; sector; electricity),MWh
+Electricity,Electricity|Total,,,2000,2050,,
+Electricity,Electricity|Rooftop PV,,,2000,2050,(matches; sector; elect_td_bld),MWh
+Electricity,Electricity|Rooftop PV|Ridiculous,,,2000,2050,"(notmatches; sector; electricity), (notmatches; sector; industrial energy use)",MWh

From e970aeca1a4a7c6c3ccf3c46355259341e4e0251 Mon Sep 17 00:00:00 2001
From: Robert Link <robert.link@pnnl.gov>
Date: Thu, 21 Sep 2017 08:50:22 -0400
Subject: [PATCH 3/4] Move iiasafy functions to output.R

---
 DESCRIPTION                  |  2 +-
 R/mcl.R                      | 54 ----------------------------------
 R/output.R                   | 56 ++++++++++++++++++++++++++++++++++++
 man/iiasa_sortcols.Rd        |  2 +-
 man/iiasafy.Rd               |  2 +-
 man/proc_var_iiasa.Rd        |  2 +-
 tests/testthat/test_output.R | 34 ++++++++++++++++++++++
 7 files changed, 94 insertions(+), 58 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 4dfcc61..deb6f4a 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: iamrpt
 Title: Convert GCAM results to the format required by various IAM experiment databases
-Version: 0.1.0-9000
+Version: 0.2.0
 Authors@R: c(
     person("Robert", "Link", email = "robert.link@pnnl.gov", role = c("aut", "cre")),
     person("Xavier", "Gutierrez", email = "xavier.gutierrez@pnnl.gov", role = c("aut"))
diff --git a/R/mcl.R b/R/mcl.R
index 2a51ccf..9d8e58a 100644
--- a/R/mcl.R
+++ b/R/mcl.R
@@ -375,57 +375,3 @@ validate1 <- function(ctl, ctlname, expectcols, rqdcols) {
     }
 }
 
-#' Convert a list of tables to a single table in IIASA format
-#'
-#' The result of this transformation will be a single table with the following
-#' columns:
-#'
-#' \itemize{
-#'   \item{Model}
-#'   \item{Scenario}
-#'   \item{Region}
-#'   \item{Variable (taken from the output name of the input)}
-#'   \item{Unit}
-#'   \item{NNNN - one for each year}
-#' }
-#'
-#' @param datalist List of data frames, one for each variable.
-#' @keywords internal
-iiasafy <- function(datalist)
-{
-    varlist <- lapply(datalist, proc_var_iiasa)
-
-    varlist <- lapply(names(varlist),   # Add variable name (need access to names(varlist) for this.)
-                      function(var) {
-                          dplyr::mutate(varlist[[var]], Variable=var)
-                      }) %>%
-      dplyr::bind_rows()              # Combine into a single table
-}
-
-
-#' Select the columns needed for the IIASA format
-#'
-#' Starting with data in long format, keep only the columns needed to form the
-#' IIASA format, namely, scenario, region, year, value, and Units.  Then rename
-#' variables according to the IIASA conventions, and spread to wide format.  We don't
-#' add the model or variable names at this point, however.
-#' @keywords internal
-proc_var_iiasa <- function(df)
-{
-    scenario <- region <- variable <- year <- value <- Units <- NULL # silence
-                                        # check notes
-    df <- df %>%
-        dplyr::select(scenario, region, year, value, Units) %>%
-        dplyr::rename(Scenario=scenario, Region=region, Unit=Units) %>%
-        tidyr::spread(year, value)
-}
-
-#' Put columns in canonical order for IIASA data format
-#'
-#' @param df Data frame
-#' @keywords internal
-iiasa_sortcols <- function(df)
-{
-    cols <- unique(c('Model', 'Scenario', 'Region', 'Variable', 'Unit', names(df)))
-    dplyr::select(df, dplyr::one_of(cols))
-}
diff --git a/R/output.R b/R/output.R
index 54407b4..194b1e5 100644
--- a/R/output.R
+++ b/R/output.R
@@ -124,3 +124,59 @@ nameparse <- function(name)
         c(stringr::str_c(splt[1:(len-1)], collapse='.'), splt[len])
     }
 }
+
+
+#' Convert a list of tables to a single table in IIASA format
+#'
+#' The result of this transformation will be a single table with the following
+#' columns:
+#'
+#' \itemize{
+#'   \item{Model}
+#'   \item{Scenario}
+#'   \item{Region}
+#'   \item{Variable (taken from the output name of the input)}
+#'   \item{Unit}
+#'   \item{NNNN - one for each year}
+#' }
+#'
+#' @param datalist List of data frames, one for each variable.
+#' @keywords internal
+iiasafy <- function(datalist)
+{
+    varlist <- lapply(datalist, proc_var_iiasa)
+
+    varlist <- lapply(names(varlist),   # Add variable name (need access to names(varlist) for this.)
+                      function(var) {
+                          dplyr::mutate(varlist[[var]], Variable=var)
+                      }) %>%
+      dplyr::bind_rows()              # Combine into a single table
+}
+
+
+#' Select the columns needed for the IIASA format
+#'
+#' Starting with data in long format, keep only the columns needed to form the
+#' IIASA format, namely, scenario, region, year, value, and Units.  Then rename
+#' variables according to the IIASA conventions, and spread to wide format.  We don't
+#' add the model or variable names at this point, however.
+#' @keywords internal
+proc_var_iiasa <- function(df)
+{
+    scenario <- region <- year <- value <- Units <- NULL # silence
+                                        # check notes
+    df <- df %>%
+        dplyr::select(scenario, region, year, value, Units) %>%
+        dplyr::rename(Scenario=scenario, Region=region, Unit=Units) %>%
+        tidyr::spread(year, value)
+}
+
+#' Put columns in canonical order for IIASA data format
+#'
+#' @param df Data frame
+#' @keywords internal
+iiasa_sortcols <- function(df)
+{
+    cols <- unique(c('Model', 'Scenario', 'Region', 'Variable', 'Unit', names(df)))
+    dplyr::select(df, dplyr::one_of(cols))
+}
diff --git a/man/iiasa_sortcols.Rd b/man/iiasa_sortcols.Rd
index 58cce57..de9b992 100644
--- a/man/iiasa_sortcols.Rd
+++ b/man/iiasa_sortcols.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/mcl.R
+% Please edit documentation in R/output.R
 \name{iiasa_sortcols}
 \alias{iiasa_sortcols}
 \title{Put columns in canonical order for IIASA data format}
diff --git a/man/iiasafy.Rd b/man/iiasafy.Rd
index 0c809ac..8adc482 100644
--- a/man/iiasafy.Rd
+++ b/man/iiasafy.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/mcl.R
+% Please edit documentation in R/output.R
 \name{iiasafy}
 \alias{iiasafy}
 \title{Convert a list of tables to a single table in IIASA format}
diff --git a/man/proc_var_iiasa.Rd b/man/proc_var_iiasa.Rd
index 2bb487d..07f0202 100644
--- a/man/proc_var_iiasa.Rd
+++ b/man/proc_var_iiasa.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/mcl.R
+% Please edit documentation in R/output.R
 \name{proc_var_iiasa}
 \alias{proc_var_iiasa}
 \title{Select the columns needed for the IIASA format}
diff --git a/tests/testthat/test_output.R b/tests/testthat/test_output.R
index 132f1b8..335f2d6 100644
--- a/tests/testthat/test_output.R
+++ b/tests/testthat/test_output.R
@@ -189,3 +189,37 @@ test_that('output_csv works for single tab mode.',
                      'billion m^2,Reference,Africa_Northern,resid,resid,resid_building,2075,5.3585')
 
 })
+
+
+test_that('Single table can be converted to iiasa format.', {
+    pop <- dplyr::filter(popq, year >= 2005, year <= 2020)
+    popiia <- proc_var_iiasa(pop) %>%
+      dplyr:::mutate(Variable='Population', Model='GCAM') %>%
+      iiasa_sortcols()
+
+    expect_equal(nrow(popiia), length(unique(pop$region)))
+    expect_identical(names(popiia), c('Model', 'Scenario', 'Region', 'Variable',
+                                      'Unit', as.character(seq(2005,2020,5))))
+    expect_equal(popiia[['2010']], dplyr::filter(pop, year==2010)[['value']])
+})
+
+test_that('List of tables can be converted to iiasa format.', {
+    pop <- dplyr::filter(popq, year >= 2005, year <= 2020)
+    flrspc <-
+        dplyr::filter(flrspcq, year >= 2005, year <= 2020) %>%
+          aggregate('sum', 'scenario, region')
+
+    allvar <- list(Population=pop, Floorspace=flrspc)
+
+    iitbl <- iiasafy(allvar) %>%
+      dplyr::mutate(Model='GCAM') %>%
+      iiasa_sortcols()
+
+    expect_true(is.data.frame(iitbl))
+    expect_equal(nrow(iitbl), 2*length(unique(pop$region)))
+    expect_identical(names(iitbl), c('Model', 'Scenario', 'Region', 'Variable',
+                                     'Unit', as.character(seq(2005,2020,5))))
+
+    expect_identical(unique(iitbl$Variable), c('Population', 'Floorspace'))
+
+})

From f4e20a2df61fae124fecde973c36a80077fcc6da Mon Sep 17 00:00:00 2001
From: Robert Link <robert.link@pnnl.gov>
Date: Thu, 21 Sep 2017 09:44:22 -0400
Subject: [PATCH 4/4] Automate production of package internal data

Verified identical with old internal data.
---
 data-raw/sysdata.R | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 data-raw/sysdata.R

diff --git a/data-raw/sysdata.R b/data-raw/sysdata.R
new file mode 100644
index 0000000..c1dbaa9
--- /dev/null
+++ b/data-raw/sysdata.R
@@ -0,0 +1,14 @@
+## Generate the internal data for the package
+## This must be sourced as a script because of the way devtools::use_data works.
+## Source it from the top level of a development copy of the package.
+
+
+source('data-raw/gdpdef.R')
+gdpdef <- calc.gdpdef('data-raw/GDPDEF.csv')
+
+source('data-raw/energyconv.R')
+energyconv <- prep.energyconv()
+
+devtools::use_data(gdpdef, energyconv, internal=TRUE, overwrite=TRUE,
+                   compress='xz')
+