Skip to content

Latest commit

 

History

History
892 lines (797 loc) · 20.9 KB

Paxtls1of3.md

File metadata and controls

892 lines (797 loc) · 20.9 KB

Statistical companion

n/a 2017-02-23

  • Prerequisites
    • How to (re-)generate the companion report
    • R packages
  • Data description
    • ALGO=IBEA
    • ALGO=NSGAII
  • Analysis of variances (for ALGO = IBEA)
    • Checks
    • Anova table: Hypervolume (HV)
    • Anova table: PCORRECT
    • Significant differences: Tukey HSD
  • Detach R packages

Prerequisites

How to (re-)generate the companion report

(This assumes that the R package rmarkdown is installed, in doubt, run from within R: install.packages("rmarkdown").)

  1. Clone the repo or download its content
  2. cd replication/data
  3. Rscript -e "rmarkdown::render('companion.Rmd','all', params = list(spl = 'Paxtls1of3'))"
  4. Open companion.html

R packages

wants <- c("ggplot2", "reshape2", "car", "pander")
has   <- wants %in% rownames(installed.packages())
if(any(!has)) install.packages(wants[!has], repos = "http://cran.us.r-project.org")
library(reshape2)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
library(car)
library(pander)

all <- read.csv2(params$data)
df <- subset(all, SPL == params$spl)
levels(df$DIST) <- c("normal", "uniform", "x264")

Data description

ALGO=IBEA

DATA <- subset(df, ALGO == "IBEA" & FINT %in% c("F","FI20","FI100"))
acast(DATA, DIST ~ FINT, length)
##           F FI100 FI20
## normal  200   200  200
## uniform 200   200  200
## x264    200   200  200
ggplot(na.omit(subset(DATA, VARIABLE=="HV")), aes(y=value, x = 1)) +
    geom_violin() + geom_boxplot(width = 0.2) +
        facet_wrap(DIST ~ FINT, ncol = 3, drop = TRUE) +
            stat_summary(fun.y="median", geom="point") +
                stat_summary(fun.y="mean", geom="point", shape=3) + xlab("HV")

ggplot(na.omit(subset(DATA, VARIABLE=="PCORRECT")), aes(y=value, x = 1)) +
    geom_violin() + geom_boxplot(width = 0.2) +
        facet_wrap(DIST ~ FINT, ncol = 3, drop = TRUE) +
            stat_summary(fun.y="median", geom="point") +
                stat_summary(fun.y="mean", geom="point", shape=3) + xlab("PCORRECT")

tta <- na.omit(subset(DATA, VARIABLE=="TimeToAnyC"))

if (nrow(tta)) {
    ggplot(tta, aes(y=value, x = 1)) +
        geom_violin() + geom_boxplot(width = 0.2) +
        facet_wrap(DIST ~ FINT, ncol = 3, drop = TRUE) +
        stat_summary(fun.y="median", geom="point") +
        stat_summary(fun.y="mean", geom="point", shape=3) +
        xlab("TimeToAnyC")
}

ALGO=NSGAII

DATA <- subset(df, ALGO == "NSGAII" & FINT %in% c("F","FI20","FI100"))
acast(DATA, DIST ~ FINT, length)
##           F FI100 FI20
## normal  200   200  200
## uniform 200   200  200
## x264    200   200  200
ggplot(na.omit(subset(DATA, VARIABLE=="HV")), aes(y=value, x = 1)) +
    geom_violin() + geom_boxplot(width = 0.2) +
        facet_wrap(DIST ~ FINT, ncol = 3, drop = TRUE) +
            stat_summary(fun.y="median", geom="point") +
                stat_summary(fun.y="mean", geom="point", shape=3) + xlab("HV")

## & value < 1000

ggplot(na.omit(subset(DATA, VARIABLE=="PCORRECT")), aes(y=value, x = 1)) +
    geom_violin() + geom_boxplot(width = 0.2) +
        facet_wrap(DIST ~ FINT, ncol = 3, drop = TRUE) +
            stat_summary(fun.y="median", geom="point") +
                stat_summary(fun.y="mean", geom="point", shape=3) + xlab("PCORRECT")

## & value < 50000

tta <- na.omit(subset(DATA, VARIABLE=="TimeToAnyC"))

if (nrow(tta)) {
    ggplot(tta, aes(y=value, x = 1)) +
        geom_violin() + geom_boxplot(width = 0.2) +
        facet_wrap(DIST ~ FINT, ncol = 3, drop = TRUE) +
        stat_summary(fun.y="median", geom="point") +
        stat_summary(fun.y="mean", geom="point", shape=3) + xlab("TimeToAnyC")
}

Analysis of variances (for ALGO = IBEA)

DATA <- subset(df, ALGO == "IBEA" & FINT %in% c("F","FI100") & DIST %in% c("normal","x264"))
d.hv.glm <- glm(value ~ FINT * DIST, family = gaussian, data = subset(DATA, VARIABLE == "HV"))
d.pc.glm <- glm(value ~ FINT * DIST, family = gaussian, data = subset(DATA, VARIABLE == "PCORRECT"))

Checks

Normality of residuals

hv.res <- residuals(d.hv.glm)
QQplot(hv.res)

pc.res <- residuals(d.pc.glm)
QQplot(pc.res)

Homogeneity of variances

hv <- subset(DATA, VARIABLE == "HV")
hv$combn <- interaction(hv$FINT,hv$DIST)
pc <- subset(DATA, VARIABLE == "PCORRECT")
pc$combn <- interaction(pc$FINT,pc$DIST)

ggplot(data=hv, aes(y = value, x = 1)) + geom_boxplot() + facet_wrap(~ combn, nrow=1) + theme_bw()

ggplot(data=pc, aes(y = value, x = 1)) + geom_boxplot() + facet_wrap(~ combn, nrow=1) + theme_bw()

l.test <- cbind(as.numeric(leveneTest(value ~ FINT * DIST, data = hv)[1,]),
      as.numeric(leveneTest(value ~ FINT * DIST, data = hv, center = mean)[1,]),
                as.numeric(leveneTest(value ~ FINT * DIST, data = pc)[1,]),
      as.numeric(leveneTest(value ~ FINT * DIST, data = pc, center = mean)[1,]))

colnames(l.test) <- c("median","mean","median","mean")
rownames(l.test) <- c("Df", "F", "p-value")
pander(l.test)
  median mean median mean
Df 3 3 3 3
F 3.11 3.25 16.27 17.75
p-value 0.02753 0.02292 1.744e-09 3.12e-10

Anova table: Hypervolume (HV)

ggplot(na.omit(subset(DATA, VARIABLE == "HV")), aes(y=value, x = 1)) +
    geom_violin() + geom_boxplot(width = 0.2) +
        facet_wrap(DIST ~ FINT, ncol = 2, drop = TRUE) +
            stat_summary(fun.y="median", geom="point") +
                stat_summary(fun.y="mean", geom="point", shape=3) + xlab("HV")

panderOptions('digits', 4)
## panderOptions('round', 4)
panderOptions('keep.trailing.zeros', TRUE)
pander(anova(d.hv.glm, test = "F"))
Analysis of Deviance Table
  Df Deviance Resid. Df Resid. Dev F Pr(>F)
NULL NA NA 199 0.5619 NA NA
FINT 1 0.1119 198 0.45 941866 0
DIST 1 0.2245 197 0.2255 1889151 0
FINT:DIST 1 0.2255 196 2.329e-05 1897600 0
aov.hv <- aov(value ~ FINT * DIST,
              data = droplevels(subset(DATA, VARIABLE == "HV")))
pander(aov.hv)
Analysis of Variance Model
  Df Sum Sq Mean Sq F value Pr(>F)
FINT 1 0.1119 0.1119 941866 0
DIST 1 0.2245 0.2245 1889151 0
FINT:DIST 1 0.2255 0.2255 1897600 0
Residuals 196 2.329e-05 1.188e-07 NA NA
pander(my.etasq(aov.hv))
FINT DIST FINT:DIST
0.199184327307677 0.399514421071929 0.401301251620393
my.interactionPlot(subset(DATA, VARIABLE == "HV"))

my.nestedBoxplot(subset(DATA, VARIABLE == "HV"))

Anova table: PCORRECT

d.pc.glm <- glm(value ~ FINT * DIST, family = gaussian, data = subset(DATA, VARIABLE == "PCORRECT"))

ggplot(na.omit(subset(DATA, VARIABLE == "PCORRECT")), aes(y=value, x = 1)) +
    geom_violin() + geom_boxplot(width = 0.2) +
    facet_wrap(DIST ~ FINT, ncol = 2, drop = TRUE) +
    stat_summary(fun.y="median", geom="point") +
    stat_summary(fun.y="mean", geom="point", shape=3) + xlab("PCORRECT")

pander(anova(d.pc.glm, test = "F"))
Analysis of Deviance Table
  Df Deviance Resid. Df Resid. Dev F Pr(>F)
NULL NA NA 199 794.7 NA NA
FINT 1 580.6 198 214.1 667.2 5.133e-65
DIST 1 2.549 197 211.6 2.93 0.08854
FINT:DIST 1 41.02 196 170.5 47.15 8.481e-11
aov.pc <- aov(value ~ FINT * DIST, data = droplevels(subset(DATA, VARIABLE == "PCORRECT")))
pander(aov.pc)
Analysis of Variance Model
  Df Sum Sq Mean Sq F value Pr(>F)
FINT 1 580.6 580.6 667.2 5.133e-65
DIST 1 2.549 2.549 2.93 0.08854
FINT:DIST 1 41.02 41.02 47.15 8.481e-11
Residuals 196 170.5 0.8701 NA NA
pander(my.etasq(aov.pc))
FINT DIST FINT:DIST
0.930187535950312 0.00408437903630415 0.0657280850133835
my.interactionPlot(subset(DATA, VARIABLE == "PCORRECT"))

my.nestedBoxplot(subset(DATA, VARIABLE == "PCORRECT"))

Significant differences: Tukey HSD

hsd.hv <- TukeyHSD(aov(value ~ FINT * DIST, data = droplevels(subset(DATA, VARIABLE == "HV"))))
pander(hsd.hv$FINT)
  diff lwr upr p adj
FI100-F -0.04731 -0.04741 -0.04721 7.572e-14
pander(hsd.hv$DIST)
  diff lwr upr p adj
x264-normal -0.067 -0.0671 -0.06691 7.572e-14
pander(hsd.hv$`FINT:DIST`)
  diff lwr upr p adj
FI100:normal-F:normal 0.01984 0.01966 0.02002 7.572e-14
F:x264-F:normal 0.0001497 -2.897e-05 0.0003283 0.135
FI100:x264-F:normal -0.1143 -0.1145 -0.1141 7.572e-14
F:x264-FI100:normal -0.01969 -0.01987 -0.01951 7.572e-14
FI100:x264-FI100:normal -0.1342 -0.1343 -0.134 7.572e-14
FI100:x264-F:x264 -0.1145 -0.1146 -0.1143 7.572e-14
hsd.pc <- TukeyHSD(aov(value ~ FINT * DIST, data = droplevels(subset(DATA, VARIABLE == "PCORRECT"))))
pander(hsd.pc$FINT)
  diff lwr upr p adj
FI100-F 3.408 3.147 3.668 7.572e-14
pander(hsd.pc$DIST)
  diff lwr upr p adj
x264-normal -0.2258 -0.486 0.03436 0.08854
pander(hsd.pc$`FINT:DIST`)
  diff lwr upr p adj
FI100:normal-F:normal 4.313 3.83 4.797 7.572e-14
F:x264-F:normal 0.68 0.1966 1.163 0.00193
FI100:x264-F:normal 3.182 2.698 3.665 7.572e-14
F:x264-FI100:normal -3.633 -4.117 -3.15 7.572e-14
FI100:x264-FI100:normal -1.132 -1.615 -0.6482 3.983e-08
FI100:x264-F:x264 2.502 2.018 2.985 7.572e-14

Detach R packages

try(detach(package:ggplot2))
try(detach(package:reshape2))
try(detach(package:car))
try(detach(package:pander))