Skip to content

Latest commit

 

History

History
892 lines (797 loc) · 20.9 KB

Pecos1of3.md

File metadata and controls

892 lines (797 loc) · 20.9 KB

Statistical companion

n/a 2017-02-23

  • Prerequisites
    • How to (re-)generate the companion report
    • R packages
  • Data description
    • ALGO=IBEA
    • ALGO=NSGAII
  • Analysis of variances (for ALGO = IBEA)
    • Checks
    • Anova table: Hypervolume (HV)
    • Anova table: PCORRECT
    • Significant differences: Tukey HSD
  • Detach R packages

Prerequisites

How to (re-)generate the companion report

(This assumes that the R package rmarkdown is installed, in doubt, run from within R: install.packages("rmarkdown").)

  1. Clone the repo or download its content
  2. cd replication/data
  3. Rscript -e "rmarkdown::render('companion.Rmd','all', params = list(spl = 'Paxtls1of3'))"
  4. Open companion.html

R packages

wants <- c("ggplot2", "reshape2", "car", "pander")
has   <- wants %in% rownames(installed.packages())
if(any(!has)) install.packages(wants[!has], repos = "http://cran.us.r-project.org")
library(reshape2)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
library(car)
library(pander)

all <- read.csv2(params$data)
df <- subset(all, SPL == params$spl)
levels(df$DIST) <- c("normal", "uniform", "x264")

Data description

ALGO=IBEA

DATA <- subset(df, ALGO == "IBEA" & FINT %in% c("F","FI20","FI100"))
acast(DATA, DIST ~ FINT, length)
##           F FI100 FI20
## normal  200   200  200
## uniform 200   200  200
## x264    200   200  200
ggplot(na.omit(subset(DATA, VARIABLE=="HV")), aes(y=value, x = 1)) +
    geom_violin() + geom_boxplot(width = 0.2) +
        facet_wrap(DIST ~ FINT, ncol = 3, drop = TRUE) +
            stat_summary(fun.y="median", geom="point") +
                stat_summary(fun.y="mean", geom="point", shape=3) + xlab("HV")

ggplot(na.omit(subset(DATA, VARIABLE=="PCORRECT")), aes(y=value, x = 1)) +
    geom_violin() + geom_boxplot(width = 0.2) +
        facet_wrap(DIST ~ FINT, ncol = 3, drop = TRUE) +
            stat_summary(fun.y="median", geom="point") +
                stat_summary(fun.y="mean", geom="point", shape=3) + xlab("PCORRECT")

tta <- na.omit(subset(DATA, VARIABLE=="TimeToAnyC"))

if (nrow(tta)) {
    ggplot(tta, aes(y=value, x = 1)) +
        geom_violin() + geom_boxplot(width = 0.2) +
        facet_wrap(DIST ~ FINT, ncol = 3, drop = TRUE) +
        stat_summary(fun.y="median", geom="point") +
        stat_summary(fun.y="mean", geom="point", shape=3) +
        xlab("TimeToAnyC")
}

ALGO=NSGAII

DATA <- subset(df, ALGO == "NSGAII" & FINT %in% c("F","FI20","FI100"))
acast(DATA, DIST ~ FINT, length)
##           F FI100 FI20
## normal  200   200  200
## uniform 200   200  200
## x264    200   200  200
ggplot(na.omit(subset(DATA, VARIABLE=="HV")), aes(y=value, x = 1)) +
    geom_violin() + geom_boxplot(width = 0.2) +
        facet_wrap(DIST ~ FINT, ncol = 3, drop = TRUE) +
            stat_summary(fun.y="median", geom="point") +
                stat_summary(fun.y="mean", geom="point", shape=3) + xlab("HV")

## & value < 1000

ggplot(na.omit(subset(DATA, VARIABLE=="PCORRECT")), aes(y=value, x = 1)) +
    geom_violin() + geom_boxplot(width = 0.2) +
        facet_wrap(DIST ~ FINT, ncol = 3, drop = TRUE) +
            stat_summary(fun.y="median", geom="point") +
                stat_summary(fun.y="mean", geom="point", shape=3) + xlab("PCORRECT")

## & value < 50000

tta <- na.omit(subset(DATA, VARIABLE=="TimeToAnyC"))

if (nrow(tta)) {
    ggplot(tta, aes(y=value, x = 1)) +
        geom_violin() + geom_boxplot(width = 0.2) +
        facet_wrap(DIST ~ FINT, ncol = 3, drop = TRUE) +
        stat_summary(fun.y="median", geom="point") +
        stat_summary(fun.y="mean", geom="point", shape=3) + xlab("TimeToAnyC")
}

Analysis of variances (for ALGO = IBEA)

DATA <- subset(df, ALGO == "IBEA" & FINT %in% c("F","FI100") & DIST %in% c("normal","x264"))
d.hv.glm <- glm(value ~ FINT * DIST, family = gaussian, data = subset(DATA, VARIABLE == "HV"))
d.pc.glm <- glm(value ~ FINT * DIST, family = gaussian, data = subset(DATA, VARIABLE == "PCORRECT"))

Checks

Normality of residuals

hv.res <- residuals(d.hv.glm)
QQplot(hv.res)

pc.res <- residuals(d.pc.glm)
QQplot(pc.res)

Homogeneity of variances

hv <- subset(DATA, VARIABLE == "HV")
hv$combn <- interaction(hv$FINT,hv$DIST)
pc <- subset(DATA, VARIABLE == "PCORRECT")
pc$combn <- interaction(pc$FINT,pc$DIST)

ggplot(data=hv, aes(y = value, x = 1)) + geom_boxplot() + facet_wrap(~ combn, nrow=1) + theme_bw()

ggplot(data=pc, aes(y = value, x = 1)) + geom_boxplot() + facet_wrap(~ combn, nrow=1) + theme_bw()

l.test <- cbind(as.numeric(leveneTest(value ~ FINT * DIST, data = hv)[1,]),
      as.numeric(leveneTest(value ~ FINT * DIST, data = hv, center = mean)[1,]),
                as.numeric(leveneTest(value ~ FINT * DIST, data = pc)[1,]),
      as.numeric(leveneTest(value ~ FINT * DIST, data = pc, center = mean)[1,]))

colnames(l.test) <- c("median","mean","median","mean")
rownames(l.test) <- c("Df", "F", "p-value")
pander(l.test)
  median mean median mean
Df 3 3 3 3
F 8.044 8.355 2.419 4.129
p-value 4.411e-05 2.959e-05 0.06742 0.007245

Anova table: Hypervolume (HV)

ggplot(na.omit(subset(DATA, VARIABLE == "HV")), aes(y=value, x = 1)) +
    geom_violin() + geom_boxplot(width = 0.2) +
        facet_wrap(DIST ~ FINT, ncol = 2, drop = TRUE) +
            stat_summary(fun.y="median", geom="point") +
                stat_summary(fun.y="mean", geom="point", shape=3) + xlab("HV")

panderOptions('digits', 4)
## panderOptions('round', 4)
panderOptions('keep.trailing.zeros', TRUE)
pander(anova(d.hv.glm, test = "F"))
Analysis of Deviance Table
  Df Deviance Resid. Df Resid. Dev F Pr(>F)
NULL NA NA 199 0.2286 NA NA
FINT 1 0.2192 198 0.009401 26154 1.452e-210
DIST 1 0.007718 197 0.001683 920.9 5.405e-76
FINT:DIST 1 4.025e-05 196 0.001643 4.803 0.02958
aov.hv <- aov(value ~ FINT * DIST,
              data = droplevels(subset(DATA, VARIABLE == "HV")))
pander(aov.hv)
Analysis of Variance Model
  Df Sum Sq Mean Sq F value Pr(>F)
FINT 1 0.2192 0.2192 26154 1.452e-210
DIST 1 0.007718 0.007718 920.9 5.405e-76
FINT:DIST 1 4.025e-05 4.025e-05 4.803 0.02958
Residuals 196 0.001643 8.381e-06 NA NA
pander(my.etasq(aov.hv))
FINT DIST FINT:DIST
0.96581568427347 0.0340069384588839 0.000177377267646053
my.interactionPlot(subset(DATA, VARIABLE == "HV"))

my.nestedBoxplot(subset(DATA, VARIABLE == "HV"))

Anova table: PCORRECT

d.pc.glm <- glm(value ~ FINT * DIST, family = gaussian, data = subset(DATA, VARIABLE == "PCORRECT"))

ggplot(na.omit(subset(DATA, VARIABLE == "PCORRECT")), aes(y=value, x = 1)) +
    geom_violin() + geom_boxplot(width = 0.2) +
    facet_wrap(DIST ~ FINT, ncol = 2, drop = TRUE) +
    stat_summary(fun.y="median", geom="point") +
    stat_summary(fun.y="mean", geom="point", shape=3) + xlab("PCORRECT")

pander(anova(d.pc.glm, test = "F"))
Analysis of Deviance Table
  Df Deviance Resid. Df Resid. Dev F Pr(>F)
NULL NA NA 199 63.85 NA NA
FINT 1 1.031 198 62.81 3.282 0.07155
DIST 1 1.235 197 61.58 3.93 0.04882
FINT:DIST 1 0.01419 196 61.57 0.04517 0.8319
aov.pc <- aov(value ~ FINT * DIST, data = droplevels(subset(DATA, VARIABLE == "PCORRECT")))
pander(aov.pc)
Analysis of Variance Model
  Df Sum Sq Mean Sq F value Pr(>F)
FINT 1 1.031 1.031 3.282 0.07155
DIST 1 1.235 1.235 3.93 0.04882
FINT:DIST 1 0.01419 0.01419 0.04517 0.8319
Residuals 196 61.57 0.3141 NA NA
pander(my.etasq(aov.pc))
FINT DIST FINT:DIST
0.452256486408053 0.541519693200419 0.00622382039152753
my.interactionPlot(subset(DATA, VARIABLE == "PCORRECT"))

my.nestedBoxplot(subset(DATA, VARIABLE == "PCORRECT"))

Significant differences: Tukey HSD

hsd.hv <- TukeyHSD(aov(value ~ FINT * DIST, data = droplevels(subset(DATA, VARIABLE == "HV"))))
pander(hsd.hv$FINT)
  diff lwr upr p adj
FI100-F 0.06621 0.0654 0.06702 7.572e-14
pander(hsd.hv$DIST)
  diff lwr upr p adj
x264-normal 0.01242 0.01162 0.01323 7.572e-14
pander(hsd.hv$`FINT:DIST`)
  diff lwr upr p adj
FI100:normal-F:normal 0.06531 0.06381 0.06681 7.572e-14
F:x264-F:normal 0.01153 0.01003 0.01303 7.572e-14
FI100:x264-F:normal 0.07863 0.07713 0.08013 7.572e-14
F:x264-FI100:normal -0.05379 -0.05529 -0.05229 7.572e-14
FI100:x264-FI100:normal 0.01332 0.01182 0.01482 7.572e-14
FI100:x264-F:x264 0.06711 0.06561 0.06861 7.572e-14
hsd.pc <- TukeyHSD(aov(value ~ FINT * DIST, data = droplevels(subset(DATA, VARIABLE == "PCORRECT"))))
pander(hsd.pc$FINT)
  diff lwr upr p adj
FI100-F 0.1436 -0.01271 0.2999 0.07155
pander(hsd.pc$DIST)
  diff lwr upr p adj
x264-normal 0.1571 0.0008222 0.3134 0.04882
pander(hsd.pc$`FINT:DIST`)
  diff lwr upr p adj
FI100:normal-F:normal 0.1604 -0.13 0.4509 0.4813
F:x264-F:normal 0.174 -0.1165 0.4644 0.4085
FI100:x264-F:normal 0.3007 0.01028 0.5912 0.03928
F:x264-FI100:normal 0.01353 -0.2769 0.304 0.9994
FI100:x264-FI100:normal 0.1403 -0.1502 0.4307 0.5949
FI100:x264-F:x264 0.1268 -0.1637 0.4172 0.6709

Detach R packages

try(detach(package:ggplot2))
try(detach(package:reshape2))
try(detach(package:car))
try(detach(package:pander))