Skip to content
/ localFDA Public

Localization processes for functional data analysis. Software companion for the paper “Localization processes for functional data analysis” by Elías, A., Jiménez, R., and Yukich, J. (2020)

License

Notifications You must be signed in to change notification settings

aefdz/localFDA

Repository files navigation

localFDA

License Travis build status

Overview

Software companion for the paper “Localization processes for functional data analysis” by Elías, Antonio, Jiménez, Raúl, and Yukich, Joe, (2020) <arXiv:2007.16059>. It provides the code for computing localization processes and localization distances and their application to classification and outlier detection problems.

Installation

#install the package
devtools::install_github("aefdz/localFDA")
##       v  checking for file 'C:\Users\anton\AppData\Local\Temp\Rtmp4617Sq\remotes2e00503a197c\aefdz-localFDA-25b0d40/DESCRIPTION' (425ms)
##       -  preparing 'localFDA':
##    checking DESCRIPTION meta-information ...     checking DESCRIPTION meta-information ...   v  checking DESCRIPTION meta-information
##       -  checking for LF line-endings in source and make files and shell scripts
##   -  checking for empty or unneeded directories
##       -  looking to see if a 'data/datalist' file should be added
##       -  building 'localFDA_0.0.0.9000.tar.gz'
##      
## 
#load the package
library(localFDA)

Test usage

Load the example data and plot it.

X <- exampleData
n <- ncol(X)
p <- nrow(X)
t <- as.numeric(rownames(X))

#plot the data set
df_functions <- data.frame(ids = rep(colnames(X), each = p),
                           y = c(X),
                           x = rep(t, n)
                           )

functions_plot <- ggplot(df_functions) + 
                  geom_line(aes(x = x, y = y, group = ids, color = ids), 
                            color = "black", alpha = 0.25) + 
                  xlab("t") + theme(legend.position = "none")


functions_plot

Compute kth empirical localization processes

Empirical version of Equation (1) of the paper. For one focal,

focal <- "1"

localizarionProcesses_focal <- localizationProcesses(X, focal)$lc

Plot localization processes of order 1, 50, 100 and 200:

df_lc <- data.frame(k = rep(colnames(localizarionProcesses_focal), each = p),
                           y = c(localizarionProcesses_focal),
                           x = rep(t, n-1)
                           )

lc_plots <- list()
ks <- c(1, 50, 100, 200)

for(i in 1:4){
  lc_plots[[i]] <- functions_plot + 
                   geom_line(data = filter(df_lc, k == paste0("k=", ks[i])), 
                             aes(x = x, y = y, group = k), 
                             color = "blue", size = 1) +
                   geom_line(data = filter(df_functions, ids == focal), 
                             aes(x = x, y = y, group = ids), 
                             color = "red", linetype = "dashed", size = 1)+
                   ggtitle(paste("k = ", ks[i]))
}

wrap_plots(lc_plots)

Compute kth empirical localization distances

Equation (18) of the paper. For one focal,

localizationDistances_focal <- localizationDistances(X, focal)

head(localizationDistances_focal)
##          k=1          k=2          k=3          k=4          k=5          k=6 
## 0.0005082926 0.0011346495 0.0017636690 0.0023955745 0.0030095117 0.0035089220

Plot the localization distances:

df_ld <- data.frame(k = names(localizationDistances_focal),
                           y = localizationDistances_focal,
                           x = 1:c(n-1)
                           )


ldistances_plot <- ggplot(df_ld, aes(x = x, y = y)) + 
                   geom_point() + 
                   ggtitle("Localization distances for one focal") + 
                   xlab("kth") + ylab("L")

ldistances_plot

Sample μ and σ

localizationStatistics_full <- localizationStatistics(X, robustify = TRUE)

#See the mean and sd estimations for k = 1, 100, 200, 400, 600

localizationStatistics_full$trim_mean[c(1, 100, 200, 400, 600)]
##         k=1       k=100       k=200       k=400       k=600 
## 0.001083517 0.098465426 0.184940365 0.350528860 0.526580274
localizationStatistics_full$trim_sd[c(1, 100, 200, 400, 600)]
##          k=1        k=100        k=200        k=400        k=600 
## 0.0005326429 0.0329170846 0.0490732397 0.0686018224 0.0806314699

Classification

X <- classificationData

ids_training <- sample(colnames(X), 90)
ids_testing <- setdiff(colnames(X), ids_training)

trainingSample <- X[,ids_training]
testSample <- X[,ids_testing]; colnames(testSample) <- NULL #blind 
classNames <- c("G1", "G2")

classification_results <- localizationClassifier(trainingSample, testSample, classNames, k_opt = 3)

checking <- data.frame(real_classs = ids_testing, 
                      predicted_class =classification_results$test$predicted_class)

checking
##    real_classs predicted_class
## 1        12_G1              G1
## 2        14_G1              G1
## 3        21_G1              G1
## 4        44_G1              G1
## 5        54_G2              G2
## 6        56_G2              G2
## 7        72_G2              G2
## 8        81_G2              G2
## 9        94_G2              G2
## 10      100_G2              G2

Outlier detection

X <- outlierData

outliers <- outlierLocalizationDistance(X, localrule = 0.95, whiskerrule = 1.5)

outliers$outliers_ld_rule
## [1] "1_magnitude" "1_shape"     "2_magnitude" "2_shape"

Plot results,

df_functions <- data.frame(ids = rep(colnames(X), each = nrow(X)),
                           y = c(X),
                           x = rep(seq(from = 0, to = 1, length.out = nrow(X)), ncol(X)))
                           

functions_plot <- ggplot(df_functions) + 
                  geom_line(aes(x = x, y = y, group = ids), 
                            color = "black") + 
                  xlab("t") + 
  theme(legend.position = "bottom")+
                  geom_line(data = df_functions[df_functions$ids %in% outliers$outliers_ld_rule,], aes(x = x, y = y, group = ids, color = ids), size = 1) +
  guides(color = guide_legend(title="Detected outliers"))

functions_plot 

References

Elías, Antonio, Jiménez, Raúl and Yukich, Joe (2020). Localization processes for functional data analysis [https://arxiv.org/abs/2007.16059]https://arxiv.org/abs/2007.16059.

About

Localization processes for functional data analysis. Software companion for the paper “Localization processes for functional data analysis” by Elías, A., Jiménez, R., and Yukich, J. (2020)

Topics

Resources

License

Stars

Watchers

Forks

Languages