-
Notifications
You must be signed in to change notification settings - Fork 0
/
Assignment1.r
85 lines (68 loc) · 2.21 KB
/
Assignment1.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
setwd("C:/Users/ali/Documents/Aaron/R Projects/JHK R programming/Prog Assgnt 1")
pollutantmean <- function(directory, pollutant = "sulfate", id = 1:332){
#container for all non-na data
data.vec <- c()
# set working directory
if(grep("specdata", directory) == 1) {
directory <- ("./specdata/")
}
#get all file path list
all.files <- as.character(list.files(directory))
file.path <- paste(directory, all.files, sep = "")
#combine all non-na data into container
for (i in id){
current <- read.csv(file.path[[i]], header = TRUE, sep = ",")
data <- na.omit(current[pollutant])[[1]]
data.vec <- c(data.vec, data)
}
mean(data.vec)
}
complete <- function(directory, id = 1:332){
#container for all non-na data
data.df <- data.frame()
# set working directory
if(grep("specdata", directory) == 1) {
directory <- ("./specdata/")
}
#get all file path list
all.files <- as.character(list.files(directory))
file.path <- paste(directory, all.files, sep = "")
#combine all non-na data into container
for (i in id){
current <- read.csv(file.path[[i]], header = TRUE, sep = ",")
data <- na.omit(current)[[1]]
data.df <- rbind(data.df, c(i, length(data)))
}
colnames(data.df) <- c("id", "nobs")
data.df
}
corr <- function(directory, threshold=0){
#container for all non-na data
corr.vec <- c()
# set working directory
if(grep("specdata", directory) == 1) {
directory <- ("./specdata/")
}
#get all file path list
all.files <- as.character(list.files(directory))
file.path <- paste(directory, all.files, sep = "")
id <- 1:length(file.path)
#combine all non-na data into container
for (i in id){
current <- read.csv(file.path[[i]], header = TRUE, sep = ",")
data <- na.omit(current)
if(length(data[,1]) > threshold){
correlation <- cor(data[,2], data[,3])
corr.vec <- c(corr.vec, correlation)
}
}
corr.vec
}
##Test
#source("pollutantmean.R")
#pollutantmean("specdata", "sulfate", 1:10)
## [1] 4.064
#pollutantmean("specdata", "nitrate", 70:72)
## [1] 1.706
#pollutantmean("specdata", "nitrate", 23)
## [1] 1.281