-
Notifications
You must be signed in to change notification settings - Fork 1
/
run_analysis.R
75 lines (39 loc) · 2.57 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# preparations and data reading
# prepare labels
# import subject info
trainSubject <- readLines('train/subject_train.txt')
testSubject <- readLines('test/subject_test.txt')
# import features info (strip number)
features <- readLines('features.txt')
features <- gsub('^[0-9]*','',features)
# import activity label-name correspondence (strip number)
actLabel <- readLines('activity_labels.txt')
actLabel <- gsub('[0-9]','',actLabel)
# import train and test data labels (originally denoted by #)
trainLabel <- as.numeric(readLines('train/Y_train.txt'))
testLabel <-as.numeric(readLines('test/Y_test.txt'))
# using correspondence table to map acitivity numbers to names
trainLabel <- sapply(trainLabel,function(actNum) actLabel[actNum])
testLabel <- sapply(testLabel,function(actNum) actLabel[actNum])
# import train and test data
trainData <- read.table('train/X_train.txt',col.names = features, na.string = NA)
testData <- read.table('test/X_test.txt',col.names = features, na.string = NA)
# add subject ID and activity info
trainData['subjectID'] = trainSubject
testData['subjectID'] = testSubject
trainData['activity'] = trainLabel
testData['activity'] = testLabel
########################################################################
# statistics session
# 1.Merges the training and the test sets to create one data set
fullData <- rbind(trainData, testData)
# 2.Extracts only the measurements on the mean and standard deviation for each measurement
meanAndStandardDeviation <- fullData[ ,grep('mean|std|subjectID|activity',names(fullData))]
# 3,4 are guaranteed in praparation sessions
# 5.From the data set in step 4, creates a second, independent tidy data set with the average of each variable for each activity and each subject.
require(dplyr)
# group data by subject ID and activity, then find mean for each group, store in a new variable averageByGroup
averageByGroup <- group_by(meanAndStandardDeviation,subjectID,activity) %>%
summarize_each(funs(mean))
# export data set to a csv file
write.table(averageByGroup, file = 'averageByGroup.txt', row.name=FALSE)