-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
63 lines (46 loc) · 2.21 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# GettingAndCleaningDataCourseraProject
library(data.table)
library(dplyr)
# 1. Merges the training and the test sets to create one data set.
subject_test <- read.table("test/subject_test.txt")
x_test <- read.table("test/X_test.txt")
y_test <- read.table("test/y_test.txt")
test = data.table(subject=subject_test,y_test,x_test)
remove("x_test", "y_test", "subject_test")
subject_train <- read.table("train/subject_train.txt")
x_train <- read.table("train/X_train.txt")
y_train <- read.table("train/y_train.txt")
train = data.table(subject=subject_train,y_train,x_train)
remove("x_train", "y_train", "subject_train")
allData <- rbind(test, train)
remove("train", "test")
# 2. Extracts only the measurements on the mean and standard deviation
# for each measurement.
features <- read.table("features.txt")
wantedFeatures <- grep("(mean|std)\\(", features$V2)
# ... save the column names for later
wantedFeatureNames <- as.character(features$V2[wantedFeatures])
# move the features two columns to the right to make room for the subject and activity.code:
wantedFeatures <- wantedFeatures + 2
# The following line is need to avoid duplicate names.
# Duplicate names cause dplyr select() to fail.
names(allData) <- make.names(names(allData), unique = TRUE)
# Select the columns with mean and std in the name
paredData <- select(allData, c(1, 2, wantedFeatures))
remove("allData")
# 3. Uses descriptive activity names to name the activities in the data set
activityLabels <- read.table("activity_labels.txt")
# name the activity code columns to avoid errors in merge:
names(activityLabels) <- c("activity.code", "activity.name")
names(paredData)[2] <- "activity.code"
paredData2 <- merge(activityLabels, paredData, by = "activity.code")
remove("paredData")
# 4. Appropriately labels the data set with descriptive variable names.
# ... rename the subject column:
names(paredData2)[3] <- "subject"
# ... rename the feature columns:
names(paredData2)[4:length(names(paredData2))] <- wantedFeatureNames
# 5. From the data set in step 4, creates a second, independent tidy data
# set with the average of each variable for each activity and each subject.
groups <- group_by(paredData2, activity.name, subject)
summary <- summarize_all(groups, mean)