-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path05-open.R
106 lines (89 loc) · 3.08 KB
/
05-open.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# Code for Task 5 Open-ended exploration
# Investigating the correlation of the starting integers of the sequence
# with the number of even and odd numbers in the sequence
# Create odd_counts and even_counts to compute the frequency of each no. in the seq
odd_numbers_in_seq <- function(x) {
odd_count <- sum(x %% 2 != 0)
return(odd_count)
}
odd_counts <- double()
for (i in start) {
odd_counts[i] <- odd_numbers_in_seq(collatz_df$seq[[i]])
}
even_numbers_in_seq <- function(x) {
even_count <- sum(x %% 2 == 0)
return(even_count)
}
even_counts <- double()
for (i in start) {
even_counts[i] <- even_numbers_in_seq(collatz_df$seq[[i]])
}
# Compute the correlation of the starting integers even_counts and odd_counts
correlation_start <- collatz_df %>%
mutate(even_counts, odd_counts) %>%
select(-seq, -parity) %>%
cor()
correlation_start
# The correlation of start and even_counts is 0.22123--------------------------
# The correlation coefficient is low
# There is a weak positive relationship between the starting integers and the
# number of even numbers in the sequence
start_even_counts <- collatz_df %>%
mutate(even_counts) %>%
select(-seq, -parity) %>%
ggplot(., aes(x = start,
y = even_counts)) +
geom_point(color = "skyblue") +
geom_smooth(method = "lm",
se = FALSE,
fullrange = TRUE,
color = "red") +
labs(x = "Starting integers",
y = "Even numbers in the sequence") +
theme_minimal()
# The correlation of start and odd_counts is 0.17986--------------------------
# The correlation coefficient is low
# There is a weak positive relationship between the starting integers and the
# number of even numbers in the sequence
start_odd_counts <- collatz_df %>%
mutate(odd_counts) %>%
select(-seq, -parity) %>%
ggplot(., aes(x = start,
y = odd_counts)) +
geom_point(color = "skyblue") +
geom_smooth(method = "lm",
se = FALSE,
fullrange = TRUE,
color = "red") +
labs(x = "Starting integers",
y = "Odd numbers in the seq") +
theme_minimal()
# The correlation between odd_counts and even_counts is 0.99879 -------------
# the correlation coefficient is very high
# There is a very strong positive relationship between the number of even and
# the number of odd numbers in the sequence
even_odd_counts <- collatz_df %>%
mutate(odd_counts, even_counts) %>%
select(-seq, -parity) %>%
ggplot(., aes(x = even_counts,
y = odd_counts)) +
geom_point(color = "skyblue") +
geom_smooth(method = "lm",
se = FALSE,
fullrange = TRUE,
color = "red") +
labs(x = "Even numbers",
y = "Odd numbers") +
theme_minimal()
library(ggpubr)
ggarrange(start_even_counts, start_odd_counts, even_odd_counts,
labels = c("p = 0.22123", "p = 0.17986", "p = 0.99879"),
hjust = -2,
font.label = list(size = 9),
ncol = 2, nrow = 2)
ggsave("correlation_start_evenodd.png",
width = 1980,
height = 1980,
units = "px",
bg = "white",
dpi = 300)