-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcollaborative_filtering.R
82 lines (57 loc) · 3.02 KB
/
collaborative_filtering.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
library(recommenderlab)
library(Matrix)
# Prepare data for models
games <- read_csv("tables/games.csv")
games_id <- read_csv("tables/games_id.csv")
load("data/data_v2.Rdata")
load("data/game_ratings_df.Rdata")
# For games reeditions, we consider only the highest ranked:
games <- games %>% group_by(title) %>% filter(row_number() == 1) %>% ungroup
games_id <- games_id %>% group_by(title) %>% filter(row_number() == 1) %>% ungroup
games_id <- games_id %>%
mutate(game_id = as.integer(game_id)) %>%
left_join(games %>% select(title, board_game_rank)) %>%
mutate(game_matrix_id = rank(board_game_rank)) %>%
arrange(game_matrix_id)
game_ratings_df <- game_ratings_df %>%
left_join(users_id) %>%
left_join(games_id %>% select(game_id,game_matrix_id))
# We only consider first n games
n <- 1000
games_id_n <- games_id %>% filter(game_matrix_id <= n) %>% arrange(game_matrix_id)
game_ratings_df_n <- game_ratings_df %>% filter(game_matrix_id <= n)
users_id_n <- users_id %>% semi_join(game_ratings_df_n %>% select(user_id))
# write_csv(games_id_n,"tables/games_id_n.csv")
# write_csv(game_ratings_df_n,"tables/game_ratings_df_n.csv")
# game_ids <- games_id_n %>% pull(game_matrix_id)
# UBCF train model----------
#___________________________
game_ratings_sparse_matrix <- Matrix::sparseMatrix(game_ratings_df_n %>% pull(user_id),
game_ratings_df_n %>% pull(game_id),
x = game_ratings_df_n %>% pull(rating))
game_ratings_matrix <- new("realRatingMatrix",data= game_ratings_sparse_matrix)
# game_ratings_matrix[2]
# colnames(game_ratings_matrix) <- games_id_n %>% pull(title)
colnames(game_ratings_matrix) <- games_id_n %>% pull(game_matrix_id)
rec_fit <- Recommender(game_ratings_matrix[1:100000], method = "IBCF"
# param=list(normalize="Z-score",
# method="pearson",
# nn=50,
# minRating=3,
# sample=F
)
dir_create("models")
save(rec_fit, file="models/ibcf.RData")
# Function to recommend
recommend_ibcf <- function(table_preferences, n = n) {
game_ratings_sparse_matrix_user <- Matrix::sparseMatrix(rep(1,length(table_preferences)),
table_preferences %>% pull(game_matrix_id),
x = table_preferences %>% pull(game_rating),
dims = c(1,n))
game_ratings_matrix_user <- new("realRatingMatrix",data= game_ratings_sparse_matrix_user)
colnames(game_ratings_matrix_user) <- games_id_n %>% pull(game_matrix_id)
predictions <- recommenderlab::predict(rec_fit,game_ratings_matrix_user,n=10)
predictions %>% as("list") %>% .[[1]]
}
# We study distribution on number of recomendations
# game_ratings_df %>% count(user_id) %>% filter(n<200) %>% ggplot(aes(x=n)) +geom_histogram(binwidth = 1)