-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
96 lines (71 loc) · 3.29 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
class MovieRecommender:
def __init__(self, matrix_path, metadata_path):
self.matrix_path = matrix_path
self.metadata_path = metadata_path
self.svd = TruncatedSVD(n_components=100, random_state=42)
self.user_similarity_threshold = 0.3
def load_data(self):
try:
# Load existing user-item matrix
self.matrix = pd.read_csv(self.matrix_path)
except FileNotFoundError:
print("No recommendations available if matrix file doesn't exist")
return False
return True
def preprocess_data(self):
if not self.load_data():
return False
if user_id not in self.matrix['userId'].unique():
print("No recommendations available for new user")
return False
# Pivot the data to create user-item matrix
self.pivot_matrix = self.matrix.pivot(index='userId', columns='movieId', values='rating').fillna(0)
# Load movies metadata
self.movies_metadata = pd.read_csv(self.metadata_path, low_memory=False)
# Convert the 'id' column to int64 data type
self.movies_metadata['id'] = pd.to_numeric(self.movies_metadata['id'], errors='coerce')
return True
def fit_svd(self):
if not self.preprocess_data():
return False
# Fit SVD to the existing user-item matrix
self.svd.fit(self.pivot_matrix)
return True
def recommend_movies(self, user_id, n=10):
if not self.fit_svd():
return []
# Get user index
user_index = self.pivot_matrix.index.get_loc(user_id)
# Compute latent user-item matrix
latent_matrix = self.svd.transform(self.pivot_matrix)
# Calculate similarity between users
user_similarity = cosine_similarity(latent_matrix)
# Get similar users
similar_users = user_similarity[user_similarity[:, user_index] > self.user_similarity_threshold]
# Calculate predicted ratings for unrated movies
predicted_ratings = np.dot(latent_matrix[user_index], latent_matrix.T)
# Filter out movies already watched by the user
unwatched_movies_indices = np.where(self.pivot_matrix.iloc[user_index] == 0)[0]
# Ensure unwatched_movies_indices are valid
valid_indices = np.intersect1d(unwatched_movies_indices, np.arange(len(predicted_ratings)))
if len(valid_indices) == 0:
print("No recommendations available")
return []
# Get predicted ratings for unwatched movies
unwatched_movies_predicted_ratings = predicted_ratings[valid_indices]
# Get indices of top recommended movies
top_movie_indices = valid_indices[np.argsort(unwatched_movies_predicted_ratings)[-n:]]
# Get titles of top recommended movies
top_movies = self.movies_metadata.iloc[top_movie_indices]['title']
return top_movies.tolist()
# Example usage:
matrix_path = "datasets/ratings_small.csv"
metadata_path = "datasets/movie_metadata.csv"
user_id = 25
recommender = MovieRecommender(matrix_path, metadata_path)
recommended_movies = recommender.recommend_movies(user_id)
print(recommended_movies)