-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
53 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import numpy as np | ||
|
||
from mrjob.job import MRJob | ||
from itertools import combinations, permutations | ||
|
||
from scipy.stats.stats import pearsonr | ||
|
||
|
||
class RestaurantSimilarities(MRJob): | ||
|
||
def steps(self): | ||
"the steps in the map-reduce process" | ||
thesteps = [ | ||
self.mr(mapper=self.line_mapper, reducer=self.users_items_collector), | ||
self.mr(mapper=self.pair_items_mapper, reducer=self.calc_sim_collector) | ||
] | ||
return thesteps | ||
|
||
def line_mapper(self,_,line): | ||
"this is the complete implementation" | ||
user_id,business_id,stars,business_avg,user_avg=line.split(',') | ||
yield user_id, (business_id,stars,business_avg,user_avg) | ||
|
||
|
||
def users_items_collector(self, user_id, values): | ||
""" | ||
#iterate over the list of tuples yielded in the previous mapper | ||
#and append them to an array of rating information | ||
""" | ||
pass | ||
|
||
|
||
def pair_items_mapper(self, user_id, values): | ||
""" | ||
ignoring the user_id key, take all combinations of business pairs | ||
and yield as key the pair id, and as value the pair rating information | ||
""" | ||
pass #your code here | ||
|
||
def calc_sim_collector(self, key, values): | ||
""" | ||
Pick up the information from the previous yield as shown. Compute | ||
the pearson correlation and yield the final information as in the | ||
last line here. | ||
""" | ||
(rest1, rest2), common_ratings = key, values | ||
#your code here | ||
yield (rest1, rest2), (rho, n_common) | ||
|
||
|
||
#Below MUST be there for things to work | ||
if __name__ == '__main__': | ||
RestaurantSimilarities.run() |