Skip to content

Commit

Permalink
added skeleton.py
Browse files Browse the repository at this point in the history
  • Loading branch information
rahuldave committed Oct 18, 2013
1 parent 41028ab commit 938f342
Showing 1 changed file with 53 additions and 0 deletions.
53 changes: 53 additions & 0 deletions skeleton.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import numpy as np

from mrjob.job import MRJob
from itertools import combinations, permutations

from scipy.stats.stats import pearsonr


class RestaurantSimilarities(MRJob):

def steps(self):
"the steps in the map-reduce process"
thesteps = [
self.mr(mapper=self.line_mapper, reducer=self.users_items_collector),
self.mr(mapper=self.pair_items_mapper, reducer=self.calc_sim_collector)
]
return thesteps

def line_mapper(self,_,line):
"this is the complete implementation"
user_id,business_id,stars,business_avg,user_avg=line.split(',')
yield user_id, (business_id,stars,business_avg,user_avg)


def users_items_collector(self, user_id, values):
"""
#iterate over the list of tuples yielded in the previous mapper
#and append them to an array of rating information
"""
pass


def pair_items_mapper(self, user_id, values):
"""
ignoring the user_id key, take all combinations of business pairs
and yield as key the pair id, and as value the pair rating information
"""
pass #your code here

def calc_sim_collector(self, key, values):
"""
Pick up the information from the previous yield as shown. Compute
the pearson correlation and yield the final information as in the
last line here.
"""
(rest1, rest2), common_ratings = key, values
#your code here
yield (rest1, rest2), (rho, n_common)


#Below MUST be there for things to work
if __name__ == '__main__':
RestaurantSimilarities.run()

0 comments on commit 938f342

Please sign in to comment.