Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use withscores to optmize leaders call where rank can be inferred. #64

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 56 additions & 23 deletions leaderboard/competition_ranking_leaderboard.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
from .leaderboard import Leaderboard
from redis import StrictRedis, Redis, ConnectionPool
import math


class CompetitionRankingLeaderboard(Leaderboard):
Expand Down Expand Up @@ -78,22 +76,15 @@ def ranked_in_list_in(self, leaderboard_name, members, **options):
scores = []

pipeline = self.redis_connection.pipeline()

for member in members:
if self.order == self.ASC:
pipeline.zrank(leaderboard_name, member)
else:
pipeline.zrevrank(leaderboard_name, member)

pipeline.zscore(leaderboard_name, member)

responses = pipeline.execute()

for index, member in enumerate(members):
data = {}
data[self.MEMBER_KEY] = member

score = responses[index * 2 + 1]
score = responses[index]
if score is not None:
score = float(score)
else:
Expand All @@ -108,21 +99,11 @@ def ranked_in_list_in(self, leaderboard_name, members, **options):
for index, rank in enumerate(self.__rankings_for_members_having_scores_in(leaderboard_name, members, scores)):
ranks_for_members[index][self.RANK_KEY] = rank

if ('with_member_data' in options) and (True == options['with_member_data']):
for index, member_data in enumerate(self.members_data_for_in(leaderboard_name, members)):
ranks_for_members[index][self.MEMBER_DATA_KEY] = member_data
if options.get('with_member_data', False):
self._with_member_data(leaderboard_name, members, ranks_for_members)

if 'sort_by' in options:
if self.RANK_KEY == options['sort_by']:
ranks_for_members = sorted(
ranks_for_members,
key=lambda member: member[
self.RANK_KEY])
elif self.SCORE_KEY == options['sort_by']:
ranks_for_members = sorted(
ranks_for_members,
key=lambda member: member[
self.SCORE_KEY])
self._sort_by(ranks_for_members, options['sort_by'])

return ranks_for_members

Expand Down Expand Up @@ -150,3 +131,55 @@ def __rankings_for_members_having_scores_in(self, leaderboard_name, members, sco
responses = pipeline.execute()

return [self.__up_rank(response) for response in responses]

def _members_from_rank_range_internal(
self, leaderboard_name, start_rank, end_rank, members_only=False, **options):
'''
Format ordered members with score as efficiently as possible.
'''
response = self._range_method(
self.redis_connection,
leaderboard_name,
start_rank,
end_rank,
withscores=not members_only)

if members_only or not response:
return [{self.MEMBER_KEY: member} for member in response]

# Find out where the current rank started using the first two ranks
current_rank = None
current_score = None
current_rank_start = 0
for index, (member, score) in enumerate(response):
if current_score is None:
current_rank = self.rank_for_in(leaderboard_name, member)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

One of the reasons for the current implementation is that once you have a page of members, the pipeline to retrieve the score and rank gives you those values for the page of members at that time. By splitting up the score and retrieving the rank separately, it's possible the rank changes here by a change in score for a member (or members).

Copy link
Member Author

@GraylinKim GraylinKim Jun 10, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is true, but in a different way, with the current implementation. We first fetch the members then rank/score them all at the same time. If scores change in between those two queries we could be:

  • Including players that are no longer in the range.
  • Missing players that are now in the range
  • Showing members out of order (although I think it previously resorted in RAM to catch this edge case)

I do see that in this current implementation though it is equally possible that the first two scores and ranks change in the gap causing wrong/duplicate ranks downstream. I'll think a bit more on this point.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the correct path forward in either case is to use LUA scripts which can have transaction-like semantics. This would ensure that we have a consistent view of the data while we do more advanced calculations and would be even more performant.

Moving forward on that path would be a huge undertaking though, essentially moving chunks of our python to LUA scripts. Thoughts?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A change to using LUA scripts would essentially be a rewrite of most of the core functionality and while it's unclear if there'd be breaking changes, the changes required there feel like that'd warrant a major version bump. So I'm a big 👎 at the moment for these changes outside of LUA, but the smaller readability, variable name, and helper method changes seem worthy of a separate PR.

current_score = score
elif score != current_score:
next_rank = self.rank_for_in(leaderboard_name, member)
current_rank_start = current_rank - next_rank + index
break

members = []
ranks_for_members = []
for index, (member, score) in enumerate(response):
members.append(member)
if score != current_score:
current_rank += (index - current_rank_start)
current_rank_start = index
current_score = score

member_entry = {
self.MEMBER_KEY: member,
self.RANK_KEY: current_rank,
self.SCORE_KEY: score,
}
ranks_for_members.append(member_entry)

if options.get('with_member_data', False):
self._with_member_data(leaderboard_name, members, ranks_for_members)

if 'sort_by' in options:
self._sort_by(ranks_for_members, options['sort_by'])

return ranks_for_members
117 changes: 68 additions & 49 deletions leaderboard/leaderboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class Leaderboard(object):
RANK_KEY = 'rank'

@classmethod
def pool(self, host, port, db, pools={}, **options):
def pool(cls, host, port, db, pools={}, **options):
'''
Fetch a redis connection pool for the unique combination of host
and port. Will create a new one if there isn't one already.
Expand Down Expand Up @@ -74,7 +74,7 @@ def __init__(self, leaderboard_name, **options):
self.DEFAULT_GLOBAL_MEMBER_DATA)

self.order = self.options.pop('order', self.DESC).lower()
if not self.order in [self.ASC, self.DESC]:
if self.order not in [self.ASC, self.DESC]:
GraylinKim marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(
"%s is not one of [%s]" % (self.order, ",".join([self.ASC, self.DESC])))

Expand Down Expand Up @@ -800,14 +800,11 @@ def leaders_in(self, leaderboard_name, current_page, **options):

ending_offset = (starting_offset + page_size) - 1

raw_leader_data = self._range_method(
self.redis_connection,
return self._members_from_rank_range_internal(
leaderboard_name,
int(starting_offset),
int(ending_offset),
withscores=False)
return self._parse_raw_members(
leaderboard_name, raw_leader_data, **options)
**options)

def all_leaders(self, **options):
'''
Expand All @@ -826,10 +823,8 @@ def all_leaders_from(self, leaderboard_name, **options):
@param options [Hash] Options to be used when retrieving the leaders from the named leaderboard.
@return the named leaderboard.
'''
raw_leader_data = self._range_method(
self.redis_connection, leaderboard_name, 0, -1, withscores=False)
return self._parse_raw_members(
leaderboard_name, raw_leader_data, **options)
return self._members_from_rank_range_internal(
leaderboard_name, 0, -1, **options)

def members_from_score_range(
self, minimum_score, maximum_score, **options):
Expand Down Expand Up @@ -900,22 +895,8 @@ def members_from_rank_range_in(
if ending_rank > self.total_members_in(leaderboard_name):
ending_rank = self.total_members_in(leaderboard_name) - 1

raw_leader_data = []
if self.order == self.DESC:
raw_leader_data = self.redis_connection.zrevrange(
leaderboard_name,
starting_rank,
ending_rank,
withscores=False)
else:
raw_leader_data = self.redis_connection.zrange(
leaderboard_name,
starting_rank,
ending_rank,
withscores=False)

return self._parse_raw_members(
leaderboard_name, raw_leader_data, **options)
return self._members_from_rank_range_internal(
leaderboard_name, starting_rank, ending_rank, **options)

def top(self, number, **options):
'''
Expand Down Expand Up @@ -1012,14 +993,11 @@ def around_me_in(self, leaderboard_name, member, **options):

ending_offset = (starting_offset + page_size) - 1

raw_leader_data = self._range_method(
self.redis_connection,
return self._members_from_rank_range_internal(
leaderboard_name,
int(starting_offset),
int(ending_offset),
withscores=False)
return self._parse_raw_members(
leaderboard_name, raw_leader_data, **options)
**options)

def ranked_in_list(self, members, **options):
'''
Expand Down Expand Up @@ -1072,26 +1050,31 @@ def ranked_in_list_in(self, leaderboard_name, members, **options):

ranks_for_members.append(data)

if ('with_member_data' in options) and (True == options['with_member_data']):
for index, member_data in enumerate(self.members_data_for_in(leaderboard_name, members)):
try:
ranks_for_members[index][self.MEMBER_DATA_KEY] = member_data
except:
pass
if options.get('with_member_data', False):
self._with_member_data(leaderboard_name, members, ranks_for_members)

if 'sort_by' in options:
sort_value_if_none = float('-inf') if self.order == self.ASC else float('+inf')
if self.RANK_KEY == options['sort_by']:
ranks_for_members = sorted(
ranks_for_members,
key=lambda member: member.get(self.RANK_KEY) if member.get(self.RANK_KEY) is not None else sort_value_if_none
)
elif self.SCORE_KEY == options['sort_by']:
ranks_for_members = sorted(
ranks_for_members,
key=lambda member: member.get(self.SCORE_KEY) if member.get(self.SCORE_KEY) is not None else sort_value_if_none
)
self._sort_by(ranks_for_members, options['sort_by'])

return ranks_for_members

def _with_member_data(self, leaderboard_name, members, ranks_for_members):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do like splitting out the _with_member_data and _sort_by methods since those can be shared among implementations.

for index, member_data in enumerate(self.members_data_for_in(leaderboard_name, members)):
try:
ranks_for_members[index][self.MEMBER_DATA_KEY] = member_data
except:
pass

def _sort_by(self, ranks_for_members, sort_by):
sort_value_if_none = float('-inf') if self.order == self.ASC else float('+inf')
if self.RANK_KEY == sort_by:
ranks_for_members.sort(
key=lambda member: member.get(self.RANK_KEY) if member.get(self.RANK_KEY) is not None else sort_value_if_none
)
elif self.SCORE_KEY == sort_by:
ranks_for_members.sort(
key=lambda member: member.get(self.SCORE_KEY) if member.get(self.SCORE_KEY) is not None else sort_value_if_none
)
return ranks_for_members

def merge_leaderboards(self, destination, keys, aggregate='SUM'):
Expand Down Expand Up @@ -1153,3 +1136,39 @@ def _parse_raw_members(
return self.ranked_in_list_in(leaderboard_name, members, **options)
else:
return []

def _members_from_rank_range_internal(
self, leaderboard_name, start_rank, end_rank, members_only=False, **options):
'''
Format ordered members with score as efficiently as possible.
'''
response = self._range_method(
self.redis_connection,
leaderboard_name,
start_rank,
end_rank,
withscores=not members_only)

if members_only or not response:
return [{self.MEMBER_KEY: member} for member in response]

current_rank = start_rank
members = []
ranks_for_members = []
for index, (member, score) in enumerate(response):
members.append(member)
current_rank += 1
member_entry = {
self.MEMBER_KEY: member,
self.RANK_KEY: current_rank,
self.SCORE_KEY: score,
}
ranks_for_members.append(member_entry)

if options.get('with_member_data', False):
self._with_member_data(leaderboard_name, members, ranks_for_members)

if 'sort_by' in options:
self._sort_by(ranks_for_members, options['sort_by'])

return ranks_for_members
63 changes: 45 additions & 18 deletions leaderboard/tie_ranking_leaderboard.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from .leaderboard import Leaderboard
from .leaderboard import grouper
from redis import StrictRedis, Redis, ConnectionPool
import math
from redis import Redis


class TieRankingLeaderboard(Leaderboard):
Expand Down Expand Up @@ -271,24 +270,11 @@ def ranked_in_list_in(self, leaderboard_name, members, **options):

ranks_for_members.append(data)

if ('with_member_data' in options) and (True == options['with_member_data']):
for index, member_data in enumerate(self.members_data_for_in(leaderboard_name, members)):
try:
ranks_for_members[index][self.MEMBER_DATA_KEY] = member_data
except:
pass
if options.get('with_member_data', False):
self._with_member_data(leaderboard_name, members, ranks_for_members)

if 'sort_by' in options:
if self.RANK_KEY == options['sort_by']:
ranks_for_members = sorted(
ranks_for_members,
key=lambda member: member[
self.RANK_KEY])
elif self.SCORE_KEY == options['sort_by']:
ranks_for_members = sorted(
ranks_for_members,
key=lambda member: member[
self.SCORE_KEY])
self._sort_by(ranks_for_members, options['sort_by'])

return ranks_for_members

Expand All @@ -300,3 +286,44 @@ def _ties_leaderboard_key(self, leaderboard_name):
@return a key in the form of +leaderboard_name:ties_namespace+
'''
return '%s:%s' % (leaderboard_name, self.ties_namespace)

def _members_from_rank_range_internal(
self, leaderboard_name, start_rank, end_rank, members_only=False, **options):
'''
Format ordered members with score as efficiently as possible.
'''
response = self._range_method(
self.redis_connection,
leaderboard_name,
start_rank,
end_rank,
withscores=not members_only)

if members_only or not response:
return [{self.MEMBER_KEY: member} for member in response]

starting_member, starting_score = response[0]
current_rank = self.rank_for_in(leaderboard_name, starting_member)
current_score = starting_score
members = []
ranks_for_members = []
for index, (member, member_score) in enumerate(response):
if member_score != current_score:
current_rank += 1
current_score = member_score

members.append(member)
member_entry = {
self.MEMBER_KEY: member,
self.RANK_KEY: current_rank,
self.SCORE_KEY: member_score,
}
ranks_for_members.append(member_entry)

if options.get('with_member_data', False):
self._with_member_data(leaderboard_name, members, ranks_for_members)

if 'sort_by' in options:
self._sort_by(ranks_for_members, options['sort_by'])

return ranks_for_members