-
Notifications
You must be signed in to change notification settings - Fork 43
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Use withscores to optmize leaders call where rank can be inferred. #64
Open
GraylinKim
wants to merge
4
commits into
main
Choose a base branch
from
more-efficient-leaders-impl
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 2 commits
Commits
Show all changes
4 commits
Select commit
Hold shift + click to select a range
b353bad
Use withscores to optimize leaders call where rank can be inferred.
GraylinKim d56dab4
Leaderboard specific implementations.
GraylinKim 6354405
Revert changes pulled out into separate patches.
GraylinKim 452a214
Improve variable naming for clarity.
GraylinKim File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,7 +32,7 @@ class Leaderboard(object): | |
RANK_KEY = 'rank' | ||
|
||
@classmethod | ||
def pool(self, host, port, db, pools={}, **options): | ||
def pool(cls, host, port, db, pools={}, **options): | ||
''' | ||
Fetch a redis connection pool for the unique combination of host | ||
and port. Will create a new one if there isn't one already. | ||
|
@@ -74,7 +74,7 @@ def __init__(self, leaderboard_name, **options): | |
self.DEFAULT_GLOBAL_MEMBER_DATA) | ||
|
||
self.order = self.options.pop('order', self.DESC).lower() | ||
if not self.order in [self.ASC, self.DESC]: | ||
if self.order not in [self.ASC, self.DESC]: | ||
GraylinKim marked this conversation as resolved.
Show resolved
Hide resolved
|
||
raise ValueError( | ||
"%s is not one of [%s]" % (self.order, ",".join([self.ASC, self.DESC]))) | ||
|
||
|
@@ -800,14 +800,11 @@ def leaders_in(self, leaderboard_name, current_page, **options): | |
|
||
ending_offset = (starting_offset + page_size) - 1 | ||
|
||
raw_leader_data = self._range_method( | ||
self.redis_connection, | ||
return self._members_from_rank_range_internal( | ||
leaderboard_name, | ||
int(starting_offset), | ||
int(ending_offset), | ||
withscores=False) | ||
return self._parse_raw_members( | ||
leaderboard_name, raw_leader_data, **options) | ||
**options) | ||
|
||
def all_leaders(self, **options): | ||
''' | ||
|
@@ -826,10 +823,8 @@ def all_leaders_from(self, leaderboard_name, **options): | |
@param options [Hash] Options to be used when retrieving the leaders from the named leaderboard. | ||
@return the named leaderboard. | ||
''' | ||
raw_leader_data = self._range_method( | ||
self.redis_connection, leaderboard_name, 0, -1, withscores=False) | ||
return self._parse_raw_members( | ||
leaderboard_name, raw_leader_data, **options) | ||
return self._members_from_rank_range_internal( | ||
leaderboard_name, 0, -1, **options) | ||
|
||
def members_from_score_range( | ||
self, minimum_score, maximum_score, **options): | ||
|
@@ -900,22 +895,8 @@ def members_from_rank_range_in( | |
if ending_rank > self.total_members_in(leaderboard_name): | ||
ending_rank = self.total_members_in(leaderboard_name) - 1 | ||
|
||
raw_leader_data = [] | ||
if self.order == self.DESC: | ||
raw_leader_data = self.redis_connection.zrevrange( | ||
leaderboard_name, | ||
starting_rank, | ||
ending_rank, | ||
withscores=False) | ||
else: | ||
raw_leader_data = self.redis_connection.zrange( | ||
leaderboard_name, | ||
starting_rank, | ||
ending_rank, | ||
withscores=False) | ||
|
||
return self._parse_raw_members( | ||
leaderboard_name, raw_leader_data, **options) | ||
return self._members_from_rank_range_internal( | ||
leaderboard_name, starting_rank, ending_rank, **options) | ||
|
||
def top(self, number, **options): | ||
''' | ||
|
@@ -1012,14 +993,11 @@ def around_me_in(self, leaderboard_name, member, **options): | |
|
||
ending_offset = (starting_offset + page_size) - 1 | ||
|
||
raw_leader_data = self._range_method( | ||
self.redis_connection, | ||
return self._members_from_rank_range_internal( | ||
leaderboard_name, | ||
int(starting_offset), | ||
int(ending_offset), | ||
withscores=False) | ||
return self._parse_raw_members( | ||
leaderboard_name, raw_leader_data, **options) | ||
**options) | ||
|
||
def ranked_in_list(self, members, **options): | ||
''' | ||
|
@@ -1072,26 +1050,31 @@ def ranked_in_list_in(self, leaderboard_name, members, **options): | |
|
||
ranks_for_members.append(data) | ||
|
||
if ('with_member_data' in options) and (True == options['with_member_data']): | ||
for index, member_data in enumerate(self.members_data_for_in(leaderboard_name, members)): | ||
try: | ||
ranks_for_members[index][self.MEMBER_DATA_KEY] = member_data | ||
except: | ||
pass | ||
if options.get('with_member_data', False): | ||
self._with_member_data(leaderboard_name, members, ranks_for_members) | ||
|
||
if 'sort_by' in options: | ||
sort_value_if_none = float('-inf') if self.order == self.ASC else float('+inf') | ||
if self.RANK_KEY == options['sort_by']: | ||
ranks_for_members = sorted( | ||
ranks_for_members, | ||
key=lambda member: member.get(self.RANK_KEY) if member.get(self.RANK_KEY) is not None else sort_value_if_none | ||
) | ||
elif self.SCORE_KEY == options['sort_by']: | ||
ranks_for_members = sorted( | ||
ranks_for_members, | ||
key=lambda member: member.get(self.SCORE_KEY) if member.get(self.SCORE_KEY) is not None else sort_value_if_none | ||
) | ||
self._sort_by(ranks_for_members, options['sort_by']) | ||
|
||
return ranks_for_members | ||
|
||
def _with_member_data(self, leaderboard_name, members, ranks_for_members): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I do like splitting out the |
||
for index, member_data in enumerate(self.members_data_for_in(leaderboard_name, members)): | ||
try: | ||
ranks_for_members[index][self.MEMBER_DATA_KEY] = member_data | ||
except: | ||
pass | ||
|
||
def _sort_by(self, ranks_for_members, sort_by): | ||
sort_value_if_none = float('-inf') if self.order == self.ASC else float('+inf') | ||
if self.RANK_KEY == sort_by: | ||
ranks_for_members.sort( | ||
key=lambda member: member.get(self.RANK_KEY) if member.get(self.RANK_KEY) is not None else sort_value_if_none | ||
) | ||
elif self.SCORE_KEY == sort_by: | ||
ranks_for_members.sort( | ||
key=lambda member: member.get(self.SCORE_KEY) if member.get(self.SCORE_KEY) is not None else sort_value_if_none | ||
) | ||
return ranks_for_members | ||
|
||
def merge_leaderboards(self, destination, keys, aggregate='SUM'): | ||
|
@@ -1153,3 +1136,39 @@ def _parse_raw_members( | |
return self.ranked_in_list_in(leaderboard_name, members, **options) | ||
else: | ||
return [] | ||
|
||
def _members_from_rank_range_internal( | ||
self, leaderboard_name, start_rank, end_rank, members_only=False, **options): | ||
''' | ||
Format ordered members with score as efficiently as possible. | ||
''' | ||
response = self._range_method( | ||
self.redis_connection, | ||
leaderboard_name, | ||
start_rank, | ||
end_rank, | ||
withscores=not members_only) | ||
|
||
if members_only or not response: | ||
return [{self.MEMBER_KEY: member} for member in response] | ||
|
||
current_rank = start_rank | ||
members = [] | ||
ranks_for_members = [] | ||
for index, (member, score) in enumerate(response): | ||
members.append(member) | ||
current_rank += 1 | ||
member_entry = { | ||
self.MEMBER_KEY: member, | ||
self.RANK_KEY: current_rank, | ||
self.SCORE_KEY: score, | ||
} | ||
ranks_for_members.append(member_entry) | ||
|
||
if options.get('with_member_data', False): | ||
self._with_member_data(leaderboard_name, members, ranks_for_members) | ||
|
||
if 'sort_by' in options: | ||
self._sort_by(ranks_for_members, options['sort_by']) | ||
|
||
return ranks_for_members |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
One of the reasons for the current implementation is that once you have a page of members, the pipeline to retrieve the score and rank gives you those values for the page of members at that time. By splitting up the score and retrieving the rank separately, it's possible the rank changes here by a change in score for a member (or members).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is true, but in a different way, with the current implementation. We first fetch the members then rank/score them all at the same time. If scores change in between those two queries we could be:
I do see that in this current implementation though it is equally possible that the first two scores and ranks change in the gap causing wrong/duplicate ranks downstream. I'll think a bit more on this point.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the correct path forward in either case is to use LUA scripts which can have transaction-like semantics. This would ensure that we have a consistent view of the data while we do more advanced calculations and would be even more performant.
Moving forward on that path would be a huge undertaking though, essentially moving chunks of our python to LUA scripts. Thoughts?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A change to using LUA scripts would essentially be a rewrite of most of the core functionality and while it's unclear if there'd be breaking changes, the changes required there feel like that'd warrant a major version bump. So I'm a big 👎 at the moment for these changes outside of LUA, but the smaller readability, variable name, and helper method changes seem worthy of a separate PR.