Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
123 changes: 14 additions & 109 deletions app/api/domains/osu.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,125 +842,30 @@ async def osuSubmitModularSelector(
stats = score.player.stats[score.mode]
prev_stats = copy.copy(stats)

# stuff update for all submitted scores
stats.playtime += score.time_elapsed // 1000
stats.plays += 1
stats.tscore += score.score
stats.total_hits += score.n300 + score.n100 + score.n50

if score.mode.as_vanilla in (1, 3):
# taiko uses geki & katu for hitting big notes with 2 keys
# mania uses geki & katu for rainbow 300 & 200
stats.total_hits += score.ngeki + score.nkatu

stats_updates: dict[str, Any] = {
"plays": stats.plays,
"playtime": stats.playtime,
"tscore": stats.tscore,
"total_hits": stats.total_hits,
}
# recalculate stats
stats = await score.player.recalc_stats_sql(score.mode)

if score.passed and score.bmap.has_leaderboard:
# player passed & map is ranked, approved, or loved.

if score.max_combo > stats.max_combo:
stats.max_combo = score.max_combo
stats_updates["max_combo"] = stats.max_combo

if score.bmap.awards_ranked_pp and score.status == SubmissionStatus.BEST:
# map is ranked or approved, and it's our (new)
# best score on the map. update the player's
# ranked score, grades, pp, acc and global rank.

additional_rscore = score.score
if score.prev_best:
# we previously had a score, so remove
# it's score from our ranked score.
additional_rscore -= score.prev_best.score

if score.grade != score.prev_best.grade:
if score.grade >= Grade.A:
stats.grades[score.grade] += 1
grade_col = format(score.grade, "stats_column")
stats_updates[grade_col] = stats.grades[score.grade]

if score.prev_best.grade >= Grade.A:
stats.grades[score.prev_best.grade] -= 1
grade_col = format(score.prev_best.grade, "stats_column")
stats_updates[grade_col] = stats.grades[score.prev_best.grade]
else:
# this is our first submitted score on the map
if score.grade >= Grade.A:
stats.grades[score.grade] += 1
grade_col = format(score.grade, "stats_column")
stats_updates[grade_col] = stats.grades[score.grade]

stats.rscore += additional_rscore
stats_updates["rscore"] = stats.rscore

# fetch scores sorted by pp for total acc/pp calc
# NOTE: we select all plays (and not just top100)
# because bonus pp counts the total amount of ranked
# scores. I'm aware this scales horribly, and it'll
# likely be split into two queries in the future.
best_scores = await app.state.services.database.fetch_all(
"SELECT s.pp, s.acc FROM scores s "
"INNER JOIN maps m ON s.map_md5 = m.md5 "
"WHERE s.userid = :user_id AND s.mode = :mode "
"AND s.status = 2 AND m.status IN (2, 3) " # ranked, approved
"ORDER BY s.pp DESC",
{"user_id": score.player.id, "mode": score.mode},
)

# calculate new total weighted accuracy
weighted_acc = sum(
row["acc"] * 0.95**i for i, row in enumerate(best_scores)
)
bonus_acc = 100.0 / (20 * (1 - 0.95 ** len(best_scores)))
stats.acc = (weighted_acc * bonus_acc) / 100
stats_updates["acc"] = stats.acc

# calculate new total weighted pp
weighted_pp = sum(row["pp"] * 0.95**i for i, row in enumerate(best_scores))
bonus_pp = 416.6667 * (1 - 0.9994 ** len(best_scores))
stats.pp = round(weighted_pp + bonus_pp)
stats_updates["pp"] = stats.pp

# update global & country ranking
stats.rank = await score.player.update_rank(score.mode)

await stats_repo.partial_update(
score.player.id,
score.mode.value,
plays=stats_updates.get("plays", UNSET),
playtime=stats_updates.get("playtime", UNSET),
tscore=stats_updates.get("tscore", UNSET),
total_hits=stats_updates.get("total_hits", UNSET),
max_combo=stats_updates.get("max_combo", UNSET),
xh_count=stats_updates.get("xh_count", UNSET),
x_count=stats_updates.get("x_count", UNSET),
sh_count=stats_updates.get("sh_count", UNSET),
s_count=stats_updates.get("s_count", UNSET),
a_count=stats_updates.get("a_count", UNSET),
rscore=stats_updates.get("rscore", UNSET),
acc=stats_updates.get("acc", UNSET),
pp=stats_updates.get("pp", UNSET),
)
# update global & country ranking
stats.rank = await score.player.update_rank(score.mode)

if not score.player.restricted:
# enqueue new stats info to all other users
app.state.sessions.players.enqueue(app.packets.user_stats(score.player))

# update beatmap with new stats
score.bmap.plays += 1
plays_incr = 1
passes_incr = 0
if score.passed:
score.bmap.passes += 1
passes_incr = 1

# update beatmap with new stats
score.bmap.plays += plays_incr
score.bmap.passes += passes_incr

await app.state.services.database.execute(
"UPDATE maps SET plays = :plays, passes = :passes WHERE md5 = :map_md5",
"UPDATE maps SET plays = plays + :plays_incr, passes = passes + :passes_incr WHERE md5 = :map_md5",
{
"plays": score.bmap.plays,
"passes": score.bmap.passes,
"plays_incr": plays_incr,
"passes_incr": passes_incr,
"map_md5": score.bmap.md5,
},
)
Expand Down
29 changes: 29 additions & 0 deletions app/objects/player.py
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,35 @@ async def update_rank(self, mode: GameMode) -> int:

return await self.get_global_rank(mode)

async def recalc_stats_sql(self, mode: GameMode) -> ModeData:
"""Recalculate `self`'s stats from sql."""
await stats_repo.sql_recalculate_mode(
player_id=self.id,
mode=mode.value,
)
row = await stats_repo.fetch_one(player_id=self.id, mode=mode.value)
md = self.stats[mode]
if row is None:
return md

md.tscore = row["tscore"]
md.rscore = row["rscore"]
md.pp = row["pp"]
md.acc = row["acc"]
md.plays = row["plays"]
md.playtime = row["playtime"]
md.max_combo = row["max_combo"]
md.total_hits = row["total_hits"]
# before.rank = await self.get_global_rank(mode)
md.grades = {
Grade.XH: row["xh_count"],
Grade.X: row["x_count"],
Grade.SH: row["sh_count"],
Grade.S: row["s_count"],
Grade.A: row["a_count"],
}
return md

async def stats_from_sql_full(self) -> None:
"""Retrieve `self`'s stats (all modes) from sql."""
for row in await stats_repo.fetch_many(player_id=self.id):
Expand Down
97 changes: 97 additions & 0 deletions app/repositories/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,4 +234,101 @@ async def partial_update(
return cast(Stat | None, stat)


async def sql_recalculate_mode(player_id: int, mode: int) -> None:
Copy link
Member

@cmyui cmyui Apr 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While I agree this is objectively better from a performance and atomicity perspective, I feel that most developers will have more trouble maintaining this, it's relatively complex sql and the procedural code (in python) I suspect is a more common of a skillset for the devs who work on b.py (and maintainability is the #1 goal for b.py)

Copy link
Contributor Author

@arily arily Apr 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can have a scaled down version (which only calculates pp and pp acc), or split query into smaller ones.

async def sql_recalculate_mode_pp(player_id: int, mode: int) -> None:
    sql = f"""
    WITH 
    ordered_pp AS (
        SELECT
            s1.id scoreId,
            s1.userid,
            s1.mode,
            s1.pp,
            s1.acc
        FROM
            scores s1
        INNER JOIN maps m ON s1.map_md5 = m.md5
        WHERE
            s1.status = 2
            AND m.status IN (2, 3)
            AND s1.pp > 0
            AND s1.mode = :mode
            AND s1.userid = :user_id
        ORDER BY
            s1.pp DESC,
            s1.acc DESC,
            s1.id DESC
    ),
    bests AS (
        SELECT
            scoreId,
            pp,
            acc,
            ROW_NUMBER() OVER (
                ORDER BY
                    pp DESC
            ) AS global_rank
        FROM
            ordered_pp
    ),
    user_calc AS (
        SELECT
            COUNT(*) AS count,
            SUM(POW (0.95, global_rank - 1) * pp) AS weightedPP,
            (1 - POW (0.9994, COUNT(*))) * 416.6667 AS bnsPP,
            SUM(POW (0.95, global_rank - 1) * acc) / SUM(POW (0.95, global_rank - 1)) AS acc
        FROM
            bests
    ),
    calculated AS (
        SELECT
            *,
            weightedPP + bnsPP AS pp
        FROM
            user_calc
    )
UPDATE stats s
INNER JOIN calculated c ON 1
SET
    s.pp = c.pp,
    s.acc = c.acc
WHERE s.id = :user_id AND s.mode = :mode
    """

    _ = await app.state.services.database.execute(
        sql, {"user_id": player_id, "mode": mode}
    )

async def sql_recalculate_mode_statistics(player_id: int, mode: int) -> None:
    sql = f"""
    WITH 
    concrete_stats AS (
        SELECT
            SUM(s2.score) AS total_score,
            SUM(IF(s2.status IN (2, 3), s2.score, 0)) AS ranked_score,
            SUM(s2.n300 + s2.n100 + s2.n50 + (IF(s2.mode IN (1, 3, 5), s2.ngeki + s2.nkatu, 0))) AS total_hits,
            SUM(s2.time_elapsed) AS play_time,
            SUM(s2.grade = "XH") AS xh_count,
            MAX(s2.max_combo) AS max_combo,  
            SUM(s2.grade = "X") AS x_count,
            SUM(s2.grade = "S") AS s_count,
            SUM(s2.grade = "A") AS a_count
        FROM
            scores s2
        INNER JOIN maps m2 ON s2.map_md5 = m2.md5
        WHERE s2.mode = :mode
        AND s2.userid = :user_id
    )
UPDATE stats s
INNER JOIN concrete_stats cs ON 1
SET
    s.tscore = cs.total_score,
    s.rscore = cs.ranked_score,
    s.plays = c.count,
    s.playtime = cs.play_time,
    s.max_combo = cs.max_combo,
    s.total_hits = cs.total_hits,
    s.xh_count = cs.xh_count,
    s.x_count = cs.x_count,
    s.sh_count = cs.s_count,
    s.s_count = cs.a_count
WHERE s.id = :user_id AND s.mode = :mode
    """

    _ = await app.state.services.database.execute(
        sql, {"user_id": player_id, "mode": mode}
    )

if this is acceptable we could strike a balance between procedural code & performance.

Our server ran into performance / memory issue recently. With this update our score submission speed went from ~5s for a heavy user, into ~600ms for single mode, and few orders better when users MP (don't forget that we have global submission thread lock). Not to mention that we also saved bunch of IO and memory.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this also comes with added benefits for example when a map changes states, rather than setup complex trigger and handlers, taking care of any potential edge cases, statistics will be corrected next time this user submits a score.

Copy link
Member

@cmyui cmyui Apr 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my feedback is more that i don't think many devs are familiar with sql, and this uses more advanced features like window functions, CTEs, etc. -- i suspect having it as pure-python is more familiar for most

this also comes with added benefits for example when a map changes states, rather than setup complex trigger and handlers, taking care of any potential edge cases, statistics will be corrected next time this user submits a score.

i don't disagree w/ this, but it can be implemented in pure py as well

Copy link
Member

@cmyui cmyui Apr 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think the most important issue in recalc to fix would be recalculation of score statuses when scores of a user shift around

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

my feedback is more that i don't think many devs are familiar with sql, and this uses more advanced features like window functions, CTEs, etc. -- i suspect having it as pure-python is more familiar for most

this also comes with added benefits for example when a map changes states, rather than setup complex trigger and handlers, taking care of any potential edge cases, statistics will be corrected next time this user submits a score.

i don't disagree w/ this, but it can be implemented in pure py as well

imo it's too cluttered for something which can be implemented in like 20-50 lines of python, and yeah sql can be confusing at times

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But the perf & scalability trade off is worth the effort I believe.

it's a good point, let me consider it & review more deeply soon

Copy link
Member

@cmyui cmyui May 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the issue with memory usage you're having here is because it loads all best scores for a given mode at once, this can be fixed with OFFSET anyway

pagination is a good idea, as time-to-execute is less of a problem than resource utilization at large scale imo (from akatsuki experience) -- not sure but i'm not entirely remembering the technical considerations of this process (e.g. row/table locking w.r.t. transactions, requirements of upfront processing, etc.), so i'll have to refresh myself

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think the most important issue in recalc to fix would be recalculation of score statuses when scores of a user shift around

A PR for that already exists for quite some time. #573

I believe the simplicity of the current pure python way easily outweighs any performance gains from this. As someone that ran bancho.py at its limits, score submission was rather the least of issues.

Copy link
Contributor Author

@arily arily May 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think the most important issue in recalc to fix would be recalculation of score statuses when scores of a user shift around

we also have solution for this one as well. https://github.com/ppy-sb/nyamatrix/blob/main/nyamatrix/qb/update_score_status.py
This was built to reduce memory usage on our server during recalc. Now with built-in recalc script memory usage will go above 4GB, while running prod bpy. Our server (configured with 4c8g) don't have enough memory. New one uses 113M doing so.
This one uses sql as well so it might not be your choice of tech stack.

the issue with memory usage you're having here is because it loads all best scores for a given mode at once, this can be fixed with OFFSET anyway

There are several technical difficulties with this approach If I understand the OFFSET correctly.:

  • you still have to maintain a user-mode-beatmap-scores and user-mode-beatmap-best maps in order to decide which score should be set to best.
  • large offset in SQL is VERY SLOW thus modern apps and websites don't often give you ability to jump to precise pages anymore, and chosen cursor based infinity scrolling.

There's however a better way imo, to chunk user ids, as they don't interference each other.

# https://osu.ppy.sh/wiki/en/Gameplay/Score/Ranked_score
# The ranked score is the total sum of the *best scores* for all ranked, loved, and approved difficulties played online.
sql = f"""
WITH
ordered_pp AS (
SELECT
s1.id scoreId,
s1.userid,
s1.mode,
s1.pp,
s1.acc
FROM
scores s1
INNER JOIN maps m ON s1.map_md5 = m.md5
WHERE
s1.status = 2
AND m.status IN (2, 3)
AND s1.pp > 0
AND s1.mode = :mode
AND s1.userid = :user_id
ORDER BY
s1.pp DESC,
s1.acc DESC,
s1.id DESC
),
bests AS (
SELECT
scoreId,
pp,
acc,
ROW_NUMBER() OVER (
ORDER BY
pp DESC
) AS global_rank
FROM
ordered_pp
),
user_calc AS (
SELECT
SUM(POW (0.95, global_rank - 1) * pp) AS weightedPP,
(1 - POW (0.9994, COUNT(*))) * 416.6667 AS bnsPP,
SUM(POW (0.95, global_rank - 1) * acc) / SUM(POW (0.95, global_rank - 1)) AS acc
FROM
bests
),
calculated AS (
SELECT
*,
weightedPP + bnsPP AS pp
FROM
user_calc
),
concrete_stats AS (
SELECT
COUNT(*) AS count,
SUM(s2.score) AS total_score,
SUM(IF(m2.status IN (2, 3) AND s2.status = 2, s2.score, 0)) AS ranked_score,
SUM(s2.n300 + s2.n100 + s2.n50 + (IF(s2.mode IN (1, 3, 5), s2.ngeki + s2.nkatu, 0))) AS total_hits,
SUM(s2.time_elapsed) / 1000 AS play_time,
MAX(s2.max_combo) AS max_combo,
SUM(s2.grade = "XH") AS xh_count,
SUM(s2.grade = "X") AS x_count,
SUM(s2.grade = "SH") AS sh_count,
SUM(s2.grade = "S") AS s_count,
SUM(s2.grade = "A") AS a_count
FROM
scores s2
LEFT JOIN maps m2 ON s2.map_md5 = m2.md5
WHERE s2.mode = :mode
AND s2.userid = :user_id
)
UPDATE stats s
INNER JOIN concrete_stats cs ON 1
INNER JOIN calculated c ON 1
SET
s.tscore = COALESCE(cs.total_score, 0),
s.rscore = COALESCE(cs.ranked_score, 0),
s.pp = COALESCE(c.pp, 0),
s.acc = COALESCE(c.acc, 0),
s.plays = COALESCE(cs.count, 0),
s.playtime = COALESCE(cs.play_time, 0),
s.max_combo = COALESCE(cs.max_combo, 0),
s.total_hits = COALESCE(cs.total_hits, 0),
s.xh_count = COALESCE(cs.xh_count, 0),
s.x_count = COALESCE(cs.x_count, 0),
s.sh_count = COALESCE(cs.sh_count, 0),
s.s_count = COALESCE(cs.s_count, 0),
s.a_count = COALESCE(cs.a_count, 0)
WHERE s.id = :user_id AND s.mode = :mode
"""

_ = await app.state.services.database.execute(
sql, {"user_id": player_id, "mode": mode}
)


# TODO: delete?