Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor load balance #424

Merged
merged 5 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 57 additions & 51 deletions schedule/reschedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from anki.decks import DeckManager
from anki.utils import ids2str
from aqt.gui_hooks import browser_menus_did_init
from collections import defaultdict


class FSRS:
Expand All @@ -19,8 +20,9 @@ class FSRS:
p_obey_easy_days: float
easy_specific_due_dates: List[int]
p_obey_specific_due_dates: float
due_cnt_perday_from_first_day: Dict[int, int]
learned_cnt_perday_from_today: Dict[int, int]
due_cnt_per_day: Dict[int, int]
due_today: int
reviewed_today: int
card: Card
elapsed_days: int
allow_to_past: bool
Expand All @@ -42,34 +44,32 @@ def __init__(self) -> None:
def set_load_balance(self, did_query=None):
self.enable_load_balance = True
true_due = "CASE WHEN odid==0 THEN due ELSE odue END"
self.due_cnt_perday_from_first_day = {
day: cnt
for day, cnt in mw.col.db.all(
f"""SELECT {true_due}, count()
FROM cards
WHERE type = 2
AND queue != -1
{did_query if did_query is not None else ""}
GROUP BY {true_due}"""
)
}
for day in list(self.due_cnt_perday_from_first_day.keys()):
if day < mw.col.sched.today:
self.due_cnt_perday_from_first_day[mw.col.sched.today] = (
self.due_cnt_perday_from_first_day.get(mw.col.sched.today, 0)
+ self.due_cnt_perday_from_first_day[day]
self.due_cnt_per_day = defaultdict(
int,
{
day: cnt
for day, cnt in mw.col.db.all(
f"""SELECT {true_due}, count()
FROM cards
WHERE type = 2
AND queue != -1
{did_query if did_query is not None else ""}
GROUP BY {true_due}"""
)
self.due_cnt_perday_from_first_day.pop(day)
self.learned_cnt_perday_from_today = {
day: cnt
for day, cnt in mw.col.db.all(
f"""SELECT (id/1000-{mw.col.sched.day_cutoff})/86400, count(distinct cid)
FROM revlog
WHERE ease > 0
AND (type < 3 OR factor != 0)
GROUP BY (id/1000-{mw.col.sched.day_cutoff})/86400"""
)
}
},
)
self.due_today = sum(
due_cnt
for due, due_cnt in self.due_cnt_per_day.items()
if due <= mw.col.sched.today
)
self.reviewed_today = mw.col.db.scalar(
f"""SELECT count(distinct cid)
FROM revlog
WHERE ease > 0
AND (type < 3 OR factor != 0)
AND id/1000 >= {mw.col.sched.day_cutoff - 86400}"""
)

def set_fuzz_factor(self, cid: int, reps: int):
random.seed(rotate_number_by_k(cid, 8) + reps)
Expand All @@ -86,18 +86,23 @@ def apply_fuzz(self, ivl):
else:
return int(self.fuzz_factor * (max_ivl - min_ivl + 1) + min_ivl)
else:
# Load balance
due = self.card.odue if self.card.odid else self.card.due
if due - self.card.ivl + max_ivl <= mw.col.sched.today:
# If the latest possible due date is in the past, skip load balance
return ivl

if self.apply_easy_days:
last_review = get_last_review_date(self.card)
due = self.card.odue if self.card.odid else self.card.due
if due > last_review + max_ivl + 2:
current_ivl = due - last_review
min_ivl, max_ivl = get_fuzz_range(
current_ivl, self.elapsed_days, current_ivl
)
min_num_cards = math.inf

min_workload = math.inf
best_ivl = (max_ivl + min_ivl) // 2 if self.allow_to_past else max_ivl
step = (max_ivl - min_ivl) // 100 + 1
due = self.card.due if self.card.odid == 0 else self.card.odue

if self.easy_days_review_ratio == 0:
obey_easy_days = True
Expand All @@ -107,8 +112,9 @@ def apply_fuzz(self, ivl):
obey_specific_due_dates = (
random.random() < self.p_obey_specific_due_dates
)

for check_ivl in reversed(range(min_ivl, max_ivl + step, step)):
check_due = due + check_ivl - self.card.ivl
check_due = due - self.card.ivl + check_ivl
if (
obey_specific_due_dates
and check_due in self.easy_specific_due_dates
Expand All @@ -121,20 +127,18 @@ def apply_fuzz(self, ivl):

due_date = sched_current_date() + timedelta(days=day_offset)
if obey_easy_days and due_date.weekday() in self.easy_days:
# If the due date is on an easy day, skip
continue

due_cards = self.due_cnt_perday_from_first_day.get(
max(check_due, mw.col.sched.today), 0
)
rated_cards = (
self.learned_cnt_perday_from_today.get(0, 0)
if day_offset <= 0
else 0
)
num_cards = due_cards + rated_cards
if num_cards < min_num_cards:
if check_due > mw.col.sched.today:
# If the due date is in the future, the workload is the number of cards due on that day
workload = self.due_cnt_per_day[check_due]
else:
# If the due date is in the past or today, the workload is the number of cards due today plus the number of cards learned today
workload = self.due_today + self.reviewed_today
if workload < min_workload:
best_ivl = check_ivl
min_num_cards = num_cards
min_workload = workload
return best_ivl

def next_interval(self, stability):
Expand Down Expand Up @@ -223,7 +227,7 @@ def reschedule_background(
if specific_due not in fsrs.easy_specific_due_dates:
fsrs.easy_specific_due_dates.append(specific_due)

fsrs.p_obey_specific_due_dates = obey_specific_due_dates(
fsrs.p_obey_specific_due_dates = p_obey_specific_due_dates(
len(fsrs.easy_specific_due_dates), fsrs.easy_days_review_ratio
)
if len(easy_specific_due_dates) > 0:
Expand Down Expand Up @@ -339,14 +343,16 @@ def reschedule_card(cid, fsrs: FSRS, recompute=False):
fsrs.set_card(card)
fsrs.set_fuzz_factor(cid, card.reps)
new_ivl = fsrs.next_interval(s)
due_before = max(card.odue if card.odid else card.due, mw.col.sched.today)
due_before = card.odue if card.odid else card.due
card = update_card_due_ivl(card, new_ivl)
due_after = max(card.odue if card.odid else card.due, mw.col.sched.today)
due_after = card.odue if card.odid else card.due
if fsrs.enable_load_balance:
fsrs.due_cnt_perday_from_first_day[due_before] -= 1
fsrs.due_cnt_perday_from_first_day[due_after] = (
fsrs.due_cnt_perday_from_first_day.get(due_after, 0) + 1
)
fsrs.due_cnt_per_day[due_before] -= 1
fsrs.due_cnt_per_day[due_after] += 1
if due_before <= mw.col.sched.today and due_after > mw.col.sched.today:
fsrs.due_today -= 1
if due_before > mw.col.sched.today and due_after <= mw.col.sched.today:
fsrs.due_today += 1
return card


Expand Down
34 changes: 28 additions & 6 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,14 +192,36 @@ def rotate_number_by_k(N, K):


def p_obey_easy_days(num_of_easy_days, easy_days_review_ratio):
"""
Calculate the probability of obeying easy days to ensure the review ratio.
Parameters:
- num_of_easy_days: the number of easy days
- easy_days_review_ratio: the ratio of reviews on easy days
Math:
- A week has 7 days, n easy days, 7 - n non-easy days
- Assume we have y reviews per non-easy day, the number of reviews per easy day is a * y
- The total number of reviews in a week is y * (7 - n) + a * y * n
- The probability of a review on an easy day is the number of reviews on easy days divided by the total number of reviews
- (a * y * n) / (y * (7 - n) + a * y * n) = (a * n) / (a * n + 7 - n)
- The probability of skipping a review on an easy day is 1 - (a * n) / (a * n + 7 - n) = (7 - n) / (a * n + 7 - n)
"""
return (7 - num_of_easy_days) / (
easy_days_review_ratio * num_of_easy_days + 7 - num_of_easy_days
)


def obey_specific_due_dates(num_of_specific_due_dates, easy_days_review_ratio):
return (8 + num_of_specific_due_dates) / (
easy_days_review_ratio * num_of_specific_due_dates
+ 8
+ num_of_specific_due_dates
)
def p_obey_specific_due_dates(num_of_specific_due_dates, easy_days_review_ratio):
"""
Calculate the probability of obeying specific due dates to ensure the review ratio.
Parameters:
- num_of_specific_due_dates: the number of specific due dates
- easy_days_review_ratio: the ratio of reviews on easy days
Math:
- When we have n specific due dates, the number of days to reschedule is 8 + n
- Assume we have y reviews per non-easy day, the number of reviews per easy day is a * y
- The total number of reviews in the days to reschedule is y * 8 + a * y * n
- The probability of a review on a specific due date is the number of reviews on specific due dates divided by the total number of reviews
- (a * y * n) / (y * 8 + a * y * n) = (a * n) / (a * n + 8)
- The probability of skipping a review on a specific due date is 1 - (a * n) / (a * n + 8) = 8 / (a * n + 8)
"""
return 8 / (easy_days_review_ratio * num_of_specific_due_dates + 8)
Loading