Skip to content

Commit

Permalink
Enable retries for send_instructor_email_digests()
Browse files Browse the repository at this point in the history
Enable retries for the `send_instructor_email_digests()` task.

We want to take the risk of potentially sending users multiple copies of
the same email digest (up to 3 copies since this commit sets
`max_retries` to `2`) rather than risking failing to send them any email
at all.

To that end, configure the `send_instructor_email_digests()` task to use
up to 2 Celery retries if the task fails.

Use a long delay between retries (hours rather than seconds or minutes)
to maximise the chances of the retry succeeding (for example if the
failure was caused by a Mailchimp outage then retrying after  a long
delay maximises the chances of the outage being resolved by the time we
retry).

Retrying is made slightly subtle by the fact that
`send_instructor_email_digests()` processes emails in batches. If just 1
email in a batch of 1000 fails to send then we want to retry just that
one email and not send duplicates of the other 999. Fortunately this is
easy to achieve by mutating the tasks `h_userids` kwarg when retrying.
  • Loading branch information
seanh committed Apr 12, 2023
1 parent 7d1b9ec commit 95bdcdb
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 9 deletions.
35 changes: 28 additions & 7 deletions lms/tasks/email_digests.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
import logging
import random
from datetime import datetime, timedelta, timezone
from typing import List, Optional

from h_pyramid_sentry import report_exception
from sqlalchemy import select

from lms.models import ApplicationInstance, AssignmentMembership, LTIRole, User
from lms.services import DigestService
from lms.services.digest import SendDigestsError
from lms.tasks.celery import app

LOG = logging.getLogger(__name__)
Expand Down Expand Up @@ -69,8 +72,9 @@ def send_instructor_email_digest_tasks(batch_size):
)


@app.task
@app.task(bind=True, max_retries=2)
def send_instructor_email_digests(
self,
*,
h_userids: List[str],
updated_after: str,
Expand All @@ -95,12 +99,29 @@ def send_instructor_email_digests(
updated_after = datetime.fromisoformat(updated_after)
updated_before = datetime.fromisoformat(updated_before)

# How long to wait (in seconds) before retrying the task if it fails.
retry_countdown = (3600 * (self.request.retries + 1)) + random.randint(0, 900)

with app.request_context() as request: # pylint:disable=no-member
with request.tm:
digest_service = request.find_service(DigestService)
digest_service.send_instructor_email_digests(
h_userids,
updated_after,
updated_before,
override_to_email=override_to_email,
)

try:
digest_service.send_instructor_email_digests(
h_userids,
updated_after,
updated_before,
override_to_email=override_to_email,
)
except SendDigestsError as err:
self.retry(
kwargs={
**self.request.kwargs,
"h_userids": list(err.errors.keys()),
},
countdown=retry_countdown,
)
except Exception as exc: # pylint:disable=broad-exception-caught
LOG.exception(exc)
report_exception(exc)
self.retry(countdown=retry_countdown)
69 changes: 67 additions & 2 deletions tests/unit/lms/tasks/email_digests_test.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import logging
from contextlib import contextmanager
from datetime import datetime
from unittest.mock import call, sentinel

import pytest
from freezegun import freeze_time
from h_matchers import Any

from lms.services.digest import SendDigestsError
from lms.services.mailchimp import MailchimpError
from lms.tasks.email_digests import (
send_instructor_email_digest_tasks,
send_instructor_email_digests,
Expand Down Expand Up @@ -170,7 +174,7 @@ def test_it(self, digest_service):
updated_after = datetime(year=2023, month=3, day=1)
updated_before = datetime(year=2023, month=3, day=2)

send_instructor_email_digests(
send_instructor_email_digests( # pylint:disable=no-value-for-parameter
h_userids=sentinel.h_userids,
updated_after=updated_after.isoformat(),
updated_before=updated_before.isoformat(),
Expand All @@ -196,12 +200,73 @@ def test_it_crashes_if_updated_after_or_updated_before_is_invalid(
self, updated_after, updated_before
):
with pytest.raises(ValueError, match="^Invalid isoformat string"):
send_instructor_email_digests(
send_instructor_email_digests( # pylint:disable=no-value-for-parameter
h_userids=sentinel.h_userids,
updated_after=updated_after,
updated_before=updated_before,
)

def test_it_retries_if_sending_fails(self, digest_service, retry):
updated_after = datetime(year=2023, month=3, day=1)
updated_before = datetime(year=2023, month=3, day=2)
# send_instructor_email_digests() raises a SendDigestsError whose
# errors attribute is a dict mapping the h_userid's that failed to send
# to their corresponding MailchimpError's.
digest_service.send_instructor_email_digests.side_effect = SendDigestsError(
errors={
sentinel.failed_h_userid_1: MailchimpError(),
sentinel.failed_h_userid_2: MailchimpError(),
}
)

send_instructor_email_digests( # pylint:disable=no-value-for-parameter
h_userids=sentinel.h_userids,
updated_after=updated_after.isoformat(),
updated_before=updated_before.isoformat(),
override_to_email=sentinel.override_to_email,
)

# It retries the task with the same arguments except that h_userids is
# now only those userids that failed to send.
retry.assert_called_once_with(
kwargs={
"h_userids": Any.list.containing(
[sentinel.failed_h_userid_1, sentinel.failed_h_userid_2]
).only(),
"updated_after": updated_after.isoformat(),
"updated_before": updated_before.isoformat(),
"override_to_email": sentinel.override_to_email,
},
countdown=Any.int(),
)

def test_it_retries_if_sending_fails_with_an_unexpected_exception(
self, caplog, digest_service, report_exception, retry
):
exception = RuntimeError("Unexpected")
digest_service.send_instructor_email_digests.side_effect = exception

send_instructor_email_digests( # pylint:disable=no-value-for-parameter
h_userids=sentinel.h_userids,
updated_after="2023-02-27T00:00:00",
updated_before="2023-02-28T00:00:00",
override_to_email=sentinel.override_to_email,
)

assert caplog.record_tuples == [
("lms.tasks.email_digests", logging.ERROR, "Unexpected")
]
report_exception.assert_called_once_with(exception)
retry.assert_called_once_with(countdown=Any.int())

@pytest.fixture
def report_exception(self, patch):
return patch("lms.tasks.email_digests.report_exception")

@pytest.fixture
def retry(self, patch):
return patch("lms.tasks.email_digests.send_instructor_email_digests.retry")


@pytest.fixture(autouse=True)
def app(patch, pyramid_request):
Expand Down

0 comments on commit 95bdcdb

Please sign in to comment.