Skip to content

Commit

Permalink
Merge pull request #5382 from grafana/jorlando/auto-create-direct-pag…
Browse files Browse the repository at this point in the history
…ing-integration-routes

feat: create direct paging integrations with two default routes (+ migrate existing ones)
  • Loading branch information
joeyorlando authored Jan 2, 2025
2 parents d928b6d + d624c7f commit 31af852
Show file tree
Hide file tree
Showing 5 changed files with 204 additions and 31 deletions.
38 changes: 34 additions & 4 deletions docs/sources/configure/integrations/references/manual/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,16 @@ to the team's ChatOps channels and start an appropriate escalation chain.

## Set up direct paging for a team

By default all teams will have a direct paging integration created for them. However, these are not configured by default.
If a team does not have their direct paging integration configured, such that it is "contactable" (ie. it has an
escalation chain assigned to it, or has at least one Chatops integration connected to send notifications to), you will
By default all teams will have a direct paging integration created for them. Each direct paging integration will be
created with two routes:

- a non-default route which has a Jinja2 filtering term of `{{ payload.oncall.important }}`
(see [Important Escalations](#important-escalations) below for more details)
- a default route to capture all other alerts

However, these integrations are not configured by default to be "contactable" (ie. their routes will have no
escalation chains assigned to them, nor any Chatops integrations connected to send notifications to).
If a team does not have their direct paging integration configured, such that it is "contactable" , you will
not be able to direct page this team. If this happens, consider following the following steps for the team (or reach out
to the relevant team and suggest doing so).

Expand All @@ -102,4 +109,27 @@ and select the same team for a test run.

### Important escalations

TODO:
Sometimes you really need to get the attention of a particular team. When directly paging a team, it is possible to
page them using an "important escalation". Practically speaking, this will create an alert, using the specified team's
direct paging integration as such:

```json
{
"oncall": {
"title": "IRM is paging Network team to join escalation",
"message": "I really need someone from your team to come take a look! The k8s cluster is down!",
"uid": "8a20b8d1-56fd-482e-824e-43fbd1bd7b10",
"author_username": "irm",
"permalink": null,
"important": true
}
}
```

When you are directly paging a team, either via the web UI, chatops apps, or the API, you can specify that this
esclation be "important", which will effectively set the value of `oncall.important` to `true`. As mentioned above in
[Set up direct paging for a team](#set-up-direct-paging-for-a-team), direct paging integrations come pre-configured with
two routes, with the non-default route having a Jinja2 filtering term of `{{ payload.oncall.important }}`.

This allows teams to be contacted via different escalation chains, depending on whether or not the user paging them
believes that this is an "important escalation".
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Generated by Django 4.2.17 on 2024-12-20 14:19

import logging

from django.db import migrations
from django.db.models import Count

logger = logging.getLogger(__name__)


def upsert_direct_paging_integration_routes(apps, schema_editor):
AlertReceiveChannel = apps.get_model("alerts", "AlertReceiveChannel")
ChannelFilter = apps.get_model("alerts", "ChannelFilter")

DIRECT_PAGING_INTEGRATION_TYPE = "direct_paging"
IMPORTANT_FILTERING_TERM = "{{ payload.oncall.important }}"

# Fetch all direct paging integrations
logger.info("Fetching direct paging integrations which have not had their routes updated.")

# Ignore updating Direct Paging integrations that have > 1 route, as this means that users have
# gone ahead and created their own routes. We don't want to overwrite these.
unedited_direct_paging_integrations = (
AlertReceiveChannel.objects
.filter(integration=DIRECT_PAGING_INTEGRATION_TYPE)
.annotate(num_routes=Count("channel_filters"))
.filter(num_routes=1)
)

integration_count = unedited_direct_paging_integrations.count()
if integration_count == 0:
logger.info("No integrations found which meet this criteria. No routes will be upserted.")
return

logger.info(f"Found {integration_count} direct paging integrations that meet this criteria.")

# Direct Paging Integrations are currently created with a single default route (order=0)
# see AlertReceiveChannelManager.create_missing_direct_paging_integrations
#
# we first need to update this route to be order=1, and then we will subsequently bulk-create the
# non-default route (order=0) which will have a filtering term set
routes = ChannelFilter.objects.filter(
alert_receive_channel__in=unedited_direct_paging_integrations,
is_default=True,
order=0,
)

logger.info(
f"Swapping the order=0 value to order=1 for {routes.count()} Direct Paging Integrations default routes"
)

updated_rows = routes.update(order=1)
logger.info(f"Swapped order=0 to order=1 for {updated_rows} Direct Paging Integrations default routes")

# Bulk create the new non-default routes
logger.info(
f"Creating new non-default routes for {len(unedited_direct_paging_integrations)} Direct Paging Integrations"
)
created_objs = ChannelFilter.objects.bulk_create(
[
ChannelFilter(
alert_receive_channel=integration,
filtering_term=IMPORTANT_FILTERING_TERM,
filtering_term_type=1, # 1 = ChannelFilter.FILTERING_TERM_TYPE_JINJA2
is_default=False,
order=0,
) for integration in unedited_direct_paging_integrations
],
batch_size=5000,
)
logger.info(f"Created {len(created_objs)} new non-default routes for Direct Paging Integrations")

logger.info("Migration for direct paging integration routes completed.")


class Migration(migrations.Migration):

dependencies = [
("alerts", "0071_migrate_labels"),
]

operations = [
migrations.RunPython(upsert_direct_paging_integration_routes, migrations.RunPython.noop),
]
51 changes: 40 additions & 11 deletions engine/apps/alerts/models/alert_receive_channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ class AlertReceiveChannelManager(models.Manager):
def create_missing_direct_paging_integrations(organization: "Organization") -> None:
from apps.alerts.models import ChannelFilter

logger.info(f"Starting create_missing_direct_paging_integrations for organization: {organization.id}")

# fetch teams without direct paging integration
teams_missing_direct_paging = list(
organization.teams.exclude(
Expand All @@ -134,10 +136,17 @@ def create_missing_direct_paging_integrations(organization: "Organization") -> N
).values_list("team_id", flat=True)
)
)
number_of_teams_missing_direct_paging = len(teams_missing_direct_paging)
logger.info(
f"Found {number_of_teams_missing_direct_paging} teams missing direct paging integrations.",
)

if not teams_missing_direct_paging:
logger.info("No missing direct paging integrations found. Exiting.")
return

# create missing integrations
logger.info(f"Creating missing direct paging integrations for {number_of_teams_missing_direct_paging} teams.")
AlertReceiveChannel.objects.bulk_create(
[
AlertReceiveChannel(
Expand All @@ -151,29 +160,49 @@ def create_missing_direct_paging_integrations(organization: "Organization") -> N
batch_size=5000,
ignore_conflicts=True, # ignore if direct paging integration already exists for team
)
logger.info("Missing direct paging integrations creation step completed.")

# fetch integrations for teams (some of them are created above, but some may already exist previously)
alert_receive_channels = organization.alert_receive_channels.filter(
team__in=teams_missing_direct_paging, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING
)
logger.info(f"Fetched {alert_receive_channels.count()} direct paging integrations for the specified teams.")

# we create two routes for each Direct Paging Integration
# 1. route for important alerts (using the payload.oncall.important alert field value) - non-default
# 2. route for all other alerts - default
routes_to_create = []
for alert_receive_channel in alert_receive_channels:
routes_to_create.extend(
[
ChannelFilter(
alert_receive_channel=alert_receive_channel,
filtering_term="{{ payload.oncall.important }}",
filtering_term_type=ChannelFilter.FILTERING_TERM_TYPE_JINJA2,
is_default=False,
order=0,
),
ChannelFilter(
alert_receive_channel=alert_receive_channel,
filtering_term=None,
is_default=True,
order=1,
),
]
)

# create default routes
logger.info(f"Creating {len(routes_to_create)} channel filter routes.")
ChannelFilter.objects.bulk_create(
[
ChannelFilter(
alert_receive_channel=alert_receive_channel,
filtering_term=None,
is_default=True,
order=0,
)
for alert_receive_channel in alert_receive_channels
],
routes_to_create,
batch_size=5000,
ignore_conflicts=True, # ignore if default route already exists for integration
ignore_conflicts=True, # ignore if routes already exist for integration
)
logger.info("Direct paging routes creation completed.")

# add integrations to metrics cache
logger.info("Adding integrations to metrics cache.")
metrics_add_integrations_to_cache(list(alert_receive_channels), organization)
logger.info("Integrations have been added to the metrics cache.")

def get_queryset(self):
return AlertReceiveChannelQueryset(self.model, using=self._db).filter(
Expand Down
38 changes: 29 additions & 9 deletions engine/apps/alerts/tests/test_alert_receiver_channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,27 +259,47 @@ def test_create_missing_direct_paging_integrations(
):
organization = make_organization()

# team with no direct paging integration
# two teams with no direct paging integration
team1 = make_team(organization)
team2 = make_team(organization)

# team with direct paging integration
team2 = make_team(organization)
team3 = make_team(organization)
alert_receive_channel = make_alert_receive_channel(
organization, team=team2, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING
organization, team=team3, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING
)
make_channel_filter(alert_receive_channel, is_default=True, order=0)

# create missing direct paging integration for organization
AlertReceiveChannel.objects.create_missing_direct_paging_integrations(organization)

assert organization.alert_receive_channels.count() == 3

# check that missing integrations and default routes were created
assert organization.alert_receive_channels.count() == 2
mock_metrics_add_integrations_to_cache.assert_called_once()
#
# NOTE: we explicitly don't test team3, it already has a Direct Paging integraiton associated with it
# and AlertReceiveChannel.objects.create_missing_direct_paging_integrations is not responsible for filling
# in missing routes.
#
# See apps/alerts/migrations/0072_upsert_direct_paging_integration_routes.py which is a data migration that does
# exactly this.
for team in [team1, team2]:
alert_receive_channel = organization.alert_receive_channels.get(
team=team, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING
)
assert alert_receive_channel.channel_filters.get().is_default
alert_receive_channel = organization.alert_receive_channels.get(team=team)

direct_paging_integration_routes = alert_receive_channel.channel_filters.all()

assert direct_paging_integration_routes.count() == 2

for route in direct_paging_integration_routes:
if route.is_default:
assert route.order == 1
assert route.filtering_term is None
else:
assert route.order == 0
assert route.filtering_term == "{{ payload.oncall.important }}"
assert route.filtering_term_type == route.FILTERING_TERM_TYPE_JINJA2

mock_metrics_add_integrations_to_cache.assert_called_once()


@pytest.mark.django_db
Expand Down
24 changes: 17 additions & 7 deletions engine/apps/user_management/tests/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,23 +203,33 @@ def test_sync_teams_for_organization(make_organization, make_team, make_alert_re
assert created_team.team_id == api_teams[2]["id"]
assert created_team.name == api_teams[2]["name"]

def _assert_teams_direct_paging_integration_is_configured_properly(integration):
assert integration.channel_filters.count() == 2

for route in integration.channel_filters.all():
if route.is_default:
assert route.order == 1
assert route.filtering_term is None
else:
assert route.order == 0
assert route.filtering_term == "{{ payload.oncall.important }}"
assert route.filtering_term_type == route.FILTERING_TERM_TYPE_JINJA2

# check that direct paging is created for created team
direct_paging_integration = AlertReceiveChannel.objects.get(
organization=organization,
integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING,
team=created_team,
)
assert direct_paging_integration.channel_filters.count() == 1
assert direct_paging_integration.channel_filters.first().order == 0
assert direct_paging_integration.channel_filters.first().is_default
_assert_teams_direct_paging_integration_is_configured_properly(direct_paging_integration)

# check that direct paging is created for existing team
direct_paging_integration = AlertReceiveChannel.objects.get(
organization=organization, integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING, team=teams[2]
organization=organization,
integration=AlertReceiveChannel.INTEGRATION_DIRECT_PAGING,
team=teams[2],
)
assert direct_paging_integration.channel_filters.count() == 1
assert direct_paging_integration.channel_filters.first().order == 0
assert direct_paging_integration.channel_filters.first().is_default
_assert_teams_direct_paging_integration_is_configured_properly(direct_paging_integration)


@pytest.mark.django_db
Expand Down

0 comments on commit 31af852

Please sign in to comment.