Skip to content

Commit

Permalink
Labs Analytics Python Wrapper (#46)
Browse files Browse the repository at this point in the history
* Analytics app

* lint

* Pulled out pool size to class field

* Analytics app rename

* JWT Access and Refresh routes

* AnalyticsTxn

* Working DLA to LAS pipeline

* Remove JWT Views until further conversation

* Clean up settings

* In-memory caching JWT decode/serialization

* Lint

* Don't update django-sentry

* Test cases

* Mock more

* Remove validate

* Temp remove codecov (which is deprecated in favor of coverage)
  • Loading branch information
judtinzhang authored Apr 25, 2024
1 parent a0f6589 commit b78d468
Show file tree
Hide file tree
Showing 10 changed files with 167 additions and 34 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/cdkactions_build-and-publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ jobs:
run: pip install poetry tox tox-gh-actions codecov
- name: Test
run: tox
- name: Upload Code Coverage
run: codecov
# - name: Upload Code Coverage
# run: codecov
publish:
runs-on: ubuntu-latest
container:
Expand Down
28 changes: 0 additions & 28 deletions .github/workflows/cdkactions_validate.yaml

This file was deleted.

Empty file added analytics/__init__.py
Empty file.
99 changes: 99 additions & 0 deletions analytics/analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import json
import time
from concurrent.futures import ThreadPoolExecutor
from enum import IntEnum
from typing import Optional

from django.utils import timezone
from requests import Session

from identity.identity import _refresh_if_outdated, container


class Product(IntEnum):
OTHER = 0
MOBILE_IOS = 1
MOBILE_ANDROID = 2
MOBILE_BACKEND = 3
PORTAL = 4
PCR = 5
PDP = 6
PCA = 7
PCP = 8
OHQ = 9
CLUBS = 10

def __str__(self):
return self.name


class AnalyticsTxn:
def __init__(
self,
product: Product,
pennkey: Optional[str],
timestamp=timezone.now(),
data=list(),
):
self.product = product.value
self.pennkey = pennkey
self.timestamp = timestamp.timestamp()
self.data = data

def to_json(self):
return json.loads(json.dumps(vars(self)))


class NoRebuildAuthSession(Session):
def rebuild_auth(self, prepared_request, response):
"""
No code here means requests will always preserve the Authorization
header when redirected.
Be careful not to leak your credentials to untrusted hosts!
"""


class LabsAnalytics:
"""
Python wrapper for async requests to Labs Analytics Engine
"""

ANALYTICS_URL = "https://analytics.pennlabs.org/analytics"
POOL_SIZE = 10

def __new__(cls, *args, **kwargs):
if not hasattr(cls, "instance"):
cls.instance = super(LabsAnalytics, cls).__new__(cls)
return cls.instance

def __init__(self):
self.executor = ThreadPoolExecutor(max_workers=self.POOL_SIZE)
self.session = NoRebuildAuthSession()

self.expires_at = None
self.headers = dict()

# Local caching of expiration date and headers
self._refresh_expires_at()
self._refresh_headers()

def _refresh_expires_at(self):
self.expires_at = json.loads(container.access_jwt.claims)["exp"]

def _refresh_headers(self):
self.headers = {
"Authorization": f"Bearer {container.access_jwt.serialize()}",
"Content-Type": "application/json",
}

def submit(self, txn: AnalyticsTxn):
# Offer a 30 second buffer to refresh
if time.time() < self.expires_at - 30:
_refresh_if_outdated()
self._refresh_expires_at()
self._refresh_headers()

self.executor.submit(self.send_message, txn.to_json())

def send_message(self, json):
self.session.post(url=self.ANALYTICS_URL, json=json, headers=self.headers)
6 changes: 6 additions & 0 deletions analytics/apps.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from django.apps import AppConfig


class AnalyticsConfig(AppConfig):
name = "analytics"
verbose_name = "Analytics Engine Functions"
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ exclude = ["tox.ini"]
packages = [
{ include = "accounts" },
{ include = "identity" },
{ include = "analytics" },
]

[tool.poetry.dependencies]
Expand Down
Empty file added tests/analytics/__init__.py
Empty file.
53 changes: 53 additions & 0 deletions tests/analytics/test_analytics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import random
from unittest import mock

from django.test import TestCase

from analytics.analytics import AnalyticsTxn, LabsAnalytics, Product


class AnalyticsTxnTestCase(TestCase):
def test_analytics_txn(self):
data = {
"product": Product.MOBILE_BACKEND,
"pennkey": None,
"data": [{"key": "backend", "value": "data"}],
}

txn = AnalyticsTxn(**data)
data_json = txn.to_json()

self.assertEqual(Product.MOBILE_BACKEND.value, int(data_json["product"]))
self.assertIsNone(data_json["pennkey"])
self.assertIn("timestamp", data_json)


class AnalyticsSubmission(TestCase):
@mock.patch("analytics.analytics.LabsAnalytics._refresh_expires_at")
@mock.patch("analytics.analytics.LabsAnalytics._refresh_headers")
def setUp(self, mock_expires_at, mock_headers):
# NOTE: use attest this for real testing
# from identity.identity import attest
# attest()
self.analytics_wrapper = LabsAnalytics()
self.NUM_TRIES = 1000

def rand_int(self):
return random.randint(1, self.NUM_TRIES)

def generate_data(self):
return {
"product": Product.MOBILE_BACKEND,
"pennkey": None,
"data": [{"key": f"{self.rand_int()}", "value": f"{self.rand_int()}"}],
}

@mock.patch("analytics.analytics.LabsAnalytics.submit")
def test_submit(self, mock_submit):
for _ in range(self.NUM_TRIES):
txn = AnalyticsTxn(**self.generate_data())
self.analytics_wrapper.submit(txn)

self.assertEqual(self.NUM_TRIES, mock_submit.call_count)

self.analytics_wrapper.executor.shutdown(wait=True)
2 changes: 2 additions & 0 deletions tests/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
"django.contrib.admin",
"django.contrib.messages",
"accounts.apps.AccountsConfig",
"identity.apps.IdentityConfig",
"analytics.apps.AnalyticsConfig",
"tests",
)

Expand Down
8 changes: 4 additions & 4 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
isolated_build = true
envlist =
lint,
py311-django{22,30,31,32,502},
py311-django{502},
sentry-django{30,31},

[testenv]
allowlist_externals = poetry
commands =
poetry install
poetry run pytest --cov=accounts --cov=identity --cov-append {posargs}
poetry run pytest --cov=accounts --cov=identity --cov=analytics --cov-append {posargs}
setenv =
DJANGO_SETTINGS_MODULE = tests.settings
PYTHONPATH = {toxinidir}
Expand All @@ -32,15 +32,15 @@ inline-quotes = double

[isort]
default_section = THIRDPARTY
known_first_party = accounts, identity
known_first_party = accounts, identity, analytics
line_length = 88
lines_after_imports = 2
multi_line_output = 3
include_trailing_comma = True
use_parentheses = True

[coverage:run]
source = accounts, identity
source = accounts, identity, analytics

[pytest]
django_find_project = false
Expand Down

0 comments on commit b78d468

Please sign in to comment.