Skip to content

Commit

Permalink
Azure Function App which runs periodically to aggregate the bytes rea…
Browse files Browse the repository at this point in the history
…d per IP address over a window of time (#215)

* init

* update

* update flake8 config

* code format changes

* fix long lines

* fix imports

* function app changes

* code format changes

* test

* remove readme

* remove redundant packages and revert python version

* revert python version

* remove redundant type hints

* use settings class inherited from baseSettings

* change kql

* use placeholder for timer schedule

* change test parameter

* update assertions and logger

* remove import

* remove dash in table name

* update test id

* format

* add no-integration flag

* add dependencies

* test

* change trigger to run every hour

* use azure clients as context manager

* add  context managers in test

* role assignment for function app

* change LAW name

* change role

* change provider's name

* change name of LAW

* better readability

* better readability

* format

* add logging

* UPDATE function settings

* suppress mypy warning

* update env variables for function app

* typo

---------

Co-authored-by: elay <[email protected]>
  • Loading branch information
elayrocks and elay authored Jun 26, 2024
1 parent 69098c7 commit a08f05e
Show file tree
Hide file tree
Showing 19 changed files with 386 additions and 5 deletions.
3 changes: 2 additions & 1 deletion .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,5 @@ extend-ignore = E203, W503
exclude =
.git
__pycache__
setup.py
setup.py
.venv
31 changes: 31 additions & 0 deletions deployment/terraform/resources/functions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ resource "azurerm_function_app" "pcfuncs" {
"IMAGE_OUTPUT_STORAGE_URL" = var.image_output_storage_url,
"IMAGE_API_ROOT_URL" = var.funcs_data_api_url,
"IMAGE_TILE_REQUEST_CONCURRENCY" = tostring(var.funcs_tile_request_concurrency),

# IPBan function
"STORAGE_ACCOUNT_URL" = var.func_storage_account_url,
"BANNED_IP_TABLE" = var.banned_ip_table,
"LOG_ANALYTICS_WORKSPACE_ID" = var.prod_log_analytics_workspace_id,
}

os_type = "linux"
Expand Down Expand Up @@ -77,3 +82,29 @@ resource "azurerm_role_assignment" "function-app-animation-container-access" {
azurerm_function_app.pcfuncs
]
}

resource "azurerm_role_assignment" "function-app-storage-table-data-contributor" {
scope = azurerm_storage_account.pc.id
role_definition_name = "Storage Table Data Contributor"
principal_id = azurerm_function_app.pcfuncs.identity[0].principal_id

depends_on = [
azurerm_function_app.pcfuncs
]
}

data "azurerm_log_analytics_workspace" "prod_log_analytics_workspace" {
provider = azurerm.planetary_computer_subscription
name = var.prod_log_analytics_workspace_name
resource_group_name = var.pc_resources_rg
}

resource "azurerm_role_assignment" "function-app-log-analytics-access" {
scope = data.azurerm_log_analytics_workspace.prod_log_analytics_workspace.id
role_definition_name = "Log Analytics Reader"
principal_id = azurerm_function_app.pcfuncs.identity[0].principal_id

depends_on = [
azurerm_function_app.pcfuncs
]
}
6 changes: 6 additions & 0 deletions deployment/terraform/resources/providers.tf
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ provider "azurerm" {
# storage_use_azuread = true
}

provider "azurerm" {
alias = "planetary_computer_subscription"
subscription_id = "9da7523a-cb61-4c3e-b1d4-afa5fc6d2da9"
features {}
}

terraform {
required_version = ">= 0.13"

Expand Down
21 changes: 21 additions & 0 deletions deployment/terraform/resources/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,11 @@ variable "pc_test_resources_rg" {
default = "pc-test-manual-resources"
}

variable "pc_resources_rg" {
type = string
default = "pc-manual-resources"
}

variable "pc_test_resources_kv" {
type = string
default = "pc-test-deploy-secrets"
Expand Down Expand Up @@ -123,6 +128,22 @@ variable "image_output_storage_url" {
type = string
}

variable "prod_log_analytics_workspace_name" {
type = string
}

variable "prod_log_analytics_workspace_id" {
type = string
}

variable "banned_ip_table" {
type = string
}

variable "func_storage_account_url" {
type = string
}

# -----------------
# Local variables

Expand Down
4 changes: 4 additions & 0 deletions deployment/terraform/staging/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ module "resources" {
animation_output_storage_url = "https://pcfilestest.blob.core.windows.net/output/animations"
image_output_storage_url = "https://pcfilestest.blob.core.windows.net/output/images"

prod_log_analytics_workspace_name = "pc-api-loganalytics"
prod_log_analytics_workspace_id = "78d48390-b6bb-49a9-b7fd-a86f6522e9c4"
func_storage_account_url = "https://pctapisstagingsa.table.core.windows.net/"
banned_ip_table = "blobstoragebannedip"
}

terraform {
Expand Down
1 change: 1 addition & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ services:
- ./pccommon:/home/site/pccommon
- ./pcfuncs:/home/site/wwwroot
- .:/opt/src
- ~/.azure:/home/.azure

nginx:
image: pc-apis-nginx
Expand Down
4 changes: 4 additions & 0 deletions pc-funcs.dev.env
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,7 @@ IMAGE_OUTPUT_STORAGE_URL="http://azurite:10000/devstoreaccount1/output/images"
IMAGE_OUTPUT_ACCOUNT_KEY="Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
IMAGE_API_ROOT_URL="https://planetarycomputer-staging.microsoft.com/api/data/v1"
IMAGE_TILE_REQUEST_CONCURRENCY=2

STORAGE_ACCOUNT_URL=https://pctapisstagingsa.table.core.windows.net/
BANNED_IP_TABLE=blobstoragebannedip
LOG_ANALYTICS_WORKSPACE_ID=78d48390-b6bb-49a9-b7fd-a86f6522e9c4
1 change: 1 addition & 0 deletions pcfuncs/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ FROM mcr.microsoft.com/azure-functions/python:4-python3.8

# git required for pip installs from git
RUN apt update && apt install -y git
RUN curl -sL https://aka.ms/InstallAzureCLIDeb | bash

ENV AzureWebJobsScriptRoot=/home/site/wwwroot \
AzureFunctionsJobHost__Logging__Console__IsEnabled=true
Expand Down
29 changes: 29 additions & 0 deletions pcfuncs/ipban/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import datetime
import logging

import azure.functions as func
from azure.data.tables import TableServiceClient
from azure.identity import DefaultAzureCredential
from azure.monitor.query import LogsQueryClient

from .config import settings
from .models import UpdateBannedIPTask

logger = logging.getLogger(__name__)


def main(mytimer: func.TimerRequest) -> None:
utc_timestamp: str = (
datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc).isoformat()
)
logger.info("Updating the ip ban list at %s", utc_timestamp)
credential: DefaultAzureCredential = DefaultAzureCredential()
with LogsQueryClient(credential) as logs_query_client:
with TableServiceClient(
endpoint=settings.storage_account_url, credential=credential
) as table_service_client:
with table_service_client.create_table_if_not_exists(
settings.banned_ip_table
) as table_client:
task = UpdateBannedIPTask(logs_query_client, table_client)
task.run()
18 changes: 18 additions & 0 deletions pcfuncs/ipban/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# config.py
from pydantic import BaseSettings, Field


class Settings(BaseSettings):
storage_account_url: str = Field(env="STORAGE_ACCOUNT_URL")
banned_ip_table: str = Field(env="BANNED_IP_TABLE")
log_analytics_workspace_id: str = Field(env="LOG_ANALYTICS_WORKSPACE_ID")

# Time and threshold settings
time_window_in_hours: int = Field(default=24, env="TIME_WINDOW_IN_HOURS")
threshold_read_count_in_gb: int = Field(
default=5120, env="THRESHOLD_READ_COUNT_IN_GB"
)


# Create a global settings instance
settings = Settings() # type: ignore
11 changes: 11 additions & 0 deletions pcfuncs/ipban/function.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"scriptFile": "__init__.py",
"bindings": [
{
"name": "mytimer",
"type": "timerTrigger",
"direction": "in",
"schedule": "0 */1 * * *"
}
]
}
70 changes: 70 additions & 0 deletions pcfuncs/ipban/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import logging
from typing import Any, List, Set

from azure.data.tables import TableClient, UpdateMode
from azure.monitor.query import LogsQueryClient
from azure.monitor.query._models import LogsTableRow

from .config import settings


class UpdateBannedIPTask:
def __init__(
self,
logs_query_client: LogsQueryClient,
table_client: TableClient,
) -> None:
self.log_query_client = logs_query_client
self.table_client = table_client

def run(self) -> List[LogsTableRow]:
query_result: List[LogsTableRow] = self.get_blob_logs_query_result()
logging.info(f"Kusto query result: {query_result}")
self.update_banned_ips(query_result)
return query_result

def get_blob_logs_query_result(self) -> List[LogsTableRow]:
query: str = f"""
StorageBlobLogs
| where TimeGenerated > ago({settings.time_window_in_hours}h)
| extend IpAddress = tostring(split(CallerIpAddress, ":")[0])
| where OperationName == 'GetBlob'
| where not(ipv4_is_private(IpAddress))
| summarize readcount = sum(ResponseBodySize) / (1024 * 1024 * 1024)
by IpAddress
| where readcount > {settings.threshold_read_count_in_gb}
"""
response: Any = self.log_query_client.query_workspace(
settings.log_analytics_workspace_id, query, timespan=None
)
return response.tables[0].rows

def update_banned_ips(self, query_result: List[LogsTableRow]) -> None:
existing_ips = {
entity["RowKey"] for entity in self.table_client.list_entities()
}
result_ips: Set[str] = set()
for result in query_result:
ip_address: str = result[0]
read_count: int = int(result[1])
result_ips.add(ip_address)
entity = {
"PartitionKey": ip_address,
"RowKey": ip_address,
"ReadCount": read_count,
"Threshold": settings.threshold_read_count_in_gb,
"TimeWindow": settings.time_window_in_hours,
}

if ip_address in existing_ips:
self.table_client.update_entity(entity, mode=UpdateMode.REPLACE)
else:
self.table_client.create_entity(entity)

for ip_address in existing_ips:
if ip_address not in result_ips:
self.table_client.delete_entity(
partition_key=ip_address, row_key=ip_address
)

logging.info("IP ban list has been updated successfully")
3 changes: 2 additions & 1 deletion pcfuncs/requirements-deploy.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ pillow==10.3.0
pyproj==3.3.1
pydantic>=1.9,<2.0.0
rasterio==1.3.*

azure-monitor-query==1.3.0
pytest-mock==3.14.0
# The deploy process needs symlinks to bring in
# pctasks libraries. Symlink is created in deploy script
./pccommon_linked
3 changes: 2 additions & 1 deletion pcfuncs/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ pillow==10.3.0
pyproj==3.3.1
pydantic>=1.9,<2.0.0
rasterio==1.3.*

azure-monitor-query==1.3.0
pytest-mock==3.14.0
# Deployment needs to copy the local code into
# the app code directory, so requires a separate
# requirements file.
Expand Down
29 changes: 29 additions & 0 deletions pcfuncs/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from typing import List

import pytest


def pytest_addoption(parser: pytest.Parser) -> None:
parser.addoption(
"--no-integration",
action="store_true",
default=False,
help="don't run integration tests",
)


def pytest_configure(config: pytest.Config) -> None:
config.addinivalue_line("markers", "integration: mark as an integration test")


def pytest_collection_modifyitems(
config: pytest.Config, items: List[pytest.Item]
) -> None:
if config.getoption("--no-integration"):
# --no-integration given in cli: skip integration tests
skip_integration = pytest.mark.skip(
reason="needs --no-integration option to run"
)
for item in items:
if "integration" in item.keywords:
item.add_marker(skip_integration)
Empty file added pcfuncs/tests/ipban/__init__.py
Empty file.
Loading

0 comments on commit a08f05e

Please sign in to comment.