Skip to content

Commit

Permalink
[COST-4333] - Adding dtypes for Azure (#4817)
Browse files Browse the repository at this point in the history
* Update Azure Dtypes for Trino
  • Loading branch information
lcouzens committed Dec 5, 2023
1 parent 11f2b4d commit 7a33cb7
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 100 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

WITH cte_azure_instances AS (
SELECT DISTINCT split_part(coalesce(azure.resourceid, azure.instanceid), '/', 9) as instance,
SELECT DISTINCT split_part(coalesce(nullif(azure.resourceid, ''), azure.instanceid), '/', 9) as instance,
azure.source
FROM hive.{{schema | sqlsafe}}.azure_line_items AS azure
WHERE coalesce(azure.date, azure.usagedatetime) >= {{start_date}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,17 +21,17 @@ INSERT INTO postgres.{{schema | sqlsafe}}.reporting_azurecostentrylineitem_daily
WITH cte_line_items AS (
SELECT date(coalesce(date, usagedatetime)) as usage_date,
INTEGER '{{bill_id | sqlsafe}}' as cost_entry_bill_id,
coalesce(subscriptionid, subscriptionguid) as subscription_guid,
coalesce(nullif(subscriptionid, ''), subscriptionguid) as subscription_guid,
resourcelocation as resource_location,
coalesce(servicename, metercategory) as service_name,
coalesce(nullif(servicename, ''), metercategory) as service_name,
json_extract_scalar(json_parse(additionalinfo), '$.ServiceType') as instance_type,
cast(coalesce(quantity, usagequantity) as DECIMAL(24,9)) as usage_quantity,
cast(coalesce(costinbillingcurrency, pretaxcost) as DECIMAL(24,9)) as pretax_cost,
coalesce(billingcurrencycode, currency, billingcurrency) as currency,
cast(coalesce(nullif(quantity, 0), usagequantity) as DECIMAL(24,9)) as usage_quantity,
cast(coalesce(nullif(costinbillingcurrency, 0), pretaxcost) as DECIMAL(24,9)) as pretax_cost,
coalesce(nullif(billingcurrencycode, ''), nullif(currency, ''), billingcurrency) as currency,
json_parse(tags) as tags,
coalesce(resourceid, instanceid) as instance_id,
coalesce(nullif(resourceid, ''), instanceid) as instance_id,
cast(source as UUID) as source_uuid,
coalesce(subscriptionname, subscriptionid, subscriptionguid) as subscription_name,
coalesce(nullif(subscriptionname, ''), nullif(subscriptionid, ''), subscriptionguid) as subscription_name,
CASE
WHEN regexp_like(split_part(unitofmeasure, ' ', 1), '^\d+(\.\d+)?$') AND NOT (unitofmeasure = '100 Hours' AND metercategory='Virtual Machines') AND NOT split_part(unitofmeasure, ' ', 2) = ''
THEN cast(split_part(unitofmeasure, ' ', 1) as INTEGER)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,10 @@ INSERT INTO hive.{{schema | sqlsafe}}.azure_openshift_daily_resource_matched_tem
)
SELECT cast(uuid() as varchar) as uuid,
coalesce(azure.date, azure.usagedatetime) as usage_start,
split_part(coalesce(resourceid, instanceid), '/', 9) as resource_id,
coalesce(servicename, metercategory) as service_name,
split_part(coalesce(nullif(resourceid, ''), instanceid), '/', 9) as resource_id,
coalesce(nullif(servicename, ''), metercategory) as service_name,
max(json_extract_scalar(json_parse(azure.additionalinfo), '$.ServiceType')) as instance_type,
coalesce(azure.subscriptionid, azure.subscriptionguid) as subscription_guid,
coalesce(nullif(azure.subscriptionid, ''), azure.subscriptionguid) as subscription_guid,
azure.resourcelocation as resource_location,
max(CASE
WHEN split_part(unitofmeasure, ' ', 2) = 'Hours'
Expand All @@ -163,9 +163,9 @@ SELECT cast(uuid() as varchar) as uuid,
THEN split_part(unitofmeasure, ' ', 2)
ELSE unitofmeasure
END) as unit_of_measure,
sum(coalesce(azure.quantity, azure.usagequantity)) as usage_quantity,
coalesce(azure.billingcurrencycode, azure.currency) as currency,
sum(coalesce(azure.costinbillingcurrency, azure.pretaxcost)) as pretax_cost,
sum(coalesce(nullif(azure.quantity, 0), azure.usagequantity)) as usage_quantity,
coalesce(nullif(azure.billingcurrencycode, ''), azure.currency) as currency,
sum(coalesce(nullif(azure.costinbillingcurrency, 0), azure.pretaxcost)) as pretax_cost,
azure.tags,
max(azure.resource_id_matched) as resource_id_matched,
{{ocp_source_uuid}} as ocp_source,
Expand All @@ -179,11 +179,11 @@ WHERE azure.source = {{azure_source_uuid}}
AND coalesce(azure.date, azure.usagedatetime) < date_add('day', 1, {{end_date}})
AND azure.resource_id_matched = TRUE
GROUP BY coalesce(azure.date, azure.usagedatetime),
split_part(coalesce(resourceid, instanceid), '/', 9),
coalesce(servicename, metercategory),
coalesce(subscriptionid, subscriptionguid),
split_part(coalesce(nullif(resourceid, ''), instanceid), '/', 9),
coalesce(nullif(servicename, ''), metercategory),
coalesce(nullif(subscriptionid, ''), subscriptionguid),
azure.resourcelocation,
coalesce(azure.billingcurrencycode, azure.currency),
coalesce(nullif(azure.billingcurrencycode, ''), azure.currency),
azure.tags
;

Expand Down Expand Up @@ -219,9 +219,9 @@ WITH cte_enabled_tag_keys AS (
SELECT cast(uuid() as varchar) as uuid,
coalesce(azure.date, azure.usagedatetime) as usage_start,
split_part(coalesce(resourceid, instanceid), '/', 9) as resource_id,
coalesce(servicename, metercategory) as service_name,
coalesce(nullif(servicename, ''), metercategory) as service_name,
max(json_extract_scalar(json_parse(azure.additionalinfo), '$.ServiceType')) as instance_type,
coalesce(azure.subscriptionid, azure.subscriptionguid) as subscription_guid,
coalesce(nullif(azure.subscriptionid, ''), azure.subscriptionguid) as subscription_guid,
azure.resourcelocation as resource_location,
max(CASE
WHEN split_part(unitofmeasure, ' ', 2) = 'Hours'
Expand All @@ -232,9 +232,9 @@ SELECT cast(uuid() as varchar) as uuid,
THEN split_part(unitofmeasure, ' ', 2)
ELSE unitofmeasure
END) as unit_of_measure,
sum(coalesce(azure.quantity, azure.usagequantity)) as usage_quantity,
coalesce(azure.billingcurrencycode, azure.currency) as currency,
sum(coalesce(azure.costinbillingcurrency, azure.pretaxcost)) as pretax_cost,
sum(coalesce(nullif(azure.quantity, 0), azure.usagequantity)) as usage_quantity,
coalesce(nullif(azure.billingcurrencycode, ''), azure.currency) as currency,
sum(coalesce(nullif(azure.costinbillingcurrency, 0), azure.pretaxcost)) as pretax_cost,
json_format(
cast(
map_filter(
Expand All @@ -257,10 +257,10 @@ WHERE azure.source = {{azure_source_uuid}}
AND (azure.resource_id_matched = FALSE OR azure.resource_id_matched IS NULL)
GROUP BY coalesce(azure.date, azure.usagedatetime),
split_part(coalesce(resourceid, instanceid), '/', 9),
coalesce(servicename, metercategory),
coalesce(subscriptionid, subscriptionguid),
coalesce(nullif(servicename, ''), metercategory),
coalesce(nullif(subscriptionid, ''), subscriptionguid),
azure.resourcelocation,
coalesce(azure.billingcurrencycode, azure.currency),
coalesce(nullif(azure.billingcurrencycode, ''), azure.currency),
12, -- tags
azure.matched_tag
;
Expand Down
6 changes: 3 additions & 3 deletions koku/masu/test/util/azure/test_azure_post_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from masu.util.azure.azure_post_processor import AzurePostProcessor
from masu.util.azure.common import INGRESS_REQUIRED_COLUMNS
from reporting.provider.all.models import EnabledTagKeys
from reporting.provider.azure.models import TRINO_COLUMNS
from reporting.provider.azure.models import TRINO_REQUIRED_COLUMNS


class TestAzurePostProcessor(MasuTestCase):
Expand Down Expand Up @@ -44,9 +44,9 @@ def test_azure_process_dataframe(self):
result, _ = self.post_processor.process_dataframe(df)
columns = list(result)
expected_columns = sorted(
col.replace("-", "_").replace("/", "_").replace(":", "_").lower() for col in TRINO_COLUMNS
col.replace("-", "_").replace("/", "_").replace(":", "_").lower() for col in TRINO_REQUIRED_COLUMNS
)
self.assertEqual(columns, expected_columns)
self.assertEqual(sorted(columns), sorted(expected_columns))

def test_azure_date_converter(self):
"""Test that we convert the new Azure date format."""
Expand Down
13 changes: 7 additions & 6 deletions koku/masu/util/azure/azure_post_processor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import logging

import ciso8601
import pandas
Expand All @@ -10,7 +11,9 @@
from masu.util.common import populate_enabled_tag_rows_with_limit
from masu.util.common import safe_float
from masu.util.common import strip_characters_from_column_name
from reporting.provider.azure.models import TRINO_COLUMNS
from reporting.provider.azure.models import TRINO_REQUIRED_COLUMNS

LOG = logging.getLogger(__name__)


def azure_json_converter(tag_str):
Expand Down Expand Up @@ -101,11 +104,9 @@ def process_dataframe(self, data_frame):

data_frame = data_frame.rename(columns=column_name_map)

columns = set(data_frame)
columns = set(TRINO_COLUMNS).union(columns)
columns = sorted(columns)

data_frame = data_frame.reindex(columns=columns)
missing = set(TRINO_REQUIRED_COLUMNS).difference(data_frame)
to_add = {k: TRINO_REQUIRED_COLUMNS[k] for k in missing}
data_frame = data_frame.assign(**to_add)

unique_tags = set()
for tags_json in data_frame["tags"].values:
Expand Down
131 changes: 66 additions & 65 deletions koku/reporting/provider/azure/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
"""Models for Azure cost and usage entry tables."""
from uuid import uuid4

import pandas as pd
from django.contrib.postgres.fields import ArrayField
from django.db import models
from django.db.models import JSONField
Expand All @@ -14,71 +15,71 @@
TRINO_LINE_ITEM_DAILY_TABLE = TRINO_LINE_ITEM_TABLE
TRINO_OCP_ON_AZURE_DAILY_TABLE = "azure_openshift_daily"

TRINO_COLUMNS = [
"billingperiodstartdate",
"billingperiodenddate",
"usagedatetime",
"date",
"accountname",
"accountownerid",
"additionalinfo",
"availabilityzone",
"billingaccountid",
"billingaccountname",
"billingcurrencycode",
"billingcurrency",
"billingprofileid",
"billingprofilename",
"chargetype",
"consumedservice",
"costcenter",
"costinbillingcurrency",
"currency",
"effectiveprice",
"frequency",
"instanceid",
"invoicesectionid",
"invoicesectionname",
"isazurecrediteligible",
"metercategory",
"meterid",
"metername",
"meterregion",
"metersubcategory",
"offerid",
"partnumber",
"paygprice",
"planname",
"pretaxcost",
"pricingmodel",
"productname",
"productorderid",
"productordername",
"publishername",
"publishertype",
"quantity",
"reservationid",
"reservationname",
"resourcegroup",
"resourceid",
"resourcelocation",
"resourcename",
"resourcerate",
"resourcetype",
"servicefamily",
"serviceinfo1",
"serviceinfo2",
"servicename",
"servicetier",
"subscriptionguid",
"subscriptionid",
"subscriptionname",
"tags",
"term",
"unitofmeasure",
"unitprice",
"usagequantity",
]
TRINO_REQUIRED_COLUMNS = {
"billingperiodstartdate": pd.NaT,
"billingperiodenddate": pd.NaT,
"usagedatetime": pd.NaT,
"date": pd.NaT,
"accountname": "",
"accountownerid": "",
"additionalinfo": "",
"availabilityzone": "",
"billingaccountid": "",
"billingaccountname": "",
"billingcurrencycode": "",
"billingcurrency": "",
"billingprofileid": "",
"billingprofilename": "",
"chargetype": "",
"consumedservice": "",
"costcenter": "",
"costinbillingcurrency": 0.0,
"currency": "",
"effectiveprice": 0.0,
"frequency": "",
"instanceid": "",
"invoicesectionid": "",
"invoicesectionname": "",
"isazurecrediteligible": "",
"metercategory": "",
"meterid": "",
"metername": "",
"meterregion": "",
"metersubcategory": "",
"offerid": "",
"partnumber": "",
"paygprice": 0.0,
"planname": "",
"pretaxcost": 0.0,
"pricingmodel": "",
"productname": "",
"productorderid": "",
"productordername": "",
"publishername": "",
"publishertype": "",
"quantity": 0.0,
"reservationid": "",
"reservationname": "",
"resourcegroup": "",
"resourceid": "",
"resourcelocation": "",
"resourcename": "",
"resourcerate": 0.0,
"resourcetype": "",
"servicefamily": "",
"serviceinfo1": "",
"serviceinfo2": "",
"servicename": "",
"servicetier": "",
"subscriptionguid": "",
"subscriptionid": "",
"subscriptionname": "",
"tags": "",
"term": "",
"unitofmeasure": "",
"unitprice": 0.0,
"usagequantity": 0.0,
}

UI_SUMMARY_TABLES = (
"reporting_azure_compute_summary_p",
Expand Down

0 comments on commit 7a33cb7

Please sign in to comment.