Skip to content

Commit

Permalink
Merge branch 'main' into create-pull-request/patch
Browse files Browse the repository at this point in the history
  • Loading branch information
lcouzens committed Jul 4, 2024
2 parents 378d658 + bdd992d commit ebfd687
Show file tree
Hide file tree
Showing 5 changed files with 231 additions and 24 deletions.
25 changes: 25 additions & 0 deletions dev/scripts/nise_ymls/ocp_on_gcp/gcp_static_data.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ generators:
start_date: {{start_date}}
end_date: {{end_date}}
price: 2
sku_id: CF4E-A0C7-E3BF
usage.amount_in_pricing_units: 1
usage.pricing_unit: hour
currency: USD
Expand All @@ -12,6 +13,30 @@ generators:
resource.name: projects/nise-populator/instances/gcp_compute1
resource.global_name: //compute.googleapis.com/projects/nise-populator/zones/australia-southeast1-a/instances/3447398860992947181
labels: [{"environment": "clyde", "app":"winter", "version":"green", "kubernetes-io-cluster-c32se93c-73z3-3s3d-cs23-d3245sj45349": "owned"}]
- ComputeEngineGenerator:
start_date: {{start_date}}
end_date: {{end_date}}
price: 2
sku_id: BBF8-C07D-1DF4 #inbound data transfer
usage.amount_in_pricing_units: 50
currency: USD
instance_type: m2-megamem-416
location.region: australia-southeast1-a
resource.name: projects/nise-populator/instances/gcp_compute1
resource.global_name: //compute.googleapis.com/projects/nise-populator/zones/australia-southeast1-a/instances/3447398860992947181
labels: [{"environment": "clyde", "app":"winter", "version":"green", "kubernetes-io-cluster-c32se93c-73z3-3s3d-cs23-d3245sj45349": "owned"}]
- ComputeEngineGenerator:
start_date: {{start_date}}
end_date: {{end_date}}
price: 30
sku_id: 9DE9-9092-B3BC # outbound data transfer
usage.amount_in_pricing_units: 10
currency: USD
instance_type: m2-megamem-416
location.region: australia-southeast1-a
resource.name: projects/nise-populator/instances/gcp_compute1
resource.global_name: //compute.googleapis.com/projects/nise-populator/zones/australia-southeast1-a/instances/3447398860992947181
labels: [{"environment": "clyde", "app":"winter", "version":"green", "kubernetes-io-cluster-c32se93c-73z3-3s3d-cs23-d3245sj45349": "owned"}]
- ComputeEngineGenerator:
start_date: {{start_date}}
end_date: {{end_date}}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
source_uuid,
infrastructure_raw_cost,
infrastructure_project_raw_cost,
infrastructure_data_in_gigabytes,
infrastructure_data_out_gigabytes,
infrastructure_usage_cost,
supplementary_usage_cost,
pod_usage_cpu_core_hours,
Expand Down Expand Up @@ -65,6 +67,14 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
rp.provider_id as source_uuid,
sum(ocp_gcp.unblended_cost + ocp_gcp.markup_cost + ocp_gcp.credit_amount) AS infrastructure_raw_cost,
sum(ocp_gcp.unblended_cost + ocp_gcp.project_markup_cost + ocp_gcp.pod_credit) AS infrastructure_project_raw_cost,
CASE
WHEN upper(data_transfer_direction) = 'IN' THEN sum(infrastructure_data_in_gigabytes)
ELSE NULL
END as infrastructure_data_in_gigabytes,
CASE
WHEN upper(data_transfer_direction) = 'OUT' THEN sum(infrastructure_data_out_gigabytes)
ELSE NULL
END as infrastructure_data_out_gigabytes,
'{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}'::jsonb as infrastructure_usage_cost,
'{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}'::jsonb as supplementary_usage_cost,
0 as pod_usage_cpu_core_hours,
Expand Down Expand Up @@ -101,5 +111,6 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
ocp_gcp.persistentvolumeclaim,
ocp_gcp.resource_id,
ocp_gcp.pod_labels,
ocp_gcp.data_transfer_direction,
rp.provider_id
;
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ INSERT INTO hive.{{schema | sqlsafe}}.gcp_openshift_daily_resource_matched_temp
instance_type,
service_id,
service_alias,
data_transfer_direction,
sku_id,
sku_alias,
region,
Expand All @@ -205,6 +206,11 @@ SELECT cast(uuid() as varchar),
json_extract_scalar(json_parse(gcp.system_labels), '$["compute.googleapis.com/machine_spec"]') as instance_type,
gcp.service_id,
max(nullif(gcp.service_description, '')) as service_alias,
CASE
WHEN service_description = 'Compute Engine' AND STRPOS(lower(sku_description), 'data transfer in') != 0 THEN 'IN'
WHEN service_description = 'Compute Engine' AND STRPOS(lower(sku_description), 'data transfer') != 0 THEN 'OUT'
ELSE NULL
END as data_transfer_direction,
max(nullif(gcp.sku_id, '')) as sku_id,
max(nullif(gcp.sku_description, '')) as sku_alias,
gcp.location_region as region,
Expand Down Expand Up @@ -233,7 +239,8 @@ GROUP BY gcp.usage_start_time,
gcp.service_id,
gcp.location_region,
gcp.invoice_month,
gcp.labels
gcp.labels,
10 -- data transfer direction
;

INSERT INTO hive.{{schema | sqlsafe}}.gcp_openshift_daily_tag_matched_temp (
Expand Down Expand Up @@ -438,6 +445,8 @@ WHERE ocp.source = {{ocp_source_uuid}}
AND gcp.ocp_source = {{ocp_source_uuid}}
AND gcp.year = {{year}}
AND gcp.month = {{month}}
-- Filter out Node Network Costs because they cannot be tied to namespace level
AND data_transfer_direction IS NULL
GROUP BY gcp.uuid, ocp.namespace, ocp.data_source, ocp.pod_labels, ocp.volume_labels
;

Expand Down Expand Up @@ -590,6 +599,7 @@ INSERT INTO hive.{{schema | sqlsafe}}.reporting_ocpgcpcostlineitem_project_daily
instance_type,
service_id,
service_alias,
data_transfer_direction,
sku_id,
sku_alias,
region,
Expand Down Expand Up @@ -657,6 +667,7 @@ SELECT pds.gcp_uuid,
instance_type,
service_id,
service_alias,
NULL as data_transfer_direction,
sku_id,
sku_alias,
region,
Expand Down Expand Up @@ -711,6 +722,110 @@ JOIN cte_rankings as r
WHERE pds.ocp_source = {{ocp_source_uuid}} AND pds.year = {{year}} AND pds.month = {{month}}
;

-- Network costs are currently not mapped to pod metrics
-- and are filtered out of the above SQL since that is grouped by namespace
-- and costs are split out by pod metrics, this puts all network costs per node
-- into a "Network unattributed" project with no cost split and one record per
-- data direction
INSERT INTO hive.{{schema | sqlsafe}}.reporting_ocpgcpcostlineitem_project_daily_summary (
gcp_uuid,
cluster_id,
cluster_alias,
data_source,
namespace,
node,
persistentvolumeclaim,
persistentvolume,
storageclass,
resource_id,
usage_start,
usage_end,
account_id,
project_id,
project_name,
instance_type,
service_id,
service_alias,
data_transfer_direction,
sku_id,
sku_alias,
region,
unit,
usage_amount,
currency,
invoice_month,
credit_amount,
unblended_cost,
markup_cost,
project_markup_cost,
pod_cost,
pod_credit,
tags,
cost_category_id,
gcp_source,
ocp_source,
year,
month,
day
)
SELECT gcp.uuid as gcp_uuid,
max(ocp.cluster_id) as cluster_id,
max(ocp.cluster_alias) as cluster_alias,
max(ocp.data_source),
'Network unattributed' as namespace,
ocp.node as node,
max(nullif(ocp.persistentvolumeclaim, '')) as persistentvolumeclaim,
max(nullif(ocp.persistentvolume, '')) as persistentvolume,
max(nullif(ocp.storageclass, '')) as storageclass,
max(ocp.resource_id) as resource_id,
max(gcp.usage_start) as usage_start,
max(gcp.usage_start) as usage_end,
max(gcp.account_id) as account_id,
max(gcp.project_id) as project_id,
max(gcp.project_name) as project_name,
max(instance_type) as instance_type,
max(nullif(gcp.service_id, '')) as service_id,
max(gcp.service_alias) as service_alias,
max(data_transfer_direction) as data_transfer_direction,
max(gcp.sku_id) as sku_id,
max(gcp.sku_alias) as sku_alias,
max(nullif(gcp.region, '')) as region,
max(gcp.unit) as unit,
max(gcp.usage_amount) as usage_amount,
max(gcp.currency) as currency,
max(gcp.invoice_month) as invoice_month,
max(gcp.credit_amount) as credit_amount,
max(gcp.unblended_cost) as unblended_cost,
max(gcp.unblended_cost * {{markup | sqlsafe}}) as markup_cost,
max(gcp.unblended_cost * {{markup | sqlsafe}}) AS project_markup_cost,
max(gcp.unblended_cost) AS pod_cost,
cast(NULL AS double) AS pod_credit,
max(gcp.labels) as tags,
max(ocp.cost_category_id) as cost_category_id,
{{gcp_source_uuid}} as gcp_source,
{{ocp_source_uuid}} as ocp_source,
cast(year(max(gcp.usage_start)) as varchar) as year,
cast(month(max(gcp.usage_start)) as varchar) as month,
cast(day(max(gcp.usage_start)) as varchar) as day
FROM hive.{{ schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
JOIN hive.{{schema | sqlsafe}}.gcp_openshift_daily_resource_matched_temp as gcp
ON gcp.usage_start = ocp.usage_start
AND (
(strpos(gcp.resource_name, ocp.node) != 0 AND ocp.data_source='Pod')
)
WHERE ocp.source = {{ocp_source_uuid}}
AND ocp.year = {{year}}
AND lpad(ocp.month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters
AND ocp.day IN {{days | inclause}}
AND (ocp.resource_id IS NOT NULL AND ocp.resource_id != '')
AND gcp.ocp_source = {{ocp_source_uuid}}
AND gcp.year = {{year}}
AND gcp.month = {{month}}
-- Filter for Node Network Costs to tie them to the Network unattributed project
AND data_transfer_direction IS NOT NULL
GROUP BY gcp.uuid, ocp.node
;

INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpgcpcostlineitem_project_daily_summary_p (
uuid,
report_period_id,
Expand All @@ -733,6 +848,9 @@ INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpgcpcostlineitem_project_d
instance_type,
service_id,
service_alias,
infrastructure_data_in_gigabytes,
infrastructure_data_out_gigabytes,
data_transfer_direction,
sku_id,
sku_alias,
region,
Expand Down Expand Up @@ -771,6 +889,25 @@ SELECT uuid(),
instance_type,
service_id,
service_alias,
CASE
WHEN upper(data_transfer_direction) = 'IN' THEN
-- GCP uses gibibyte but we are tracking this field in gigabytes
CASE unit
WHEN 'gibibyte' THEN usage_amount * 1.07374
ELSE usage_amount
END
ELSE 0
END as infrastructure_data_in_gigabytes,
CASE
WHEN upper(data_transfer_direction) = 'OUT' THEN
-- GCP uses gibibyte but we are tracking this field in gigabytes
CASE unit
WHEN 'gibibyte' THEN usage_amount * 1.07374
ELSE usage_amount
END
ELSE 0
END as infrastructure_data_out_gigabytes,
data_transfer_direction as data_transfer_direction,
sku_id,
sku_alias,
region,
Expand Down
23 changes: 14 additions & 9 deletions koku/subs/subs_data_messenger.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __init__(self, context, schema_name, tracing_id):
self.org_id = subs_cust.org_id
self.download_path = mkdtemp(prefix="subs")
self.instance_map = {}
self.date_map = defaultdict(list)
self.date_map = defaultdict(dict)

def determine_azure_instance_and_tenant_id(self, row):
"""For Azure we have to query the instance id if its not provided by a tag and the tenant_id."""
Expand Down Expand Up @@ -222,18 +222,23 @@ def process_azure_row(self, row):
"""Process an Azure row into subs kafka messages."""
msg_count = 0
# Azure can unexplicably generate strange records with a second entry per day
# so we track the resource ids we've seen for a specific day so we don't send a record twice
if self.date_map.get(row["subs_start_time"]) and row["subs_resource_id"] in self.date_map.get(
row["subs_start_time"]
):
return msg_count
self.date_map[row["subs_start_time"]].append(row["subs_resource_id"])
# these two values should sum to the total usage so we need to track what was already
# sent for a specific instance so we get the full usage amount
range_start = 0
resource_id = row["subs_resource_id"]
start_time = row["subs_start_time"]
usage = int(row["subs_usage_quantity"])
if self.date_map.get(start_time) and resource_id in self.date_map.get(start_time):
range_start = self.date_map.get(start_time).get(resource_id)
self.date_map[start_time] = {resource_id: usage + range_start}
instance_id, tenant_id = self.determine_azure_instance_and_tenant_id(row)
if not instance_id:
return msg_count
# Azure is daily records but subs need hourly records
start = parser.parse(row["subs_start_time"])
for i in range(int(row["subs_usage_quantity"])):
start = parser.parse(start_time)
# if data for the day was previously sent, start at hour following previous events
start = start + timedelta(hours=range_start)
for i in range(range_start, range_start + usage):
end = start + timedelta(hours=1)
subs_dict = self.build_azure_subs_dict(
instance_id,
Expand Down
57 changes: 43 additions & 14 deletions koku/subs/test/test_subs_data_messenger.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,35 +536,64 @@ def test_process_and_send_subs_message_azure_with_id(self, mock_reader, mock_pro
def test_process_and_send_subs_message_azure_time_already_processed(
self, mock_msg_builder, mock_reader, mock_producer, mock_remove, mock_azure_id
):
"""Tests that the functions are not called for a provider that has already processed."""
"""Tests that the start for the range is updated."""
mock_azure_id.return_value = ("expected", "expected")
mock_msg_builder.return_value = {"fake": "msg"}
upload_keys = ["fake_key"]
self.azure_messenger.date_map["2023-07-01T01:00:00Z"] = "i-55555556"
self.azure_messenger.date_map = {"2024-07-01T00:00:00Z": {"i-55555556": 12}}
instance = "expected"
account = "9999999999999"
vcpu = "2"
rhel_version = "7"
sla = "Premium"
usage = "Production"
role = "Red Hat Enterprise Linux Server"
conversion = "true"
addon_id = "ELS"
tenant_id = "expected"
expected_start = "2024-07-01T12:00:00+00:00"
expected_end = "2024-07-01T13:00:00+00:00"
mock_reader.return_value = [
{
"resourceid": "i-55555556",
"subs_start_time": "2023-07-01T01:00:00Z",
"subs_end_time": "2023-07-01T02:00:00Z",
"subs_start_time": "2024-07-01T00:00:00Z",
"subs_end_time": "2024-07-02T00:00:00Z",
"subs_resource_id": "i-55555556",
"subs_account": "9999999999999",
"subs_account": account,
"physical_cores": "1",
"subs_vcpu": "2",
"variant": "Server",
"subs_usage": "Production",
"subs_sla": "Premium",
"subs_role": "Red Hat Enterprise Linux Server",
"subs_product_ids": "479-70",
"subs_addon": "false",
"subs_instance": "",
"subs_usage": usage,
"subs_usage_quantity": "1",
"subs_sla": sla,
"subs_role": role,
"subs_rhel_version": rhel_version,
"subs_addon_id": addon_id,
"subs_instance": instance,
"subs_conversion": conversion,
"source": self.azure_provider.uuid,
"resourcegroup": "my-fake-rg",
}
]
mock_op = mock_open(read_data="x,y,z")
with patch("builtins.open", mock_op):
self.azure_messenger.process_and_send_subs_message(upload_keys)
mock_azure_id.assert_not_called()
mock_msg_builder.assert_not_called()
mock_producer.assert_not_called()
mock_azure_id.assert_called_once()
mock_msg_builder.assert_called_with(
instance,
account,
expected_start,
expected_end,
vcpu,
rhel_version,
sla,
usage,
role,
conversion,
addon_id,
tenant_id,
)
mock_producer.assert_called_once()

def test_determine_product_ids(self):
"""Test that different combinations of inputs result in expected product IDs"""
Expand Down

0 comments on commit ebfd687

Please sign in to comment.