Merge branch 'main' into create-pull-request/patch

project-koku · Jul 4, 2024 · ebfd687 · ebfd687
2 parents 378d658 + bdd992d
commit ebfd687
Show file tree

Hide file tree

Showing 5 changed files with 231 additions and 24 deletions.
diff --git a/dev/scripts/nise_ymls/ocp_on_gcp/gcp_static_data.yml b/dev/scripts/nise_ymls/ocp_on_gcp/gcp_static_data.yml
@@ -4,6 +4,7 @@ generators:
       start_date: {{start_date}}
       end_date: {{end_date}}
       price: 2
+      sku_id: CF4E-A0C7-E3BF
       usage.amount_in_pricing_units: 1
       usage.pricing_unit: hour
       currency: USD
@@ -12,6 +13,30 @@ generators:
       resource.name: projects/nise-populator/instances/gcp_compute1
       resource.global_name: //compute.googleapis.com/projects/nise-populator/zones/australia-southeast1-a/instances/3447398860992947181
       labels: [{"environment": "clyde", "app":"winter", "version":"green", "kubernetes-io-cluster-c32se93c-73z3-3s3d-cs23-d3245sj45349": "owned"}]
+  - ComputeEngineGenerator:
+      start_date: {{start_date}}
+      end_date: {{end_date}}
+      price: 2
+      sku_id: BBF8-C07D-1DF4 #inbound data transfer
+      usage.amount_in_pricing_units: 50
+      currency: USD
+      instance_type: m2-megamem-416
+      location.region: australia-southeast1-a
+      resource.name: projects/nise-populator/instances/gcp_compute1
+      resource.global_name: //compute.googleapis.com/projects/nise-populator/zones/australia-southeast1-a/instances/3447398860992947181
+      labels: [{"environment": "clyde", "app":"winter", "version":"green", "kubernetes-io-cluster-c32se93c-73z3-3s3d-cs23-d3245sj45349": "owned"}]
+  - ComputeEngineGenerator:
+      start_date: {{start_date}}
+      end_date: {{end_date}}
+      price: 30
+      sku_id: 9DE9-9092-B3BC # outbound data transfer
+      usage.amount_in_pricing_units: 10
+      currency: USD
+      instance_type: m2-megamem-416
+      location.region: australia-southeast1-a
+      resource.name: projects/nise-populator/instances/gcp_compute1
+      resource.global_name: //compute.googleapis.com/projects/nise-populator/zones/australia-southeast1-a/instances/3447398860992947181
+      labels: [{"environment": "clyde", "app":"winter", "version":"green", "kubernetes-io-cluster-c32se93c-73z3-3s3d-cs23-d3245sj45349": "owned"}]
   - ComputeEngineGenerator:
       start_date: {{start_date}}
       end_date: {{end_date}}

diff --git a/koku/masu/database/sql/reporting_ocpgcp_ocp_infrastructure_back_populate.sql b/koku/masu/database/sql/reporting_ocpgcp_ocp_infrastructure_back_populate.sql
@@ -19,6 +19,8 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
     source_uuid,
     infrastructure_raw_cost,
     infrastructure_project_raw_cost,
+    infrastructure_data_in_gigabytes,
+    infrastructure_data_out_gigabytes,
     infrastructure_usage_cost,
     supplementary_usage_cost,
     pod_usage_cpu_core_hours,
@@ -65,6 +67,14 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
         rp.provider_id as source_uuid,
         sum(ocp_gcp.unblended_cost + ocp_gcp.markup_cost + ocp_gcp.credit_amount) AS infrastructure_raw_cost,
         sum(ocp_gcp.unblended_cost + ocp_gcp.project_markup_cost + ocp_gcp.pod_credit) AS infrastructure_project_raw_cost,
+        CASE
+            WHEN upper(data_transfer_direction) = 'IN' THEN sum(infrastructure_data_in_gigabytes)
+            ELSE NULL
+        END as infrastructure_data_in_gigabytes,
+        CASE
+            WHEN upper(data_transfer_direction) = 'OUT' THEN sum(infrastructure_data_out_gigabytes)
+            ELSE NULL
+        END as infrastructure_data_out_gigabytes,
         '{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}'::jsonb as infrastructure_usage_cost,
         '{"cpu": 0.000000000, "memory": 0.000000000, "storage": 0.000000000}'::jsonb as supplementary_usage_cost,
         0 as pod_usage_cpu_core_hours,
@@ -101,5 +111,6 @@ INSERT INTO {{schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary (
         ocp_gcp.persistentvolumeclaim,
         ocp_gcp.resource_id,
         ocp_gcp.pod_labels,
+        ocp_gcp.data_transfer_direction,
         rp.provider_id
 ;
diff --git a/...tabase/trino_sql/gcp/openshift/reporting_ocpgcpcostlineitem_daily_summary_resource_id.sql b/...tabase/trino_sql/gcp/openshift/reporting_ocpgcpcostlineitem_daily_summary_resource_id.sql
@@ -181,6 +181,7 @@ INSERT INTO hive.{{schema | sqlsafe}}.gcp_openshift_daily_resource_matched_temp
     instance_type,
     service_id,
     service_alias,
+    data_transfer_direction,
     sku_id,
     sku_alias,
     region,
@@ -205,6 +206,11 @@ SELECT cast(uuid() as varchar),
     json_extract_scalar(json_parse(gcp.system_labels), '$["compute.googleapis.com/machine_spec"]') as instance_type,
     gcp.service_id,
     max(nullif(gcp.service_description, '')) as service_alias,
+    CASE
+        WHEN service_description = 'Compute Engine' AND STRPOS(lower(sku_description), 'data transfer in') != 0 THEN 'IN'
+        WHEN service_description = 'Compute Engine' AND STRPOS(lower(sku_description), 'data transfer') != 0 THEN 'OUT'
+        ELSE NULL
+    END as data_transfer_direction,
     max(nullif(gcp.sku_id, '')) as sku_id,
     max(nullif(gcp.sku_description, '')) as sku_alias,
     gcp.location_region as region,
@@ -233,7 +239,8 @@ GROUP BY gcp.usage_start_time,
     gcp.service_id,
     gcp.location_region,
     gcp.invoice_month,
-    gcp.labels
+    gcp.labels,
+    10 -- data transfer direction
 ;
 
 INSERT INTO hive.{{schema | sqlsafe}}.gcp_openshift_daily_tag_matched_temp (
@@ -438,6 +445,8 @@ WHERE ocp.source = {{ocp_source_uuid}}
     AND gcp.ocp_source = {{ocp_source_uuid}}
     AND gcp.year = {{year}}
     AND gcp.month = {{month}}
+    -- Filter out Node Network Costs because they cannot be tied to namespace level
+    AND data_transfer_direction IS NULL
 GROUP BY gcp.uuid, ocp.namespace, ocp.data_source, ocp.pod_labels, ocp.volume_labels
 ;
 
@@ -590,6 +599,7 @@ INSERT INTO hive.{{schema | sqlsafe}}.reporting_ocpgcpcostlineitem_project_daily
     instance_type,
     service_id,
     service_alias,
+    data_transfer_direction,
     sku_id,
     sku_alias,
     region,
@@ -657,6 +667,7 @@ SELECT pds.gcp_uuid,
     instance_type,
     service_id,
     service_alias,
+    NULL as data_transfer_direction,
     sku_id,
     sku_alias,
     region,
@@ -711,6 +722,110 @@ JOIN cte_rankings as r
 WHERE pds.ocp_source = {{ocp_source_uuid}} AND pds.year = {{year}} AND pds.month = {{month}}
 ;
 
+-- Network costs are currently not mapped to pod metrics
+-- and are filtered out of the above SQL since that is grouped by namespace
+-- and costs are split out by pod metrics, this puts all network costs per node
+-- into a "Network unattributed" project with no cost split and one record per
+-- data direction
+INSERT INTO hive.{{schema | sqlsafe}}.reporting_ocpgcpcostlineitem_project_daily_summary (
+    gcp_uuid,
+    cluster_id,
+    cluster_alias,
+    data_source,
+    namespace,
+    node,
+    persistentvolumeclaim,
+    persistentvolume,
+    storageclass,
+    resource_id,
+    usage_start,
+    usage_end,
+    account_id,
+    project_id,
+    project_name,
+    instance_type,
+    service_id,
+    service_alias,
+    data_transfer_direction,
+    sku_id,
+    sku_alias,
+    region,
+    unit,
+    usage_amount,
+    currency,
+    invoice_month,
+    credit_amount,
+    unblended_cost,
+    markup_cost,
+    project_markup_cost,
+    pod_cost,
+    pod_credit,
+    tags,
+    cost_category_id,
+    gcp_source,
+    ocp_source,
+    year,
+    month,
+    day
+)
+SELECT gcp.uuid as gcp_uuid,
+    max(ocp.cluster_id) as cluster_id,
+    max(ocp.cluster_alias) as cluster_alias,
+    max(ocp.data_source),
+    'Network unattributed' as namespace,
+    ocp.node as node,
+    max(nullif(ocp.persistentvolumeclaim, '')) as persistentvolumeclaim,
+    max(nullif(ocp.persistentvolume, '')) as persistentvolume,
+    max(nullif(ocp.storageclass, '')) as storageclass,
+    max(ocp.resource_id) as resource_id,
+    max(gcp.usage_start) as usage_start,
+    max(gcp.usage_start) as usage_end,
+    max(gcp.account_id) as account_id,
+    max(gcp.project_id) as project_id,
+    max(gcp.project_name) as project_name,
+    max(instance_type) as instance_type,
+    max(nullif(gcp.service_id, '')) as service_id,
+    max(gcp.service_alias) as service_alias,
+    max(data_transfer_direction) as data_transfer_direction,
+    max(gcp.sku_id) as sku_id,
+    max(gcp.sku_alias) as sku_alias,
+    max(nullif(gcp.region, '')) as region,
+    max(gcp.unit) as unit,
+    max(gcp.usage_amount) as usage_amount,
+    max(gcp.currency) as currency,
+    max(gcp.invoice_month) as invoice_month,
+    max(gcp.credit_amount) as credit_amount,
+    max(gcp.unblended_cost) as unblended_cost,
+    max(gcp.unblended_cost * {{markup | sqlsafe}}) as markup_cost,
+    max(gcp.unblended_cost * {{markup | sqlsafe}}) AS project_markup_cost,
+    max(gcp.unblended_cost) AS pod_cost,
+    cast(NULL AS double) AS pod_credit,
+    max(gcp.labels) as tags,
+    max(ocp.cost_category_id) as cost_category_id,
+    {{gcp_source_uuid}} as gcp_source,
+    {{ocp_source_uuid}} as ocp_source,
+    cast(year(max(gcp.usage_start)) as varchar) as year,
+    cast(month(max(gcp.usage_start)) as varchar) as month,
+    cast(day(max(gcp.usage_start)) as varchar) as day
+FROM hive.{{ schema | sqlsafe}}.reporting_ocpusagelineitem_daily_summary as ocp
+JOIN hive.{{schema | sqlsafe}}.gcp_openshift_daily_resource_matched_temp as gcp
+    ON gcp.usage_start = ocp.usage_start
+        AND (
+            (strpos(gcp.resource_name, ocp.node) != 0 AND ocp.data_source='Pod')
+        )
+WHERE ocp.source = {{ocp_source_uuid}}
+    AND ocp.year = {{year}}
+    AND lpad(ocp.month, 2, '0') = {{month}} -- Zero pad the month when fewer than 2 characters
+    AND ocp.day IN {{days | inclause}}
+    AND (ocp.resource_id IS NOT NULL AND ocp.resource_id != '')
+    AND gcp.ocp_source = {{ocp_source_uuid}}
+    AND gcp.year = {{year}}
+    AND gcp.month = {{month}}
+    -- Filter for Node Network Costs to tie them to the Network unattributed project
+    AND data_transfer_direction IS NOT NULL
+GROUP BY gcp.uuid, ocp.node
+;
+
 INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpgcpcostlineitem_project_daily_summary_p (
     uuid,
     report_period_id,
@@ -733,6 +848,9 @@ INSERT INTO postgres.{{schema | sqlsafe}}.reporting_ocpgcpcostlineitem_project_d
     instance_type,
     service_id,
     service_alias,
+    infrastructure_data_in_gigabytes,
+    infrastructure_data_out_gigabytes,
+    data_transfer_direction,
     sku_id,
     sku_alias,
     region,
@@ -771,6 +889,25 @@ SELECT uuid(),
     instance_type,
     service_id,
     service_alias,
+    CASE
+        WHEN upper(data_transfer_direction) = 'IN' THEN
+            -- GCP uses gibibyte but we are tracking this field in gigabytes
+            CASE unit
+                WHEN 'gibibyte' THEN usage_amount * 1.07374
+                ELSE usage_amount
+            END
+        ELSE 0
+    END as infrastructure_data_in_gigabytes,
+    CASE
+        WHEN upper(data_transfer_direction) = 'OUT' THEN
+            -- GCP uses gibibyte but we are tracking this field in gigabytes
+            CASE unit
+                WHEN 'gibibyte' THEN usage_amount * 1.07374
+                ELSE usage_amount
+            END
+        ELSE 0
+    END as infrastructure_data_out_gigabytes,
+    data_transfer_direction as data_transfer_direction,
     sku_id,
     sku_alias,
     region,

diff --git a/koku/subs/subs_data_messenger.py b/koku/subs/subs_data_messenger.py
@@ -57,7 +57,7 @@ def __init__(self, context, schema_name, tracing_id):
         self.org_id = subs_cust.org_id
         self.download_path = mkdtemp(prefix="subs")
         self.instance_map = {}
-        self.date_map = defaultdict(list)
+        self.date_map = defaultdict(dict)
 
     def determine_azure_instance_and_tenant_id(self, row):
         """For Azure we have to query the instance id if its not provided by a tag and the tenant_id."""
@@ -222,18 +222,23 @@ def process_azure_row(self, row):
         """Process an Azure row into subs kafka messages."""
         msg_count = 0
         # Azure can unexplicably generate strange records with a second entry per day
-        # so we track the resource ids we've seen for a specific day so we don't send a record twice
-        if self.date_map.get(row["subs_start_time"]) and row["subs_resource_id"] in self.date_map.get(
-            row["subs_start_time"]
-        ):
-            return msg_count
-        self.date_map[row["subs_start_time"]].append(row["subs_resource_id"])
+        # these two values should sum to the total usage so we need to track what was already
+        # sent for a specific instance so we get the full usage amount
+        range_start = 0
+        resource_id = row["subs_resource_id"]
+        start_time = row["subs_start_time"]
+        usage = int(row["subs_usage_quantity"])
+        if self.date_map.get(start_time) and resource_id in self.date_map.get(start_time):
+            range_start = self.date_map.get(start_time).get(resource_id)
+        self.date_map[start_time] = {resource_id: usage + range_start}
         instance_id, tenant_id = self.determine_azure_instance_and_tenant_id(row)
         if not instance_id:
             return msg_count
         # Azure is daily records but subs need hourly records
-        start = parser.parse(row["subs_start_time"])
-        for i in range(int(row["subs_usage_quantity"])):
+        start = parser.parse(start_time)
+        # if data for the day was previously sent, start at hour following previous events
+        start = start + timedelta(hours=range_start)
+        for i in range(range_start, range_start + usage):
             end = start + timedelta(hours=1)
             subs_dict = self.build_azure_subs_dict(
                 instance_id,

diff --git a/koku/subs/test/test_subs_data_messenger.py b/koku/subs/test/test_subs_data_messenger.py
@@ -536,35 +536,64 @@ def test_process_and_send_subs_message_azure_with_id(self, mock_reader, mock_pro
     def test_process_and_send_subs_message_azure_time_already_processed(
         self, mock_msg_builder, mock_reader, mock_producer, mock_remove, mock_azure_id
     ):
-        """Tests that the functions are not called for a provider that has already processed."""
+        """Tests that the start for the range is updated."""
+        mock_azure_id.return_value = ("expected", "expected")
+        mock_msg_builder.return_value = {"fake": "msg"}
         upload_keys = ["fake_key"]
-        self.azure_messenger.date_map["2023-07-01T01:00:00Z"] = "i-55555556"
+        self.azure_messenger.date_map = {"2024-07-01T00:00:00Z": {"i-55555556": 12}}
+        instance = "expected"
+        account = "9999999999999"
+        vcpu = "2"
+        rhel_version = "7"
+        sla = "Premium"
+        usage = "Production"
+        role = "Red Hat Enterprise Linux Server"
+        conversion = "true"
+        addon_id = "ELS"
+        tenant_id = "expected"
+        expected_start = "2024-07-01T12:00:00+00:00"
+        expected_end = "2024-07-01T13:00:00+00:00"
         mock_reader.return_value = [
             {
                 "resourceid": "i-55555556",
-                "subs_start_time": "2023-07-01T01:00:00Z",
-                "subs_end_time": "2023-07-01T02:00:00Z",
+                "subs_start_time": "2024-07-01T00:00:00Z",
+                "subs_end_time": "2024-07-02T00:00:00Z",
                 "subs_resource_id": "i-55555556",
-                "subs_account": "9999999999999",
+                "subs_account": account,
                 "physical_cores": "1",
                 "subs_vcpu": "2",
                 "variant": "Server",
-                "subs_usage": "Production",
-                "subs_sla": "Premium",
-                "subs_role": "Red Hat Enterprise Linux Server",
-                "subs_product_ids": "479-70",
-                "subs_addon": "false",
-                "subs_instance": "",
+                "subs_usage": usage,
+                "subs_usage_quantity": "1",
+                "subs_sla": sla,
+                "subs_role": role,
+                "subs_rhel_version": rhel_version,
+                "subs_addon_id": addon_id,
+                "subs_instance": instance,
+                "subs_conversion": conversion,
                 "source": self.azure_provider.uuid,
                 "resourcegroup": "my-fake-rg",
             }
         ]
         mock_op = mock_open(read_data="x,y,z")
         with patch("builtins.open", mock_op):
             self.azure_messenger.process_and_send_subs_message(upload_keys)
-        mock_azure_id.assert_not_called()
-        mock_msg_builder.assert_not_called()
-        mock_producer.assert_not_called()
+        mock_azure_id.assert_called_once()
+        mock_msg_builder.assert_called_with(
+            instance,
+            account,
+            expected_start,
+            expected_end,
+            vcpu,
+            rhel_version,
+            sla,
+            usage,
+            role,
+            conversion,
+            addon_id,
+            tenant_id,
+        )
+        mock_producer.assert_called_once()
 
     def test_determine_product_ids(self):
         """Test that different combinations of inputs result in expected product IDs"""