diff --git a/deploy/clowdapp.yaml b/deploy/clowdapp.yaml index fd33657a16..1066c07934 100644 --- a/deploy/clowdapp.yaml +++ b/deploy/clowdapp.yaml @@ -117,6 +117,8 @@ objects: value: ${CACHE_TIMEOUT} - name: TAG_ENABLED_LIMIT value: ${TAG_ENABLED_LIMIT} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} image: ${IMAGE}:${IMAGE_TAG} initContainers: - command: @@ -297,6 +299,8 @@ objects: value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -496,6 +500,8 @@ objects: value: ${TAG_ENABLED_LIMIT} - name: KAFKA_CONNECT value: ${KAFKA_CONNECT} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: PROMETHEUS_PUSHGATEWAY value: ${PROMETHEUS_PUSHGATEWAY} - name: UNLEASH_CACHE_DIR @@ -629,6 +635,8 @@ objects: value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -813,6 +821,8 @@ objects: value: ${RBAC_SERVICE_PATH} - name: RBAC_CACHE_TTL value: ${RBAC_CACHE_TTL} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: PROMETHEUS_MULTIPROC_DIR value: ${PROMETHEUS_DIR} - name: KOKU_ENABLE_SENTRY @@ -976,6 +986,8 @@ objects: value: ${RBAC_SERVICE_PATH} - name: RBAC_CACHE_TTL value: ${RBAC_CACHE_TTL} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: PROMETHEUS_MULTIPROC_DIR value: ${PROMETHEUS_DIR} - name: KOKU_ENABLE_SENTRY @@ -1120,6 +1132,8 @@ objects: value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -1296,6 +1310,8 @@ objects: value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -1478,6 +1494,8 @@ objects: value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -1662,6 +1680,8 @@ objects: value: ${PARQUET_PROCESSING_BATCH_SIZE} - name: PANDAS_COLUMN_BATCH_SIZE value: ${PANDAS_COLUMN_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -1848,6 +1868,8 @@ objects: value: ${PARQUET_PROCESSING_BATCH_SIZE} - name: PANDAS_COLUMN_BATCH_SIZE value: ${PANDAS_COLUMN_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -2032,6 +2054,8 @@ objects: value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -2216,6 +2240,8 @@ objects: value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -2402,6 +2428,8 @@ objects: value: ${PARQUET_PROCESSING_BATCH_SIZE} - name: PANDAS_COLUMN_BATCH_SIZE value: ${PANDAS_COLUMN_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -2592,6 +2620,8 @@ objects: value: ${PARQUET_PROCESSING_BATCH_SIZE} - name: PANDAS_COLUMN_BATCH_SIZE value: ${PANDAS_COLUMN_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -2780,6 +2810,8 @@ objects: value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -2962,6 +2994,8 @@ objects: value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -3144,6 +3178,8 @@ objects: value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -3330,6 +3366,8 @@ objects: value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -3518,6 +3556,8 @@ objects: value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -3700,6 +3740,8 @@ objects: value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -3874,6 +3916,8 @@ objects: value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY @@ -4355,6 +4399,11 @@ parameters: name: PANDAS_COLUMN_BATCH_SIZE required: true value: "250" +- description: Enable copy-on-write in Pandas + displayName: Pandas copy-on-write + name: PANDAS_COPY_ON_WRITE + required: true + value: "1" - description: Processing batch size displayName: Processing batch size name: REPORT_PROCESSING_BATCH_SIZE diff --git a/deploy/kustomize/base/base.yaml b/deploy/kustomize/base/base.yaml index 8f42ba71dc..550ce518c2 100644 --- a/deploy/kustomize/base/base.yaml +++ b/deploy/kustomize/base/base.yaml @@ -422,6 +422,11 @@ parameters: name: PANDAS_COLUMN_BATCH_SIZE required: true value: "250" +- description: Enable copy-on-write in Pandas + displayName: Pandas copy-on-write + name: PANDAS_COPY_ON_WRITE + required: true + value: "1" - description: Processing batch size displayName: Processing batch size name: REPORT_PROCESSING_BATCH_SIZE diff --git a/deploy/kustomize/patches/koku.yaml b/deploy/kustomize/patches/koku.yaml index 8c20490aff..3bdff08f2f 100644 --- a/deploy/kustomize/patches/koku.yaml +++ b/deploy/kustomize/patches/koku.yaml @@ -115,6 +115,8 @@ value: ${CACHE_TIMEOUT} - name: TAG_ENABLED_LIMIT value: ${TAG_ENABLED_LIMIT} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} livenessProbe: httpGet: path: ${API_PATH_PREFIX}/v1/status/ diff --git a/deploy/kustomize/patches/listener.yaml b/deploy/kustomize/patches/listener.yaml index 80fae5e5c1..2f5d958139 100644 --- a/deploy/kustomize/patches/listener.yaml +++ b/deploy/kustomize/patches/listener.yaml @@ -55,6 +55,8 @@ value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/masu.yaml b/deploy/kustomize/patches/masu.yaml index 324e554c7c..222a3ace18 100644 --- a/deploy/kustomize/patches/masu.yaml +++ b/deploy/kustomize/patches/masu.yaml @@ -110,6 +110,8 @@ value: ${TAG_ENABLED_LIMIT} - name: KAFKA_CONNECT value: ${KAFKA_CONNECT} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: PROMETHEUS_PUSHGATEWAY value: ${PROMETHEUS_PUSHGATEWAY} - name: UNLEASH_CACHE_DIR diff --git a/deploy/kustomize/patches/scheduler.yaml b/deploy/kustomize/patches/scheduler.yaml index b4d19d3e21..8391265808 100644 --- a/deploy/kustomize/patches/scheduler.yaml +++ b/deploy/kustomize/patches/scheduler.yaml @@ -59,6 +59,8 @@ value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/sources-client.yaml b/deploy/kustomize/patches/sources-client.yaml index 8d107da5cb..926326e1ff 100644 --- a/deploy/kustomize/patches/sources-client.yaml +++ b/deploy/kustomize/patches/sources-client.yaml @@ -79,6 +79,8 @@ value: ${RBAC_SERVICE_PATH} - name: RBAC_CACHE_TTL value: ${RBAC_CACHE_TTL} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: PROMETHEUS_MULTIPROC_DIR value: ${PROMETHEUS_DIR} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/sources-listener.yaml b/deploy/kustomize/patches/sources-listener.yaml index 444ad6c2fd..1155c0a95f 100644 --- a/deploy/kustomize/patches/sources-listener.yaml +++ b/deploy/kustomize/patches/sources-listener.yaml @@ -79,6 +79,8 @@ value: ${RBAC_SERVICE_PATH} - name: RBAC_CACHE_TTL value: ${RBAC_CACHE_TTL} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: PROMETHEUS_MULTIPROC_DIR value: ${PROMETHEUS_DIR} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/worker-celery.yaml b/deploy/kustomize/patches/worker-celery.yaml index 3088e39b2f..40aa48a67c 100644 --- a/deploy/kustomize/patches/worker-celery.yaml +++ b/deploy/kustomize/patches/worker-celery.yaml @@ -61,6 +61,8 @@ value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/worker-cost-model-xl.yaml b/deploy/kustomize/patches/worker-cost-model-xl.yaml index b819c7cf34..3a2c0d624a 100644 --- a/deploy/kustomize/patches/worker-cost-model-xl.yaml +++ b/deploy/kustomize/patches/worker-cost-model-xl.yaml @@ -61,6 +61,8 @@ value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/worker-cost-model.yaml b/deploy/kustomize/patches/worker-cost-model.yaml index c183fb38cd..53843f5e5d 100644 --- a/deploy/kustomize/patches/worker-cost-model.yaml +++ b/deploy/kustomize/patches/worker-cost-model.yaml @@ -61,6 +61,8 @@ value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/worker-download-xl.yaml b/deploy/kustomize/patches/worker-download-xl.yaml index 1a75c990ab..9e9487701b 100644 --- a/deploy/kustomize/patches/worker-download-xl.yaml +++ b/deploy/kustomize/patches/worker-download-xl.yaml @@ -63,6 +63,8 @@ value: ${PARQUET_PROCESSING_BATCH_SIZE} - name: PANDAS_COLUMN_BATCH_SIZE value: ${PANDAS_COLUMN_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/worker-download.yaml b/deploy/kustomize/patches/worker-download.yaml index 5aaf37b5aa..0684e545f7 100644 --- a/deploy/kustomize/patches/worker-download.yaml +++ b/deploy/kustomize/patches/worker-download.yaml @@ -63,6 +63,8 @@ value: ${PARQUET_PROCESSING_BATCH_SIZE} - name: PANDAS_COLUMN_BATCH_SIZE value: ${PANDAS_COLUMN_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/worker-hcs.yaml b/deploy/kustomize/patches/worker-hcs.yaml index 0704ba63d2..c9c999cd5c 100644 --- a/deploy/kustomize/patches/worker-hcs.yaml +++ b/deploy/kustomize/patches/worker-hcs.yaml @@ -63,6 +63,8 @@ value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/worker-ocp-xl.yaml b/deploy/kustomize/patches/worker-ocp-xl.yaml index be4e779af2..99916a8361 100644 --- a/deploy/kustomize/patches/worker-ocp-xl.yaml +++ b/deploy/kustomize/patches/worker-ocp-xl.yaml @@ -61,6 +61,8 @@ value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/worker-ocp.yaml b/deploy/kustomize/patches/worker-ocp.yaml index b5eec46b87..776c38f988 100644 --- a/deploy/kustomize/patches/worker-ocp.yaml +++ b/deploy/kustomize/patches/worker-ocp.yaml @@ -61,6 +61,8 @@ value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/worker-priority-xl.yaml b/deploy/kustomize/patches/worker-priority-xl.yaml index 095e60a09d..79bb766d45 100644 --- a/deploy/kustomize/patches/worker-priority-xl.yaml +++ b/deploy/kustomize/patches/worker-priority-xl.yaml @@ -63,6 +63,8 @@ value: ${PARQUET_PROCESSING_BATCH_SIZE} - name: PANDAS_COLUMN_BATCH_SIZE value: ${PANDAS_COLUMN_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/worker-priority.yaml b/deploy/kustomize/patches/worker-priority.yaml index 60b5d476e4..6e35fa44e5 100644 --- a/deploy/kustomize/patches/worker-priority.yaml +++ b/deploy/kustomize/patches/worker-priority.yaml @@ -63,6 +63,8 @@ value: ${PARQUET_PROCESSING_BATCH_SIZE} - name: PANDAS_COLUMN_BATCH_SIZE value: ${PANDAS_COLUMN_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/worker-refresh-xl.yaml b/deploy/kustomize/patches/worker-refresh-xl.yaml index dd42f06940..57b8ceeede 100644 --- a/deploy/kustomize/patches/worker-refresh-xl.yaml +++ b/deploy/kustomize/patches/worker-refresh-xl.yaml @@ -61,6 +61,8 @@ value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/worker-refresh.yaml b/deploy/kustomize/patches/worker-refresh.yaml index bcc458b889..c94e88b8a7 100644 --- a/deploy/kustomize/patches/worker-refresh.yaml +++ b/deploy/kustomize/patches/worker-refresh.yaml @@ -61,6 +61,8 @@ value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/worker-subs-extraction.yaml b/deploy/kustomize/patches/worker-subs-extraction.yaml index 51a403aa27..43bda6d031 100644 --- a/deploy/kustomize/patches/worker-subs-extraction.yaml +++ b/deploy/kustomize/patches/worker-subs-extraction.yaml @@ -59,6 +59,8 @@ value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/worker-subs-transmission.yaml b/deploy/kustomize/patches/worker-subs-transmission.yaml index 244833fcc5..2090ea0cde 100644 --- a/deploy/kustomize/patches/worker-subs-transmission.yaml +++ b/deploy/kustomize/patches/worker-subs-transmission.yaml @@ -59,6 +59,8 @@ value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/worker-summary-xl.yaml b/deploy/kustomize/patches/worker-summary-xl.yaml index a05f113cdf..92e5d9e480 100644 --- a/deploy/kustomize/patches/worker-summary-xl.yaml +++ b/deploy/kustomize/patches/worker-summary-xl.yaml @@ -61,6 +61,8 @@ value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/deploy/kustomize/patches/worker-summary.yaml b/deploy/kustomize/patches/worker-summary.yaml index 3922dee54d..60ff4f227c 100644 --- a/deploy/kustomize/patches/worker-summary.yaml +++ b/deploy/kustomize/patches/worker-summary.yaml @@ -61,6 +61,8 @@ value: ${ENABLE_S3_ARCHIVING} - name: PARQUET_PROCESSING_BATCH_SIZE value: ${PARQUET_PROCESSING_BATCH_SIZE} + - name: PANDAS_COPY_ON_WRITE + value: ${PANDAS_COPY_ON_WRITE} - name: TRINO_DATE_STEP value: ${TRINO_DATE_STEP} - name: KOKU_ENABLE_SENTRY diff --git a/docker-compose.yml b/docker-compose.yml index 90924f967c..21085b0865 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -40,6 +40,7 @@ services: - REDIS_HOST=${REDIS_HOST-redis} - REDIS_PORT=${REDIS_PORT-6379} - RBAC_CACHE_TTL + - PANDAS_COPY_ON_WRITE=1 - PROMETHEUS_MULTIPROC_DIR=/tmp - API_PATH_PREFIX=${API_PATH_PREFIX-/api/cost-management} - MAX_GROUP_BY_OVERRIDE=${MAX_GROUP_BY_OVERRIDE-3} @@ -107,6 +108,7 @@ services: - REDIS_HOST=${REDIS_HOST-redis} - REDIS_PORT=${REDIS_PORT-6379} - RBAC_CACHE_TTL + - PANDAS_COPY_ON_WRITE=1 - PROMETHEUS_MULTIPROC_DIR=/tmp - API_PATH_PREFIX=${API_PATH_PREFIX-/api/cost-management} - ACCOUNT_ENHANCED_METRICS=${ACCOUNT_ENHANCED_METRICS-False} @@ -172,6 +174,7 @@ services: - DJANGO_LOG_LEVEL=${DJANGO_LOG_LEVEL-INFO} - UNLEASH_LOG_LEVEL=${UNLEASH_LOG_LEVEL-WARNING} - DJANGO_SETTINGS_MODULE=koku.settings + - PANDAS_COPY_ON_WRITE=1 - PROMETHEUS_MULTIPROC_DIR=/tmp - PROMETHEUS_PUSHGATEWAY=${PROMETHEUS_PUSHGATEWAY-pushgateway:9091} - ENABLE_S3_ARCHIVING=${ENABLE_S3_ARCHIVING-False} @@ -263,6 +266,7 @@ services: - INSIGHTS_KAFKA_HOST=kafka - INSIGHTS_KAFKA_PORT=29092 - KAFKA_CONNECT=True + - PANDAS_COPY_ON_WRITE=1 - PROMETHEUS_MULTIPROC_DIR=/tmp - MASU_DATE_OVERRIDE - KOKU_LOG_LEVEL=${KOKU_LOG_LEVEL-INFO} @@ -322,6 +326,7 @@ services: - REDIS_PORT=${REDIS_PORT-6379} - INSIGHTS_KAFKA_HOST=${INSIGHTS_KAFKA_HOST-kafka} - INSIGHTS_KAFKA_PORT=${INSIGHTS_KAFKA_PORT-29092} + - PANDAS_COPY_ON_WRITE=1 - PROMETHEUS_MULTIPROC_DIR=/tmp - KOKU_LOG_LEVEL=${KOKU_LOG_LEVEL-DEBUG} - DJANGO_LOG_LEVEL=${DJANGO_LOG_LEVEL-INFO} @@ -379,6 +384,7 @@ services: - OCI_CLI_KEY_FILE=${OCI_CLI_KEY_FILE-} - OCI_PYTHON_SDK_NO_SERVICE_IMPORTS=True - DEMO_ACCOUNTS + - PANDAS_COPY_ON_WRITE=1 - PROMETHEUS_MULTIPROC_DIR=/tmp - KOKU_LOG_LEVEL=${KOKU_LOG_LEVEL-DEBUG} - DJANGO_LOG_LEVEL=${DJANGO_LOG_LEVEL-INFO} @@ -425,6 +431,7 @@ services: - REDIS_PORT=${REDIS_PORT-6379} - LOG_LEVEL=INFO - DJANGO_SETTINGS_MODULE=koku.settings + - PANDAS_COPY_ON_WRITE=1 - PROMETHEUS_MULTIPROC_DIR=/tmp - SCHEDULE_REPORT_CHECKS=True - SOURCE_STATUS_FREQUENCY_MINUTES diff --git a/koku/masu/util/gcp/gcp_post_processor.py b/koku/masu/util/gcp/gcp_post_processor.py index d4c8decf8f..e9c8e7839a 100644 --- a/koku/masu/util/gcp/gcp_post_processor.py +++ b/koku/masu/util/gcp/gcp_post_processor.py @@ -127,7 +127,8 @@ def _generate_daily_data(self, data_frame): rollup_frame["credits"] = rollup_frame["credits"].apply(json.loads) rollup_frame["daily_credits"] = 0.0 for i, credit_dict in enumerate(rollup_frame["credits"]): - rollup_frame["daily_credits"][i] = credit_dict.get("amount", 0.0) + rollup_frame.loc[:, ("daily_credits", i)] = credit_dict.get("amount", 0.0) + resource_df = rollup_frame.get("resource_name") try: if not resource_df: