Skip to content

Commit

Permalink
support for node limits
Browse files Browse the repository at this point in the history
  • Loading branch information
mudverma authored and chaitanyaenr committed May 31, 2024
1 parent 57de376 commit 31266fb
Showing 1 changed file with 43 additions and 11 deletions.
54 changes: 43 additions & 11 deletions kraken/chaos_recommender/prometheus.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,41 @@ def convert_data_to_dataframe(data, label):


def convert_data(data, service):
result = {}
for entry in data:
pod_name = entry['metric']['pod']
value = entry['value'][1]
result[pod_name] = value
return result.get(service) # for those pods whose limits are not defined they can take as much resources, there assigning a very high value


def convert_data_limits(data, node_data, service, prometheus):
result = {}
for entry in data:
pod_name = entry['metric']['pod']
value = entry['value'][1]
result[pod_name] = value
return result.get(service, '100000000000') # for those pods whose limits are not defined they can take as much resources, there assigning a very high value
return result.get(service, get_node_capacity(node_data, service, prometheus)) # for those pods whose limits are not defined they can take as much resources, there assigning a very high value

def get_node_capacity(node_data, pod_name, prometheus ):

# Get the node name on which the pod is running
query = f'kube_pod_info{{pod="{pod_name}"}}'
result = prometheus.custom_query(query)
if not result:
return None

node_name = result[0]['metric']['node']

for item in node_data:
if item['metric']['node'] == node_name:
return item['value'][1]

return '1000000000'


def save_utilization_to_file(utilization, filename, prometheus):

def save_utilization_to_file(utilization, filename):
merged_df = pd.DataFrame(columns=['namespace', 'service', 'CPU', 'CPU_LIMITS', 'MEM', 'MEM_LIMITS', 'NETWORK'])
for namespace in utilization:
# Loading utilization_data[] for namespace
Expand All @@ -41,9 +66,9 @@ def save_utilization_to_file(utilization, filename):
new_row_df = pd.DataFrame({
"namespace": namespace, "service": s,
"CPU": convert_data(utilization_data[0], s),
"CPU_LIMITS": convert_data(utilization_data[1], s),
"CPU_LIMITS": convert_data_limits(utilization_data[1],utilization_data[5], s, prometheus),
"MEM": convert_data(utilization_data[2], s),
"MEM_LIMITS": convert_data(utilization_data[3], s),
"MEM_LIMITS": convert_data_limits(utilization_data[3], utilization_data[6], s, prometheus),
"NETWORK": convert_data(utilization_data[4], s)}, index=[0])
merged_df = pd.concat([merged_df, new_row_df], ignore_index=True)

Expand All @@ -55,11 +80,11 @@ def save_utilization_to_file(utilization, filename):
merged_df['NETWORK'] = merged_df['NETWORK'].astype(str)

# Extract integer part before the decimal point
merged_df['CPU'] = merged_df['CPU'].str.split('.').str[0]
merged_df['MEM'] = merged_df['MEM'].str.split('.').str[0]
merged_df['CPU_LIMITS'] = merged_df['CPU_LIMITS'].str.split('.').str[0]
merged_df['MEM_LIMITS'] = merged_df['MEM_LIMITS'].str.split('.').str[0]
merged_df['NETWORK'] = merged_df['NETWORK'].str.split('.').str[0]
#merged_df['CPU'] = merged_df['CPU'].str.split('.').str[0]
#merged_df['MEM'] = merged_df['MEM'].str.split('.').str[0]
#merged_df['CPU_LIMITS'] = merged_df['CPU_LIMITS'].str.split('.').str[0]
#merged_df['MEM_LIMITS'] = merged_df['MEM_LIMITS'].str.split('.').str[0]
#merged_df['NETWORK'] = merged_df['NETWORK'].str.split('.').str[0]

merged_df.to_csv(filename, sep='\t', index=False)

Expand All @@ -84,20 +109,27 @@ def fetch_utilization_from_prometheus(prometheus_endpoint, auth_token,
cpu_limits_query = '(sum by (pod) (kube_pod_container_resource_limits{resource="cpu", namespace="%s"}))*1000' %(namespace)
cpu_limits_result = prometheus.custom_query(cpu_limits_query)

node_cpu_limits_query = 'kube_node_status_capacity{resource="cpu", unit="core"}*1000'
node_cpu_limits_result = prometheus.custom_query(node_cpu_limits_query)

mem_query = 'sum by (pod) (avg_over_time(container_memory_usage_bytes{image!="", namespace="%s"}[%s]))' % (namespace, scrape_duration)
mem_result = prometheus.custom_query(mem_query)

mem_limits_query = 'sum by (pod) (kube_pod_container_resource_limits{resource="memory", namespace="%s"}) ' %(namespace)
mem_limits_result = prometheus.custom_query(mem_limits_query)

node_mem_limits_query = 'kube_node_status_capacity{resource="memory", unit="byte"}'
node_mem_limits_result = prometheus.custom_query(node_mem_limits_query)

network_query = 'sum by (pod) ((avg_over_time(container_network_transmit_bytes_total{namespace="%s"}[%s])) + \
(avg_over_time(container_network_receive_bytes_total{namespace="%s"}[%s])))' % (namespace, scrape_duration, namespace, scrape_duration)
network_result = prometheus.custom_query(network_query)

utilization[namespace] = [cpu_result, cpu_limits_result, mem_result, mem_limits_result, network_result]
utilization[namespace] = [cpu_result, cpu_limits_result, mem_result, mem_limits_result, network_result, node_cpu_limits_result, node_mem_limits_result ]
queries[namespace] = json_queries(cpu_query, cpu_limits_query, mem_query, mem_limits_query, network_query)

save_utilization_to_file(utilization, saved_metrics_path)
save_utilization_to_file(utilization, saved_metrics_path, prometheus)

return saved_metrics_path, queries


Expand Down

0 comments on commit 31266fb

Please sign in to comment.