-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path3-flow-analysis.py
126 lines (107 loc) · 5 KB
/
3-flow-analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Designed for the 3 flow experiment, this program plots histogram of the first in batch time gaps, in 3 phase.
# phase 1 is 1 flow, phase 2 is 2 flow, and phase 3 is three flows.
# The program will output 3 histogram, and 3 means.
# time gaps here are the difference between 2 consecutive first in batch packets.
def process_interval(drp_file, dest_port, start_time, end_time):
# Read the drp.tr file
df = pd.read_csv(drp_file, sep='\t', header=None, names=['timestamp', 'seq', 'dest_port'])
# Filter for the given dest_port and time interval
if end_time is not None:
df = df[(df['dest_port'] == dest_port) & (df['timestamp'] >= start_time) & (df['timestamp'] <= end_time)]
else:
df = df[(df['dest_port'] == dest_port) & (df['timestamp'] >= start_time)]
# Sort by timestamp
df = df.sort_values('timestamp').reset_index(drop=True)
# Group drops into batches where drops within 1 second are in the same batch
df['delta_time'] = df['timestamp'].diff().fillna(0)
# Create a batch_id column
# batch_id = 0
# batch_ids = []
# for delta in df['delta_time']:
# if delta > 0.34:
# batch_id += 1
# batch_ids.append(batch_id)
# df['batch_id'] = batch_ids
# # For each batch, get the first timestamp
# batch_times = df.groupby('batch_id')['timestamp'].min().reset_index()
# # Compute time gaps between batches
# batch_times['time_gap'] = batch_times['timestamp'].diff().fillna(0)
# # Exclude the first time_gap which is zero
# time_gaps = batch_times['time_gap'][1:].values # Exclude first zero gap
batches = []
current_batch = {'drops': [], 'times': []}
for idx, row in df.iterrows():
timestamp = row['timestamp']
seq = row['seq']
delta_time = row['delta_time']
if delta_time > 0.34:
# Start a new batch, save the old batch if there's things
# the decider is currently static, how can we have a dynamic decider for batches?
# or use a more accurate value?
if current_batch['drops']:
batches.append(current_batch)
current_batch = {'drops': [seq], 'times': [timestamp]}
else:
current_batch['drops'].append(seq)
current_batch['times'].append(timestamp)
# Append the last batch
if current_batch['drops']:
batches.append(current_batch)
# For each batch, collect time differences and number of drops
time_diffs_between_batches = [] # Time difference between the start of consecutive batches
drops_per_batch = [] # Number of drops in each batch
last_batch_start_time = None
for idx, batch in enumerate(batches):
times = batch['times']
if not times:
continue
batch_start_time = times[0]
# Number of drops in this batch
drops_per_batch.append(len(batch['drops']))
# checker: per batch drop count
# if(delay == 4):
# print("drops_per_batch for ",idx, " is " ,len(batch['drops']))
# Time difference between batches
if last_batch_start_time is not None:
time_between_batches = batch_start_time - last_batch_start_time
time_diffs_between_batches.append(time_between_batches)
else:
# For first batch, set time difference to zero but do not include it in average
time_diffs_between_batches.append(0.0)
last_batch_start_time = batch_start_time # Update last_batch_start_time
# Exclude the first time difference (which is zero) from average calculation
if len(time_diffs_between_batches) > 1:
time_gaps= time_diffs_between_batches[1:]
else:
time_gaps = []
return time_gaps
def plot_time_gap_distribution(time_gaps, title):
print(time_gaps)
print()
plt.figure(figsize=(10, 6))
plt.hist(time_gaps, bins=20, edgecolor='black', alpha=0.7)
mean_gap = np.mean(time_gaps)
plt.axvline(mean_gap, color='red', linestyle='dashed', linewidth=1)
min_ylim, max_ylim = plt.ylim()
plt.text(mean_gap * 1.05, max_ylim * 0.9, f'Mean: {mean_gap:.2f}s', color='red')
plt.title(title)
plt.xlabel('Time Gap Between Batches (s)')
plt.ylabel('Frequency')
# plt.tight_layout()
plt.show()
def main():
drp_file = 'CD-multiflow-drp.tr' # Replace with your actual drp.tr file path
dest_port = 50000
intervals = [(20, 59.5), (80, 119.5), (140, None)]
for start_time, end_time in intervals:
time_gaps = process_interval(drp_file, dest_port, start_time, end_time)
if len(time_gaps) == 0:
print(f"No time gaps found for flow {dest_port} between {start_time}s and {end_time if end_time else 'end'}s.")
continue
title = f'Time Gap Distribution for Flow {dest_port}\nfrom {start_time}s to {end_time if end_time else "end"}s'
plot_time_gap_distribution(time_gaps, title)
if __name__ == '__main__':
main()