-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathnewcwv.patch
351 lines (335 loc) · 10.3 KB
/
newcwv.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 2399468..fb82343 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -323,6 +323,19 @@ struct tcp_sock {
* socket. Used to retransmit SYNACKs etc.
*/
struct request_sock *fastopen_rsk;
+
+/* TCP newcwv related information */
+ u32 psample[4]; /* pipeACK samples circular buffer */
+ u32 time_stamp[4];
+ u32 head; /* index for psample buffer */
+
+ u32 pipeack; /* pipeACK value after filtering */
+ u32 loss_flight_size; /* flightsize at loss detection */
+ u32 prior_retrans; /* Retransmission before going into FR */
+ u32 prev_snd_una; /* snd_una of last record */
+ u32 prev_snd_nxt; /* snd_nxt of last record */
+ u32 cwnd_valid_ts; /* last time cwnd was found valid */
+
};
enum tsq_flags {
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 87d8774..6d71de5 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -235,6 +235,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
*/
#define TFO_SERVER_ALWAYS 0x1000
+/* NewCWV defaults */
+#define NCWV_UNDEF 0xFFFFFFFF
+#define NCWV_FIVEMINS (HZ*300)
+
extern struct inet_timewait_death_row tcp_death_row;
/* sysctl variables for tcp */
@@ -284,6 +288,7 @@ extern int sysctl_tcp_challenge_ack_limit;
extern unsigned int sysctl_tcp_notsent_lowat;
extern int sysctl_tcp_min_tso_segs;
extern int sysctl_tcp_autocorking;
+extern int sysctl_tcp_newcwv;
extern atomic_long_t tcp_memory_allocated;
extern struct percpu_counter tcp_sockets_allocated;
@@ -975,7 +980,7 @@ static inline u32 tcp_wnd_end(const struct tcp_sock *tp)
{
return tp->snd_una + tp->snd_wnd;
}
-bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight);
+bool tcp_is_cwnd_limited(struct sock *sk, u32 in_flight);
static inline void tcp_check_probe_timer(struct sock *sk)
{
@@ -1324,6 +1329,13 @@ struct tcp_fastopen_context {
struct rcu_head rcu;
};
+/* TCP New CWV functions */
+void tcp_newcwv_update_pipeack(struct sock* sk);
+void tcp_newcwv_datalim_closedown(struct sock* sk);
+void tcp_newcwv_enter_recovery(struct sock* sk);
+void tcp_newcwv_end_recovery(struct sock* sk);
+void tcp_newcwv_reset(struct sock* sk);
+
/* write queue abstraction */
static inline void tcp_write_queue_purge(struct sock *sk)
{
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index f032688..f687746 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -10,7 +10,7 @@ obj-y := route.o inetpeer.o protocol.o \
tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \
tcp_offload.o datagram.o raw.o udp.o udplite.o \
udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \
- fib_frontend.o fib_semantics.o fib_trie.o \
+ fib_frontend.o fib_semantics.o fib_trie.o tcp_newcwv.o \
inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o
obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 44eba05..b7ed5b5 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -493,6 +493,13 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_dointvec
},
{
+ .procname = "tcp_newcwv",
+ .data = &sysctl_tcp_newcwv,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec
+ },
+ {
.procname = "tcp_reordering",
.data = &sysctl_tcp_reordering,
.maxlen = sizeof(int),
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 2b9464c..6fb0719 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -93,6 +93,9 @@ void tcp_init_congestion_control(struct sock *sk)
rcu_read_unlock();
}
+ /* draft-ietf-tcpm-newcwv initial state */
+ tcp_newcwv_reset(sk);
+
if (icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
}
@@ -279,13 +282,17 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
/* RFC2861 Check whether we are limited by application or congestion window
* This is the inverse of cwnd check in tcp_tso_should_defer
*/
-bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
+bool tcp_is_cwnd_limited(struct sock *sk, u32 in_flight)
{
- const struct tcp_sock *tp = tcp_sk(sk);
+ struct tcp_sock *tp = tcp_sk(sk);
u32 left;
- if (in_flight >= tp->snd_cwnd)
+ /* draft-ietf-tcpm-newcwv relaxes conditions for growing cwnd */
+ if (in_flight >= tp->snd_cwnd || (sysctl_tcp_newcwv &&
+ tp->pipeack >= (tp->snd_cwnd*tp->mss_cache >> 1))) {
+ tp->cwnd_valid_ts = tcp_time_stamp;
return true;
+ }
left = tp->snd_cwnd - in_flight;
if (sk_can_gso(sk) &&
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d6b46eb..cf5675c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1927,6 +1927,7 @@ void tcp_enter_loss(struct sock *sk, int how)
tp->snd_cwnd = 1;
tp->snd_cwnd_cnt = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
+ tcp_newcwv_reset(sk);
tcp_clear_retrans_partial(tp);
@@ -2522,7 +2523,16 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
(tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
tp->snd_cwnd = tp->snd_ssthresh;
tp->snd_cwnd_stamp = tcp_time_stamp;
+
+ /* draft-ietf-tcpm-newcwv actions at the end of recovery */
+ if (sysctl_tcp_newcwv) {
+ if (tp->loss_flight_size)
+ tcp_newcwv_end_recovery(sk);
+ tcp_newcwv_reset(sk);
+ }
}
+ tp->loss_flight_size = 0;
+
tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR);
}
@@ -2666,6 +2676,11 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
tp->undo_marker = tp->snd_una;
tp->undo_retrans = tp->retrans_out;
+ /* draft-ietf-tcpm-newcwv reduction at start of recovery */
+ if (sysctl_tcp_newcwv &&
+ tp->pipeack <= (tp->snd_cwnd*tp->mss_cache >> 1))
+ tcp_newcwv_enter_recovery(sk);
+
if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
if (!ece_ack)
tp->prior_ssthresh = tcp_current_ssthresh(sk);
@@ -3450,6 +3465,11 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
sack_rtt_us);
acked -= tp->packets_out;
+ /* draft-ietf-tcpm-newcwv pipeack estimation */
+ if (sysctl_tcp_newcwv && icsk->icsk_ca_state <= TCP_CA_Disorder
+ && (flag & FLAG_DATA_ACKED))
+ tcp_newcwv_update_pipeack(sk);
+
/* Advance cwnd if state allows */
if (tcp_may_raise_cwnd(sk, flag))
tcp_cong_avoid(sk, ack, acked, prior_in_flight);
diff --git a/net/ipv4/tcp_newcwv.c b/net/ipv4/tcp_newcwv.c
new file mode 100644
index 0000000..e56fab0
--- /dev/null
+++ b/net/ipv4/tcp_newcwv.c
@@ -0,0 +1,142 @@
+/*
+ * New-CWV implementation for Linux: draft-ietf-tcpm-newcwv.txt
+ */
+
+#include <linux/module.h>
+#include <net/tcp.h>
+
+#define nextbin(x) (((x)+1) & 0x03)
+#define prevbin(x) (((x)-1) & 0x03)
+
+int sysctl_tcp_newcwv __read_mostly = 0;
+
+/* returns pipeack in MSSs */
+static u32 pa_pkts(struct tcp_sock* tp)
+{
+ if (tp->pipeack == NCWV_UNDEF || !tp->mss_cache)
+ return 0;
+
+ return (tp->pipeack)/tp->mss_cache;
+}
+
+/* initialies newcwv variables */
+void tcp_newcwv_reset(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ tp->prev_snd_una = tp->snd_una;
+ tp->prev_snd_nxt = tp->snd_nxt;
+ tp->cwnd_valid_ts = tcp_time_stamp;
+ tp->loss_flight_size = 0;
+
+ tp->head = 0;
+ tp->psample[0] = NCWV_UNDEF;
+ tp->pipeack = NCWV_UNDEF;
+}
+
+/* This fuction removes all the expired elements from the circular buffer
+ * and returns the maximum from the remaining elements
+ */
+static int remove_expired_element(struct tcp_sock *tp, u32 psp)
+{
+ int k = tp->head;
+ int tmp = tp->psample[tp->head];
+
+ while (tp->psample[k] != NCWV_UNDEF) {
+ /* remove expired */
+ if (tp->time_stamp[k] < tcp_time_stamp - psp) {
+ tp->psample[k] = NCWV_UNDEF;
+ continue;
+ }
+
+ /* search the maximum */
+ if (tp->psample[k] > tmp)
+ tmp = tp->psample[k];
+
+ k = prevbin(k);
+ if (k == tp->head)
+ return tmp;
+ }
+
+ return tmp;
+}
+
+/* updates pipeack when an ACK is received */
+void tcp_newcwv_update_pipeack(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 tmp_pipeack, psp, h;
+
+ psp = max_t(u32, 3*usecs_to_jiffies(tp->srtt_us>>3), (u32) HZ);
+
+ if (tp->snd_una >= tp->prev_snd_nxt) {
+
+ /* now get a new pipeack sample */
+ tmp_pipeack = tp->snd_una - tp->prev_snd_una;
+ tp->prev_snd_una = tp->snd_una;
+ tp->prev_snd_nxt = tp->snd_nxt;
+
+ h = tp->head;
+
+ /* create a new element at the end of current pmp */
+ if (tp->psample[h] == NCWV_UNDEF ||
+ tcp_time_stamp > tp->time_stamp[h] + (psp >> 2)) {
+
+ /* Add an element to circular the buffer */
+ tp->head = nextbin(h);
+ tp->psample[tp->head] = tmp_pipeack;
+ tp->time_stamp[tp->head] = tcp_time_stamp;
+
+ } else if (tmp_pipeack > tp->psample[h])
+ tp->psample[h] = tmp_pipeack;
+ }
+
+ tp->pipeack = remove_expired_element(tp, psp);
+}
+
+/* newcwv actions at loss detection */
+void tcp_newcwv_enter_recovery(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 pipe;
+
+ if (tp->pipeack == NCWV_UNDEF)
+ return;
+
+ tp->prior_retrans = tp->total_retrans;
+ tp->loss_flight_size = tcp_packets_in_flight(tp);
+
+ pipe = max_t(u32, pa_pkts(tp), tp->loss_flight_size);
+ tp->snd_cwnd = max_t(u32, pipe >> 1, 1UL);
+}
+
+/* newcwv actions at the end of recovery */
+void tcp_newcwv_end_recovery(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ u32 retrans, pipe;
+
+ retrans = tp->total_retrans - tp->prior_retrans;
+ pipe = max_t(u32, pa_pkts(tp), tp->loss_flight_size) - retrans;
+ tp->snd_ssthresh = max_t(u32, pipe >> 1, 1UL);
+ tp->snd_cwnd = tp->snd_ssthresh;
+}
+
+
+/* reduces the cwnd after 5mins of non-validated phase */
+void tcp_newcwv_datalim_closedown(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ while ((tcp_time_stamp - tp->cwnd_valid_ts) > NCWV_FIVEMINS &&
+ tp->snd_cwnd > TCP_INIT_CWND) {
+ tp->cwnd_valid_ts += NCWV_FIVEMINS;
+
+ tp->snd_ssthresh =
+ max_t(u32, (3 * tp->snd_cwnd) >> 2, tp->snd_ssthresh);
+ tp->snd_cwnd =
+ max_t(u32, tp->snd_cwnd >> 1, TCP_INIT_CWND);
+ }
+}
+
+
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 12d6016..e868211 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -168,6 +168,9 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
const u32 now = tcp_time_stamp;
const struct dst_entry *dst = __sk_dst_get(sk);
+ if (sysctl_tcp_newcwv)
+ tcp_newcwv_datalim_closedown(sk);
+
if (sysctl_tcp_slow_start_after_idle &&
(!tp->packets_out && (s32)(now - tp->lsndtime) > icsk->icsk_rto))
tcp_cwnd_restart(sk, __sk_dst_get(sk));