Skip to content

Commit

Permalink
fix: only rekey after timeout from first unreplied packet (#66)
Browse files Browse the repository at this point in the history
As other forks have already identified (e.g. [0]) and tracked in an
upstream issue ([1]), `boringtun` currently has a bug that makes it
perform spurious rekeys because it considers the session dead due to
"missing replies" from the remote.

The WireGuard spec forsees a passive keepalive mechanism that - in the
absence of an organic reply to a data message (i.e. an ICMP echo reply
to an ICMP echo request) - a WireGuard keepalive message is sent after
`KEEPALIVE_TIMEOUT` ([2]). Thus, we can assume that a healthy peer will
reply to every data packet, either naturally or with a keepalive. If
don't receive _any_ packet within `KEEPALIVE_TIMEOUT + REKEY_TIMEOUT`
after sending one, the session should be considered dead and we want to
initiate a new handshake.

At present, `boringtun` tracks this timer from when it last received a
packet. This is wrong because the last received packet may be quite old
and thus, as soon as we send a new packet, the "grace period" is
immediately reached not even giving the remote time to answer. This
results in the spurious rekeys mentioned in other forks and also
upstream.

To fix this, instead of just tracking whether we have sent unreplied
packets, we track when we have sent the first packet that hasn't seen a
reply yet. This allows us to correctly apply the grace period of
`KEEPALIVE_TIMEOUT + REKEY_TIMEOUT` and only invalidate the session if
the remote doesn't reply within that period.

[0]: NordSecurity#8
[1]: cloudflare#363
[2]: https://www.wireguard.com/papers/wireguard.pdf#subsection.6.5
  • Loading branch information
thomaseizinger authored Jan 18, 2025
1 parent a0a4ab0 commit 2f0467f
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 11 deletions.
83 changes: 82 additions & 1 deletion boringtun/src/noise/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,8 @@ mod tests {

use super::*;
use rand::{rngs::OsRng, RngCore};
use timers::{MAX_JITTER, REJECT_AFTER_TIME};
use timers::{KEEPALIVE_TIMEOUT, MAX_JITTER, REJECT_AFTER_TIME};
use tracing::Level;

fn create_two_tuns(now: Instant) -> (Tunn, Tunn) {
let my_secret_key = x25519_dalek::StaticSecret::random_from_rng(OsRng);
Expand Down Expand Up @@ -997,4 +998,84 @@ mod tests {
};
assert_eq!(sent_packet_buf, recv_packet_buf);
}

#[test]
fn rekey_without_response() {
let _guard = tracing_subscriber::fmt()
.with_test_writer()
.with_max_level(Level::DEBUG)
.try_init();

let mut now = Instant::now();
let sent_packet_buf = create_ipv4_udp_packet();

let (mut my_tun, mut their_tun) = create_two_tuns_and_handshake(now);
let mut my_dst = [0u8; 1024];
let mut their_dst = [0u8; 1024];

now += Duration::from_secs(1);

// Simulate an application-level handshake.
let req = my_tun
.encapsulate_at(&sent_packet_buf, &mut my_dst, now)
.unwrap_network();
their_tun.decapsulate_at(None, req, &mut their_dst, now);
let res = their_tun
.encapsulate_at(&sent_packet_buf, &mut their_dst, now)
.unwrap_network();
my_tun.decapsulate_at(None, res, &mut my_dst, now);

// Idle the connection for 10s.
now += Duration::from_secs(10);

let first_unreplied_packet_sent = now;

// Start sending more traffic each second, this time without a reply.
for _ in 0..10 {
my_tun.encapsulate_at(&sent_packet_buf, &mut my_dst, now);
now += Duration::from_secs(1);

assert!(
matches!(my_tun.update_timers_at(&mut [], now), TunnResult::Done),
"No time based action should be necessary yet"
)
}

// Timeout should be from the first unreplied packet.
let rekey_at = first_unreplied_packet_sent + KEEPALIVE_TIMEOUT + REKEY_TIMEOUT;

// Trigger the creation of a handshake.
// Will be scheduled with 0..333ms
assert!(matches!(
my_tun.update_timers_at(&mut [], rekey_at),
TunnResult::Done
));

let TunnResult::WriteToNetwork(handshake) =
my_tun.update_timers_at(&mut my_dst, rekey_at + MAX_JITTER)
else {
panic!("Expected handshake")
};

assert!(matches!(
Tunn::parse_incoming_packet(handshake).unwrap(),
Packet::HandshakeInit(_)
));
}

impl<'a> TunnResult<'a> {
fn unwrap_network(self) -> &'a [u8] {
match self {
TunnResult::Done => panic!("Expected `WriteToNetwork` but was `Done`"),
TunnResult::Err(e) => panic!("Expected `WriteToNetwork` but was `Err({e:?})`"),
TunnResult::WriteToNetwork(d) => d,
TunnResult::WriteToTunnelV4(_, _) => {
panic!("Expected `WriteToNetwork` but was `WriteToTunnelV4`")
}
TunnResult::WriteToTunnelV6(_, _) => {
panic!("Expected `WriteToNetwork` but was `WriteToTunnelV6`")
}
}
}
}
}
35 changes: 25 additions & 10 deletions boringtun/src/noise/timers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ pub(crate) const REKEY_AFTER_TIME: Duration = Duration::from_secs(120);
pub(crate) const REJECT_AFTER_TIME: Duration = Duration::from_secs(180);
const REKEY_ATTEMPT_TIME: Duration = Duration::from_secs(90);
pub(crate) const REKEY_TIMEOUT: Duration = Duration::from_secs(5);
const KEEPALIVE_TIMEOUT: Duration = Duration::from_secs(10);
pub(crate) const KEEPALIVE_TIMEOUT: Duration = Duration::from_secs(10);
pub(crate) const COOKIE_EXPIRATION_TIME: Duration = Duration::from_secs(120);
pub(crate) const MAX_JITTER: Duration = Duration::from_millis(333);

Expand Down Expand Up @@ -49,7 +49,9 @@ pub struct Timers {
/// Did we receive data without sending anything back?
want_keepalive: bool,
/// Did we send data without hearing back?
want_handshake: bool,
///
/// If `Some`, tracks the timestamp of the _first_ packet without a reply.
want_handshake_since: Option<Instant>,
persistent_keepalive: usize,
/// Should this timer call reset rr function (if not a shared rr instance)
pub(super) should_reset_rr: bool,
Expand All @@ -70,7 +72,7 @@ impl Timers {
is_initiator: false,
timers: [now; TimerName::Top as usize],
want_keepalive: Default::default(),
want_handshake: Default::default(),
want_handshake_since: Default::default(),
persistent_keepalive: usize::from(persistent_keepalive.unwrap_or(0)),
should_reset_rr: reset_rr,
send_handshake_at: None,
Expand All @@ -88,7 +90,7 @@ impl Timers {
for t in &mut self.timers[..] {
*t = now;
}
self.want_handshake = false;
self.want_handshake_since = None;
self.want_keepalive = false;
}
}
Expand All @@ -111,12 +113,25 @@ impl Tunn {
match timer_name {
TimeLastPacketReceived => {
self.timers.want_keepalive = true;
self.timers.want_handshake = false;
self.timers.want_handshake_since = None;
}
TimeLastPacketSent => {
self.timers.want_handshake = true;
self.timers.want_keepalive = false;
}
TimeLastDataPacketSent => {
match self.timers.want_handshake_since {
Some(_) => {
// This isn't the first timer tick (i.e. not the first packet)
// we haven't received a response to.
}
None => {
// We sent a packet and haven't heard back yet.
// Track the current time so we know when to expire
// the session.
self.timers.want_handshake_since = Some(now)
}
}
}
_ => {}
}

Expand Down Expand Up @@ -202,7 +217,6 @@ impl Tunn {
// Load timers only once:
let session_established = self.timers[TimeSessionEstablished];
let handshake_started = self.timers[TimeLastHandshakeStarted];
let aut_packet_received = self.timers[TimeLastPacketReceived];
let aut_packet_sent = self.timers[TimeLastPacketSent];
let data_packet_received = self.timers[TimeLastDataPacketReceived];
let data_packet_sent = self.timers[TimeLastDataPacketSent];
Expand Down Expand Up @@ -285,9 +299,10 @@ impl Tunn {
// If we have sent a packet to a given peer but have not received a
// packet after from that peer for (KEEPALIVE + REKEY_TIMEOUT) ms,
// we initiate a new handshake.
if data_packet_sent > aut_packet_received
&& now - aut_packet_received >= KEEPALIVE_TIMEOUT + REKEY_TIMEOUT
&& mem::replace(&mut self.timers.want_handshake, false)
if self
.timers
.want_handshake_since
.is_some_and(|sent_at| now >= sent_at + KEEPALIVE_TIMEOUT + REKEY_TIMEOUT)
{
tracing::debug!("HANDSHAKE(KEEPALIVE + REKEY_TIMEOUT)");
handshake_initiation_required = true;
Expand Down

0 comments on commit 2f0467f

Please sign in to comment.