From 3357848354a335fc27ef5c791fc4da82bb9539b3 Mon Sep 17 00:00:00 2001 From: Christian Mazakas Date: Thu, 12 Sep 2024 15:51:54 -0700 Subject: [PATCH 01/27] add drop_only tests --- libsafecxx/single-header/std2.h | 5 ++--- libsafecxx/test/arc_test.cxx | 24 ++++++++++++++++++++++++ libsafecxx/test/box_test.cxx | 14 ++++++++++++++ libsafecxx/test/helpers.h | 2 ++ libsafecxx/test/rc_test.cxx | 15 +++++++++++++++ libsafecxx/test/vector_test.cxx | 14 ++++++++++++++ 6 files changed, 71 insertions(+), 3 deletions(-) create mode 100644 libsafecxx/test/arc_test.cxx diff --git a/libsafecxx/single-header/std2.h b/libsafecxx/single-header/std2.h index 1f4f6e4..9d404ca 100644 --- a/libsafecxx/single-header/std2.h +++ b/libsafecxx/single-header/std2.h @@ -1684,8 +1684,7 @@ class vector auto* pos = self^.data(); while (pos < end) { - auto t = __rel_read(pos); - drp t; + std::destroy_at(pos); ++pos; } @@ -1785,7 +1784,7 @@ class vector value_type* unsafe p_; size_type capacity_; size_type size_; - // value_type __phantom_data; + value_type __phantom_data; }; template diff --git a/libsafecxx/test/arc_test.cxx b/libsafecxx/test/arc_test.cxx new file mode 100644 index 0000000..0088721 --- /dev/null +++ b/libsafecxx/test/arc_test.cxx @@ -0,0 +1,24 @@ +#feature on safety + +#include +#include "helpers.h" + +void drop_only() safe +{ + { + std2::arc p; + { + std2::string s("hello, world!"); + + // TODO: re-enable this test once we get pointer variance working + // p = std2::arc(s.str()); + // assert_true(*p == "hello, world!"sv2); + } + } +} + + +int main() safe +{ + drop_only(); +} diff --git a/libsafecxx/test/box_test.cxx b/libsafecxx/test/box_test.cxx index b0df3d3..25833b1 100644 --- a/libsafecxx/test/box_test.cxx +++ b/libsafecxx/test/box_test.cxx @@ -43,8 +43,22 @@ void unique_ptr_constructor() safe } +void drop_only() safe +{ + { + std2::box p; + + { + std2::string s("hello, world!"); + p = std2::box(s.str()); + assert_true(*p == "hello, world!"sv2); + } + } +} + int main() { box_constructor(); unique_ptr_constructor(); + drop_only(); } diff --git a/libsafecxx/test/helpers.h b/libsafecxx/test/helpers.h index dcd26df..92ac4cb 100644 --- a/libsafecxx/test/helpers.h +++ b/libsafecxx/test/helpers.h @@ -27,3 +27,5 @@ void assert_throws(F f) safe } assert_true(threw); } + +using namespace std2::literals::string_literals; diff --git a/libsafecxx/test/rc_test.cxx b/libsafecxx/test/rc_test.cxx index 0fd146e..6b835e8 100644 --- a/libsafecxx/test/rc_test.cxx +++ b/libsafecxx/test/rc_test.cxx @@ -30,7 +30,22 @@ void rc_constructor() safe } } +void drop_only() safe +{ + { + std2::rc p; + { + std2::string s("hello, world!"); + + // TODO: re-enable this test once we get pointer variance working + // p = std2::rc(s.str()); + // assert_true(*p == "hello, world!"sv2); + } + } +} + int main() safe { rc_constructor(); + drop_only(); } diff --git a/libsafecxx/test/vector_test.cxx b/libsafecxx/test/vector_test.cxx index c98e04d..f2100d4 100644 --- a/libsafecxx/test/vector_test.cxx +++ b/libsafecxx/test/vector_test.cxx @@ -175,10 +175,24 @@ void vector_box() safe assert_eq(xs.size(), 16u); } +void drop_only() safe +{ + { + std2::vector p; + { + std2::string s("hello, world!"); + p = {s.str()}; + assert_true(p[0] == "hello, world!"sv2); + } + } +} + + int main() { vector_constructor(); vector_iterator(); vector_string_view(); vector_box(); + drop_only(); } From 2861eb42a6b139b70e0f9b09ba4c83fbc7dfd5c6 Mon Sep 17 00:00:00 2001 From: Christian Mazakas Date: Fri, 13 Sep 2024 13:05:23 -0700 Subject: [PATCH 02/27] add drop_only initializer_list tests --- libsafecxx/test/initializer_list_test.cxx | 56 +++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 libsafecxx/test/initializer_list_test.cxx diff --git a/libsafecxx/test/initializer_list_test.cxx b/libsafecxx/test/initializer_list_test.cxx new file mode 100644 index 0000000..c6080d3 --- /dev/null +++ b/libsafecxx/test/initializer_list_test.cxx @@ -0,0 +1,56 @@ +#feature on safety + +#include +#include "helpers.h" + +struct borrow_with_drop/(a) +{ + int x_; + int^/a p_; + + borrow_with_drop(int x, int^/a p) safe + : x_{x} + , p_(p) + { + } + + [[unsafe::drop_only(a)]] + ~borrow_with_drop() safe {} +}; + +void drop_only() safe +{ + { + std2::string s("hello, world!"); + std2::initializer_list list = { s.str() }; + { + assert_true(list.slice()[0] == "hello, world!"sv2); + std2::string s2("rawr"); + mut list.slice()[0] = s2.str(); + } + } + + { + std2::string s("hello, world!"); + std2::initializer_list list = { rel s }; + { + assert_true(list.slice()[0] == "hello, world!"sv2); + std2::string s2("rawr"); + mut list.slice()[0] = rel s2; + } + } + + { + int x = 4321; + std2::initializer_list list = { {1234, ^x} }; + { + int y = 1234; + mut list.slice()[0] = borrow_with_drop{4321, ^y}; + } + } +} + +int main() safe +{ + drop_only(); +} From b830c1f35e24126697c8fe4ee527e2a6a9543d56 Mon Sep 17 00:00:00 2001 From: Christian Mazakas Date: Mon, 16 Sep 2024 15:12:58 -0700 Subject: [PATCH 03/27] add missing pragam once, licensing --- libsafecxx/single-header/std2.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libsafecxx/single-header/std2.h b/libsafecxx/single-header/std2.h index 9d404ca..cba4ea8 100644 --- a/libsafecxx/single-header/std2.h +++ b/libsafecxx/single-header/std2.h @@ -1,3 +1,9 @@ +// Copyright 2024 Sean Baxter +// Copyright 2024 Christian Mazakas +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once #feature on safety #include From 8ee58bb700b7b6548f912d3e5b10794897d535a1 Mon Sep 17 00:00:00 2001 From: Christian Mazakas Date: Mon, 16 Sep 2024 15:13:13 -0700 Subject: [PATCH 04/27] add test framework impl --- libsafecxx/test/arc_test.cxx | 8 +- libsafecxx/test/box_test.cxx | 25 ++- libsafecxx/test/helpers.h | 31 --- libsafecxx/test/initializer_list_test.cxx | 13 +- libsafecxx/test/lightweight_test.h | 126 ++++++++++++ libsafecxx/test/match_test.cxx | 24 +-- libsafecxx/test/optional_test.cxx | 58 +++--- libsafecxx/test/rc_test.cxx | 13 +- libsafecxx/test/ref_cell_test.cxx | 47 +++-- libsafecxx/test/source_location_test.cxx | 20 +- libsafecxx/test/string_test.cxx | 83 ++++---- libsafecxx/test/string_view_test.cxx | 227 +++++++++++----------- libsafecxx/test/thread_test.cxx | 19 +- libsafecxx/test/vector_test.cxx | 74 ++++--- 14 files changed, 413 insertions(+), 355 deletions(-) delete mode 100644 libsafecxx/test/helpers.h create mode 100644 libsafecxx/test/lightweight_test.h diff --git a/libsafecxx/test/arc_test.cxx b/libsafecxx/test/arc_test.cxx index 0088721..4ad0fd6 100644 --- a/libsafecxx/test/arc_test.cxx +++ b/libsafecxx/test/arc_test.cxx @@ -1,7 +1,7 @@ #feature on safety #include -#include "helpers.h" +#include "lightweight_test.h" void drop_only() safe { @@ -17,8 +17,4 @@ void drop_only() safe } } - -int main() safe -{ - drop_only(); -} +TEST_MAIN(drop_only) diff --git a/libsafecxx/test/box_test.cxx b/libsafecxx/test/box_test.cxx index 25833b1..7626a5c 100644 --- a/libsafecxx/test/box_test.cxx +++ b/libsafecxx/test/box_test.cxx @@ -5,26 +5,26 @@ #feature on safety #include -#include "helpers.h" +#include "lightweight_test.h" void box_constructor() safe { { std2::box p(1337); - assert_eq(mut *p, 1337); - assert_eq(*p, 1337); + REQUIRE_EQ(mut *p, 1337); + REQUIRE_EQ(*p, 1337); // Bind a mutable borrow. int^ x = mut *p; *x = 7331; - assert_eq(*p, 7331); + REQUIRE_EQ(*p, 7331); } { std2::box> p(std2::box(1337)); - assert_eq(**p, 1337); + REQUIRE_EQ(**p, 1337); } } @@ -39,7 +39,7 @@ void unique_ptr_constructor() safe .none => 7331; }; - assert_eq(x, 1337); + REQUIRE_EQ(x, 1337); } @@ -51,14 +51,13 @@ void drop_only() safe { std2::string s("hello, world!"); p = std2::box(s.str()); - assert_true(*p == "hello, world!"sv2); + REQUIRE_EQ(*p, "hello, world!"sv2); } } } -int main() -{ - box_constructor(); - unique_ptr_constructor(); - drop_only(); -} +TEST_MAIN( + box_constructor, + unique_ptr_constructor, + drop_only +); diff --git a/libsafecxx/test/helpers.h b/libsafecxx/test/helpers.h deleted file mode 100644 index 92ac4cb..0000000 --- a/libsafecxx/test/helpers.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2024 Christian Mazakas -// Distributed under the Boost Software License, Version 1.0. (See accompanying -// file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) - -#pragma once -#feature on safety - -template -void assert_eq(const T^ t, const U^ u) safe -{ - if (*t != *u) throw "unequal values"; -} - -void assert_true(bool b) safe -{ - if (!b) throw "failed boolean assertion"; -} - -template -void assert_throws(F f) safe -{ - bool threw = false; - try { - f(); - } catch(...) { - threw = true; - } - assert_true(threw); -} - -using namespace std2::literals::string_literals; diff --git a/libsafecxx/test/initializer_list_test.cxx b/libsafecxx/test/initializer_list_test.cxx index c6080d3..3186d8e 100644 --- a/libsafecxx/test/initializer_list_test.cxx +++ b/libsafecxx/test/initializer_list_test.cxx @@ -1,7 +1,6 @@ #feature on safety -#include -#include "helpers.h" +#include "lightweight_test.h" struct borrow_with_drop/(a) { @@ -24,7 +23,8 @@ void drop_only() safe std2::string s("hello, world!"); std2::initializer_list list = { s.str() }; { - assert_true(list.slice()[0] == "hello, world!"sv2); + REQUIRE_EQ(list.slice()[0], "hello, world!"sv2); + std2::string s2("rawr"); mut list.slice()[0] = s2.str(); } @@ -34,7 +34,7 @@ void drop_only() safe std2::string s("hello, world!"); std2::initializer_list list = { rel s }; { - assert_true(list.slice()[0] == "hello, world!"sv2); + REQUIRE_EQ(list.slice()[0], "hello, world!"sv2); std2::string s2("rawr"); mut list.slice()[0] = rel s2; } @@ -50,7 +50,4 @@ void drop_only() safe } } -int main() safe -{ - drop_only(); -} +TEST_MAIN(drop_only) diff --git a/libsafecxx/test/lightweight_test.h b/libsafecxx/test/lightweight_test.h new file mode 100644 index 0000000..2f79796 --- /dev/null +++ b/libsafecxx/test/lightweight_test.h @@ -0,0 +1,126 @@ +// Copyright 2024 Christian Mazakas +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once +#feature on safety + +#include + +using namespace std2::literals::string_literals; + +struct failed_assert { + std2::string msg_; + std2::source_location loc_; + + failed_assert(std2::string msg, std2::source_location loc) noexcept safe + : msg_(rel msg) + , loc_(rel loc) + { + } + + // TODO: should this be unsafe? + failed_assert(failed_assert const& rhs) safe + : msg_(cpy rhs->msg_) + , loc_(cpy rhs->loc_) + { + } +}; + +failed_assert +make_failed_assert( + std2::string msg, + std2::source_location loc) safe +{ + return failed_assert(rel msg, rel loc); +} + +template +void require_eq_impl( + T const^ t, U const^ u, + std2::source_location loc = std2::source_location::current()) safe +{ + if (!(*t == *u)) + throw make_failed_assert(std2::string("unequal arguments"), loc); +} + +void require_impl( + bool b, std2::source_location loc = std2::source_location::current()) safe +{ + if (!b) + throw make_failed_assert(std2::string("expected boolean expression was false"), loc); +} + +template +void require_throws_impl(F f, std2::source_location loc = std2::source_location::current()) safe +{ + bool threw = false; + try { + f(); + } catch(...) { + threw = true; + } + + if (!threw) + throw make_failed_assert(std2::string("function didn't throw as expected"), loc); +} + +#define REQUIRE_EQ(x, y) require_eq_impl((x), (y)) +#define REQUIRE(x) require_impl((x)) +#define REQUIRE_THROWS(x) require_throws_impl((x)) + +struct test_runner { + using fp_type = void(*)() safe; + + std2::vector test_fns_; + std2::vector fails_; + + test_runner() safe = default; + + void add_test(self^, fp_type fp) safe { + mut self->test_fns_.push_back(fp); + } + + int run(self) safe { + for (auto fn : self.test_fns_) { + try { + fn(); + } catch(failed_assert const& fa) { + mut self.fails_.push_back(cpy fa); + } + } + + if (self.fails_.size() > 0) { + for (failed_assert const^ fail : self.fails_) { + std2::println("tests failed!"); + unsafe { + printf( + "%.*s at %s(), line %d:%d in %s\n", + fail->msg_.str().size(), fail->msg_.str().data(), + fail->loc_.function_name(), fail->loc_.line(), fail->loc_.column(), + fail->loc_.file_name()); + } + } + unsafe { printf("%llu of %llu tests failed\n", self.fails_.size(), self.test_fns_.size()); }; + unsafe { fflush(stdout); } + drp self; + return 1; + } + + unsafe { printf("%d of %d tests passed\n", self.test_fns_.size(), self.test_fns_.size()); } + drp self; + return 0; + } +}; + +#define TEST_MAIN(...) \ +int main() safe \ +{ \ + using fp_type = void(*)() safe; \ + std2::vector test_fns = { __VA_ARGS__ }; \ + test_runner runner{}; \ + for (auto fn : test_fns) { \ + mut runner.add_test(fn); \ + } \ + return runner rel.run(); \ +} diff --git a/libsafecxx/test/match_test.cxx b/libsafecxx/test/match_test.cxx index ccb1361..dab9fe8 100644 --- a/libsafecxx/test/match_test.cxx +++ b/libsafecxx/test/match_test.cxx @@ -6,7 +6,7 @@ #include -#include "helpers.h" +#include "lightweight_test.h" void simple() safe { @@ -16,7 +16,7 @@ void simple() safe y if y >= 0 => 1337; _ => -1; }; - assert_eq(z, 1337); + REQUIRE_EQ(z, 1337); } @@ -92,28 +92,28 @@ void use_cow() safe }; std2::string_view s = *borrow; - assert_eq(s, std2::string_view("rawr")); + REQUIRE_EQ(s, std2::string_view("rawr")); } { cow str = .owned(std2::string("rawr")); - assert_true(str.is_owned()); - assert_true(!str.is_borrowed()); + REQUIRE(str.is_owned()); + REQUIRE(!str.is_borrowed()); std2::string s = str rel.into_owned(); - assert_eq(s, std2::string_view("rawr")); + REQUIRE_EQ(s, std2::string_view("rawr")); } { std2::string base("rawr"); cow str = .borrowed(^const base); - assert_true(str.is_borrowed()); - assert_true(!str.is_owned()); + REQUIRE(str.is_borrowed()); + REQUIRE(!str.is_owned()); // std2::string^ b = mut str.to_mut(); std2::string s = str rel.into_owned(); - assert_eq(s, std2::string_view("rawr")); + REQUIRE_EQ(s, std2::string_view("rawr")); } { @@ -122,8 +122,4 @@ void use_cow() safe } } -int main() safe -{ - simple(); - use_cow(); -} +TEST_MAIN(simple, use_cow) diff --git a/libsafecxx/test/optional_test.cxx b/libsafecxx/test/optional_test.cxx index b75fd64..0b92f96 100644 --- a/libsafecxx/test/optional_test.cxx +++ b/libsafecxx/test/optional_test.cxx @@ -6,7 +6,7 @@ #include -#include "helpers.h" +#include "lightweight_test.h" class error_code { @@ -21,17 +21,17 @@ void optional_accessors() safe std2::optional mx = .some(-1); std2::expected mx2 = mx.ok_or(error_code{}); - assert_eq(mx2.unwrap(), -1); + REQUIRE_EQ(mx2.unwrap(), -1); } { std2::optional mx = .some(-1); - assert_eq(mx.expect("invalid optional used"), -1); + REQUIRE_EQ(mx.expect("invalid optional used"), -1); } { std2::optional mx = .some(-1); - assert_eq(mx.unwrap(), -1); + REQUIRE_EQ(mx.unwrap(), -1); } { @@ -41,14 +41,14 @@ void optional_accessors() safe std2::vector ys{4, 3, 2, 1, 1, 2, 3, 4}; mp = .some(rel ys); - assert_eq((mp rel.unwrap()).size(), 8u); + REQUIRE_EQ((mp rel.unwrap()).size(), 8u); } { std2::optional> mp = .some(std2::box{1234}); mp = .some(std2::box{4321}); - assert_eq(*(mp rel.unwrap()), 4321); + REQUIRE_EQ(*(mp rel.unwrap()), 4321); } } @@ -58,22 +58,22 @@ void take() safe std2::optional> opt = .some(std2::box{1234}); auto m_p = mut opt.take(); - assert_true(m_p.is_some()); - assert_true(!m_p.is_none()); + REQUIRE(m_p.is_some()); + REQUIRE(!m_p.is_none()); - assert_true(opt.is_none()); - assert_true(!opt.is_some()); + REQUIRE(opt.is_none()); + REQUIRE(!opt.is_some()); } { std2::optional> opt = .none; auto m_p = mut opt.take(); - assert_true(m_p.is_none()); - assert_true(!m_p.is_some()); + REQUIRE(m_p.is_none()); + REQUIRE(!m_p.is_some()); - assert_true(opt.is_none()); - assert_true(!opt.is_some()); + REQUIRE(opt.is_none()); + REQUIRE(!opt.is_some()); } struct C @@ -89,38 +89,34 @@ void take() safe std2::optional opt = .some(1234); auto m_p = mut opt.take_if(addr C::invoke); - assert_true(m_p.is_some()); - assert_true(!m_p.is_none()); + REQUIRE(m_p.is_some()); + REQUIRE(!m_p.is_none()); - assert_true(opt.is_none()); - assert_true(!opt.is_some()); + REQUIRE(opt.is_none()); + REQUIRE(!opt.is_some()); } { std2::optional opt = .some(43211234); auto m_p = mut opt.take_if(addr C::invoke); - assert_true(!m_p.is_some()); - assert_true(m_p.is_none()); + REQUIRE(!m_p.is_some()); + REQUIRE(m_p.is_none()); - assert_true(!opt.is_none()); - assert_true(opt.is_some()); + REQUIRE(!opt.is_none()); + REQUIRE(opt.is_some()); } { std2::optional opt = .none; auto m_p = mut opt.take_if(addr C::invoke); - assert_true(!m_p.is_some()); - assert_true(m_p.is_none()); + REQUIRE(!m_p.is_some()); + REQUIRE(m_p.is_none()); - assert_true(opt.is_none()); - assert_true(!opt.is_some()); + REQUIRE(opt.is_none()); + REQUIRE(!opt.is_some()); } } -int main() safe -{ - // optional_accessors(); - take(); -} +TEST_MAIN(optional_accessors, take) diff --git a/libsafecxx/test/rc_test.cxx b/libsafecxx/test/rc_test.cxx index 6b835e8..a2db5ca 100644 --- a/libsafecxx/test/rc_test.cxx +++ b/libsafecxx/test/rc_test.cxx @@ -5,14 +5,13 @@ #feature on safety #include - -#include "helpers.h" +#include "lightweight_test.h" void rc_constructor() safe { { std2::rc p{-1}; - assert_eq(*p, -1); + REQUIRE_EQ(*p, -1); } { @@ -20,13 +19,13 @@ void rc_constructor() safe std2::rc p{cell_type{1234}}; auto b = p->borrow(); - assert_eq(*b, 1234); + REQUIRE_EQ(*b, 1234); std2::rc p2 = cpy p; auto b2 = p2->borrow(); - assert_eq(*b2, 1234); + REQUIRE_EQ(*b2, 1234); - assert_eq(addr *b, addr *b2); + REQUIRE_EQ(addr *b, addr *b2); } } @@ -39,7 +38,7 @@ void drop_only() safe // TODO: re-enable this test once we get pointer variance working // p = std2::rc(s.str()); - // assert_true(*p == "hello, world!"sv2); + // REQUIRE(*p == "hello, world!"sv2); } } } diff --git a/libsafecxx/test/ref_cell_test.cxx b/libsafecxx/test/ref_cell_test.cxx index e8daa19..ef97653 100644 --- a/libsafecxx/test/ref_cell_test.cxx +++ b/libsafecxx/test/ref_cell_test.cxx @@ -6,7 +6,7 @@ #include -#include "helpers.h" +#include "lightweight_test.h" struct copyable { @@ -22,12 +22,12 @@ void cell_constructor() safe { { std2::cell x{-1}; - assert_eq(x.get(), -1); + REQUIRE_EQ(x.get(), -1); } { std2::cell x{42}; - assert_eq(x.get().x_, 42); + REQUIRE_EQ(x.get().x_, 42); } } @@ -36,11 +36,11 @@ void cell_mutate() safe { std2::cell x{42}; x.set(copyable{24}); - assert_eq(x.get().x_, 24); + REQUIRE_EQ(x.get().x_, 24); auto old = x.replace(copyable{1337}); - assert_eq(old.x_, 24); - assert_eq(x.get().x_, 1337); + REQUIRE_EQ(old.x_, 24); + REQUIRE_EQ(x.get().x_, 1337); } } @@ -49,7 +49,7 @@ void verify_ref(std2::ref_cell::ref h) safe int const^ b1 = *h; int const^ b2 = *h; - assert_eq(b1, b2); + REQUIRE_EQ(b1, b2); // TODO: manual drop here seems to cause a double-free with the match block // drp h; @@ -61,21 +61,21 @@ void ref_cell_constructor() safe { auto m_x = rc.try_borrow(); match (m_x) { - .some(x) => assert_eq(*x, -1); - .none => assert_true(false); + .some(x) => REQUIRE_EQ(*x, -1); + .none => REQUIRE(false); }; auto rc1 = ^const rc; auto m_x1 = rc1.try_borrow(); match (m_x1) { - .some(x) => assert_eq(*x, -1); - .none => assert_true(false); + .some(x) => REQUIRE_EQ(*x, -1); + .none => REQUIRE(false); }; auto rc2 = ^const rc; auto m_x2 = rc2.try_borrow_mut(); match (m_x2) { - .some(x) => assert_true(false); + .some(x) => REQUIRE(false); .none => void(); }; @@ -83,7 +83,7 @@ void ref_cell_constructor() safe auto m_x3 = rc3.try_borrow(); match (m_x3) { .some(x) => verify_ref(rel x); - .none => assert_true(false); + .none => REQUIRE(false); }; } @@ -91,26 +91,26 @@ void ref_cell_constructor() safe auto m_x = rc.try_borrow_mut(); match (m_x) { .some(x) => void(mut *x = 1337); - .none => assert_true(false); + .none => REQUIRE(false); }; auto rc1 = ^const rc; auto m_x1 = rc1.try_borrow(); match (m_x1) { - .some(x) => assert_true(false); + .some(x) => REQUIRE(false); .none => void(); }; auto rc2 = ^const rc; auto m_x2 = rc2.try_borrow_mut(); match (m_x2) { - .some(x) => assert_true(false); + .some(x) => REQUIRE(false); .none => void(); }; } auto^ p = mut rc.get_mut(); - assert_eq(*p, 1337); + REQUIRE_EQ(*p, 1337); } void borrowing() safe @@ -131,10 +131,9 @@ void borrowing() safe } -int main() safe -{ - cell_constructor(); - cell_mutate(); - ref_cell_constructor(); - borrowing(); -} +TEST_MAIN( + cell_constructor, + cell_mutate, + ref_cell_constructor, + borrowing +) diff --git a/libsafecxx/test/source_location_test.cxx b/libsafecxx/test/source_location_test.cxx index 89fce64..95c6c2d 100644 --- a/libsafecxx/test/source_location_test.cxx +++ b/libsafecxx/test/source_location_test.cxx @@ -5,26 +5,12 @@ #feature on safety #include -#include - -template -void assert_eq(const T^ t, const U^ u) safe -{ - if (*t != *u) throw "unequal values"; -} - -void assert_true(bool b) safe -{ - if (!b) throw "failed boolean assertion"; -} +#include "lightweight_test.h" void source_location() safe { - char buf[] = {'l','m','a','o'}; - auto loc = std2::source_location::current(buf); + auto loc = std2::source_location::current(); unsafe { printf("%s\n", loc.file_name()); } } -int main() { - source_location(); -} +TEST_MAIN(source_location) diff --git a/libsafecxx/test/string_test.cxx b/libsafecxx/test/string_test.cxx index ff9db3a..3afa3b2 100644 --- a/libsafecxx/test/string_test.cxx +++ b/libsafecxx/test/string_test.cxx @@ -6,58 +6,58 @@ #include -#include "helpers.h" +#include "lightweight_test.h" void string_constructor() safe { { std2::string s = {}; - assert_eq(s.size(), 0u); - assert_eq(s.capacity(), 0u); + REQUIRE_EQ(s.size(), 0u); + REQUIRE_EQ(s.capacity(), 0u); } { std2::string s("hello, world!"); - assert_eq(s.size(), 13u); - assert_eq(s.capacity(), 13u); - assert_true(s == std2::string_view("hello, world!")); - assert_true(s != std2::string_view("")); + REQUIRE_EQ(s.size(), 13u); + REQUIRE_EQ(s.capacity(), 13u); + REQUIRE(s == std2::string_view("hello, world!")); + REQUIRE(s != std2::string_view("")); } { std2::string s{"hello, world!"}; - assert_eq(s.size(), 13u); - assert_eq(s.capacity(), 13u); - assert_true(s == std2::string_view("hello, world!")); - assert_true(s != std2::string_view("")); + REQUIRE_EQ(s.size(), 13u); + REQUIRE_EQ(s.capacity(), 13u); + REQUIRE(s == std2::string_view("hello, world!")); + REQUIRE(s != std2::string_view("")); } { char const buf[] = "hello, world!"; std2::string s{buf}; - assert_eq(s.size(), 14u); // null terminator - assert_eq(s.capacity(), 14u); - assert_true(s == std2::string_view(buf)); - assert_true(s != std2::string_view("")); + REQUIRE_EQ(s.size(), 14u); // null terminator + REQUIRE_EQ(s.capacity(), 14u); + REQUIRE(s == std2::string_view(buf)); + REQUIRE(s != std2::string_view("")); } { char const buf[] = "hello, world!"; const [char; dyn]^ p_buf = buf; std2::string s{p_buf}; - assert_eq(s.size(), 14u); // null terminator - assert_eq(s.capacity(), 14u); - assert_true(s == std2::string_view(p_buf)); - assert_true(s != std2::string_view("")); + REQUIRE_EQ(s.size(), 14u); // null terminator + REQUIRE_EQ(s.capacity(), 14u); + REQUIRE(s == std2::string_view(p_buf)); + REQUIRE(s != std2::string_view("")); } { std2::string_view sv = "hello, world!"; std2::string s(sv); - assert_eq(s.size(), 13u); - assert_eq(s.capacity(), 13u); - assert_true(s == sv); - assert_true(s != std2::string_view("")); + REQUIRE_EQ(s.size(), 13u); + REQUIRE_EQ(s.capacity(), 13u); + REQUIRE(s == sv); + REQUIRE(s != std2::string_view("")); } } @@ -70,10 +70,10 @@ void string_append() safe std2::string s(sv1); mut s.append(sv2); - assert_eq(s.size(), sv1.size() + sv2.size()); - assert_eq(s.capacity(), s.size()); - assert_eq(s, std2::string_view("if I only had the heart to find out exactly who you are")); - assert_true(s != std2::string_view("")); + REQUIRE_EQ(s.size(), sv1.size() + sv2.size()); + REQUIRE_EQ(s.capacity(), s.size()); + REQUIRE_EQ(s, std2::string_view("if I only had the heart to find out exactly who you are")); + REQUIRE(s != std2::string_view("")); } { @@ -85,10 +85,10 @@ void string_append() safe std2::string s(s1 + s2); - assert_eq(s.size(), sv1.size() + sv2.size()); - assert_eq(s.capacity(), s.size()); - assert_eq(s, std2::string_view("if I only had the heart to find out exactly who you are")); - assert_true(s != std2::string_view("")); + REQUIRE_EQ(s.size(), sv1.size() + sv2.size()); + REQUIRE_EQ(s.capacity(), s.size()); + REQUIRE_EQ(s, std2::string_view("if I only had the heart to find out exactly who you are")); + REQUIRE(s != std2::string_view("")); } } @@ -98,31 +98,30 @@ void literal_test() safe { std2::string s = "hello, world!"s2; - assert_true(s == std2::string_view("hello, world!")); + REQUIRE(s == std2::string_view("hello, world!")); } { std2::u8string s = u8"hello, world!"s2; - assert_true(s == std2::u8string_view(u8"hello, world!")); + REQUIRE(s == std2::u8string_view(u8"hello, world!")); } { std2::u16string s = u"hello, world!"s2; - assert_true(s == std2::u16string_view(u"hello, world!")); + REQUIRE(s == std2::u16string_view(u"hello, world!")); } { std2::u32string s = U"hello, world!"s2; - assert_true(s == std2::u32string_view(U"hello, world!")); + REQUIRE(s == std2::u32string_view(U"hello, world!")); } { std2::wstring s = L"hello, world!"s2; - assert_true(s == std2::wstring_view(L"hello, world!")); + REQUIRE(s == std2::wstring_view(L"hello, world!")); } } -int main() safe -{ - string_constructor(); - string_append(); - literal_test(); -} +TEST_MAIN( + string_constructor, + string_append, + literal_test +) diff --git a/libsafecxx/test/string_view_test.cxx b/libsafecxx/test/string_view_test.cxx index b30ce9d..883ff73 100644 --- a/libsafecxx/test/string_view_test.cxx +++ b/libsafecxx/test/string_view_test.cxx @@ -5,16 +5,16 @@ #feature on safety #include -#include "helpers.h" +#include "lightweight_test.h" void string_view_constructor() safe { std2::string_constant sc = "hello, world!"; std2::string_view sv = sc; - assert_eq(sv.size(), (*sc.text())~length); - assert_eq(sv.data(), (*sc.text())~as_pointer); - assert_true(sv == sc); - assert_true(!(sv != sc)); + REQUIRE_EQ(sv.size(), (*sc.text())~length); + REQUIRE_EQ(sv.data(), (*sc.text())~as_pointer); + REQUIRE(sv == sc); + REQUIRE(!(sv != sc)); } // Encodes ucs into the UTF-8 buffer at s. Returns the number of characters @@ -99,161 +99,161 @@ void string_view_slice_ordinary_utf8_constructor() safe { const [char; dyn]^ str = "rawr"; std2::string_view sv = str; - assert_eq(sv.size(), 5u); - assert_eq(sv.data(), (*str)~as_pointer); + REQUIRE_EQ(sv.size(), 5u); + REQUIRE_EQ(sv.data(), (*str)~as_pointer); } { // outside valid range - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { char const str[] = { (char)0xff }; std2::string_view sv = str; (void)sv; - }); + })); } // 2 byte code points { const [char; dyn]^ str = "£"; std2::string_view sv = str; - assert_eq(sv.size(), 3u); - assert_eq(sv.data(), (*str)~as_pointer); + REQUIRE_EQ(sv.size(), 3u); + REQUIRE_EQ(sv.data(), (*str)~as_pointer); } { char const str[] = { (char)0xcf, (char)0xbf }; std2::string_view sv = str; - assert_eq(sv.size(), 2u); - assert_eq(sv.data(), str); + REQUIRE_EQ(sv.size(), 2u); + REQUIRE_EQ(sv.data(), str); } { // invalid lengths - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { char const str[] = { (char)0xcf }; std2::string_view sv = str; (void)sv; - }); + })); } { // invalid continuation - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { char const str[] = { (char)0xcf, (char)0xcf }; std2::string_view sv = str; (void)sv; - }); + })); } // 3 byte code points { const [char; dyn]^ str = "한"; std2::string_view sv = str; - assert_eq(sv.size(), 4u); - assert_eq(sv.data(), (*str)~as_pointer); + REQUIRE_EQ(sv.size(), 4u); + REQUIRE_EQ(sv.data(), (*str)~as_pointer); } { char const str[] = { (char)0xed, (char)0x95, (char)0x9c }; std2::string_view sv = str; - assert_eq(sv.size(), 3u); - assert_eq(sv.data(), str); + REQUIRE_EQ(sv.size(), 3u); + REQUIRE_EQ(sv.data(), str); } { // invalid length - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { char const str[] = { (char)0xed }; std2::string_view sv = str; (void)sv; - }); + })); } { // invalid length - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { char const str[] = { (char)0xed, (char)0x95 }; std2::string_view sv = str; (void)sv; - }); + })); } { // invalid continuation - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { char const str[] = { (char)0xed, (char)0x95, (char)0xcc }; std2::string_view sv = str; (void)sv; - }); + })); } { // invalid continuation - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { char const str[] = { (char)0xed, (char)0xc5, (char)0x9c }; std2::string_view sv = str; (void)sv; - }); + })); } // 4 byte code points { const [char; dyn]^ str = "𐍈"; std2::string_view sv = str; - assert_eq(sv.size(), 5u); - assert_eq(sv.data(), (*str)~as_pointer); + REQUIRE_EQ(sv.size(), 5u); + REQUIRE_EQ(sv.data(), (*str)~as_pointer); } { char const str[] = { (char)0xf0, (char)0x90, (char)0x8d, (char)0x88, }; std2::string_view sv = str; - assert_eq(sv.size(), 4u); - assert_eq(sv.data(), str); + REQUIRE_EQ(sv.size(), 4u); + REQUIRE_EQ(sv.data(), str); } { // invalid length - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { char const str[] = { (char)0xf0}; std2::string_view sv = str; (void)sv; - }); + })); } { // invalid length - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { char const str[] = { (char)0xf0, (char)0x90, }; std2::string_view sv = str; (void)sv; - }); + })); } { // invalid continuation - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { char const str[] = { (char)0xf0, (char)0xc0, (char)0x8d, (char)0x88, }; std2::string_view sv = str; (void)sv; - }); + })); } { // invalid continuation - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { char const str[] = { (char)0xf0, (char)0x90, (char)0xcd, (char)0x88, }; std2::string_view sv = str; (void)sv; - }); + })); } { // invalid continuation - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { char const str[] = { (char)0xf0, (char)0x90, (char)0x8d, (char)0xc8, }; std2::string_view sv = str; (void)sv; - }); + })); } // prove we can parse the entire utf space @@ -261,11 +261,11 @@ void string_view_slice_ordinary_utf8_constructor() safe for (char32_t i = 0; i <= 0x10ffff; ++i) { [char; 4] buf = {}; auto str = to_utf8(^buf, i); - assert_true((*str)~length > 0); + REQUIRE((*str)~length > 0); std2::string_view sv = str; - assert_true(sv.size() > 0u); - assert_eq(sv.data(), (*str)~as_pointer); + REQUIRE(sv.size() > 0u); + REQUIRE_EQ(sv.data(), (*str)~as_pointer); } } @@ -273,8 +273,8 @@ void string_view_slice_ordinary_utf8_constructor() safe const [char; dyn]^ str = "$£Иह€한𐍈"; std2::string_view sv = str; - assert_eq(sv.size(), 19u); - assert_eq(sv.data(), (*str)~as_pointer); + REQUIRE_EQ(sv.size(), 19u); + REQUIRE_EQ(sv.data(), (*str)~as_pointer); } } @@ -284,8 +284,8 @@ void string_view_slice_utf8_constructor() safe { const [char8_t; dyn]^ str = u8"rawr"; std2::u8string_view sv = str; - assert_eq(sv.size(), 5u); - assert_eq(sv.data(), (*str)~as_pointer); + REQUIRE_EQ(sv.size(), 5u); + REQUIRE_EQ(sv.data(), (*str)~as_pointer); } } @@ -295,87 +295,87 @@ void string_view_slice_utf16_constructor() safe { const [char16_t; dyn]^ str = u"rawr"; std2::u16string_view sv = str; - assert_eq(sv.size(), 5u); - assert_eq(sv.data(), (*str)~as_pointer); + REQUIRE_EQ(sv.size(), 5u); + REQUIRE_EQ(sv.data(), (*str)~as_pointer); } { const char16_t str[] = { (char16_t)0xffff }; std2::u16string_view sv = str; - assert_eq(sv.size(), 1u); - assert_eq(sv.data(), str); + REQUIRE_EQ(sv.size(), 1u); + REQUIRE_EQ(sv.data(), str); } { const char16_t str[] = { (char16_t)0xfffe }; std2::u16string_view sv = str; - assert_eq(sv.size(), 1u); - assert_eq(sv.data(), str); + REQUIRE_EQ(sv.size(), 1u); + REQUIRE_EQ(sv.data(), str); } { const char16_t str[] = { (char16_t)0xfeff }; std2::u16string_view sv = str; - assert_eq(sv.size(), 1u); - assert_eq(sv.data(), str); + REQUIRE_EQ(sv.size(), 1u); + REQUIRE_EQ(sv.data(), str); } { const [char16_t; dyn]^ str = u"€"; std2::u16string_view sv = str; - assert_eq(sv.size(), 2u); - assert_eq(sv.data(), (*str)~as_pointer); + REQUIRE_EQ(sv.size(), 2u); + REQUIRE_EQ(sv.data(), (*str)~as_pointer); } { const [char16_t; dyn]^ str = u"𐐷"; std2::u16string_view sv = str; - assert_eq(sv.size(), 3u); - assert_eq(sv.data(), (*str)~as_pointer); + REQUIRE_EQ(sv.size(), 3u); + REQUIRE_EQ(sv.data(), (*str)~as_pointer); } { const char16_t str[] = { (char16_t)0xd801, (char16_t)0xdc37 }; std2::u16string_view sv = str; - assert_eq(sv.size(), 2u); - assert_eq(sv.data(), str); + REQUIRE_EQ(sv.size(), 2u); + REQUIRE_EQ(sv.data(), str); } { // length error - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { const char16_t str[] = { (char16_t)0xd801 }; std2::u16string_view sv = str; (void)sv; - }); + })); } { // invalid leading surrogate - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { const char16_t str[] = { (char16_t)0xf801, (char16_t)0xdc37 }; std2::u16string_view sv = str; (void)sv; - }); + })); } { // invalid trailing surrogate - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { const char16_t str[] = { (char16_t)0xd801, (char16_t)0xfc37 }; std2::u16string_view sv = str; (void)sv; - }); + })); } { const [char16_t; dyn]^ str = u"𤭢"; std2::u16string_view sv = str; - assert_eq(sv.size(), 3u); - assert_eq(sv.data(), (*str)~as_pointer); + REQUIRE_EQ(sv.size(), 3u); + REQUIRE_EQ(sv.data(), (*str)~as_pointer); } // prove we can parse the entire utf space @@ -385,11 +385,11 @@ void string_view_slice_utf16_constructor() safe [char16_t; 2] buf = {}; auto str = to_utf16(^buf, i); - assert_true((*str)~length > 0); + REQUIRE((*str)~length > 0); std2::u16string_view sv = str; - assert_true(sv.size() > 0u); - assert_eq(sv.data(), (*str)~as_pointer); + REQUIRE(sv.size() > 0u); + REQUIRE_EQ(sv.data(), (*str)~as_pointer); } } @@ -397,24 +397,24 @@ void string_view_slice_utf16_constructor() safe const [char16_t; dyn]^ str = u"$€𐐷𤭢"; std2::u16string_view sv = str; - assert_eq(sv.size(), 7u); - assert_eq(sv.data(), (*str)~as_pointer); + REQUIRE_EQ(sv.size(), 7u); + REQUIRE_EQ(sv.data(), (*str)~as_pointer); } { - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { const char16_t str[] = { (char16_t)0xd800 }; std2::u16string_view sv = str; (void)sv; - }); + })); } { - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { const char16_t str[] = { (char16_t)0xdfff }; std2::u16string_view sv = str; (void)sv; - }); + })); } } @@ -428,25 +428,25 @@ void string_view_slice_utf32_constructor() safe const char32_t str[] = { i }; std2::u32string_view sv = str; - assert_true(sv.size() > 0u); - assert_eq(sv.data(), str); + REQUIRE(sv.size() > 0u); + REQUIRE_EQ(sv.data(), str); } } { - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { const char32_t str[] = { (char32_t)0xd800 }; std2::u32string_view sv = str; (void)sv; - }); + })); } { - assert_throws([]() safe { + REQUIRE_THROWS(([]() safe { const char32_t str[] = { (char32_t)0xdfff }; std2::u32string_view sv = str; (void)sv; - }); + })); } } @@ -456,15 +456,15 @@ void string_view_slice_wstring_constructor() safe { const [wchar_t; dyn]^ str = L"rawr"; std2::wstring_view sv = str; - assert_eq(sv.size(), 5u); - assert_eq(sv.data(), (*str)~as_pointer); + REQUIRE_EQ(sv.size(), 5u); + REQUIRE_EQ(sv.data(), (*str)~as_pointer); } { const [wchar_t; dyn]^ str = L"한"; std2::wstring_view sv = str; - assert_true(sv.size() > 0); - assert_eq(sv.data(), (*str)~as_pointer); + REQUIRE(sv.size() > 0); + REQUIRE_EQ(sv.data(), (*str)~as_pointer); } } @@ -476,8 +476,8 @@ void string_view_compare() safe std2::string_view sv1 = str; std2::string_view sv2 = str; - assert_true(sv1 == sv2); - assert_true(!(sv1 != sv2)); + REQUIRE(sv1 == sv2); + REQUIRE(!(sv1 != sv2)); } { @@ -487,8 +487,8 @@ void string_view_compare() safe std2::string_view sv1 = str1; std2::string_view sv2 = str2; - assert_true(sv1 != sv2); - assert_true(!(sv1 == sv2)); + REQUIRE(sv1 != sv2); + REQUIRE(!(sv1 == sv2)); } } @@ -500,8 +500,8 @@ void string_view_slice() safe auto s = sv.slice(); - assert_eq((*s)~length, sv.size()); - assert_eq((*s)~as_pointer, sv.data()); + REQUIRE_EQ((*s)~length, sv.size()); + REQUIRE_EQ((*s)~as_pointer, sv.data()); } } @@ -511,39 +511,38 @@ void literal_test() safe { std2::string_view sv = "hello, world!"sv2; - assert_true(sv == std2::string_view("hello, world!")); + REQUIRE(sv == std2::string_view("hello, world!")); } { std2::u8string_view sv = u8"hello, world!"sv2; - assert_true(sv == std2::u8string_view(u8"hello, world!")); + REQUIRE(sv == std2::u8string_view(u8"hello, world!")); } { std2::u16string_view sv = u"hello, world!"sv2; - assert_true(sv == std2::u16string_view(u"hello, world!")); + REQUIRE(sv == std2::u16string_view(u"hello, world!")); } { std2::u32string_view sv = U"hello, world!"sv2; - assert_true(sv == std2::u32string_view(U"hello, world!")); + REQUIRE(sv == std2::u32string_view(U"hello, world!")); } { std2::wstring_view sv = L"hello, world!"sv2; - assert_true(sv == std2::wstring_view(L"hello, world!")); + REQUIRE(sv == std2::wstring_view(L"hello, world!")); } } -int main() safe -{ - string_view_constructor(); - string_view_slice_ordinary_utf8_constructor(); - string_view_slice_utf8_constructor(); - string_view_slice_utf16_constructor(); - string_view_slice_utf32_constructor(); - string_view_slice_wstring_constructor(); - string_view_compare(); - string_view_slice(); - literal_test(); -} +TEST_MAIN( + string_view_constructor, + string_view_slice_ordinary_utf8_constructor, + string_view_slice_utf8_constructor, + string_view_slice_utf16_constructor, + string_view_slice_utf32_constructor, + string_view_slice_wstring_constructor, + string_view_compare, + string_view_slice, + literal_test +) diff --git a/libsafecxx/test/thread_test.cxx b/libsafecxx/test/thread_test.cxx index 68c2288..35aa5a6 100644 --- a/libsafecxx/test/thread_test.cxx +++ b/libsafecxx/test/thread_test.cxx @@ -8,7 +8,7 @@ #include -#include "helpers.h" +#include "lightweight_test.h" int add(std2::arc> mtx, int x, int y) safe { @@ -67,7 +67,7 @@ void thread_constructor() safe std2::thread t(add, cpy mtx, 1, 2); int r = *mtx->lock(); - if (r != 1337) assert_eq(r, 1 + 2); + if (r != 1337) REQUIRE_EQ(r, 1 + 2); t rel.join(); } @@ -124,7 +124,7 @@ void mutex_test() safe int const val = *sp->lock()^.borrow(); auto const expected = num_threads * 10'000; - assert_eq(val, expected); + REQUIRE_EQ(val, expected); } void shared_mutex_test() safe @@ -189,12 +189,11 @@ void shared_mutex_test() safe t rel.join(); } - assert_eq(**sp->lock_shared(), value); + REQUIRE_EQ(**sp->lock_shared(), value); } -int main() safe -{ - thread_constructor(); - mutex_test(); - shared_mutex_test(); -} +TEST_MAIN( + thread_constructor, + mutex_test, + shared_mutex_test +) diff --git a/libsafecxx/test/vector_test.cxx b/libsafecxx/test/vector_test.cxx index f2100d4..2936e22 100644 --- a/libsafecxx/test/vector_test.cxx +++ b/libsafecxx/test/vector_test.cxx @@ -7,44 +7,44 @@ #include #include -#include "helpers.h" +#include "lightweight_test.h" void vector_constructor() safe { { std2::vector vec{}; - assert_eq(vec.size(), 0u); + REQUIRE_EQ(vec.size(), 0u); vec^.push_back(1); vec^.push_back(2); vec^.push_back(3); - assert_eq(vec.size(), 3u); + REQUIRE_EQ(vec.size(), 3u); { auto s = vec^.slice(); - assert_eq(s[0], 1); - assert_eq(s[1], 2); - assert_eq(s[2], 3); + REQUIRE_EQ(s[0], 1); + REQUIRE_EQ(s[1], 2); + REQUIRE_EQ(s[2], 3); s[0] = 17; - assert_eq((^vec)[0], 17); + REQUIRE_EQ((^vec)[0], 17); (^vec)[0] = 4; - assert_eq(vec[0], 4); + REQUIRE_EQ(vec[0], 4); } { auto s = vec.slice(); - assert_eq(s[0], 4); - assert_eq(s[1], 2); - assert_eq(s[2], 3); + REQUIRE_EQ(s[0], 4); + REQUIRE_EQ(s[1], 2); + REQUIRE_EQ(s[2], 3); } { const std2::vector^ v = ^vec; const int^ x = v[0]; - assert_eq(*x, 4); + REQUIRE_EQ(*x, 4); } } @@ -53,17 +53,17 @@ void vector_constructor() safe { std2::vector vec = {}; vec^.push_back(^x); - assert_eq(vec.size(), 1u); + REQUIRE_EQ(vec.size(), 1u); { const [int^; dyn]^ elems = vec.slice(); - assert_eq(*elems[0], 1); + REQUIRE_EQ(*elems[0], 1); } [int^; dyn]^ elems = (^vec).slice(); *elems[0] = 20; } - assert_eq(x, 20); + REQUIRE_EQ(x, 20); } { @@ -72,31 +72,31 @@ void vector_constructor() safe int^ p = ^x; std2::vector vec = {}; vec^.push_back(^p); - assert_eq(vec.size(), 1u); + REQUIRE_EQ(vec.size(), 1u); int^ const^ q = vec.slice()[0]; (void)q; - assert_eq(**q, 1); + REQUIRE_EQ(**q, 1); } } { std2::vector xs = { 1, 2, 3, 4, 5 }; - assert_eq(xs.size(), 5u); + REQUIRE_EQ(xs.size(), 5u); for (int i = 0; i < 5; ++i) { auto idx = static_cast(i); - assert_eq(xs[idx], i + 1); + REQUIRE_EQ(xs[idx], i + 1); } } { std2::vector> xs = { std2::box(1), std2::box(2), std2::box(3), std2::box(4), std2::box(5) }; - assert_eq(xs.size(), 5u); + REQUIRE_EQ(xs.size(), 5u); for (int i = 0; i < 5; ++i) { auto idx = static_cast(i); - assert_eq(*xs[idx], i + 1); + REQUIRE_EQ(*xs[idx], i + 1); } } } @@ -122,8 +122,8 @@ void vector_iterator() safe .none => true; .some(x) => false; }; - assert_true(v.empty()); - assert_true(b); + REQUIRE(v.empty()); + REQUIRE(b); v^.push_back(1); v^.push_back(2); @@ -131,14 +131,14 @@ void vector_iterator() safe v^.push_back(4); v^.push_back(5); - assert_eq(v.size(), 5u); + REQUIRE_EQ(v.size(), 5u); int sum = 0; for (int x : v.iter()) { sum += x; } - assert_eq(sum, 1 + 2 + 3 + 4 + 5); + REQUIRE_EQ(sum, 1 + 2 + 3 + 4 + 5); } } @@ -157,11 +157,11 @@ void vector_string_view() safe strs^.push_back(sv2); strs^.push_back(sv3); - assert_eq(strs.size(), 3u); + REQUIRE_EQ(strs.size(), 3u); const std2::vector^ v = ^strs; const std2::string_view^ sv = v[0]; - assert_eq(sv, sv1); + REQUIRE_EQ(sv, sv1); } void vector_box() safe @@ -172,7 +172,7 @@ void vector_box() safe xs^.push_back(std2::box(1)); } - assert_eq(xs.size(), 16u); + REQUIRE_EQ(xs.size(), 16u); } void drop_only() safe @@ -182,17 +182,15 @@ void drop_only() safe { std2::string s("hello, world!"); p = {s.str()}; - assert_true(p[0] == "hello, world!"sv2); + REQUIRE(p[0] == "hello, world!"sv2); } } } - -int main() -{ - vector_constructor(); - vector_iterator(); - vector_string_view(); - vector_box(); - drop_only(); -} +TEST_MAIN( + vector_constructor, + vector_iterator, + vector_string_view, + vector_box, + drop_only +) From baeeb24bc7c1c67b599fe1d6f5d0060815b47847 Mon Sep 17 00:00:00 2001 From: Christian Mazakas Date: Tue, 17 Sep 2024 15:07:15 -0700 Subject: [PATCH 05/27] add drop_only tests for arc/rc --- libsafecxx/single-header/std2.h | 4 +++ libsafecxx/test/arc_test.cxx | 44 ++++++++++++++++++++++++++++++--- libsafecxx/test/rc_test.cxx | 13 +++++----- 3 files changed, 51 insertions(+), 10 deletions(-) diff --git a/libsafecxx/single-header/std2.h b/libsafecxx/single-header/std2.h index cba4ea8..485d2d8 100644 --- a/libsafecxx/single-header/std2.h +++ b/libsafecxx/single-header/std2.h @@ -798,6 +798,10 @@ arc T const^ operator->(self const^) noexcept safe { return ^*self->p_->data_.get(); } + + T const^ operator*(self const^) noexcept safe { + return ^*self->p_->data_.get(); + } }; //////////////////////////////////////////////////////////////////////////////// diff --git a/libsafecxx/test/arc_test.cxx b/libsafecxx/test/arc_test.cxx index 4ad0fd6..f59a560 100644 --- a/libsafecxx/test/arc_test.cxx +++ b/libsafecxx/test/arc_test.cxx @@ -3,6 +3,44 @@ #include #include "lightweight_test.h" +struct anon_callable/(a, b, c) +{ + std2::string_view/c sv_; + int^/a x_; + int const^/b y_; + + anon_callable( + std2::string_view/c sv, + int^/a x, + int const^/b y) safe + : sv_(sv) + , x_(x) + , y_(y) + { + } + + [[unsafe::drop_only(a, b)]] + ~anon_callable() = default; + + void operator()(self const^) safe + { + self->sv_; + self->x_; + self->y_; + } +}; + +void functor_test() safe +{ + std2::string_view sv = "hello, world!"sv2; + + int a = 1234; + int const b = 4321; + + anon_callable f(sv, ^a, b); + f(); +} + void drop_only() safe { { @@ -11,10 +49,10 @@ void drop_only() safe std2::string s("hello, world!"); // TODO: re-enable this test once we get pointer variance working - // p = std2::arc(s.str()); - // assert_true(*p == "hello, world!"sv2); + p = std2::arc(s.str()); + REQUIRE_EQ(*p, "hello, world!"sv2); } } } -TEST_MAIN(drop_only) +TEST_MAIN(functor_test, drop_only) diff --git a/libsafecxx/test/rc_test.cxx b/libsafecxx/test/rc_test.cxx index a2db5ca..55b864b 100644 --- a/libsafecxx/test/rc_test.cxx +++ b/libsafecxx/test/rc_test.cxx @@ -37,14 +37,13 @@ void drop_only() safe std2::string s("hello, world!"); // TODO: re-enable this test once we get pointer variance working - // p = std2::rc(s.str()); - // REQUIRE(*p == "hello, world!"sv2); + p = std2::rc(s.str()); + REQUIRE(*p == "hello, world!"sv2); } } } -int main() safe -{ - rc_constructor(); - drop_only(); -} +TEST_MAIN( + rc_constructor, + drop_only +) From 3a36b9788e2fcb85a5dea917ad0395a2c17dce8f Mon Sep 17 00:00:00 2001 From: Christian Mazakas Date: Tue, 17 Sep 2024 15:14:00 -0700 Subject: [PATCH 06/27] add missing noinline to panic functions --- libsafecxx/single-header/std2.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libsafecxx/single-header/std2.h b/libsafecxx/single-header/std2.h index 485d2d8..815e324 100644 --- a/libsafecxx/single-header/std2.h +++ b/libsafecxx/single-header/std2.h @@ -168,7 +168,7 @@ basic_string_view/(a) static constexpr size_type npos = size_type(-1); private: - [[noreturn, safety::panic(panic_code::generic)]] + [[noreturn, noinline, safety::panic(panic_code::generic)]] static void panic_impl(string_constant msg, source_location loc = source_location::current()) safe { @@ -437,7 +437,7 @@ auto operator""sv2(wchar_t const* p, std::size_t len) noexcept safe -> wstring_v // Panic functions are categorized and marked with an safety::panic(N) attribute. // This makes it easy for the frontend to toggle on or off panic calls on a // per-file basis. -[[noreturn, safety::panic(panic_code::generic)]] +[[noreturn, noinline, safety::panic(panic_code::generic)]] inline void panic( str msg, source_location loc = source_location::current()) noexcept safe { @@ -455,7 +455,7 @@ inline void panic( } } -[[noreturn, safety::panic(panic_code::bounds)]] +[[noreturn, noinline, safety::panic(panic_code::bounds)]] inline void panic_bounds( str msg, source_location loc = source_location::current()) noexcept safe { From ef05a59c76af1eaa03acde9fbacdf55754404c84 Mon Sep 17 00:00:00 2001 From: sdarwin Date: Tue, 17 Sep 2024 06:36:52 -0600 Subject: [PATCH 07/27] Docs: github actions to render .md markdown files --- .github/workflows/build-docs.yml | 54 ++++++++++++++++++++++++++++++++ .github/workflows/static.yml | 3 ++ 2 files changed, 57 insertions(+) create mode 100644 .github/workflows/build-docs.yml diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml new file mode 100644 index 0000000..dd03a33 --- /dev/null +++ b/.github/workflows/build-docs.yml @@ -0,0 +1,54 @@ +--- +name: Convert markdown docs to html + +on: + push: + branches: ["master", "develop"] + paths: + - proposal/** + - .github/workflows/build-docs.yml + + workflow_dispatch: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: false + +jobs: + deploy: + runs-on: ubuntu-latest + container: + image: cppalliance/wg21:latest + options: --user 1001 + + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Build docs + run: | + set -xe + # list_of_files="draft P3390R0" + list_of_files="draft" + cd proposal + git clone https://github.com/mpark/wg21.git + echo "include wg21/Makefile" > Makefile + for file in $list_of_files; do + make ${file}.html + done + cp generated/* ../docs/ + cd .. + git config --global user.name 'commitbot' + git config --global user.email 'commitbot@boost.org' + git add docs/* || true + git commit -m "Docs: update from proposal/ md files" || true + git push + + - name: Trigger Publish Workflow + uses: actions/github-script@v7 + with: + script: | + github.rest.repos.createDispatchEvent({ + owner: context.repo.owner, + repo: context.repo.repo, + event_type: 'publish-trigger', + }); diff --git a/.github/workflows/static.yml b/.github/workflows/static.yml index b816dbe..1b39eb0 100644 --- a/.github/workflows/static.yml +++ b/.github/workflows/static.yml @@ -11,6 +11,9 @@ on: # Allows you to run this workflow manually from the Actions tab workflow_dispatch: + repository_dispatch: + types: [publish-trigger] + # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages permissions: contents: read From 7f708ae0a92c0c11bb02ab5750b7968fa9e08597 Mon Sep 17 00:00:00 2001 From: sdarwin Date: Tue, 17 Sep 2024 18:07:45 -0600 Subject: [PATCH 08/27] Modify email address of commitbot in github actions --- .github/workflows/build-docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-docs.yml b/.github/workflows/build-docs.yml index dd03a33..1d2471c 100644 --- a/.github/workflows/build-docs.yml +++ b/.github/workflows/build-docs.yml @@ -38,7 +38,7 @@ jobs: cp generated/* ../docs/ cd .. git config --global user.name 'commitbot' - git config --global user.email 'commitbot@boost.org' + git config --global user.email 'commitbot@example.com' git add docs/* || true git commit -m "Docs: update from proposal/ md files" || true git push From 484dcc0b38e6c2f5bb44aa420d1b7e69fbcc2737 Mon Sep 17 00:00:00 2001 From: Gabriel Aubut-Lussier Date: Wed, 18 Sep 2024 22:53:25 -0400 Subject: [PATCH 09/27] Fixing typos and making one small improvement --- proposal/draft.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/proposal/draft.md b/proposal/draft.md index 396c05b..cf10c53 100644 --- a/proposal/draft.md +++ b/proposal/draft.md @@ -137,7 +137,7 @@ It's instructive to break the memory safety problem down into four categories. E ### Lifetime safety -How do we ensure that dangling references are never used? There are two mainstream lifetime safety technologies: garbage collection and borrow checking. Garbage collection is simple to implement and use, but moves object allocations to the heap, making it incompatible with manual memory manegement. It keeps objects initialized as long as there are live references to them, making it incompatible with C++'s RAII[@raii] object model. +How do we ensure that dangling references are never used? There are two proven and production-ready lifetime safety technologies: garbage collection and borrow checking. Garbage collection is simple to implement and use, but moves object allocations to the heap, making it incompatible with manual memory management. It keeps objects initialized as long as there are live references to them, making it incompatible with C++'s RAII[@raii] object model. Borrow checking is an advanced form of live analysis. It keeps track of the _live references_ at every point in the function, and errors when there's a _conflicting action_ on a place associated with a live reference. For example, writing to, moving or dropping an object with a live shared borrow will raise a borrow check error. Pushing to a vector with a live iterator will raise an iterator invalidation error. This technology is compatible with manual memory management and RAII, making it a good fit for C++. @@ -813,7 +813,7 @@ cannot call unsafe constructor String::String(const char*) in safe context see declaration at unsafe4.cxx:10:3 ``` -This code is ill-formed. It's permissible to invoke an unsafe constructor when copy-initializing into the `push_back` call, since its function parameter is `unsafe String`. Dut direct initialization of `String` is not allowed. The constructor chosen for direct initialization is unsafe, but the type it's initializing is not. The type is just `String`. The compiler is right to reject this program because the user is plainly calling an unsafe constructor in a safe context, without a mitigating _unsafe-block_ or unsafe qualifier. +This code is ill-formed. It's permissible to invoke an unsafe constructor when copy-initializing into the `push_back` call, since its function parameter is `unsafe String`. But direct initialization of `String` is not allowed. The constructor chosen for direct initialization is unsafe, but the type it's initializing is not. The type is just `String`. The compiler is right to reject this program because the user is plainly calling an unsafe constructor in a safe context, without a mitigating _unsafe-block_ or unsafe qualifier. [**unsafe5.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/proposal/unsafe5.cxx) ```cpp @@ -1390,7 +1390,7 @@ I've relabelled the example to show function points and region names of variable `'R1 : 'R0 @ P3` means that starting at P3, the 'R1 contains all points 'R0 does, along all control flow paths, as long as 'R0 is live. 'R1 = { 3, 4 }. Grow 'R2 the same way: 'R2 = { 7, 8, 9, 10, 11 }. -Now we can hunt for contradictions. Visit each point in the function and consider, "is there a read, write, move, drop or other invalidating action on any of the loans in scope?" The only potential invalidating actions are the drops of `x` and `y` where they go out of scope. At P9, the loan `^y` is in scope, because P9 is an element of its region 'R2. This is a conflicting action, because the loan is also on the variable `y`. That raises a borrow checker error. There's also a drop at P10. P10 is in the region for `^y`, but that is not an invalidating action, because the loan is not on a place that overlaps with with `x`, the operand of the drop. +Now we can hunt for contradictions. Visit each point in the function and consider, "is there a read, write, move, drop or other invalidating action on any of the loans in scope?" The only potential invalidating actions are the drops of `x` and `y` where they go out of scope. At P9, the loan `^y` is in scope, because P9 is an element of its region 'R2. This is a conflicting action, because the loan is also on the variable `y`. That raises a borrow checker error. There's also a drop at P10. P10 is in the region for `^y`, but that is not an invalidating action, because the loan is not on a place that overlaps with `x`, the operand of the drop. The law of exclusivity is enforced at this point. A new mutable loan is an invalidating action on loans that are live at an overlapping place. A new shared loan is an invalidating action on mutable loans that are live at an overlapping place. Additionally, storing to variables is always an invalidating action when there is any loan, shared or mutable, on an overlapping place. @@ -1440,7 +1440,7 @@ Circle tries to identify all three of these points when forming borrow checker e The invariants that are tested are established with a network of lifetime constraints. It might not be the case that the invalidating action is obviously related to either the place of the loan or the use that extends the loan. More completely describing the chain of constraints could help users diagnose borrow checker errors. But there's a fine line between presenting an error like the one above, which is already pretty wordy, and overwhelming programmers with information. -### Lifetime constraints on called functinos +### Lifetime constraints on called functions Borrow checking is easiest to understand when applied to a single function. The function is lowered to a control flow graph, the compiler assigns regions to loans and borrow variables, emits lifetime constraints where there are assignments, iteratively grows regions until the constraints are solved, and walks the instructions, checking for invalidating actions on loans in scope. Within the definition of the function, there's nothing it can't analyze. The complexity arises when passing and receiving borrows through function calls. @@ -2064,7 +2064,7 @@ In Rust, objects are _relocated by default_. Implicit relocation is too surprisi * `rel x` - relocate `x` into a new value. `x` is set as uninitialized. * `cpy x` - copy construct `x` into a new value. `x` remains initialized. -In line with C++'s goals of _zero-cost abstractions_, we want to make it easy for users to choose the more efficient option between relocaton and copy. If the expression's type is trivially copyable and trivially destructible, it'll initialize a copy from an lvalue. Otherwise, the compiler prompts for a `rel` or `cpy` token to resolve the copy initialization. You're not going to accidentally hit the slow path or the mutable path. Opt into mutation. Opt into non-trivial copies. +In line with C++'s goals of _zero-cost abstractions_, we want to make it easy for users to choose the more efficient option between relocation and copy. If the expression's type is trivially copyable and trivially destructible, it'll initialize a copy from an lvalue. Otherwise, the compiler prompts for a `rel` or `cpy` token to resolve the copy initialization. You're not going to accidentally hit the slow path or the mutable path. Opt into mutation. Opt into non-trivial copies. opy, or do you want to relocate? You've noticed the nonsense spellings for some of these keywords. Why not call them `relocate`, `copy` and `drop`? Alternative token spelling avoids shadowing these common identifiers and improves results when searching code or the web. @@ -2235,7 +2235,7 @@ Safe C++ introduces a new special member function, the _relocation constructor_, * User defined - manually relocate the operand into the new object. This can be used for fixing internal addresses, like those used to implement sentinels in standard linked lists and maps. * `= trivial` - Trivially copyable types are already trivially relocatable. But other types may be trivially relocatable as well, like `box`, `unique_ptr`, `rc`, `arc` and `shared_ptr`. * `= default` - A defaulted or implicitly declared relocation constructor is implemented by the compiler with one of three strategies: types with safe destructors are trivially relocated; aggregate types use member-wise relocation; and other types are move-constructed into the new data, and the old operand is destroyed. -* `= delete` - A deleted relocation constructor _pins_ a type. Objects of that type can't be relocated. A `rel-expression` is a SFINAE failure. Rust uses its `std::Pin`[@pin] pin type as a container for structs with with address-sensitive states. That's an option with Safe C++'s deleted relocation constructors. Or, users can write user-defined relocation constructors to update address-sensitive states. +* `= delete` - A deleted relocation constructor _pins_ a type. Objects of that type can't be relocated. A `rel-expression` is a SFINAE failure. Rust uses its `std::Pin`[@pin] pin type as a container for structs with address-sensitive states. That's an option with Safe C++'s deleted relocation constructors. Or, users can write user-defined relocation constructors to update address-sensitive states. Relocation constructors are always noexcept. It's used to implement the drop-and-replace semantics of assignment expressions. If a relocation constructor was throwing, it might leave objects involved in drop-and-replace in illegal uninitialized states. An uncaught exception in a user-defined or defaulted relocation constructor will panic and terminate. @@ -2643,7 +2643,7 @@ It's the responsibility of a safe library to think through all possible scenario C++ variadics don't convey lifetime constraints from a function's return type to its parameters. Calls like `make_unique` and `emplace_back` take parameters `Ts... args` and return an unrelated type `T`. This may trigger the borrow checker, because the implementation of the function will produce free regions with unrelated endpoints. It's not a soundness issue, but it is a serious usability issue. -We need an _expression-outlives-constraint_, a programmatic version of _outlives-constrant_ `/(where a : b)`. It would consist of an _expression_ in an unevaluated context, which names the actual function parameters and harvests the lifetime constraints implied by those expressions. We should name function parameters rather than declvals of their types, because they may be borrow parameters with additional constraints than their template lifetime parameters have. +We need an _expression-outlives-constraint_, a programmatic version of _outlives-constraint_ `/(where a : b)`. It would consist of an _expression_ in an unevaluated context, which names the actual function parameters and harvests the lifetime constraints implied by those expressions. We should name function parameters rather than declvals of their types, because they may be borrow parameters with additional constraints than their template lifetime parameters have. In order to name the function parameters, we'll need a trailing _expression-lifetime-constraint_ syntax. Something like, @@ -2680,7 +2680,7 @@ Surprisingly, we can also support standard conversions from a `__unified` functi ### Non-static member functions with lifetimes -At this point in development, lifetime parameters are not supported for non-static member functions where the enclosing class has lifetime parameters, including including template lifetime parameters. Use the `self` parameter to declare an explicit object parameter. Non-static member functions don't have full object parameter types, which makes it challenging for the compiler to attach lifetime arguments. As the project matures it's likely that this capability will be included. +At this point in development, lifetime parameters are not supported for non-static member functions where the enclosing class has lifetime parameters, including template lifetime parameters. Use the `self` parameter to declare an explicit object parameter. Non-static member functions don't have full object parameter types, which makes it challenging for the compiler to attach lifetime arguments. As the project matures it's likely that this capability will be included. Constructors, destructors and the relocation constructor don't take explicit `self` parameters. But that's less problematic because the language won't form function pointers. From 923d200fe40ddcd05a0b1bde5fccd9de7d5f7314 Mon Sep 17 00:00:00 2001 From: Gabriel Aubut-Lussier Date: Wed, 18 Sep 2024 23:12:13 -0400 Subject: [PATCH 10/27] =?UTF-8?q?Spellchecking=20=C2=A71=20with=20Antidote?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- proposal/draft.md | 62 +++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/proposal/draft.md b/proposal/draft.md index cf10c53..3843ec6 100644 --- a/proposal/draft.md +++ b/proposal/draft.md @@ -28,11 +28,11 @@ Over the past two years, the United States Government has been issuing warnings * May 7, 2024 - **National Cybersecurity Strategy Implementation Plan**[@ncsi-plan] -The government papers are backed by industry research. Microsoft's bug telemetry reveals that 70% of its vulnerabilities would be stopped by memory safe languages.[@ms-vulnerabilities] Google's research finds 68% of 0day exploits are related to memory corruption.[@google-0day] +The government papers are backed by industry research. Microsoft's bug telemetry reveals that 70% of its vulnerabilities would be stopped by memory safe languages.[@ms-vulnerabilities] Google's research finds 68% of 0-day exploits are related to memory corruption.[@google-0day] * Mar. 4, 2024 - **Secure by Design: Google's Perspective on Memory Safety**[@secure-by-design] -Security professionals urge projects to migrate away from C++ and adopt memory safe languages. But the scale of the problem is daunting. C++ powers software that has generated trillions of dollars of value. There are many veteran C++ programmers and lots of C++ code. Given how wide-spread C++ is, what can industry really do to improve software quality and reduce vulnerabilities? What are the options for introducing new memory safe code into existing projects and hardening software that already exists? +Security professionals urge projects to migrate away from C++ and adopt memory safe languages. But the scale of the problem is daunting. C++ powers software that has generated trillions of dollars of value. There are many veteran C++ programmers and lots of C++ code. Given how widespread C++ is, what can industry really do to improve software quality and reduce vulnerabilities? What are the options for introducing new memory safe code into existing projects and hardening software that already exists? > Decades of vulnerabilities have proven how difficult it is to prevent memory-corrupting bugs when using C/C++. While garbage-collected languages like C# or Java have proven more resilient to these issues, there are scenarios where they cannot be used. For such cases, we’re betting on Rust as the alternative to C/C++. Rust is a modern language designed to compete with the performance C/C++, but with memory safety and thread safety guarantees built into the language. While we are not able to rewrite everything in Rust overnight, we’ve already adopted Rust in some of the most critical components of Azure’s infrastructure. We expect our adoption of Rust to expand substantially over time. > @@ -40,15 +40,15 @@ Security professionals urge projects to migrate away from C++ and adopt memory s There's only one mainstream systems level/non-garbage collected language that provides rigorous memory safety. That's the Rust language.[@rust-language] Although they play in the same space, C++ and Rust have different designs with limited interop capability, making incremental migration from C++ to Rust a painstaking process. -Rust lacks function overloading, templates, inheritance and exceptions. C++ lacks traits, relocation and borrow checking. These discrepancies are responsible for an impedence mismatch when interfacing the two languages. Most code generators for inter-language bindings aren't able to represent features of one language in terms of the features of another. They typically identify a small number of special vocabulary types,[@vocabulary-types] which have first-class ergonomics, and limit functionality of other constructs. +Rust lacks function overloading, templates, inheritance and exceptions. C++ lacks traits, relocation and borrow checking. These discrepancies are responsible for an impedance mismatch when interfacing the two languages. Most code generators for interlanguage bindings aren't able to represent features of one language in terms of the features of another. They typically identify a small number of special vocabulary types,[@vocabulary-types] which have first-class ergonomics, and limit functionality of other constructs. -The foreignness of Rust for career C++ developers combined with the the friction of interop tools makes hardening C++ applications by rewriting critical sections in Rust difficult. Why is there no in-language solution to memory safety? _Why not a Safe C++?_ +The foreignness of Rust for career C++ developers combined with the friction of interop tools makes hardening C++ applications by rewriting critical sections in Rust difficult. Why is there no in-language solution to memory safety? _Why not a Safe C++?_ ## Extend C++ for safety The goal of this proposal is to advance a superset of C++ with a _rigorously safe subset_. Begin a new project, or take an existing one, and start writing safe code in C++. Code in the safe context exhibits the same strong safety guarantees as code written in Rust. -Rigorous safety is a carrot-and-stick approach. The stick comes first. The stick is what security researchers and regulators care about. Safe C++ developers are prohibited from writing operations that may result in lifetime safety, type safety or thread safety undefined behaviors. Sometimes these operations are prohibited by the compiler frontend, as is the case with pointer arithmetic. Sometimes the operations are prohibited by static analysis in the compiler's middle-end; that stops use of uninitialized variables and use-after-free bugs, and it's the enabling technology of the _ownership and borrowing_ safety model. The remainder of issues, like out-of-bounds array subscripts, are addressed with runtime panic and aborts. +Rigorous safety is a carrot-and-stick approach. The stick comes first. The stick is what security researchers and regulators care about. Safe C++ developers are prohibited from writing operations that may result in lifetime safety, type safety or thread safety undefined behaviors. Sometimes these operations are prohibited by the compiler frontend, as is the case with pointer arithmetic. Sometimes the operations are prohibited by static analysis in the compiler's middle-end; that stops use of uninitialized variables and use-after-free bugs, and it's the enabling technology of the _ownership and borrowing_ safety model. The remaining issues, like out-of-bounds array subscripts, are addressed with runtime panic and aborts. The carrot is a suite of new capabilities which improve on the unsafe ones denied to users. The affine type system makes it easier to relocate objects without breaking type safety. Pattern matching, which is safe and expressive, interfaces with the extension's new choice types. Borrow checking,[@borrow-checking] the most sophisticated part of the Safe C++, provides a new reference type that flags use-after-free and iterator invalidation defects at compile time. @@ -104,7 +104,7 @@ Line 7: `for(int x : vec)` - Ranged-for on the vector. The standard mechanism re Line 10: `mut vec.push_back(x);` - Push a value onto the vector. The `mut` token establishes a [_mutable context_](#the-mutable-context) which enables standard conversions from lvalues to mutable borrows and references. When `[safety]` is enabled, _all mutations are explicit_. Explicit mutation lends precision when choosing between shared borrows and mutable borrows of an object. Rust doesn't feature function overloading, so it will bind whatever kind of reference it needs to a member function's object. C++, by contrast, has function overloading, so we'll need to be explicit in order to get the overload we want. Use `mut` to bind mutable borrows. Or don't use it and bind shared borrows. -If `main` checks out syntatically, its AST is lowered to MIR, where initialization and borrow checking takes place. The hidden `slice_iterator` that powers the ranged-for loop stays initialized over the duration of the loop. The `push_back` call _invalidates_ that iterator, by mutating a place (the vector) that the iterator has a constraint on. When the value `x` is next loaded out of the iterator, the borrow checker raises an error: `mutable borrow of vec between its shared borrow and its use`. The borrow checker prevents Safe C++ from compiling a program that may exhibit undefined behavior. This analysis is done at compile time. It has no impact on your binary's size or execution speed. +If `main` checks out syntactically, its AST is lowered to MIR, where initialization and borrow checking takes place. The hidden `slice_iterator` that powers the ranged-for loop stays initialized over the duration of the loop. The `push_back` call _invalidates_ that iterator, by mutating a place (the vector) that the iterator has a constraint on. When the value `x` is next loaded out of the iterator, the borrow checker raises an error: `mutable borrow of vec between its shared borrow and its use`. The borrow checker prevents Safe C++ from compiling a program that may exhibit undefined behavior. This analysis is done at compile time. It has no impact on your binary's size or execution speed. This sample is only a few lines, but it introduces several new mechanisms and types. A comprehensive effort is needed to supply a superset of the language with a safe subset that has enough flexibility to remain expressive. @@ -116,7 +116,7 @@ Many C++ functions have preconditions that you'd have to read the docs to unders Here's the memory safety value proposition: language and library vendors make an extra effort to provide a robust environment so that users _don't have to read the docs_. No matter how they use the tooling, their actions will not raise undefined behavior and compromise their software to safety-related exploits. No system can guard against all misuse, and hastily written code may have plenty of logic bugs. But those logic bugs won't lead to memory-safety vulnerabilities. -Consider an old libc function, `std::isprint`,[@isprint] that exhibits unsafe design. This function takes an `int` parameter. _But it's not valid to call `std::isprint` for all int arguments_. The preconditions state the function be called only with arguments between -1 and 255: +Consider an old libc function, `std::isprint`,[@isprint] that exhibits unsafe design. This function takes an `int` parameter. _But it's not valid to call `std::isprint` for all int arguments_. The preconditions state the function can be called only with arguments between -1 and 255: > Like all other functions from ``, the behavior of `std::isprint` is undefined if the argument's value is neither representable as unsigned char nor equal to EOF. To use these functions safely with plain chars (or signed chars), the argument should first be converted to unsigned char. > Similarly, they should not be directly used with standard algorithms when the iterator's value type is char or signed char. Instead, convert the value to unsigned char first. @@ -210,7 +210,7 @@ The "billion-dollar mistake" is a type safety problem. Consider `std::unique_ptr As Hoare observes, the problem comes from conflating two different things, a pointer to an object and an empty state, into the same type and giving them the same interface. Smart pointers should only hold valid pointers. Denying the null state eliminates undefined behavior. -We address the type safety problem by overhauling the object model. Safe C++ features a new kind of move: [_relocation_](#relocation-object-model), also called _destructive move_. The object model is called an _affine_ or a _linear_ type system. Unless explicitly initialized, objects start out _uninitialized_. They can't be used in this state. When you assign to an object, it becomes initialized. When you relocate from an object, it's value is moved and it's reset to uninitialized. If you relocate from an object inside control flow, it becomes _potentially uninitialized_, and its destructor is conditionally executed after reading a compiler-generated drop flag. +We address the type safety problem by overhauling the object model. Safe C++ features a new kind of move: [_relocation_](#relocation-object-model), also called _destructive move_. The object model is called an _affine_ or a _linear_ type system. Unless explicitly initialized, objects start out _uninitialized_. They can't be used in this state. When you assign to an object, it becomes initialized. When you relocate from an object, its value is moved and it's reset to uninitialized. If you relocate from an object inside control flow, it becomes _potentially uninitialized_, and its destructor is conditionally executed after reading a compiler-generated drop flag. `std2::box` is our version of `unique_ptr`. It has no null state. There's no default constructor. Dereference it without risk of undefined behavior. If this design is so much safer, why doesn't C++ simply introduce its own fixed `unique_ptr` without a null state? Blame C++11 move semantics. @@ -263,7 +263,7 @@ The compiler can only relocate local variables. How do we move objects that live The C++ Standard Library has an optional type, but it's not safe to use. The optional API is full of undefined behaviors: using `operator*` or `operator->` while the value is disengaged raises undefined behavior. `std::expected`, new to C++23, exhibits the same undefined behaviors for out-of-contract uses of its `operator*`, `operator->` and `error` APIs. -If we were to represent the null state by wrapping the safe `std2::box` in an `std::optional`, that would be just as unsafe as using `std::unique_ptr`. The `operator->` is unsafe either way. We need a new _sum type_ that doesn't exhibit the union-like safety defects of `std::optional` and `std::expected`. +If we were to represent the null state by wrapping the safe `std2::box` in a `std::optional`, that would be just as unsafe as using `std::unique_ptr`. The `operator->` is unsafe either way. We need a new _sum type_ that doesn't exhibit the union-like safety defects of `std::optional` and `std::expected`. ```cpp template @@ -337,7 +337,7 @@ Pattern matching and choice types aren't just a qualify-of-life improvement. The ### Thread safety -A memory safe language should be robust against data races to shared mutable state. If one thread is writing to shared state, no other thread may access it. C++ is not a thread safe language. Its synchronization objects, such as `std::mutex`, are opt-in. If a user reads shared mutable state from outside of a mutex, that's a potential data race. It's up to users to coordinate that the same synchronization objects are locked before accessing the same shared mutable state. +A memory safe language should be robust against data races to shared mutable state. If one thread is writing to shared state, no other thread may access it. C++ is not a thread-safe language. Its synchronization objects, such as `std::mutex`, are opt-in. If a user reads shared mutable state from outside of a mutex, that's a potential data race. It's up to users to coordinate that the same synchronization objects are locked before accessing the same shared mutable state. Due to their non-deterministic nature, data race defects are notoriously difficult to debug. Safe C++ prevents them from occurring in the first place. Programs with potential data race bugs in the safe context are ill-formed at compile time. @@ -414,13 +414,13 @@ Hello world - 🔥🔥🔥🔥🔥🔥🔥🔥🔥 Hello world - 🔥🔥🔥🔥🔥🔥🔥🔥🔥🔥 ``` -We spawn ten threads which append a fire emoji to a shared string. The string is stored in an `std2::mutex` which is owned by an `arc`, which stands for "atomic reference count." The `arc` provides _shared ownership_ of the data. The `mutex` provides _shared access_ to it. C++ programmers often think that `std::shared_ptr` pointer provides safe shared access to objects. It does not. It only provides shared ownership. +We spawn ten threads which append a fire emoji to a shared string. The string is stored in a `std2::mutex` which is owned by an `arc`, which stands for "atomic reference count." The `arc` provides _shared ownership_ of the data. The `mutex` provides _shared access_ to it. C++ programmers often think that `std::shared_ptr` pointer provides safe shared access to objects. It does not. It only provides shared ownership. `arc`'s accessor `operator->` returns const-qualified borrows to the owned data. You can't mutate through most const-qualified types. You can only mutate through const-qualified types that encapsulate `unsafe_cell`, such as `cell`, `ref_cell`, `mutex` and `shared_mutex`. This is how [interior mutability](#interior-mutability) implements shared mutable access. The safe standard library provides `mutex` and `shared_mutex` which satisfy the [`send` and `sync`](#send-and-sync) interfaces. Only types satisfying `send` may be copied through the `std2::thread` constructor. -Inside the worker thread, we `lock` the mutex to initialize a lock guard object. The lock guard is an RAII type: on its construction the mutex is locked and on its destruction the mutex is unlocked. We call `borrow` on the lock guard to gain a mutable borrow to the string it contains. It's only correct to use the reference while the lock guard is in scope, that is, while the thread has the mutex locked. Now we have exclusive access to the string inside the mutex and append the fire emoji without risking a data race. +Inside the worker thread, we `lock` the mutex to initialize a lock guard object. The lock guard is a RAII type: on its construction the mutex is locked and on its destruction the mutex is unlocked. We call `borrow` on the lock guard to gain a mutable borrow to the string it contains. It's only correct to use the reference while the lock guard is in scope, that is, while the thread has the mutex locked. Now we have exclusive access to the string inside the mutex and append the fire emoji without risking a data race. -But the thread safety isn't yet demonstrated: the claim isn't that we _can_ write thread safe software; the claim is that it's _ill-formed_ to write thread unsafe software. +But the thread safety isn't yet demonstrated: the claim isn't that we _can_ write thread-safe software; the claim is that it's _ill-formed_ to write thread unsafe software. Let's sabotage our own design. Uncomment the `drp lock_guard` line. The lock guard is destroyed and unlocks the mutex. The next statement prints the string outside of the mutex, which is a data race, because one of the other nine threads may at that instant be appending to the string. @@ -439,7 +439,7 @@ safety: during safety checking of void entry_point(std2::arc @@ -732,7 +732,7 @@ int main() safe { // Requires unsafe type specifier because std::string's dtor is unsafe. std2::vector vec { }; - // Construct an std::string from a const char* (unsafe) + // Construct a std::string from a const char* (unsafe) // Pass by relocation (unsafe) mut vec.push_back("Hello unsafe type qualifier!"); @@ -774,7 +774,7 @@ int main() safe { } ``` -In this example, the unsafe `String` constructor is called in the safe `main` function. That's permitted because substitution of `unsafe String` into Vec's template parameter creates a `push_back` specialization with an `unsafe String` function parameter. Safe C++ allows unsafe constructors to initialize unsafe-qualified types in an safe context. +In this example, the unsafe `String` constructor is called in the safe `main` function. That's permitted because substitution of `unsafe String` into Vec's template parameter creates a `push_back` specialization with an `unsafe String` function parameter. Safe C++ allows unsafe constructors to initialize unsafe-qualified types in a safe context. Permitting unsafe operations with unsafe qualifier specialization is less noisy and exposes less of the implementation than using conditional _unsafe-specifiers_ on the class template's member functions. More importantly, we want to keep the new vector's interface safe, even when it's specialized with unsafe types. This device allows member functions to remain safe without resorting to _unsafe-blocks_ in the implementations. There's a single use of the `unsafe` token, which makes for simple audits during code review. @@ -853,7 +853,7 @@ To be more accommodating when mixing unsafe with safe code, the `unsafe` qualifi ### unsafe subscripts -There's one more prominent use of the `unsafe` token: it suppresses runtime bounds checks in subscript operations on both builtin and user-defined types. For applications where nanoseconds matter, developers may want to forego runtime bounds checking. In Safe C++, this is straight forward. Just write `; unsafe` in your array, slice or vector subscript. +There's one more prominent use of the `unsafe` token: it suppresses runtime bounds checks in subscript operations on both built-in and user-defined types. For applications where nanoseconds matter, developers may want to forego runtime bounds checking. In Safe C++, this is straight forward. Just write `; unsafe` in your array, slice or vector subscript. [**unsafe_bounds.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/proposal/unsafe_bounds.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/9xajqhrvc) ```cpp @@ -1095,7 +1095,7 @@ impl vector : make_iter { To opt into safe ranged-for iteration, containers implement the `std2::make_iter` interface. They can provide const iterators, mutable iterators or _consuming iterators_ which take ownership of the operand's data and destroy its container in the process. -Because the _range-initializer_ of the loop is an lvalue of `vector` that's outside the mutable context, the const iterator overload of `vector::iter` gets chosen. That returns an `std2::slice_iterator` into the vector's contents. Here's the first borrow: lifetime elision invents a lifetime parameter for the `self const^` parameter, which is also used for the named lifetime argument `/a` of the `slice_iterator` return type. As with the use-after-free example, the `vector::iter` function declaration establishes an _outlives-constraint_: the lifetime on the `self const^` operand must outlive the lifetime on the result object. +Because the _range-initializer_ of the loop is an lvalue of `vector` that's outside the mutable context, the const iterator overload of `vector::iter` gets chosen. That returns a `std2::slice_iterator` into the vector's contents. Here's the first borrow: lifetime elision invents a lifetime parameter for the `self const^` parameter, which is also used for the named lifetime argument `/a` of the `slice_iterator` return type. As with the use-after-free example, the `vector::iter` function declaration establishes an _outlives-constraint_: the lifetime on the `self const^` operand must outlive the lifetime on the result object. ```cpp template @@ -2123,7 +2123,7 @@ struct Pair { Pair g { 10, 20 }; int main() { - // Relocate from an std::tuple element. + // Relocate from a std::tuple element. auto tup = std::make_tuple(5, 1.619); int x = rel *get<0>(&tup); @@ -2216,7 +2216,7 @@ Use `circle -print-mir` to dump the MIR of this program. 20 InstEnd _4 ``` -The assignment `t3.0.0.1` lowers to `_4.0.0.1`. This is a place name of a local variable. Importantly, it doesn't involve dereferences, unlike the result of an `std::get` call. It's an _owned place_ which the compiler is able to relocate out of. +The assignment `t3.0.0.1` lowers to `_4.0.0.1`. This is a place name of a local variable. Importantly, it doesn't involve dereferences, unlike the result of a `std::get` call. It's an _owned place_ which the compiler is able to relocate out of. C++'s native array decays to pointers and doesn't support pass-by-value semantics. `std::array` encapsulates arrays to fix these problems and provides an `operator[]` API for consistent subscripting syntax. But `std::array` is broken for our purposes. Since `operator[]` returns a reference, the `std::array`'s elements are not _owned places_ and can't be relocated out of. @@ -2224,7 +2224,7 @@ Safe C++ introduces a first-class pass-by-value array type `[T; N]` and a first- Slices have dynamic length and are _incomplete types_. You may form borrows, references or pointers to slices and access through those. These are called _fat pointers_ and are 16 bytes on 64-bit platforms. The data pointer is accompanied by a length field. -The new array type, the slice type and the legacy builtin array type panic on out-of-bounds subscripts. They exhibit bounds safety in the new object model. Use [unsafe subscripts](#unsafe-subscripts) to suppress the runtime bounds check. +The new array type, the slice type and the legacy built-in array type panic on out-of-bounds subscripts. They exhibit bounds safety in the new object model. Use [unsafe subscripts](#unsafe-subscripts) to suppress the runtime bounds check. Making `std::pair`, `std::tuple` and `std::array` magic types with native support for relocation is on the short list of language improvements. We hope to incorporate this functionality for the next revision of this proposal. In the meantime, the first-class replacement types provide us with a convenient path forward for developing the safe standard library. @@ -2401,7 +2401,7 @@ int test(Primitive obj) noexcept safe { } ``` -A _match-expression_'s operand is an expression of class, choice, array, slice, arithmetic, builtin vector or builtin matrix type. The _match-specifier_ is populated with a set of _match-clauses_. Each _match-clause_ has a _pattern_ on the left, an optional _match-guard_ in the middle, and a _match-body_ after the `=>` token. +A _match-expression_'s operand is an expression of class, choice, array, slice, arithmetic, built-in vector or built-in matrix type. The _match-specifier_ is populated with a set of _match-clauses_. Each _match-clause_ has a _pattern_ on the left, an optional _match-guard_ in the middle, and a _match-body_ after the `=>` token. A match is rich in the kind of patterns it supports: @@ -2587,7 +2587,7 @@ class [[ ]] arc; ``` -`std2::arc` is the atomic reference-counted pointer. If its inner type is both `send` and `sync`, then the `arc` specialization is also `send` and `sync`. Most types with _value semantics_, including builtin types, are `send` and `sync`. By the rules of _inherited mutability_, so are aggregate types built from `send` and `sync` subobjects. `std2::arc` is `send`, permitting copy to other threads. +`std2::arc` is the atomic reference-counted pointer. If its inner type is both `send` and `sync`, then the `arc` specialization is also `send` and `sync`. Most types with _value semantics_, including built-in types, are `send` and `sync`. By the rules of _inherited mutability_, so are aggregate types built from `send` and `sync` subobjects. `std2::arc` is `send`, permitting copy to other threads. But `std2;:arc` isn't an interesting case. `arc`'s interface only produces _const_ borrows to the owned value: you can't have a data race if you're only reading from something. `arc` is intended to be used with types that implement [_interior mutability_](#interior-mutability), permitting mutation through const references. `sync` characterizes the thread safety of hde deconfliction mechanisms of types with interior mutability. Is that deconfliction mechanism _single threaded_ (`sync`=false) or _multi-threaded_ (`sync`=true)? @@ -2606,9 +2606,9 @@ class [[ ]] mutex; ``` -`std2::mutex` is another candidate for use with `std2::arc`. This type is thread safe. As shown in the [thread safety](#thread-safety) example, it provides threads with exclusive access to its interior data using a synchronization object. The borrow checker prevents the reference to the inner data from being used outside of the mutex's lock. Therefore, `std2::mutex` is `sync` if its inner type is `send`. Why make it conditional on `send` when the mutex is already providing threads with exclusive access to the inner value? This provides protection for the rare type with thread affinity. A type is `send` if it can both be copied to a different thread _and used_ by a different thread. +`std2::mutex` is another candidate for use with `std2::arc`. This type is thread-safe. As shown in the [thread safety](#thread-safety) example, it provides threads with exclusive access to its interior data using a synchronization object. The borrow checker prevents the reference to the inner data from being used outside of the mutex's lock. Therefore, `std2::mutex` is `sync` if its inner type is `send`. Why make it conditional on `send` when the mutex is already providing threads with exclusive access to the inner value? This provides protection for the rare type with thread affinity. A type is `send` if it can both be copied to a different thread _and used_ by a different thread. -`std2::arc>` is `send` if `std2::mutex` is `send` and `sync`. `std2::mutex` is `send` and `sync` if `T` is `send`. Since most types are `send` by construction, we can safely mutate shared state over multiple threads as long as its wrapped in a `std2::mutex` and that's owned by an `std2::arc`. The `arc` provides shared ownership. The `mutex` provides shared mutation. +`std2::arc>` is `send` if `std2::mutex` is `send` and `sync`. `std2::mutex` is `send` and `sync` if `T` is `send`. Since most types are `send` by construction, we can safely mutate shared state over multiple threads as long as its wrapped in a `std2::mutex` and that's owned by a `std2::arc`. The `arc` provides shared ownership. The `mutex` provides shared mutation. ```cpp class thread { @@ -2631,11 +2631,11 @@ The `send` constraint against demonstrates the safety model's [theme of responsi `std2::thread` is designed defensively with the safety promise that it won't produce undefined behavior no matter how it's used. Can we fool `thread` into producing a data race? -**Pass a borrow to a value on the stack.** There's no guarantee that the thread will join before the stack object is destroyed. Is that a potential use-after-free? No, because the thread has an _outlives-constraint_ which checks that all function arguments outlive `/static`. An `std2::arc` doesn't have lifetime arguments (unless its inner type is a lifetime binder), so that checks out. But a shared or mutable borrow does have a lifetime argument, and if it refers to an object on the stack, it's not `/static`. Those are arguments are accepted by the _requires-clause_ but are rejected by the borrow checker. +**Pass a borrow to a value on the stack.** There's no guarantee that the thread will join before the stack object is destroyed. Is that a potential use-after-free? No, because the thread has an _outlives-constraint_ which checks that all function arguments outlive `/static`. A `std2::arc` doesn't have lifetime arguments (unless its inner type is a lifetime binder), so that checks out. But a shared or mutable borrow does have a lifetime argument, and if it refers to an object on the stack, it's not `/static`. Those are arguments are accepted by the _requires-clause_ but are rejected by the borrow checker. -**Pass a borrow to a global variable.** If the global's type is not `sync`, then a borrow to it is not `send`, and that's a constraint violation. If the global variable is mutable, that could cause a data race. Fortunately it's ill-formed to name mutable global objects in a [safe context](#the-safe-context). Otherwise, it's safe to share const global objects between threads. +**Pass a borrow to a global variable.** If the global's type is not `sync`, then a borrow to it is not `send`, and that's a constraint violation. If the global variable is mutable, that could cause a data race. Fortunately, it's ill-formed to name mutable global objects in a [safe context](#the-safe-context). Otherwise, it's safe to share const global objects between threads. -It's the responsibility of a safe library to think through all possible scenarios of use and prevent execution that could result in soundness defects. After all, the library author is a specialist in that domain. This is a friendlier system than Standard C++, which places the all the weight of writing thread safe code on the shoulders of users. +It's the responsibility of a safe library to think through all possible scenarios of use and prevent execution that could result in soundness defects. After all, the library author is a specialist in that domain. This is a friendlier system than Standard C++, which places the all the weight of writing thread-safe code on the shoulders of users. ## Unresolved design issues From 9c39fc3c6c38c7c559bf8f791af9a206814cdc62 Mon Sep 17 00:00:00 2001 From: Sean Baxter Date: Mon, 30 Sep 2024 16:06:42 -0400 Subject: [PATCH 11/27] std2::box has deleted copy/borrow ctor --- libsafecxx/single-header/std2.h | 4 ++++ libsafecxx/test/CMakeLists.txt | 2 ++ libsafecxx/test/compile-fail/box_thread.cxx | 22 +++++++++++++++++++++ 3 files changed, 28 insertions(+) create mode 100644 libsafecxx/test/compile-fail/box_thread.cxx diff --git a/libsafecxx/single-header/std2.h b/libsafecxx/single-header/std2.h index 1f4f6e4..2b21a54 100644 --- a/libsafecxx/single-header/std2.h +++ b/libsafecxx/single-header/std2.h @@ -822,6 +822,10 @@ box delete p_; } + // Delete the copy constructors. + box(const box^) = delete; + box(const box&) = delete; + static box make_default() safe requires(safe(T())) { diff --git a/libsafecxx/test/CMakeLists.txt b/libsafecxx/test/CMakeLists.txt index ddcd5f5..024bf6f 100644 --- a/libsafecxx/test/CMakeLists.txt +++ b/libsafecxx/test/CMakeLists.txt @@ -121,6 +121,8 @@ safe_cxx_compile_fail_test(cell_box_uaf "drop of s2 between its shared borrow an safe_cxx_compile_fail_test(manually_drop1 "cannot convert prvalue std2::box to std2::manually_drop>") safe_cxx_compile_fail_test(throw1 "s constrained to live as long as static") +safe_cxx_compile_fail_test(box_thread "call to deleted borrow constructor") + safe_cxx_compile_fail_test(vector1 "use of strs depends on expired loan") safe_cxx_compile_fail_test(vector2 "use of it depends on expired loan") safe_cxx_compile_fail_test(mutex1 "cannot convert prvalue int to std2::mutex") diff --git a/libsafecxx/test/compile-fail/box_thread.cxx b/libsafecxx/test/compile-fail/box_thread.cxx new file mode 100644 index 0000000..46ed553 --- /dev/null +++ b/libsafecxx/test/compile-fail/box_thread.cxx @@ -0,0 +1,22 @@ +#feature on safety +#include +#include + +using namespace std2; + +void entry_point(box data, int thread_id) safe { + mut data->append("🔥"); + println(*data); + unsafe { std::this_thread::sleep_for(std::chrono::milliseconds(10)); } +} + +int main() safe { + box shared_data(string("Hello world - ")); + + vector threads { }; + for(int i : 10) + mut threads.push_back(thread(entry_point, cpy shared_data, i)); + + for(thread t : rel threads) + t rel.join(); +} \ No newline at end of file From 4c599c14ad60d971f8b0ddf8562a5cf7c0e33300 Mon Sep 17 00:00:00 2001 From: Sean Baxter Date: Wed, 2 Oct 2024 19:43:27 -0400 Subject: [PATCH 12/27] Better wording in interior mutability section --- docs/draft.html | 8 ++++---- proposal/draft.md | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/draft.html b/docs/draft.html index b643e79..650970e 100644 --- a/docs/draft.html +++ b/docs/draft.html @@ -4338,13 +4338,13 @@

unsafe_cell to safely strip the const off shared borrows, allowing users to mutate the protected resource.

-

Safe C++ and Rust and conflate exclusive access with mutable borrows -and shared access with const borrows. It’s is an economical choice, -because one type qualifier, +

Safe C++ and Rust conflate exclusive access with mutable borrows and +shared access with const borrows. It’s is an economical choice, because +one type qualifier, const or mut, also determines exclusivity. But the cast-away-const model of interior mutability is an awkward -consequence. But this design is not the only way: The Ante language[ante] experiments with separate +consequence. This design may not be the only way: The Ante language[ante] experiments with separate own mut and shared mut qualifiers. That’s really attractive, because you’re never mutating something through a const diff --git a/proposal/draft.md b/proposal/draft.md index 396c05b..27bc0a3 100644 --- a/proposal/draft.md +++ b/proposal/draft.md @@ -2553,7 +2553,7 @@ Lifetime safety also guarantees that the `lock_guard` is in scope (meaning the m Interior mutability is a legal loophole around exclusivity. You're still limited to one mutable borrow or any number of shared borrows to an object. Types with a deconfliction strategy use `unsafe_cell` to safely strip the const off shared borrows, allowing users to mutate the protected resource. -Safe C++ and Rust and conflate exclusive access with mutable borrows and shared access with const borrows. It's is an economical choice, because one type qualifier, `const` or `mut`, also determines exclusivity. But the cast-away-const model of interior mutability is an awkward consequence. But this design is not the only way: The Ante language[@ante] experiments with separate `own mut` and `shared mut` qualifiers. That's really attractive, because you're never mutating something through a const reference. This three-state system doesn't map onto C++'s existing type system as easily, but that doesn't mean the const/mutable borrow treatment, which does integrate elegantly, is the most expressive. A `shared` type qualifier merits investigation during the course of this project. +Safe C++ and Rust conflate exclusive access with mutable borrows and shared access with const borrows. It's is an economical choice, because one type qualifier, `const` or `mut`, also determines exclusivity. But the cast-away-const model of interior mutability is an awkward consequence. This design may not be the only way: The Ante language[@ante] experiments with separate `own mut` and `shared mut` qualifiers. That's really attractive, because you're never mutating something through a const reference. This three-state system doesn't map onto C++'s existing type system as easily, but that doesn't mean the const/mutable borrow treatment, which does integrate elegantly, is the most expressive. A `shared` type qualifier merits investigation during the course of this project. * `T^` - Exclusive mutable access. Permits standard conversion to `shared T^` and `const T^`. * `shared T^` - Shared mutable access. Permits standard conversion to `const T^`. Only types that enforce interior mutability have overloads with shared mutable access. From 5a6a18f8c86c13283138d8fdaa499af3375d3e6a Mon Sep 17 00:00:00 2001 From: Sean Baxter Date: Mon, 14 Oct 2024 11:07:43 -0400 Subject: [PATCH 13/27] Added draft-lifetimes document --- docs/draft-lifetimes.html | 1323 ++++++++++++++++++++++++++++++++++ docs/index.html | 4 +- lifetimes/draft-lifetimes.md | 571 +++++++++++++++ lifetimes/lifetimes1.cxx | 21 + lifetimes/lifetimes2.cxx | 10 + lifetimes/lifetimes3.cxx | 37 + lifetimes/lifetimes4.cxx | 1 + lifetimes/lifetimes5.cxx | 27 + lifetimes/vector1.cxx | 17 + lifetimes/vector2.cxx | 28 + lifetimes/vector3.cxx | 31 + lifetimes/vector4.cxx | 30 + 12 files changed, 2098 insertions(+), 2 deletions(-) create mode 100644 docs/draft-lifetimes.html create mode 100644 lifetimes/draft-lifetimes.md create mode 100644 lifetimes/lifetimes1.cxx create mode 100644 lifetimes/lifetimes2.cxx create mode 100644 lifetimes/lifetimes3.cxx create mode 100644 lifetimes/lifetimes4.cxx create mode 100644 lifetimes/lifetimes5.cxx create mode 100644 lifetimes/vector1.cxx create mode 100644 lifetimes/vector2.cxx create mode 100644 lifetimes/vector3.cxx create mode 100644 lifetimes/vector4.cxx diff --git a/docs/draft-lifetimes.html b/docs/draft-lifetimes.html new file mode 100644 index 0000000..fe6eaca --- /dev/null +++ b/docs/draft-lifetimes.html @@ -0,0 +1,1323 @@ + + + + + + + + Memory safety without lifetime parameters + + + + + + + +

+
+

Memory safety without +lifetime parameters

+ + + + + + + + + + + + + + + + + + + + + +
Document #:DXXXX
Date:2024-10-15
Project:Programming Language C++
Audience: + SG23
+
Reply-to: + Sean Baxter
<>
+
+
+
+

1 Safe references

+

“Safe C++”[safecpp] introduced a comprehensive +design for compile-time memory safety in C++. The borrow checking model +in Safe C++ requires lifetime parameters, a feature that increases +expressiveness but complicates the language’s type system. This proposal +describes an alternative style of borrow checking, guaranteeing lifetime +safety without the involvement of lifetime annotations.

+

First let’s recap how lifetime parameters are declared and used.

+

lifetimes1.cxx +– (Compiler Explorer)

+
#feature on safety
+
+// Function parameters have different lifetime parameters. 
+// Return type is constrained by x's lifetime.
+auto f1/(a, b)(int^/a x, int^/b y, bool pred) safe -> int^/a {
+  // Error:
+  // function auto f1/(a, b)(int^/a, int^/b) -> int^/a returns
+  // object with lifetime b, but b doesn't outlive a
+  // return y;
+  return pred ? x : y;
+}
+
+// Function parameters have a common lifetime parameter.
+auto f2/(a)(int^/a x, int^/a y, bool pred) safe -> int^/a {
+  // Ok
+  return pred ? x : y;
+}
+
+// Error:
+// cannot use lifetime elision for return type int^ 
+auto f3(int^ x, int^ y) safe -> int^;
+

In Safe C++, occurrences of the borrow type +T^ in +function declarations and in data members require specialization with +lifetime arguments. Lifetime arguments name +lifetime-parameters declared as part of the function +declaration. Borrow types without lifetime arguments have unbound +lifetimes and borrows with lifetime arguments have bound +lifetimes. These are treated as different entities by the +language’s type system, and there are subtle rules on how bound +lifetimes decay to unbound lifetimes and how unbound lifetimes become +bound. Lifetime annotations greatly improve the capability of safe +references, but extend an already complicated type system.

+

The above code declares functions +f1, +f2 and +f3 with +lifetime-parameter-lists. Borrows in function return types must +be constrained by the lifetimes of one or more function parameters. +Failure to match lifetime arguments between function parameters and +return types will cause a borrow checker failure. +f1 fails to borrow check because the +returned parameter y does not +outlive the lifetime +/a on the +return type.

+

Elision rules make lifetime annotations implicit in some cases. But +elision can fail, requiring users to intervene with annotations. In the +example above, the declaration of f3 +fails because the elision rules cannot determine the lifetime argument +on the returned borrow.

+

lifetimes2.cxx +– (Compiler Explorer)

+
#feature on safety
+
+// New elision rules:
+// All parameters are constrained by a common lifetime.
+// The common lifetime constrains the return type.
+int% f4(int% x, int% y, bool pred) safe {
+  // Can return either x or y, because they outlive the common lifetime
+  // and the common lifetime outlives the result object.
+  return pred ? x : y;
+}
+

This proposal introduces a new safe reference marked by the +reference declarator +T%. Safe +references do not take lifetime arguments and there is no notion of +bound or unbound lifetimes. The lifetime +parameterization is determined by the formation of the function type. +For a free function, all function parameters outlive a single invented +lifetime that extends through the duration of the function call. For a +non-static member function with the +% +ref-qualifier, the implicit object parameter outlives the +invented lifetime. In turn, this invented lifetime outlives the returned +safe reference.

+

1.1 Exclusivity

+
    +
  • T% is a +mutable safe reference. It cannot alias other references to +overlapping places.
  • +
  • const T% +is a shared safe reference. It may alias shared safe references +to overlapping places, but may never overlap a mutable reference.
  • +
+

If lifetime safety can be guaranteed without lifetime parameters, why +involve a new reference type +T% at all? +Why not perform this form of borrow checking on the existing lvalue- and +rvalue-references +T& and +T&&? +The answer is that safe references enforce exclusivity and +legacy references do not. There may be one mutable reference to a place, +or any number of shared (constant) references, but not both at the same +time. This is the universal invariant of borrow checking. Borrow +checking legacy reference types would break all existing code, because +that code was written without upholding the exclusivity invariant.

+

Exclusivity is a program-wide invariant. It doesn’t hinge on the +safeness of a function.

+
    +
  • A safe function is sound for all valid inputs.
  • +
  • An unsafe function has preconditions and may be unsound for some +valid inputs.
  • +
+

“Valid” borrow and safe reference inputs don’t mutably alias. This is +something a function can just assume; it doesn’t need to check +and there’s no way to check. Borrow checking upholds exclusivity even +for unsafe functions (when compiled under the [safety] +feature). There are other assumptions C++ programmers already make about +the validity of inputs: for instance, references never hold null +addresses. Non-valid inputs are implicated in undefined behavior.

+

By the parsimony principal you may suggest “rather than adding a new +safe reference type, just enforce exclusivity on lvalue- and +rvalue-references when compiled under the [safety] +feature.” But that makes the soundness problem worse. New code will +assume legacy references don’t mutably alias, but existing code +doesn’t uphold that invariant because it was written without even +knowing about it.

+

If safe code calls legacy code that returns a struct with a pair of +references, do those references alias? Of course they may alias, but the +parsimonious treatment claims that mutable references don’t alias under +the [safety] +feature. We’ve already stumbled on a soundness bug.

+

Coming from the other direction, it may be necessary to form aliasing +references just to use the APIs for existing code. Consider a function +that takes an lvalue reference to a container and an lvalue reference to +one of its elements. If safe code can’t even form aliased lvalue +references, it wouldn’t be able to use that API at all.

+

Exclusivity is a program-wide invariant on safe references. We need +separate safe and unsafe reference types for both soundness and +expressiveness.

+

vector1.cxx +– (Compiler Explorer)

+
#include <vector>
+
+void f1(std::vector<float>& vec, float& x) {
+  // Do vec and x alias? If so, the push_back may invalidate x.
+  vec.push_back(6);
+
+  // Potential UB: x may have been invalidated by the push_back.
+  x = 6;
+}
+
+int main() {
+  std::vector<float> vec { 1.0f };
+
+  // Legacy references permit aliasing.
+  f1(vec, vec[0]);
+}
+

This example demonstrates how perilous mutable aliasing in C++ is. In +f1, the compiler doesn’t know if +vec and +x alias. Pushing to the vector may +cause a buffer resize and copy its data into a new allocation, +invalidating existing references or pointers into the container. As C++ +doesn’t enforce exclusivity on legacy references, the code in +main is legal, even though it leads +to a use-after-free defect.

+

vector2.cxx +– (Compiler Explorer)

+
#feature on safety
+#include <cstdint>
+
+template<typename T>
+class Vec {
+public:
+  void push_back(T value) % safe;
+
+  const T% operator[](size_t idx) const % safe;
+        T% operator[](size_t idx)       % safe;
+};
+
+void f2(Vec<float>% vec, float% x) safe {
+  // Does push_back potentially invalidate x? 
+  // No! Exclusivity prevents vec and x from aliasing.
+  vec.push_back(7);
+
+  // Okay to store to x, because it doesn't point into vec's data.
+  *x = 7;
+}
+
+int main() safe {
+  Vec<float> vec { };
+  mut vec.push_back(1);
+
+  // Ill-formed: mutable borrow of vec between its mutable borrow and its use
+  f2(mut vec, mut vec[0]);
+}
+
$ circle vector2.cxx
+safety: during safety checking of int main() safe
+  borrow checking: vector2.cxx:27:19
+    f2(mut vec, mut vec[0]); 
+                    ^
+  mutable borrow of vec between its mutable borrow and its use
+  loan created at vector2.cxx:27:10
+    f2(mut vec, mut vec[0]); 
+           ^
+

Rewrite the example using our simplified safe references. In +main, the user attempts to pass a +safe reference to vec and a safe +reference to one of its elements. This violates exclusivity, causing the +program to be ill-formed.

+

Mutable safe references are prohibited from aliasing. Exclusivity is +enforced by the same MIR analysis that polices Safe C++’s more general +borrow type +T^. While +enforcing exclusivity involves more complicated tooling, it simplifies +reasoning about your functions. Since safe reference parameters don’t +alias, users don’t even have to think about aliasing bugs. You’re free +to store to references without worrying about iterator invalidation or +other side effects leading to use-after-free defects.

+

1.2 Constraint rules

+

This proposal implements two sets of constraint rules. Free functions +constrain return references by the shortest of the argument lifetimes. +Non-static member functions constrain return references by the implicit +object lifetime.

+

lifetimes3.cxx +– (Compiler Explorer)

+
#feature on safety
+
+const int% f1(const int% x, const int% y, bool pred) safe {
+  // The return reference is constrained by all reference parameters: x and y.
+  return pred ? x : y;
+}
+
+struct Obj {
+  const int% f2(const int% arg) const % safe {
+    // Non-static member functions are constrained by the implicit 
+    // object lifetime.
+    // It's OK to return `x`, because self outlives the return.
+    return %x;
+  }
+
+  const int% f3(const int% arg) const % safe {
+    // Error: arg does not outlive the return reference.
+    return arg;
+  }
+
+  const int% f4(const self%, const int% arg) safe {
+    // OK - f4 is a free function with an explicit self parameter.
+    return arg;
+  }
+
+  int x;
+};
+
+int main() {
+  int x = 1, y = 2;
+  f1(x, y, true); // OK
+
+  Obj obj { };
+  obj.f2(x);  // OK
+  obj.f3(x);  // Error
+  obj.f4(x);  // OK.
+}
+
$ circle lifetimes3.cxx 
+safety: during safety checking of const int% Obj::f3(const int%) const % safe
+  error: lifetimes3.cxx:18:12
+      return arg; 
+             ^
+  function const int% Obj::f3(const int%) const % safe returns object with lifetime SCC-ref-1, but SCC-ref-1 doesn't outlive SCC-ref-0
+

The definitions of free function +f1 and non-static member function +f2 compile, because they return +function parameters that constrain the return type: the returned +parameter outlives the returned reference. The non-static +member function f3 fails to compile, +because the returned parameter does not outlive the the return +type. In a non-static member function, only the implicit object +parameter outlives the return type. +f4 returns a function parameter but +compiles; it uses the explicit object syntax to gain the ergonomics of a +non-static member function, but retains the constraint rules of a free +function.

+

vector3.cxx +– (Compiler Explorer)

+
#feature on safety
+
+template<typename Key, typename Value>
+class Map {
+public:
+  // Non-static member functions do not constrain the result object to
+  // the function parameters.
+  auto get1(const Key% key) % safe -> Value%;
+
+  // Free function do constrain the result object to the function parameters.
+  auto get2(self%, const Key% key) safe -> Value%;
+};
+
+int main() safe {
+  Map<float, long> map { };
+
+  // Bind the key reference to a materialized temporary.
+  // The temporary expires at the end of this statement.
+  long% value1 = mut map.get1(3.14f);
+
+  // We can still access value, because it's not constrained on the 
+  // key argument.
+  *value1 = 1001;
+
+  // The call to get2 constrains the returned reference to the lifetime
+  // of the key temporary.
+  long% value2 = mut map.get2(1.6186f);
+
+  // This is ill-formed, because get2's key argument is out of scope.
+  *value2 = 1002;
+}
+
$ circle vector3.cxx 
+safety: during safety checking of int main() safe
+  borrow checking: vector3.cxx:30:4
+    *value2 = 1002; 
+     ^
+  use of value2 depends on expired loan
+  drop of temporary object float between its shared borrow and its use
+  loan created at vector3.cxx:27:31
+    long% value2 = mut map.get2(1.6186f); 
+                                ^
+

The constraint rules for non-static member functions reflect the idea +that resources are owned by class objects. Consider a map data structure +that associates values with keys. The map may be specialized a key type +that’s expensive to copy, such as a string or another map. We don’t want +to compel the user to pass the key by value, because that may require +copying this expensive type. Naturally, we pass by const reference.

+

However, the accessor only needs the key inside the body of the +function. Once it locates the value, it should return a reference to +that, unconstrained by the lifetime of the key argument. Consider +passing a materialized temporary for a key: it goes out of scope at the +end of the full expression. get1 +uses the non-static member function constraint rules. The caller can use +the returned reference even after the key temporary goes out of scope. +get2 uses the free function +constraint rules, which constrains the return type to all of its +function parameters. This leaves the program ill-formed when the +returned reference is used after the expiration of the key +temporary.

+

In this model, lifetime constraints are not generally programmable, +but that design still provides a degree of freedom in the form of +non-static member functions.

+

vector4.cxx +– (Compiler Explorer)

+
#feature on safety
+
+template<typename Key, typename Value>
+class Map {
+public:
+  // Lifetime elision rules constrain the return by self.
+  auto get1(self^, const Key^ key) safe -> Value^;
+
+  // Use explicit parameterizations for alternate constraints.
+  auto get2/(a)(self^/a, const Key^/a key) safe -> Value^/a;
+};
+
+int main() safe {
+  Map<float, long> map { };
+
+  // Bind the key reference to a materialized temporary.
+  // The temporary expires at the end of this statement.
+  long^ value1 = mut map.get1(3.14f);
+
+  // We can still access value, because it's not constrained on the 
+  // key argument.
+  *value1 = 1001;
+
+  // The call to get2 constrains the returned reference to the lifetime
+  // of the key temporary.
+  long^ value2 = mut map.get2(1.6186f);
+
+  // This is ill-formed, because get2's key argument is out of scope.
+  *value2 = 1002;
+}
+
$ circle vector4.cxx 
+safety: during safety checking of int main() safe
+  borrow checking: vector4.cxx:29:4
+    *value2 = 1002; 
+     ^
+  use of value2 depends on expired loan
+  drop of temporary object float between its shared borrow and its use
+  loan created at vector4.cxx:26:31
+    long^ value2 = mut map.get2(1.6186f); 
+                                ^
+

The general borrow type +T^ has +programmable constraints. The map above declares accessor functions. +get1 relies on lifetime elision to +constrain the result object by the +self parameter. This is equivalent +to the non-static member function constraint rule. We can call +get1 and use the returned reference +even after the key temporary goes out of scope.

+

get2 includes lifetime +annotations to constrain the returned reference by both the +self and +key parameters. This is like the +free function constraint rules. The program fails borrow checking when +the returned reference value2 is +used after the expiration of its key temporary.

+

2 Second-class references

+

References can be taxonimized into two classes:[second-class]

+
    +
  • First-class references can pass data into functions, be returned +from functions, made into objects and be stored in structures.
  • +
  • Second-class references can pass data into functions but cannot be +returned from functions, made into objects or stored in structures.
  • +
+

Parameter-passing directives like +in and +inout are equivalent to second-class +references. The mutable value semantics[mutable-value-semantics] model uses +parameter-passing directives to pass objects to functions by reference +without involving the complexity of a borrow checker.

+
void func(Vec<float>% vec, float% x) safe;
+

In this fragment, the reference parameters +vec and +x serve as second-class +references. The compiler can achieve memory safety without +involving the complexity of borrow checking. Both references point at +data that outlives the duration of the call to +func. Exclusivity is enforced at the +point of the call, which prevents +vec and +x from aliasing. Since +vec and +x don’t alias, resizing or clearing +vec cannot invalidate the +x reference.

+

The safe references presented here are more powerful than +second-class references. While they don’t support all the capabilities +of borrows, they can be returned from functions and made into objects. +The compiler must implement borrow checking to support this additional +capability.

+

Borrow checking operates on a function lowering called mid-level IR +(MIR). A fresh region variable is provisioned for each local variable +with a safe reference type. Dataflow analysis populates each region +variable with the liveness of its reference. Assignments and function +calls involving references generate lifetime constraints. The +compiler solves the constraint equation to find the liveness of +each loan. All instructions in the MIR are scanned for +conflicting actions with any of the loans in scope at that +point. Conflicting actions raise borrow checker errors.

+

The Hylo[hylo] model is largely equivalent to +this model and it requires borrow checking technology. +let and +inout parameter directives use +mutable value semantics to ensure memory safety for objects passed by +reference into functions. But Hylo also supports returning references in +the form of subscripts:

+

Array.hylo

+
public conformance Array: Collection {
+  ...
+  public subscript(_ position: Int): Element {
+    let {
+      precondition((position >= 0) && (position < count()), "position is out of bounds")
+      yield pointer_to_element(at: position).unsafe[]
+    }
+    inout {
+      precondition((position >= 0) && (position < count()), "position is out of bounds")
+      yield &(pointer_to_element(at: position).unsafe[])
+    }
+  }
+}
+

Subscripts are reference-returning coroutines. Coroutines +with a single yield point are split into two normal functions: a ramp +function that starts at the top and returns the expression of the yield +statement, and a continuation function which resumes after the yield and +runs to the end. Local state that’s live over the yield point must live +in a coroutine frame so that it’s available to the continuation +function. These Array subscripts +don’t have instructions after the yield, so the continuation function is +empty and hopefully elided by the optimizer.

+
template<typename T>
+struct Vec {
+  const T% operator[](size_t idx) const % safe;
+        T% operator[](size_t idx)       % safe;
+};
+

The Hylo Array subscripts are +lowered to reference-returning ramp functions exactly like their C++ +Vec counterparts. For both +languages, the borrow checker relates lifetimes through the function +arguments and out the result object. This isn’t the simple safety of +second-class references/mutable value semantics. This is full-fat live +analysis.

+

Safe references without lifetime annotations shields users from +dealing with a new degree of freedom, but it doesn’t simplify the static +analysis that upholds lifetime safety. To prevent use-after-free +defects, compilers must still lower functions to mid-level IR, compute +non-lexical lifetimes[nll] and solve the constraint equation. +When it comes to returning references, in for a penny, in for a +pound.

+

Since Circle has already made the investment in borrow checking, +adding simplified safe references was an easy extension. If the +community is able to fill in our gaps in knowledge around this sort of +reference, the compiler could accommodate those advances as well.

+

3 Other aspects of safety

+

As detailed in the Safe C++[safecpp] proposal, there are four +categories of memory safety:

+
    +
  1. Lifetime safety - This proposal advances a simpler +form of safe references that provides safety against use-after-free +defects. The feature is complementary with borrow types +T^ that take +lifetime arguments. Both types can be used in the same translation unit, +and even the same function, without conflict.
  2. +
  3. Type safety - Relocation must replace move +semantics to eliminate unsafe null pointer exposure. Choice types and +pattern matching must be included for safe modeling of optional +types.
  4. +
  5. Thread safety - The +send and +sync interfaces account for which +types can be copied and shared between threads.
  6. +
  7. Runtime checks - The compiler automatically emits +runtime bounds checks on array and slice subscripts. It emits checks for +integer divide-by-zero and INT_MIN / -1, which are undefined behavior. +Conforming safe library functions must also implement panics to prevent +out-of-bounds access to heap allocations.
  8. +
+

Most critically, the safe-specifier is added to a function’s +type. Inside a safe function, only safe operations may be used, unless +escaped by an unsafe-block.

+

C++ must adopt a new standard library with a safe API, which observes +all four categories of safety. We need new tooling. But it’s not the +case that we have to rewrite all C++ code. Time has already shaken +out most of the vulnerabilities in old code. As demonstrated by the +recent Android study on memory safety[android], the benefits of rewriting are +often not worth the costs. What we have to prioritize is the transition +to safe coding practices[safe-coding] for new code.

+

4 Achieving first-class +references

+

The presented design is as far as I could go to address the goal of +“memory safety without lifetime parameters.” But safe references aren’t +yet powerful enough to replace all the unsafe mechanisms necessary for +productivity in C++. We need support for safe versions of idioms that +are central to the C++ experience, such as:

+
    +
  • Iterators.
  • +
  • Views like string_view and +span.
  • +
  • RAII types with reference semantics.
  • +
+

Let’s consider RAII types with reference semantics. An example is +std::lock_guard, +which keeps a reference to a mutex. When the +lock_guard goes out of scope its +destructor calls unlock on the +mutex. This is a challenge for safe references, because safe reference +data members aren’t supported. Normally those would require lifetime +parameters on the containing class.

+

What are some options for RAII reference semantics?

+
    +
  • Coroutines. This is the Hylo strategy. The ramp function locks a +mutex and returns a safe reference to the data within. The continuation +unlocks the mutex. The reference to the mutex is kept in the coroutine +frame. But this still reduces to supporting structs with reference data +members. In this case it’s not a user-defined type, but a +compiler-defined coroutine frame. I feel that the coroutine solution is +an unidiomatic fit for C++ for several reasons: static allocation of the +coroutine frame requires exposing the definition of the coroutine to the +caller, which breaks C++’s approach to modularity; the continuation is +called immediately after the last use of the yielded reference, which +runs counter to expectation that cleanup runs at the end of the +enclosing scope; and since the continuation is called implicitly, +there’s nothing textual on the caller side to indicate an unlock.
  • +
  • Defer expressions. Some garbage-collected languages include +defer expressions, which run after some condition is met. We +could defer a call to the mutex unlock until the end of the enclosing +lexical scope. This has the benefit of being explicit to the caller and +not requiring computation of a coroutine frame. But it introduces a +fundamental new control flow mechanism to the language with +applicability that almost perfectly overlaps with destructors.
  • +
  • Destructors. This is the idiomatic C++ choice. A local object is +destroyed when it goes out of scope (or is dropped, with the Safe C++ +drop keyword). The destructor calls +the mutex unlock.
  • +
+

It makes sense to strengthen safe references to support current RAII +practice. How do we support safe references as data members? A +reasonable starting point is to declare a class as having safe +reference semantics. class name %; +is a possible syntax. Inside these classes, you can declare data members +and base classes with safe reference semantics: that includes both safe +reference and other classes with safe reference semantics.

+
class lock_guard % {
+  // Permitted because the containing class has safe reference semantics.
+  std2::mutex% mutex;
+public:
+  ~lock_guard() safe {
+    mutex.unlock();
+  }
+};
+

The constraint rules can apply to the new +lock_guard class exactly as it +applies to safe references. Returning a +lock_guard constraints its lifetime +by the lifetimes of the function arguments. Transitively, the lifetimes +of the data members are constrained by the lifetime of the containing +class.

+

Unfortunately, we run into problems immediately upon declaring member +functions that take safe reference objects or safe reference parameter +types.

+
class string_view %;
+
+template<typename T>
+class info % {
+  // Has reference semantics, but that's okay because the containing class does.
+  string_view sv;
+public:
+  void swap(info% rhs) % safe;
+};
+

Consider an info class that has +safe reference semantics and keeps a +string_view as a data member. The +string_view also has reference +semantics, so it constrains the underlying string that owns the data. +Declare a non-static member function that binds the implicit object with +the % +ref-qualifier and also takes an +info by safe reference. This is +uncharted water. The implicit object type +info has reference semantics, yet +we’re taking a reference to that with +swap call. We’re also taking a +reference to info in the function +parameter. How do we deal with references to references? The existing +constraint rules only invent a single lifetime: if we used those, we’d +be clobbering the lifetime of the inner +string_view member.

+

There’s a big weakness with the safe reference type +T%: it’s +under-specified when dealing with references to references. We need a +fix that respects the lifetimes on the class’s data members.

+

lifetimes5.cxx +– (Compiler Explorer)

+
#feature on safety
+
+class string_view/(a) {
+  // Keep a borrow to a slice over the string data.
+  const [char; dyn]^/a p_;
+public:
+};
+
+class info/(a) {
+  // The handle has lifetime /a.
+  string_view/a sv;
+
+public:
+  void swap(self^, info^ rhs) safe {
+    string_view temp = self->sv;
+    self->sv = rhs->sv;
+    rhs->sv = temp;
+  }
+};
+
+void func/(a)(info/a^ lhs, info/a^ rhs) safe {
+  lhs.swap(rhs);
+}
+
+void func2(info^ lhs, info^ rhs) safe {
+  lhs.swap(rhs);
+}
+

Rust and Safe C++ have a way to keep the lifetime of the +string_view member distinct from the +lifetimes of the self and +rhs references: lifetime parameters. +func assumes that the +string_views of its parameters come +from sources with overlapping lifetimes, so it declares a lifetime +parameter /a +that’s common to both parameters. The lifetimes on the two references +are created implicitly by elision, as they don’t have to be related in +the swap call. +func compiles and doesn’t clobber +the lifetimes of the contained +string_views.

+
safety: during safety checking of void func2(info^, info^) safe
+  error: lifetimes5.cxx:26:12
+    lhs.swap(rhs); 
+             ^
+  function void func2(info^, info^) safe returns object with lifetime #0, but #0 doesn't outlive #2
+
+  error: lifetimes5.cxx:26:3
+    lhs.swap(rhs); 
+    ^
+  function void func2(info^, info^) safe returns object with lifetime #2, but #2 doesn't outlive #0
+

Compiling func2 raises borrow +checker errors. Instead of providing explicit lifetime annotations that +relate the lifetimes of the lhs and +rhs +info types, lifetime elision create +four distinct lifetimes: +#0 for the +lhs +info, +#1 for the +lhs +info^, +#2 for the +rhs +info and +#3 for the +rhs +info^. The +lhs.swap(rhs) +call relates the lifetimes of the operands through the common lifetime +/a. But +these lifetimes aren’t related! The compiler has no information whether +#0 outlives +#2 or vice +versa. Since the lifetimes aren’t related in +func2’s declaration, the program is +rejected as ill-formed.

+

This contrasts with the safe reference constraint rules, which would +assign the same lifetime to all four lifetime binders and clobber the +string_view lifetimes, causing a +borrow checker failure further from the source and leaving the developer +without the possibility of a fix.

+

5 Lifetime parameters

+

If there’s a community-wide research effort among compiler experts to +evolve safe references it may be possible to get them into a state to +support the abstractions most important for C++. But soundness reasoning +is very subtle work. As you increase the indirection capabilty of safe +references, you invite networks of dependencies of implied constraints +and variances. This increases complexity for the compiler implementation +and puts a mental burden on the authors of unsafe code to properly +uphold the invariants assumed by safe references. A research project +must produce soundness doctrine, which is essential guidance on +how to interface safe and unsafe systems while upholding the soundness +invariants of the program.

+

But we don’t have to do the research. There’s already a solution +that’s been deployed in a successful production toolchain for a decade: +lifetime parameters as used in Rust. The soundness doctrine for +writing unsafe code that upholds the invariants established by lifetime +parameters is described in the Rustnomicon[rustnomicon].

+

This is the only known viable solution for first-class safe +references without garbage collection. It’s a critical lifeline that +addresses an existential problem facing C++. By adopting lifetime +parameters, C++ can achieve safety parity with the security community’s +favored languages.

+

Consider common objections to Rust’s lifetime-annotation flavor of +borrow checking:

+
    +
  1. You need heavy annotations. This concern is +misplaced. Are you intrigued by mutable value semantics, +parameter-passing directives or second-class references? Borrow checking +gives you those, without ever having to write lifetime arguments. If +your function only uses references as parameters, elision implicitly +annotates them in a way that can’t fail. You only have to involve +lifetime arguments when going beyond the capabilities of second-class +references or mutable value semantics. More advanced usages such as the +implementation of iterators, views and RAII wrappers with reference +semantics are where annotations most often appear, because those designs +deal with multiple levels of references.
  2. +
  3. Borrow checking doesn’t permit patterns such as +self-references. It’s true that checked references are less +flexible than unsafe references or pointers, but this objection is at +odds with the claim that lifetime parameters are too burdensome. +Lifetime parameters increase the expressiveness of safe +references. Additionally, they can reference things important to C++ +users that a garbage collection can’t, such as variables on the stack. +Do we want more expressive references at the cost of annotations, or do +we want to get rid of lifetime parameters to make a simpler language? +Those are opposite goals.
  4. +
  5. Borrow checking with lifetimes is too different from normal +C++. Borrow checking is the safety technology most similar to +current C++ practice. This model replaces unchecked references with +checked references. Other safety models get rid of reference types +entirely or replace them with garbage collection which is incompatible +with C++’s manual memory management and RAII. The design philosophy of +borrow checking is to take normal references but constrain them to uses +that can be checked for soundness by the compiler.
  6. +
+

It’s not surprising that the C++ community hasn’t discovered a better +way to approach safe references than the lifetime parameter model. After +all, there isn’t a well-funded effort to advance C++ language-level +lifetime safety. But there is in the Rust community. Rust has made +valuable improvements to its lifetime safety design. Lots of effort goes +into making borrow checking more permissive: The integration of +mid-level IR and non-lexical lifetimes in 2016 revitalized the +toolchain. Polonius[polonius] approaches dataflow analysis +from the opposite direction, hoping to shake loose more improvements. +Ideas like view types[view-types] and the sentinel +pattern[sentinel-pattern] are being +investigated. But all this activity has not discovered a mechanism +that’s superior to lifetime parameters for specifying constraints. If +something had been discovered, it would be integrated into the Rust +language and I’d be proposing to adopt that into C++. For now, +lifetime parameters are the best solution that the world has to +offer.

+

The US government and major players in tech including Google[secure-by-design] and Microsoft[ms-vulnerabilities] are telling industry +to transition to memory-safe languages because C++ is too unsafe to use. +There’s already a proven safety technology compatible with C++’s goals +of performance and manual memory management. If the C++ community +rejects this robust safety solution on the grounds of slightly +inconvenient lifetime annotations, and allows C++ to limp forward as a +memory-unsafe language, can it still claim to care about software +quality? If the lifetime model is good enough for a Rust, a safe +language that is enjoying snowballing investment in industry, why is it +it not good enough for C++?

+

Finally, adoption of this feature brings a major benefit even if you +personally want to get off C++: It’s critical for improving +C++/Rust interop. Your C++ project is generating revenue and +there’s scant economic incentive to rewrite it. But there is an +incentive to pivot to a memory-safe language for new development, +because new code is how vulnerabilities get introduced.[android] Bringing C++ closer to Rust +with the inclusion of safe-specifier, relocation, choice types, +and, importantly, lifetime parameters, reduces the friction of +interfacing the two languages. The easier it is to interoperate with +Rust, the more options and freedom companies have to fulfill with their +security mandate.[rust-interop]

+

6 References

+
+
+[android] Eliminating Memory Safety Vulnerabilites at the Source.
https://security.googleblog.com/2024/09/eliminating-memory-safety-vulnerabilities-Android.html?m=1
+
+
+[hylo] Borrow checking Hylo.
https://2023.splashcon.org/details/iwaco-2023-papers/5/Borrow-checking-Hylo
+
+
+[ms-vulnerabilities] We need a safer systems programming language.
https://msrc.microsoft.com/blog/2019/07/we-need-a-safer-systems-programming-language\
+
+
+[mutable-value-semantics] Implementation Strategies for Mutable Value +Semantics.
https://www.jot.fm/issues/issue_2022_02/article2.pdf
+
+
+[nll] The Rust RFC Book - Non-lexical lifetimes.
https://rust-lang.github.io/rfcs/2094-nll.html
+
+
+[polonius] Polonius revisited.
https://smallcultfollowing.com/babysteps/blog/2023/09/22/polonius-part-1/
+
+
+[rust-interop] Improving Interoperability Between Rust and C++.
https://security.googleblog.com/2024/02/improving-interoperability-between-rust-and-c.html
+
+
+[rustnomicon] Rustnomicon – The Dark Arts of Unsafe Rust.
https://doc.rust-lang.org/nomicon/intro.html
+
+
+[safe-coding] Tackling cybersecurity vulnerabilities through Secure by +Design.
https://blog.google/technology/safety-security/tackling-cybersecurity-vulnerabilities-through-secure-by-design/
+
+
+[safecpp] Safe C++.
https://safecpp.org/draft.html
+
+
+[second-class] Second-Class References.
https://borretti.me/article/second-class-references
+
+
+[secure-by-design] Secure by Design : Google’s Perspective on Memory +Safety.
https://research.google/pubs/secure-by-design-googles-perspective-on-memory-safety/
+
+
+[sentinel-pattern] After NLL: Moving from borrowed data and the sentinel +pattern.
https://smallcultfollowing.com/babysteps/blog/2018/11/10/after-nll-moving-from-borrowed-data-and-the-sentinel-pattern/
+
+
+[view-types] View types for Rust.
https://smallcultfollowing.com/babysteps/blog/2021/11/05/view-types/
+
+
+
+
+ + diff --git a/docs/index.html b/docs/index.html index cd93321..6f2d7e1 100644 --- a/docs/index.html +++ b/docs/index.html @@ -1,9 +1,9 @@ - + Automatic redirection failed, please go to -P3390R0.html. +draft.html. diff --git a/lifetimes/draft-lifetimes.md b/lifetimes/draft-lifetimes.md new file mode 100644 index 0000000..125f0b3 --- /dev/null +++ b/lifetimes/draft-lifetimes.md @@ -0,0 +1,571 @@ +--- +title: "Memory safety without lifetime parameters" +document: DXXXX +date: 2024-10-15 +audience: SG23 +author: + - name: Sean Baxter + email: +toc: false +--- + +# Safe references + +"Safe C++"[@safecpp] introduced a comprehensive design for compile-time memory safety in C++. The borrow checking model in Safe C++ requires lifetime parameters, a feature that increases expressiveness but complicates the language's type system. This proposal describes an alternative style of borrow checking, guaranteeing lifetime safety without the involvement of lifetime annotations. + +First let's recap how lifetime parameters are declared and used. + +[**lifetimes1.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/lifetimes/lifetimes1.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/5s9qG1h4E) +```cpp +#feature on safety + +// Function parameters have different lifetime parameters. +// Return type is constrained by x's lifetime. +auto f1/(a, b)(int^/a x, int^/b y, bool pred) safe -> int^/a { + // Error: + // function auto f1/(a, b)(int^/a, int^/b) -> int^/a returns + // object with lifetime b, but b doesn't outlive a + // return y; + return pred ? x : y; +} + +// Function parameters have a common lifetime parameter. +auto f2/(a)(int^/a x, int^/a y, bool pred) safe -> int^/a { + // Ok + return pred ? x : y; +} + +// Error: +// cannot use lifetime elision for return type int^ +auto f3(int^ x, int^ y) safe -> int^; +``` + +In Safe C++, occurrences of the borrow type `T^` in function declarations and in data members require specialization with _lifetime arguments_. Lifetime arguments name _lifetime-parameters_ declared as part of the function declaration. Borrow types without lifetime arguments have _unbound lifetimes_ and borrows with lifetime arguments have _bound lifetimes_. These are treated as different entities by the language's type system, and there are subtle rules on how bound lifetimes decay to unbound lifetimes and how unbound lifetimes become bound. Lifetime annotations greatly improve the capability of safe references, but extend an already complicated type system. + +The above code declares functions `f1`, `f2` and `f3` with _lifetime-parameter-lists_. Borrows in function return types must be constrained by the lifetimes of one or more function parameters. Failure to match lifetime arguments between function parameters and return types will cause a borrow checker failure. `f1` fails to borrow check because the returned parameter `y` does not outlive the lifetime `/a` on the return type. + +Elision rules make lifetime annotations implicit in some cases. But elision can fail, requiring users to intervene with annotations. In the example above, the declaration of `f3` fails because the elision rules cannot determine the lifetime argument on the returned borrow. + +[**lifetimes2.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/lifetimes/lifetimes2.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/G6TWx83M9) +```cpp +#feature on safety + +// New elision rules: +// All parameters are constrained by a common lifetime. +// The common lifetime constrains the return type. +int% f4(int% x, int% y, bool pred) safe { + // Can return either x or y, because they outlive the common lifetime + // and the common lifetime outlives the result object. + return pred ? x : y; +} +``` + +This proposal introduces a new _safe reference_ marked by the reference declarator `T%`. Safe references do not take lifetime arguments and there is no notion of _bound_ or _unbound_ lifetimes. The lifetime parameterization is determined by the formation of the function type. For a free function, all function parameters outlive a single invented lifetime that extends through the duration of the function call. For a non-static member function with the `%` _ref-qualifier_, the implicit object parameter outlives the invented lifetime. In turn, this invented lifetime outlives the returned safe reference. + +## Exclusivity + +* `T%` is a _mutable safe reference_. It cannot alias other references to overlapping places. +* `const T%` is a _shared safe reference_. It may alias shared safe references to overlapping places, but may never overlap a mutable reference. + +If lifetime safety can be guaranteed without lifetime parameters, why involve a new reference type `T%` at all? Why not perform this form of borrow checking on the existing lvalue- and rvalue-references `T&` and `T&&`? The answer is that safe references enforce _exclusivity_ and legacy references do not. There may be one mutable reference to a place, or any number of shared (constant) references, but not both at the same time. This is the universal invariant of borrow checking. Borrow checking legacy reference types would break all existing code, because that code was written without upholding the exclusivity invariant. + +Exclusivity is a program-wide invariant. It doesn't hinge on the safeness of a function. + +* A safe function is sound for all valid inputs. +* An unsafe function has preconditions and may be unsound for some valid inputs. + +"Valid" borrow and safe reference inputs don't mutably alias. This is something a function can just _assume_; it doesn't need to check and there's no way to check. Borrow checking upholds exclusivity even for unsafe functions (when compiled under the `[safety]` feature). There are other assumptions C++ programmers already make about the validity of inputs: for instance, references never hold null addresses. Non-valid inputs are implicated in undefined behavior. + +By the parsimony principal you may suggest "rather than adding a new safe reference type, just enforce exclusivity on lvalue- and rvalue-references when compiled under the `[safety]` feature." But that makes the soundness problem worse. New code will _assume_ legacy references don't mutably alias, but existing code doesn't uphold that invariant because it was written without even knowing about it. + +If safe code calls legacy code that returns a struct with a pair of references, do those references alias? Of course they may alias, but the parsimonious treatment claims that mutable references don't alias under the `[safety]` feature. We've already stumbled on a soundness bug. + +Coming from the other direction, it may be necessary to form aliasing references just to use the APIs for existing code. Consider a function that takes an lvalue reference to a container and an lvalue reference to one of its elements. If safe code can't even form aliased lvalue references, it wouldn't be able to use that API at all. + +Exclusivity is a program-wide invariant on safe references. We need separate safe and unsafe reference types for both soundness and expressiveness. + +[**vector1.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/lifetimes/vector1.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/KTEWEdEsM) +```cpp +#include + +void f1(std::vector& vec, float& x) { + // Do vec and x alias? If so, the push_back may invalidate x. + vec.push_back(6); + + // Potential UB: x may have been invalidated by the push_back. + x = 6; +} + +int main() { + std::vector vec { 1.0f }; + + // Legacy references permit aliasing. + f1(vec, vec[0]); +} +``` + +This example demonstrates how perilous mutable aliasing in C++ is. In `f1`, the compiler doesn't know if `vec` and `x` alias. Pushing to the vector may cause a buffer resize and copy its data into a new allocation, invalidating existing references or pointers into the container. As C++ doesn't enforce exclusivity on legacy references, the code in `main` is legal, even though it leads to a use-after-free defect. + +[**vector2.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/lifetimes/vector2.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/ETenGYK8n) +```cpp +#feature on safety +#include + +template +class Vec { +public: + void push_back(T value) % safe; + + const T% operator[](size_t idx) const % safe; + T% operator[](size_t idx) % safe; +}; + +void f2(Vec% vec, float% x) safe { + // Does push_back potentially invalidate x? + // No! Exclusivity prevents vec and x from aliasing. + vec.push_back(7); + + // Okay to store to x, because it doesn't point into vec's data. + *x = 7; +} + +int main() safe { + Vec vec { }; + mut vec.push_back(1); + + // Ill-formed: mutable borrow of vec between its mutable borrow and its use + f2(mut vec, mut vec[0]); +} +``` +``` +$ circle vector2.cxx +safety: during safety checking of int main() safe + borrow checking: vector2.cxx:27:19 + f2(mut vec, mut vec[0]); + ^ + mutable borrow of vec between its mutable borrow and its use + loan created at vector2.cxx:27:10 + f2(mut vec, mut vec[0]); + ^ +``` + +Rewrite the example using our simplified safe references. In `main`, the user attempts to pass a safe reference to `vec` and a safe reference to one of its elements. This violates exclusivity, causing the program to be ill-formed. + +Mutable safe references are prohibited from aliasing. Exclusivity is enforced by the same MIR analysis that polices Safe C++'s more general borrow type `T^`. While enforcing exclusivity involves more complicated tooling, it simplifies reasoning about your functions. Since safe reference parameters don't alias, users don't even have to think about aliasing bugs. You're free to store to references without worrying about iterator invalidation or other side effects leading to use-after-free defects. + +## Constraint rules + +This proposal implements two sets of constraint rules. Free functions constrain return references by the shortest of the argument lifetimes. Non-static member functions constrain return references by the implicit object lifetime. + +[**lifetimes3.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/lifetimes/lifetimes3.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/Yb6EoMMb6) +```cpp +#feature on safety + +const int% f1(const int% x, const int% y, bool pred) safe { + // The return reference is constrained by all reference parameters: x and y. + return pred ? x : y; +} + +struct Obj { + const int% f2(const int% arg) const % safe { + // Non-static member functions are constrained by the implicit + // object lifetime. + // It's OK to return `x`, because self outlives the return. + return %x; + } + + const int% f3(const int% arg) const % safe { + // Error: arg does not outlive the return reference. + return arg; + } + + const int% f4(const self%, const int% arg) safe { + // OK - f4 is a free function with an explicit self parameter. + return arg; + } + + int x; +}; + +int main() { + int x = 1, y = 2; + f1(x, y, true); // OK + + Obj obj { }; + obj.f2(x); // OK + obj.f3(x); // Error + obj.f4(x); // OK. +} +``` +``` +$ circle lifetimes3.cxx +safety: during safety checking of const int% Obj::f3(const int%) const % safe + error: lifetimes3.cxx:18:12 + return arg; + ^ + function const int% Obj::f3(const int%) const % safe returns object with lifetime SCC-ref-1, but SCC-ref-1 doesn't outlive SCC-ref-0 +``` + +The definitions of free function `f1` and non-static member function `f2` compile, because they return function parameters that constrain the return type: the returned parameter _outlives_ the returned reference. The non-static member function `f3` fails to compile, because the returned parameter _does not outlive_ the the return type. In a non-static member function, only the implicit object parameter outlives the return type. `f4` returns a function parameter but compiles; it uses the explicit object syntax to gain the ergonomics of a non-static member function, but retains the constraint rules of a free function. + +[**vector3.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/lifetimes/vector3.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/KEr1chMac) +```cpp +#feature on safety + +template +class Map { +public: + // Non-static member functions do not constrain the result object to + // the function parameters. + auto get1(const Key% key) % safe -> Value%; + + // Free function do constrain the result object to the function parameters. + auto get2(self%, const Key% key) safe -> Value%; +}; + +int main() safe { + Map map { }; + + // Bind the key reference to a materialized temporary. + // The temporary expires at the end of this statement. + long% value1 = mut map.get1(3.14f); + + // We can still access value, because it's not constrained on the + // key argument. + *value1 = 1001; + + // The call to get2 constrains the returned reference to the lifetime + // of the key temporary. + long% value2 = mut map.get2(1.6186f); + + // This is ill-formed, because get2's key argument is out of scope. + *value2 = 1002; +} +``` +``` +$ circle vector3.cxx +safety: during safety checking of int main() safe + borrow checking: vector3.cxx:30:4 + *value2 = 1002; + ^ + use of value2 depends on expired loan + drop of temporary object float between its shared borrow and its use + loan created at vector3.cxx:27:31 + long% value2 = mut map.get2(1.6186f); + ^ +``` + +The constraint rules for non-static member functions reflect the idea that resources are owned by class objects. Consider a map data structure that associates values with keys. The map may be specialized a key type that's expensive to copy, such as a string or another map. We don't want to compel the user to pass the key by value, because that may require copying this expensive type. Naturally, we pass by const reference. + +However, the accessor only needs the key inside the body of the function. Once it locates the value, it should return a reference to that, unconstrained by the lifetime of the key argument. Consider passing a materialized temporary for a key: it goes out of scope at the end of the full expression. `get1` uses the non-static member function constraint rules. The caller can use the returned reference even after the key temporary goes out of scope. `get2` uses the free function constraint rules, which constrains the return type to all of its function parameters. This leaves the program ill-formed when the returned reference is used after the expiration of the key temporary. + +In this model, lifetime constraints are not generally programmable, but that design still provides a degree of freedom in the form of non-static member functions. + +[**vector4.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/lifetimes/vector4.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/hdMr5G3j1) +```cpp +#feature on safety + +template +class Map { +public: + // Lifetime elision rules constrain the return by self. + auto get1(self^, const Key^ key) safe -> Value^; + + // Use explicit parameterizations for alternate constraints. + auto get2/(a)(self^/a, const Key^/a key) safe -> Value^/a; +}; + +int main() safe { + Map map { }; + + // Bind the key reference to a materialized temporary. + // The temporary expires at the end of this statement. + long^ value1 = mut map.get1(3.14f); + + // We can still access value, because it's not constrained on the + // key argument. + *value1 = 1001; + + // The call to get2 constrains the returned reference to the lifetime + // of the key temporary. + long^ value2 = mut map.get2(1.6186f); + + // This is ill-formed, because get2's key argument is out of scope. + *value2 = 1002; +} +``` +``` +$ circle vector4.cxx +safety: during safety checking of int main() safe + borrow checking: vector4.cxx:29:4 + *value2 = 1002; + ^ + use of value2 depends on expired loan + drop of temporary object float between its shared borrow and its use + loan created at vector4.cxx:26:31 + long^ value2 = mut map.get2(1.6186f); + ^ +``` + +The general borrow type `T^` has programmable constraints. The map above declares accessor functions. `get1` relies on lifetime elision to constrain the result object by the `self` parameter. This is equivalent to the non-static member function constraint rule. We can call `get1` and use the returned reference even after the key temporary goes out of scope. + +`get2` includes lifetime annotations to constrain the returned reference by both the `self` and `key` parameters. This is like the free function constraint rules. The program fails borrow checking when the returned reference `value2` is used after the expiration of its key temporary. + +# Second-class references + +References can be taxonimized into two classes:[@second-class] + +* First-class references can pass data into functions, be returned from functions, made into objects and be stored in structures. +* Second-class references can pass data into functions but cannot be returned from functions, made into objects or stored in structures. + +_Parameter-passing directives_ like `in` and `inout` are equivalent to second-class references. The _mutable value semantics_[@mutable-value-semantics] model uses parameter-passing directives to pass objects to functions by reference without involving the complexity of a borrow checker. + +```cpp +void func(Vec% vec, float% x) safe; +``` + +In this fragment, the reference parameters `vec` and `x` serve as _second-class references_. The compiler can achieve memory safety without involving the complexity of borrow checking. Both references point at data that outlives the duration of the call to `func`. Exclusivity is enforced at the point of the call, which prevents `vec` and `x` from aliasing. Since `vec` and `x` don't alias, resizing or clearing `vec` cannot invalidate the `x` reference. + +The safe references presented here are more powerful than second-class references. While they don't support all the capabilities of borrows, they can be returned from functions and made into objects. The compiler must implement borrow checking to support this additional capability. + +Borrow checking operates on a function lowering called mid-level IR (MIR). A fresh region variable is provisioned for each local variable with a safe reference type. Dataflow analysis populates each region variable with the liveness of its reference. Assignments and function calls involving references generate _lifetime constraints_. The compiler _solves the constraint equation_ to find the liveness of each _loan_. All instructions in the MIR are scanned for _conflicting actions_ with any of the loans in scope at that point. Conflicting actions raise borrow checker errors. + +The Hylo[@hylo] model is largely equivalent to this model and it requires borrow checking technology. `let` and `inout` parameter directives use mutable value semantics to ensure memory safety for objects passed by reference into functions. But Hylo also supports returning references in the form of subscripts: + +[**Array.hylo**](https://github.com/hylo-lang/hylo/blob/main/StandardLibrary/Sources/Array.hylo) +```swift +public conformance Array: Collection { + ... + public subscript(_ position: Int): Element { + let { + precondition((position >= 0) && (position < count()), "position is out of bounds") + yield pointer_to_element(at: position).unsafe[] + } + inout { + precondition((position >= 0) && (position < count()), "position is out of bounds") + yield &(pointer_to_element(at: position).unsafe[]) + } + } +} +``` + +Subscripts are reference-returning _coroutines_. Coroutines with a single yield point are split into two normal functions: a ramp function that starts at the top and returns the expression of the yield statement, and a continuation function which resumes after the yield and runs to the end. Local state that's live over the yield point must live in a _coroutine frame_ so that it's available to the continuation function. These `Array` subscripts don't have instructions after the yield, so the continuation function is empty and hopefully elided by the optimizer. + +```cpp +template +struct Vec { + const T% operator[](size_t idx) const % safe; + T% operator[](size_t idx) % safe; +}; +``` + +The Hylo `Array` subscripts are lowered to reference-returning ramp functions exactly like their C++ `Vec` counterparts. For both languages, the borrow checker relates lifetimes through the function arguments and out the result object. This isn't the simple safety of second-class references/mutable value semantics. This is full-fat live analysis. + +Safe references without lifetime annotations shields users from dealing with a new degree of freedom, but it doesn't simplify the static analysis that upholds lifetime safety. To prevent use-after-free defects, compilers must still lower functions to mid-level IR, compute non-lexical lifetimes[@nll] and solve the constraint equation. When it comes to returning references, in for a penny, in for a pound. + +Since Circle has already made the investment in borrow checking, adding simplified safe references was an easy extension. If the community is able to fill in our gaps in knowledge around this sort of reference, the compiler could accommodate those advances as well. + +# Other aspects of safety + +As detailed in the Safe C++[@safecpp] proposal, there are four categories of memory safety: + +1. **Lifetime safety** - This proposal advances a simpler form of safe references that provides safety against use-after-free defects. The feature is complementary with borrow types `T^` that take lifetime arguments. Both types can be used in the same translation unit, and even the same function, without conflict. +2. **Type safety** - Relocation must replace move semantics to eliminate unsafe null pointer exposure. Choice types and pattern matching must be included for safe modeling of optional types. +3. **Thread safety** - The `send` and `sync` interfaces account for which types can be copied and shared between threads. +4. **Runtime checks** - The compiler automatically emits runtime bounds checks on array and slice subscripts. It emits checks for integer divide-by-zero and INT_MIN / -1, which are undefined behavior. Conforming safe library functions must also implement panics to prevent out-of-bounds access to heap allocations. + +Most critically, the _safe-specifier_ is added to a function's type. Inside a safe function, only safe operations may be used, unless escaped by an _unsafe-block_. + +C++ must adopt a new standard library with a safe API, which observes all four categories of safety. We need new tooling. _But it's not the case that we have to rewrite all C++ code_. Time has already shaken out most of the vulnerabilities in old code. As demonstrated by the recent Android study on memory safety[@android], the benefits of rewriting are often not worth the costs. What we have to prioritize is the transition to safe coding practices[@safe-coding] for new code. + +# Achieving first-class references + +The presented design is as far as I could go to address the goal of "memory safety without lifetime parameters." But safe references aren't yet powerful enough to replace all the unsafe mechanisms necessary for productivity in C++. We need support for safe versions of idioms that are central to the C++ experience, such as: + +* Iterators. +* Views like `string_view` and `span`. +* RAII types with reference semantics. + +Let's consider RAII types with reference semantics. An example is `std::lock_guard`, which keeps a reference to a mutex. When the `lock_guard` goes out of scope its destructor calls `unlock` on the mutex. This is a challenge for safe references, because safe reference data members aren't supported. Normally those would require lifetime parameters on the containing class. + +What are some options for RAII reference semantics? + +* Coroutines. This is the Hylo strategy. The ramp function locks a mutex and returns a safe reference to the data within. The continuation unlocks the mutex. The reference to the mutex is kept in the coroutine frame. But this still reduces to supporting structs with reference data members. In this case it's not a user-defined type, but a compiler-defined coroutine frame. I feel that the coroutine solution is an unidiomatic fit for C++ for several reasons: static allocation of the coroutine frame requires exposing the definition of the coroutine to the caller, which breaks C++'s approach to modularity; the continuation is called immediately after the last use of the yielded reference, which runs counter to expectation that cleanup runs at the end of the enclosing scope; and since the continuation is called implicitly, there's nothing textual on the caller side to indicate an unlock. +* Defer expressions. Some garbage-collected languages include _defer_ expressions, which run after some condition is met. We could defer a call to the mutex unlock until the end of the enclosing lexical scope. This has the benefit of being explicit to the caller and not requiring computation of a coroutine frame. But it introduces a fundamental new control flow mechanism to the language with applicability that almost perfectly overlaps with destructors. +* Destructors. This is the idiomatic C++ choice. A local object is destroyed when it goes out of scope (or is dropped, with the Safe C++ `drop` keyword). The destructor calls the mutex unlock. + +It makes sense to strengthen safe references to support current RAII practice. How do we support safe references as data members? A reasonable starting point is to declare a class as having _safe reference semantics_. `class name %;` is a possible syntax. Inside these classes, you can declare data members and base classes with safe reference semantics: that includes both safe reference and other classes with safe reference semantics. + +```cpp +class lock_guard % { + // Permitted because the containing class has safe reference semantics. + std2::mutex% mutex; +public: + ~lock_guard() safe { + mutex.unlock(); + } +}; +``` + +The constraint rules can apply to the new `lock_guard` class exactly as it applies to safe references. Returning a `lock_guard` constraints its lifetime by the lifetimes of the function arguments. Transitively, the lifetimes of the data members are constrained by the lifetime of the containing class. + +Unfortunately, we run into problems immediately upon declaring member functions that take safe reference objects or safe reference parameter types. + +```cpp +class string_view %; + +template +class info % { + // Has reference semantics, but that's okay because the containing class does. + string_view sv; +public: + void swap(info% rhs) % safe; +}; +``` + +Consider an `info` class that has _safe reference semantics_ and keeps a `string_view` as a data member. The `string_view` also has reference semantics, so it constrains the underlying string that owns the data. Declare a non-static member function that binds the implicit object with the `%` _ref-qualifier_ and also takes an `info` by safe reference. This is uncharted water. The implicit object type `info` has reference semantics, yet we're taking a reference to that with `swap` call. We're also taking a reference to `info` in the function parameter. How do we deal with references to references? The existing constraint rules only invent a single lifetime: if we used those, we'd be clobbering the lifetime of the inner `string_view` member. + +There's a big weakness with the safe reference type `T%`: it's under-specified when dealing with references to references. We need a fix that respects the lifetimes on the class's data members. + +[**lifetimes5.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/lifetimes/lifetimes5.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/Gj7zoq343) +```cpp +#feature on safety + +class string_view/(a) { + // Keep a borrow to a slice over the string data. + const [char; dyn]^/a p_; +public: +}; + +class info/(a) { + // The handle has lifetime /a. + string_view/a sv; + +public: + void swap(self^, info^ rhs) safe { + string_view temp = self->sv; + self->sv = rhs->sv; + rhs->sv = temp; + } +}; + +void func/(a)(info/a^ lhs, info/a^ rhs) safe { + lhs.swap(rhs); +} + +void func2(info^ lhs, info^ rhs) safe { + lhs.swap(rhs); +} +``` + +Rust and Safe C++ have a way to keep the lifetime of the `string_view` member distinct from the lifetimes of the `self` and `rhs` references: lifetime parameters. `func` assumes that the `string_view`s of its parameters come from sources with overlapping lifetimes, so it declares a lifetime parameter `/a` that's common to both parameters. The lifetimes on the two references are created implicitly by elision, as they don't have to be related in the `swap` call. `func` compiles and doesn't clobber the lifetimes of the contained `string_view`s. + +``` +safety: during safety checking of void func2(info^, info^) safe + error: lifetimes5.cxx:26:12 + lhs.swap(rhs); + ^ + function void func2(info^, info^) safe returns object with lifetime #0, but #0 doesn't outlive #2 + + error: lifetimes5.cxx:26:3 + lhs.swap(rhs); + ^ + function void func2(info^, info^) safe returns object with lifetime #2, but #2 doesn't outlive #0 +``` + +Compiling `func2` raises borrow checker errors. Instead of providing explicit lifetime annotations that relate the lifetimes of the `lhs` and `rhs` `info` types, lifetime elision create four distinct lifetimes: `#0` for the `lhs` `info`, `#1` for the `lhs` `info^`, `#2` for the `rhs` `info` and `#3` for the `rhs` `info^`. The `lhs.swap(rhs)` call relates the lifetimes of the operands through the common lifetime `/a`. But these lifetimes aren't related! The compiler has no information whether `#0` outlives `#2` or vice versa. Since the lifetimes aren't related in `func2`'s declaration, the program is rejected as ill-formed. + +This contrasts with the safe reference constraint rules, which would assign the same lifetime to all four lifetime binders and clobber the `string_view` lifetimes, causing a borrow checker failure further from the source and leaving the developer without the possibility of a fix. + +# Lifetime parameters + +If there's a community-wide research effort among compiler experts to evolve safe references it may be possible to get them into a state to support the abstractions most important for C++. But soundness reasoning is very subtle work. As you increase the indirection capabilty of safe references, you invite networks of dependencies of implied constraints and variances. This increases complexity for the compiler implementation and puts a mental burden on the authors of unsafe code to properly uphold the invariants assumed by safe references. A research project must produce _soundness doctrine_, which is essential guidance on how to interface safe and unsafe systems while upholding the soundness invariants of the program. + +But we don't have to do the research. There's already a solution that's been deployed in a successful production toolchain for a decade: _lifetime parameters_ as used in Rust. The soundness doctrine for writing unsafe code that upholds the invariants established by lifetime parameters is described in the Rustnomicon[@rustnomicon]. + +This is the only known viable solution for first-class safe references without garbage collection. It's a critical lifeline that addresses an existential problem facing C++. By adopting lifetime parameters, C++ can achieve safety parity with the security community's favored languages. + +Consider common objections to Rust's lifetime-annotation flavor of borrow checking: + +1. **You need heavy annotations.** This concern is misplaced. Are you intrigued by mutable value semantics, parameter-passing directives or second-class references? Borrow checking gives you those, without ever having to write lifetime arguments. If your function only uses references as parameters, elision implicitly annotates them in a way that can't fail. You only have to involve lifetime arguments when going beyond the capabilities of second-class references or mutable value semantics. More advanced usages such as the implementation of iterators, views and RAII wrappers with reference semantics are where annotations most often appear, because those designs deal with multiple levels of references. +2. **Borrow checking doesn't permit patterns such as self-references.** It's true that checked references are less flexible than unsafe references or pointers, but this objection is at odds with the claim that lifetime parameters are too burdensome. Lifetime parameters _increase_ the expressiveness of safe references. Additionally, they can reference things important to C++ users that a garbage collection can't, such as variables on the stack. Do we want more expressive references at the cost of annotations, or do we want to get rid of lifetime parameters to make a simpler language? Those are opposite goals. +3. **Borrow checking with lifetimes is too different from normal C++.** Borrow checking is the safety technology most similar to current C++ practice. This model replaces unchecked references with checked references. Other safety models get rid of reference types entirely or replace them with garbage collection which is incompatible with C++'s manual memory management and RAII. The design philosophy of borrow checking is to take normal references but constrain them to uses that can be checked for soundness by the compiler. + +It's not surprising that the C++ community hasn't discovered a better way to approach safe references than the lifetime parameter model. After all, there isn't a well-funded effort to advance C++ language-level lifetime safety. But there is in the Rust community. Rust has made valuable improvements to its lifetime safety design. Lots of effort goes into making borrow checking more permissive: The integration of mid-level IR and non-lexical lifetimes in 2016 revitalized the toolchain. Polonius[@polonius] approaches dataflow analysis from the opposite direction, hoping to shake loose more improvements. Ideas like view types[@view-types] and the sentinel pattern[@sentinel-pattern] are being investigated. But all this activity has not discovered a mechanism that's superior to lifetime parameters for specifying constraints. If something had been discovered, it would be integrated into the Rust language and I'd be proposing to adopt _that_ into C++. For now, lifetime parameters are the best solution that the world has to offer. + +The US government and major players in tech including Google[@secure-by-design] and Microsoft[@ms-vulnerabilities] are telling industry to transition to memory-safe languages because C++ is too unsafe to use. There's already a proven safety technology compatible with C++'s goals of performance and manual memory management. If the C++ community rejects this robust safety solution on the grounds of slightly inconvenient lifetime annotations, and allows C++ to limp forward as a memory-unsafe language, can it still claim to care about software quality? If the lifetime model is good enough for a Rust, a safe language that is enjoying snowballing investment in industry, why is it it not good enough for C++? + +Finally, adoption of this feature brings a major benefit even if you personally want to get off C++: It's critical for **improving C++/Rust interop**. Your C++ project is generating revenue and there's scant economic incentive to rewrite it. But there is an incentive to pivot to a memory-safe language for new development, because new code is how vulnerabilities get introduced.[@android] Bringing C++ closer to Rust with the inclusion of _safe-specifier_, relocation, choice types, and, importantly, lifetime parameters, reduces the friction of interfacing the two languages. The easier it is to interoperate with Rust, the more options and freedom companies have to fulfill with their security mandate.[@rust-interop] + +--- +references: + - id: safecpp + citation-label: safecpp + title: Safe C++ + URL: https://safecpp.org/draft.html + + - id: second-class + citation-label: second-class + title: Second-Class References + URL: https://borretti.me/article/second-class-references + + - id: mutable-value-semantics + citation-label: mutable-value-semantics + title: Implementation Strategies for Mutable Value Semantics + URL: https://www.jot.fm/issues/issue_2022_02/article2.pdf + + - id: hylo + citation-label: hylo + title: Borrow checking Hylo + URL: https://2023.splashcon.org/details/iwaco-2023-papers/5/Borrow-checking-Hylo + + - id: nll + citation-label: nll + title: The Rust RFC Book - Non-lexical lifetimes + URL: https://rust-lang.github.io/rfcs/2094-nll.html + + - id: android + citation-label: android + title: Eliminating Memory Safety Vulnerabilites at the Source + URL: https://security.googleblog.com/2024/09/eliminating-memory-safety-vulnerabilities-Android.html?m=1 + + - id: safe-coding + citation-label: safe-coding + title: Tackling cybersecurity vulnerabilities through Secure by Design + URL: https://blog.google/technology/safety-security/tackling-cybersecurity-vulnerabilities-through-secure-by-design/ + + - id: rustnomicon + citation-label: rustnomicon + title: Rustnomicon -- The Dark Arts of Unsafe Rust + URL: https://doc.rust-lang.org/nomicon/intro.html + + - id: polonius + citation-label: polonius + title: Polonius revisited + URL: https://smallcultfollowing.com/babysteps/blog/2023/09/22/polonius-part-1/ + + - id: view-types + citation-label: view-types + title: View types for Rust + URL: https://smallcultfollowing.com/babysteps/blog/2021/11/05/view-types/ + + - id: sentinel-pattern + citation-label: sentinel-pattern + title: After NLL: Moving from borrowed data and the sentinel pattern + URL: https://smallcultfollowing.com/babysteps/blog/2018/11/10/after-nll-moving-from-borrowed-data-and-the-sentinel-pattern/ + + - id: secure-by-design + citation-label: secure-by-design + title: Secure by Design : Google's Perspective on Memory Safety + URL: https://research.google/pubs/secure-by-design-googles-perspective-on-memory-safety/ + + - id: ms-vulnerabilities + citation-label: ms-vulnerabilities + title: We need a safer systems programming language + URL: https://msrc.microsoft.com/blog/2019/07/we-need-a-safer-systems-programming-language\ + + - id: rust-interop + citation-label: rust-interop + title: Improving Interoperability Between Rust and C++ + URL: https://security.googleblog.com/2024/02/improving-interoperability-between-rust-and-c.html +--- \ No newline at end of file diff --git a/lifetimes/lifetimes1.cxx b/lifetimes/lifetimes1.cxx new file mode 100644 index 0000000..b24e497 --- /dev/null +++ b/lifetimes/lifetimes1.cxx @@ -0,0 +1,21 @@ +#feature on safety + +// Function parameters have different lifetime parameters. +// Return type is constrained by x's lifetime. +auto f1/(a, b)(int^/a x, int^/b y, bool pred) safe -> int^/a { + // Error: + // function auto f1/(a, b)(int^/a, int^/b) -> int^/a returns + // object with lifetime b, but b doesn't outlive a + // return y; + return pred ? x : y; +} + +// Function parameters have a common lifetime parameter. +auto f2/(a)(int^/a x, int^/a y, bool pred) safe -> int^/a { + // Ok + return pred ? x : y; +} + +// Error: +// cannot use lifetime elision for return type int^ +auto f3(int^ x, int^ y) safe -> int^; diff --git a/lifetimes/lifetimes2.cxx b/lifetimes/lifetimes2.cxx new file mode 100644 index 0000000..5b3475d --- /dev/null +++ b/lifetimes/lifetimes2.cxx @@ -0,0 +1,10 @@ +#feature on safety + +// New elision rules: +// All parameters are constrained by a common lifetime. +// The common lifetime constrains the return type. +int% f4(int% x, int% y, bool pred) safe { + // Can return either x or y, because they outlive the common lifetime + // and the common lifetime outlives the result object. + return pred ? x : y; +} diff --git a/lifetimes/lifetimes3.cxx b/lifetimes/lifetimes3.cxx new file mode 100644 index 0000000..4ec0689 --- /dev/null +++ b/lifetimes/lifetimes3.cxx @@ -0,0 +1,37 @@ +#feature on safety + +const int% f1(const int% x, const int% y, bool pred) safe { + // The return reference is constrained by all reference parameters: x and y. + return pred ? x : y; +} + +struct Obj { + const int% f2(const int% arg) const % safe { + // Non-static member functions are constrained by the implicit + // object lifetime. + // It's OK to return `x`, because self outlives the return. + return %x; + } + + const int% f3(const int% arg) const % safe { + // Error: arg does not outlive the return reference. + return arg; + } + + const int% f4(const self%, const int% arg) safe { + // OK - f4 is a free function with an explicit self parameter. + return arg; + } + + int x; +}; + +int main() { + int x = 1, y = 2; + f1(x, y, true); // OK + + Obj obj { }; + obj.f2(x); // OK + obj.f3(x); // Error + obj.f4(x); // OK. +} \ No newline at end of file diff --git a/lifetimes/lifetimes4.cxx b/lifetimes/lifetimes4.cxx new file mode 100644 index 0000000..151e38c --- /dev/null +++ b/lifetimes/lifetimes4.cxx @@ -0,0 +1 @@ +#feature on safety \ No newline at end of file diff --git a/lifetimes/lifetimes5.cxx b/lifetimes/lifetimes5.cxx new file mode 100644 index 0000000..1b610f0 --- /dev/null +++ b/lifetimes/lifetimes5.cxx @@ -0,0 +1,27 @@ +#feature on safety + +class string_view/(a) { + // Keep a borrow to a slice over the string data. + const [char; dyn]^/a p_; +public: +}; + +class info/(a) { + // The handle has lifetime /a. + string_view/a sv; + +public: + void swap(self^, info^ rhs) safe { + string_view temp = self->sv; + self->sv = rhs->sv; + rhs->sv = temp; + } +}; + +void func/(a)(info/a^ lhs, info/a^ rhs) safe { + lhs.swap(rhs); +} + +void func2(info^ lhs, info^ rhs) safe { + lhs.swap(rhs); +} \ No newline at end of file diff --git a/lifetimes/vector1.cxx b/lifetimes/vector1.cxx new file mode 100644 index 0000000..e4adc70 --- /dev/null +++ b/lifetimes/vector1.cxx @@ -0,0 +1,17 @@ +#include + +void f1(std::vector& vec, float& x) { + // Do vec and x alias? If so, the push_back may invalidate x. + vec.push_back(6); + + // Potential UB: x may have been invalidated by the push_back. + x = 6; +} + +int main() { + std::vector vec; + vec.push_back(1): + + // Legacy references permit aliasing. + f1(vec, vec[0]); +} \ No newline at end of file diff --git a/lifetimes/vector2.cxx b/lifetimes/vector2.cxx new file mode 100644 index 0000000..3060be4 --- /dev/null +++ b/lifetimes/vector2.cxx @@ -0,0 +1,28 @@ +#feature on safety +#include + +template +class Vec { +public: + void push_back(T value) % safe; + + const T% operator[](size_t idx) const % safe; + T% operator[](size_t idx) % safe; +}; + +void f2(Vec% vec, float% x) safe { + // Does push_back potentially invalidate x? + // No! Exclusivity prevents vec and x from aliasing. + vec.push_back(7); + + // Okay to store to x, because it doesn't point into vec's data. + *x = 7; +} + +int main() safe { + Vec vec { }; + mut vec.push_back(1); + + // Ill-formed: shared borrow of vec between its mutable borrow and its use + f2(mut vec, mut vec[0]); +} \ No newline at end of file diff --git a/lifetimes/vector3.cxx b/lifetimes/vector3.cxx new file mode 100644 index 0000000..a40444f --- /dev/null +++ b/lifetimes/vector3.cxx @@ -0,0 +1,31 @@ +#feature on safety + +template +class Map { +public: + // Non-static member functions do not constrain the result object to + // the function parameters. + auto get1(const Key% key) % safe -> Value%; + + // Free function do constrain the result object to the f unction praameters. + auto get2(self%, const Key% key) safe -> Value%; +}; + +int main() safe { + Map map { }; + + // Bind the key reference to a materialized temporary. + // The temporary expires at the end of this statement. + long% value1 = mut map.get1(3.14f); + + // We can still access value, because it's not constrained on the + // key argument. + *value1 = 1001; + + // The call to get2 constrains the returned reference to the lifetime + // of the key temporary. + long% value2 = mut map.get2(1.6186f); + + // This is ill-formed, because get2's key argument is out of scope. + *value2 = 1002; +} \ No newline at end of file diff --git a/lifetimes/vector4.cxx b/lifetimes/vector4.cxx new file mode 100644 index 0000000..a23c2bf --- /dev/null +++ b/lifetimes/vector4.cxx @@ -0,0 +1,30 @@ +#feature on safety + +template +class Map { +public: + // Lifetime elision rules constrain the return by self. + auto get1(self^, const Key^ key) safe -> Value^; + + // Use explicit parameterizations for alternate constraints. + auto get2/(a)(self^/a, const Key^/a key) safe -> Value^/a; +}; + +int main() safe { + Map map { }; + + // Bind the key reference to a materialized temporary. + // The temporary expires at the end of this statement. + long^ value1 = mut map.get1(3.14f); + + // We can still access value, because it's not constrained on the + // key argument. + *value1 = 1001; + + // The call to get2 constrains the returned reference to the lifetime + // of the key temporary. + long^ value2 = mut map.get2(1.6186f); + + // This is ill-formed, because get2's key argument is out of scope. + *value2 = 1002; +} \ No newline at end of file From 1056736e6ff73d3aa6c92dd230bb8857d8332ea6 Mon Sep 17 00:00:00 2001 From: Sean Baxter Date: Mon, 14 Oct 2024 12:40:47 -0400 Subject: [PATCH 14/27] Added proposal number for lifetimes submission --- docs/P3444P0.html | 1324 ++++++++++++++++++++++++++++++++++ docs/draft-lifetimes.html | 2 +- lifetimes/P3444P0.md | 573 +++++++++++++++ lifetimes/draft-lifetimes.md | 2 +- 4 files changed, 1899 insertions(+), 2 deletions(-) create mode 100644 docs/P3444P0.html create mode 100644 lifetimes/P3444P0.md diff --git a/docs/P3444P0.html b/docs/P3444P0.html new file mode 100644 index 0000000..29f6815 --- /dev/null +++ b/docs/P3444P0.html @@ -0,0 +1,1324 @@ + + + + + + + + Memory safety without lifetime parameters + + + + + + + +
+
+

Memory safety without +lifetime parameters

+ + + + + + + + + + + + + + + + + + + + + +
Document #:P3444R0
Date:2024-10-15
Project:Programming Language C++
Audience: + SG23
+
Reply-to: + Sean Baxter
<>
+
+
+
+

1 Safe references

+

“Safe C++”[safecpp] introduced a comprehensive +design for compile-time memory safety in C++. The borrow checking model +in Safe C++ requires lifetime parameters, a feature that increases +expressiveness but complicates the language’s type system. This proposal +describes an alternative style of borrow checking, guaranteeing lifetime +safety without the involvement of lifetime annotations.

+

First let’s recap how lifetime parameters are declared and used.

+

lifetimes1.cxx +– (Compiler Explorer)

+
#feature on safety
+
+// Function parameters have different lifetime parameters. 
+// Return type is constrained by x's lifetime.
+auto f1/(a, b)(int^/a x, int^/b y, bool pred) safe -> int^/a {
+  // Error:
+  // function auto f1/(a, b)(int^/a, int^/b) -> int^/a returns
+  // object with lifetime b, but b doesn't outlive a
+  // return y;
+  return pred ? x : y;
+}
+
+// Function parameters have a common lifetime parameter.
+auto f2/(a)(int^/a x, int^/a y, bool pred) safe -> int^/a {
+  // Ok
+  return pred ? x : y;
+}
+
+// Error:
+// cannot use lifetime elision for return type int^ 
+auto f3(int^ x, int^ y) safe -> int^;
+

In Safe C++, occurrences of the borrow type +T^ in +function declarations and in data members require specialization with +lifetime arguments. Lifetime arguments name +lifetime-parameters declared as part of the function +declaration. Borrow types without lifetime arguments have unbound +lifetimes and borrows with lifetime arguments have bound +lifetimes. These are treated as different entities by the +language’s type system, and there are subtle rules on how bound +lifetimes decay to unbound lifetimes and how unbound lifetimes become +bound. Lifetime annotations greatly improve the capability of safe +references, but extend an already complicated type system.

+

The above code declares functions +f1, +f2 and +f3 with +lifetime-parameter-lists. Borrows in function return types must +be constrained by the lifetimes of one or more function parameters. +Failure to match lifetime arguments between function parameters and +return types will cause a borrow checker failure. +f1 fails to borrow check because the +returned parameter y does not +outlive the lifetime +/a on the +return type.

+

Elision rules make lifetime annotations implicit in some cases. But +elision can fail, requiring users to intervene with annotations. In the +example above, the declaration of f3 +fails because the elision rules cannot determine the lifetime argument +on the returned borrow.

+

lifetimes2.cxx +– (Compiler Explorer)

+
#feature on safety
+
+// New elision rules:
+// All parameters are constrained by a common lifetime.
+// The common lifetime constrains the return type.
+int% f4(int% x, int% y, bool pred) safe {
+  // Can return either x or y, because they outlive the common lifetime
+  // and the common lifetime outlives the result object.
+  return pred ? x : y;
+}
+

This proposal introduces a new safe reference marked by the +reference declarator +T%. Safe +references do not take lifetime arguments and there is no notion of +bound or unbound lifetimes. The lifetime +parameterization is determined by the formation of the function type. +For a free function, all function parameters outlive a single invented +lifetime that extends through the duration of the function call. For a +non-static member function with the +% +ref-qualifier, the implicit object parameter outlives the +invented lifetime. In turn, this invented lifetime outlives the returned +safe reference.

+

1.1 Exclusivity

+
    +
  • T% is a +mutable safe reference. It cannot alias other references to +overlapping places.
  • +
  • const T% +is a shared safe reference. It may alias shared safe references +to overlapping places, but may never overlap a mutable reference.
  • +
+

If lifetime safety can be guaranteed without lifetime parameters, why +involve a new reference type +T% at all? +Why not perform this form of borrow checking on the existing lvalue- and +rvalue-references +T& and +T&&? +The answer is that safe references enforce exclusivity and +legacy references do not. There may be one mutable reference to a place, +or any number of shared (constant) references, but not both at the same +time. This is the universal invariant of borrow checking. Borrow +checking legacy reference types would break all existing code, because +that code was written without upholding the exclusivity invariant.

+

Exclusivity is a program-wide invariant. It doesn’t hinge on the +safeness of a function.

+
    +
  • A safe function is sound for all valid inputs.
  • +
  • An unsafe function has preconditions and may be unsound for some +valid inputs.
  • +
+

“Valid” borrow and safe reference inputs don’t mutably alias. This is +something a function can just assume; it doesn’t need to check +and there’s no way to check. Borrow checking upholds exclusivity even +for unsafe functions (when compiled under the [safety] +feature). There are other assumptions C++ programmers already make about +the validity of inputs: for instance, references never hold null +addresses. Non-valid inputs are implicated in undefined behavior.

+

By the parsimony principal you may suggest “rather than adding a new +safe reference type, just enforce exclusivity on lvalue- and +rvalue-references when compiled under the [safety] +feature.” But that makes the soundness problem worse. New code will +assume legacy references don’t mutably alias, but existing code +doesn’t uphold that invariant because it was written without even +knowing about it.

+

If safe code calls legacy code that returns a struct with a pair of +references, do those references alias? Of course they may alias, but the +parsimonious treatment claims that mutable references don’t alias under +the [safety] +feature. We’ve already stumbled on a soundness bug.

+

Coming from the other direction, it may be necessary to form aliasing +references just to use the APIs for existing code. Consider a function +that takes an lvalue reference to a container and an lvalue reference to +one of its elements. If safe code can’t even form aliased lvalue +references, it wouldn’t be able to use that API at all.

+

Exclusivity is a program-wide invariant on safe references. We need +separate safe and unsafe reference types for both soundness and +expressiveness.

+

vector1.cxx +– (Compiler Explorer)

+
#include <vector>
+
+void f1(std::vector<float>& vec, float& x) {
+  // Do vec and x alias? If so, the push_back may invalidate x.
+  vec.push_back(6);
+
+  // Potential UB: x may have been invalidated by the push_back.
+  x = 6;
+}
+
+int main() {
+  std::vector<float> vec { 1.0f };
+
+  // Legacy references permit aliasing.
+  f1(vec, vec[0]);
+}
+

This example demonstrates how perilous mutable aliasing in C++ is. In +f1, the compiler doesn’t know if +vec and +x alias. Pushing to the vector may +cause a buffer resize and copy its data into a new allocation, +invalidating existing references or pointers into the container. As C++ +doesn’t enforce exclusivity on legacy references, the code in +main is legal, even though it leads +to a use-after-free defect.

+

vector2.cxx +– (Compiler Explorer)

+
#feature on safety
+#include <cstdint>
+
+template<typename T>
+class Vec {
+public:
+  void push_back(T value) % safe;
+
+  const T% operator[](size_t idx) const % safe;
+        T% operator[](size_t idx)       % safe;
+};
+
+void f2(Vec<float>% vec, float% x) safe {
+  // Does push_back potentially invalidate x? 
+  // No! Exclusivity prevents vec and x from aliasing.
+  vec.push_back(7);
+
+  // Okay to store to x, because it doesn't point into vec's data.
+  *x = 7;
+}
+
+int main() safe {
+  Vec<float> vec { };
+  mut vec.push_back(1);
+
+  // Ill-formed: mutable borrow of vec between its mutable borrow and its use
+  f2(mut vec, mut vec[0]);
+}
+
$ circle vector2.cxx
+safety: during safety checking of int main() safe
+  borrow checking: vector2.cxx:27:19
+    f2(mut vec, mut vec[0]); 
+                    ^
+  mutable borrow of vec between its mutable borrow and its use
+  loan created at vector2.cxx:27:10
+    f2(mut vec, mut vec[0]); 
+           ^
+

Rewrite the example using our simplified safe references. In +main, the user attempts to pass a +safe reference to vec and a safe +reference to one of its elements. This violates exclusivity, causing the +program to be ill-formed.

+

Mutable safe references are prohibited from aliasing. Exclusivity is +enforced by the same MIR analysis that polices Safe C++’s more general +borrow type +T^. While +enforcing exclusivity involves more complicated tooling, it simplifies +reasoning about your functions. Since safe reference parameters don’t +alias, users don’t even have to think about aliasing bugs. You’re free +to store to references without worrying about iterator invalidation or +other side effects leading to use-after-free defects.

+

1.2 Constraint rules

+

This proposal implements two sets of constraint rules. Free functions +constrain return references by the shortest of the argument lifetimes. +Non-static member functions constrain return references by the implicit +object lifetime.

+

lifetimes3.cxx +– (Compiler Explorer)

+
#feature on safety
+
+const int% f1(const int% x, const int% y, bool pred) safe {
+  // The return reference is constrained by all reference parameters: x and y.
+  return pred ? x : y;
+}
+
+struct Obj {
+  const int% f2(const int% arg) const % safe {
+    // Non-static member functions are constrained by the implicit 
+    // object lifetime.
+    // It's OK to return `x`, because self outlives the return.
+    return %x;
+  }
+
+  const int% f3(const int% arg) const % safe {
+    // Error: arg does not outlive the return reference.
+    return arg;
+  }
+
+  const int% f4(const self%, const int% arg) safe {
+    // OK - f4 is a free function with an explicit self parameter.
+    return arg;
+  }
+
+  int x;
+};
+
+int main() {
+  int x = 1, y = 2;
+  f1(x, y, true); // OK
+
+  Obj obj { };
+  obj.f2(x);  // OK
+  obj.f3(x);  // Error
+  obj.f4(x);  // OK.
+}
+
$ circle lifetimes3.cxx 
+safety: during safety checking of const int% Obj::f3(const int%) const % safe
+  error: lifetimes3.cxx:18:12
+      return arg; 
+             ^
+  function const int% Obj::f3(const int%) const % safe returns object with lifetime SCC-ref-1, but SCC-ref-1 doesn't outlive SCC-ref-0
+

The definitions of free function +f1 and non-static member function +f2 compile, because they return +function parameters that constrain the return type: the returned +parameter outlives the returned reference. The non-static +member function f3 fails to compile, +because the returned parameter does not outlive the the return +type. In a non-static member function, only the implicit object +parameter outlives the return type. +f4 returns a function parameter but +compiles; it uses the explicit object syntax to gain the ergonomics of a +non-static member function, but retains the constraint rules of a free +function.

+

vector3.cxx +– (Compiler Explorer)

+
#feature on safety
+
+template<typename Key, typename Value>
+class Map {
+public:
+  // Non-static member functions do not constrain the result object to
+  // the function parameters.
+  auto get1(const Key% key) % safe -> Value%;
+
+  // Free function do constrain the result object to the function parameters.
+  auto get2(self%, const Key% key) safe -> Value%;
+};
+
+int main() safe {
+  Map<float, long> map { };
+
+  // Bind the key reference to a materialized temporary.
+  // The temporary expires at the end of this statement.
+  long% value1 = mut map.get1(3.14f);
+
+  // We can still access value, because it's not constrained on the 
+  // key argument.
+  *value1 = 1001;
+
+  // The call to get2 constrains the returned reference to the lifetime
+  // of the key temporary.
+  long% value2 = mut map.get2(1.6186f);
+
+  // This is ill-formed, because get2's key argument is out of scope.
+  *value2 = 1002;
+}
+
$ circle vector3.cxx 
+safety: during safety checking of int main() safe
+  borrow checking: vector3.cxx:30:4
+    *value2 = 1002; 
+     ^
+  use of value2 depends on expired loan
+  drop of temporary object float between its shared borrow and its use
+  loan created at vector3.cxx:27:31
+    long% value2 = mut map.get2(1.6186f); 
+                                ^
+

The constraint rules for non-static member functions reflect the idea +that resources are owned by class objects. Consider a map data structure +that associates values with keys. The map may be specialized a key type +that’s expensive to copy, such as a string or another map. We don’t want +to compel the user to pass the key by value, because that may require +copying this expensive type. Naturally, we pass by const reference.

+

However, the accessor only needs the key inside the body of the +function. Once it locates the value, it should return a reference to +that, unconstrained by the lifetime of the key argument. Consider +passing a materialized temporary for a key: it goes out of scope at the +end of the full expression. get1 +uses the non-static member function constraint rules. The caller can use +the returned reference even after the key temporary goes out of scope. +get2 uses the free function +constraint rules, which constrains the return type to all of its +function parameters. This leaves the program ill-formed when the +returned reference is used after the expiration of the key +temporary.

+

In this model, lifetime constraints are not generally programmable, +but that design still provides a degree of freedom in the form of +non-static member functions.

+

vector4.cxx +– (Compiler Explorer)

+
#feature on safety
+
+template<typename Key, typename Value>
+class Map {
+public:
+  // Lifetime elision rules constrain the return by self.
+  auto get1(self^, const Key^ key) safe -> Value^;
+
+  // Use explicit parameterizations for alternate constraints.
+  auto get2/(a)(self^/a, const Key^/a key) safe -> Value^/a;
+};
+
+int main() safe {
+  Map<float, long> map { };
+
+  // Bind the key reference to a materialized temporary.
+  // The temporary expires at the end of this statement.
+  long^ value1 = mut map.get1(3.14f);
+
+  // We can still access value, because it's not constrained on the 
+  // key argument.
+  *value1 = 1001;
+
+  // The call to get2 constrains the returned reference to the lifetime
+  // of the key temporary.
+  long^ value2 = mut map.get2(1.6186f);
+
+  // This is ill-formed, because get2's key argument is out of scope.
+  *value2 = 1002;
+}
+
$ circle vector4.cxx 
+safety: during safety checking of int main() safe
+  borrow checking: vector4.cxx:29:4
+    *value2 = 1002; 
+     ^
+  use of value2 depends on expired loan
+  drop of temporary object float between its shared borrow and its use
+  loan created at vector4.cxx:26:31
+    long^ value2 = mut map.get2(1.6186f); 
+                                ^
+

The general borrow type +T^ has +programmable constraints. The map above declares accessor functions. +get1 relies on lifetime elision to +constrain the result object by the +self parameter. This is equivalent +to the non-static member function constraint rule. We can call +get1 and use the returned reference +even after the key temporary goes out of scope.

+

get2 includes lifetime +annotations to constrain the returned reference by both the +self and +key parameters. This is like the +free function constraint rules. The program fails borrow checking when +the returned reference value2 is +used after the expiration of its key temporary.

+

2 Second-class references

+

References can be taxonimized into two classes:[second-class]

+
    +
  • First-class references can pass data into functions, be returned +from functions, made into objects and be stored in structures.
  • +
  • Second-class references can pass data into functions but cannot be +returned from functions, made into objects or stored in structures.
  • +
+

Parameter-passing directives like +in and +inout are equivalent to second-class +references. The mutable value semantics[mutable-value-semantics] model uses +parameter-passing directives to pass objects to functions by reference +without involving the complexity of a borrow checker.

+
void func(Vec<float>% vec, float% x) safe;
+

In this fragment, the reference parameters +vec and +x serve as second-class +references. The compiler can achieve memory safety without +involving the complexity of borrow checking. Both references point at +data that outlives the duration of the call to +func. Exclusivity is enforced at the +point of the call, which prevents +vec and +x from aliasing. Since +vec and +x don’t alias, resizing or clearing +vec cannot invalidate the +x reference.

+

The safe references presented here are more powerful than +second-class references. While they don’t support all the capabilities +of borrows, they can be returned from functions and made into objects. +The compiler must implement borrow checking to support this additional +capability.

+

Borrow checking operates on a function lowering called mid-level IR +(MIR). A fresh region variable is provisioned for each local variable +with a safe reference type. Dataflow analysis populates each region +variable with the liveness of its reference. Assignments and function +calls involving references generate lifetime constraints. The +compiler solves the constraint equation to find the liveness of +each loan. All instructions in the MIR are scanned for +conflicting actions with any of the loans in scope at that +point. Conflicting actions raise borrow checker errors.

+

The Hylo[hylo] model is largely equivalent to +this model and it requires borrow checking technology. +let and +inout parameter directives use +mutable value semantics to ensure memory safety for objects passed by +reference into functions. But Hylo also supports returning references in +the form of subscripts:

+

Array.hylo

+
public conformance Array: Collection {
+  ...
+  public subscript(_ position: Int): Element {
+    let {
+      precondition((position >= 0) && (position < count()), "position is out of bounds")
+      yield pointer_to_element(at: position).unsafe[]
+    }
+    inout {
+      precondition((position >= 0) && (position < count()), "position is out of bounds")
+      yield &(pointer_to_element(at: position).unsafe[])
+    }
+  }
+}
+

Subscripts are reference-returning coroutines. Coroutines +with a single yield point are split into two normal functions: a ramp +function that starts at the top and returns the expression of the yield +statement, and a continuation function which resumes after the yield and +runs to the end. Local state that’s live over the yield point must live +in a coroutine frame so that it’s available to the continuation +function. These Array subscripts +don’t have instructions after the yield, so the continuation function is +empty and hopefully elided by the optimizer.

+
template<typename T>
+struct Vec {
+  const T% operator[](size_t idx) const % safe;
+        T% operator[](size_t idx)       % safe;
+};
+

The Hylo Array subscripts are +lowered to reference-returning ramp functions exactly like their C++ +Vec counterparts. For both +languages, the borrow checker relates lifetimes through the function +arguments and out the result object. This isn’t the simple safety of +second-class references/mutable value semantics. This is full-fat live +analysis.

+

Safe references without lifetime annotations shields users from +dealing with a new degree of freedom, but it doesn’t simplify the static +analysis that upholds lifetime safety. To prevent use-after-free +defects, compilers must still lower functions to mid-level IR, compute +non-lexical lifetimes[nll] and solve the constraint equation. +When it comes to returning references, in for a penny, in for a +pound.

+

Since Circle has already made the investment in borrow checking, +adding simplified safe references was an easy extension. If the +community is able to fill in our gaps in knowledge around this sort of +reference, the compiler could accommodate those advances as well.

+

3 Other aspects of safety

+

As detailed in the Safe C++[safecpp] proposal, there are four +categories of memory safety:

+
    +
  1. Lifetime safety - This proposal advances a simpler +form of safe references that provides safety against use-after-free +defects. The feature is complementary with borrow types +T^ that take +lifetime arguments. Both types can be used in the same translation unit, +and even the same function, without conflict.
  2. +
  3. Type safety - Relocation must replace move +semantics to eliminate unsafe null pointer exposure. Choice types and +pattern matching must be included for safe modeling of optional +types.
  4. +
  5. Thread safety - The +send and +sync interfaces account for which +types can be copied and shared between threads.
  6. +
  7. Runtime checks - The compiler automatically emits +runtime bounds checks on array and slice subscripts. It emits checks for +integer divide-by-zero and INT_MIN / -1, which are undefined behavior. +Conforming safe library functions must also implement panics to prevent +out-of-bounds access to heap allocations.
  8. +
+

Most critically, the safe-specifier is added to a function’s +type. Inside a safe function, only safe operations may be used, unless +escaped by an unsafe-block.

+

C++ must adopt a new standard library with a safe API, which observes +all four categories of safety. We need new tooling. But it’s not the +case that we have to rewrite all C++ code. Time has already shaken +out most of the vulnerabilities in old code. As demonstrated by the +recent Android study on memory safety[android], the benefits of rewriting are +often not worth the costs. What we have to prioritize is the transition +to safe coding practices[safe-coding] for new code.

+

4 Achieving first-class +references

+

The presented design is as far as I could go to address the goal of +“memory safety without lifetime parameters.” But safe references aren’t +yet powerful enough to replace all the unsafe mechanisms necessary for +productivity in C++. We need support for safe versions of idioms that +are central to the C++ experience, such as:

+
    +
  • Iterators.
  • +
  • Views like string_view and +span.
  • +
  • RAII types with reference semantics.
  • +
+

Let’s consider RAII types with reference semantics. An example is +std::lock_guard, +which keeps a reference to a mutex. When the +lock_guard goes out of scope its +destructor calls unlock on the +mutex. This is a challenge for safe references, because safe reference +data members aren’t supported. Normally those would require lifetime +parameters on the containing class.

+

What are some options for RAII reference semantics?

+
    +
  • Coroutines. This is the Hylo strategy. The ramp function locks a +mutex and returns a safe reference to the data within. The continuation +unlocks the mutex. The reference to the mutex is kept in the coroutine +frame. But this still reduces to supporting structs with reference data +members. In this case it’s not a user-defined type, but a +compiler-defined coroutine frame. I feel that the coroutine solution is +an unidiomatic fit for C++ for several reasons: static allocation of the +coroutine frame requires exposing the definition of the coroutine to the +caller, which breaks C++’s approach to modularity; the continuation is +called immediately after the last use of the yielded reference, which +runs counter to expectation that cleanup runs at the end of the +enclosing scope; and since the continuation is called implicitly, +there’s nothing textual on the caller side to indicate an unlock.
  • +
  • Defer expressions. Some garbage-collected languages include +defer expressions, which run after some condition is met. We +could defer a call to the mutex unlock until the end of the enclosing +lexical scope. This has the benefit of being explicit to the caller and +not requiring computation of a coroutine frame. But it introduces a +fundamental new control flow mechanism to the language with +applicability that almost perfectly overlaps with destructors.
  • +
  • Destructors. This is the idiomatic C++ choice. A local object is +destroyed when it goes out of scope (or is dropped, with the Safe C++ +drop keyword). The destructor calls +the mutex unlock.
  • +
+

It makes sense to strengthen safe references to support current RAII +practice. How do we support safe references as data members? A +reasonable starting point is to declare a class as having safe +reference semantics. class name %; +is a possible syntax. Inside these classes, you can declare data members +and base classes with safe reference semantics: that includes both safe +reference and other classes with safe reference semantics.

+
class lock_guard % {
+  // Permitted because the containing class has safe reference semantics.
+  std2::mutex% mutex;
+public:
+  ~lock_guard() safe {
+    mutex.unlock();
+  }
+};
+

The constraint rules can apply to the new +lock_guard class exactly as it +applies to safe references. Returning a +lock_guard constraints its lifetime +by the lifetimes of the function arguments. Transitively, the lifetimes +of the data members are constrained by the lifetime of the containing +class.

+

Unfortunately, we run into problems immediately upon declaring member +functions that take safe reference objects or safe reference parameter +types.

+
class string_view %;
+
+template<typename T>
+class info % {
+  // Has reference semantics, but that's okay because the containing class does.
+  string_view sv;
+public:
+  void swap(info% rhs) % safe;
+};
+

Consider an info class that has +safe reference semantics and keeps a +string_view as a data member. The +string_view also has reference +semantics, so it constrains the underlying string that owns the data. +Declare a non-static member function that binds the implicit object with +the % +ref-qualifier and also takes an +info by safe reference. This is +uncharted water. The implicit object type +info has reference semantics, yet +we’re taking a reference to that with +swap call. We’re also taking a +reference to info in the function +parameter. How do we deal with references to references? The existing +constraint rules only invent a single lifetime: if we used those, we’d +be clobbering the lifetime of the inner +string_view member.

+

There’s a big weakness with the safe reference type +T%: it’s +under-specified when dealing with references to references. We need a +fix that respects the lifetimes on the class’s data members.

+

lifetimes5.cxx +– (Compiler Explorer)

+
#feature on safety
+
+class string_view/(a) {
+  // Keep a borrow to a slice over the string data.
+  const [char; dyn]^/a p_;
+public:
+};
+
+class info/(a) {
+  // The handle has lifetime /a.
+  string_view/a sv;
+
+public:
+  void swap(self^, info^ rhs) safe {
+    string_view temp = self->sv;
+    self->sv = rhs->sv;
+    rhs->sv = temp;
+  }
+};
+
+void func/(a)(info/a^ lhs, info/a^ rhs) safe {
+  lhs.swap(rhs);
+}
+
+void func2(info^ lhs, info^ rhs) safe {
+  lhs.swap(rhs);
+}
+

Rust and Safe C++ have a way to keep the lifetime of the +string_view member distinct from the +lifetimes of the self and +rhs references: lifetime parameters. +func assumes that the +string_views of its parameters come +from sources with overlapping lifetimes, so it declares a lifetime +parameter /a +that’s common to both parameters. The lifetimes on the two references +are created implicitly by elision, as they don’t have to be related in +the swap call. +func compiles and doesn’t clobber +the lifetimes of the contained +string_views.

+
safety: during safety checking of void func2(info^, info^) safe
+  error: lifetimes5.cxx:26:12
+    lhs.swap(rhs); 
+             ^
+  function void func2(info^, info^) safe returns object with lifetime #0, but #0 doesn't outlive #2
+
+  error: lifetimes5.cxx:26:3
+    lhs.swap(rhs); 
+    ^
+  function void func2(info^, info^) safe returns object with lifetime #2, but #2 doesn't outlive #0
+

Compiling func2 raises borrow +checker errors. Instead of providing explicit lifetime annotations that +relate the lifetimes of the lhs and +rhs +info types, lifetime elision create +four distinct lifetimes: +#0 for the +lhs +info, +#1 for the +lhs +info^, +#2 for the +rhs +info and +#3 for the +rhs +info^. The +lhs.swap(rhs) +call relates the lifetimes of the operands through the common lifetime +/a. But +these lifetimes aren’t related! The compiler has no information whether +#0 outlives +#2 or vice +versa. Since the lifetimes aren’t related in +func2’s declaration, the program is +rejected as ill-formed.

+

This contrasts with the safe reference constraint rules, which would +assign the same lifetime to all four lifetime binders and clobber the +string_view lifetimes, causing a +borrow checker failure further from the source and leaving the developer +without the possibility of a fix.

+

5 Lifetime parameters

+

If there’s a community-wide research effort among compiler experts to +evolve safe references it may be possible to get them into a state to +support the abstractions most important for C++. But soundness reasoning +is very subtle work. As you increase the indirection capabilty of safe +references, you invite networks of dependencies of implied constraints +and variances. This increases complexity for the compiler implementation +and puts a mental burden on the authors of unsafe code to properly +uphold the invariants assumed by safe references. A research project +must produce soundness doctrine, which is essential guidance on +how to interface safe and unsafe systems while upholding the soundness +invariants of the program.

+

But we don’t have to do the research. There’s already a solution +that’s been deployed in a successful production toolchain for a decade: +lifetime parameters as used in Rust. The soundness doctrine for +writing unsafe code that upholds the invariants established by lifetime +parameters is described in the Rustnomicon[rustnomicon].

+

This is the only known viable solution for first-class safe +references without garbage collection. It’s a critical lifeline that +addresses an existential problem facing C++. By adopting lifetime +parameters, C++ can achieve safety parity with the security community’s +favored languages.

+

Consider common objections to Rust’s lifetime-annotation flavor of +borrow checking:

+
    +
  1. You need heavy annotations. This concern is +misplaced. Are you intrigued by mutable value semantics, +parameter-passing directives or second-class references? Borrow checking +gives you those, without ever having to write lifetime arguments. If +your function only uses references as parameters, elision implicitly +annotates them in a way that can’t fail. You only have to involve +lifetime arguments when going beyond the capabilities of second-class +references or mutable value semantics. More advanced usages such as the +implementation of iterators, views and RAII wrappers with reference +semantics are where annotations most often appear, because those designs +deal with multiple levels of references.
  2. +
  3. Borrow checking doesn’t permit patterns such as +self-references. It’s true that checked references are less +flexible than unsafe references or pointers, but this objection is at +odds with the claim that lifetime parameters are too burdensome. +Lifetime parameters increase the expressiveness of safe +references. Additionally, they can reference things important to C++ +users that a garbage collection can’t, such as variables on the stack. +Do we want more expressive references at the cost of annotations, or do +we want to get rid of lifetime parameters to make a simpler language? +Those are opposite goals.
  4. +
  5. Borrow checking with lifetimes is too different from normal +C++. Borrow checking is the safety technology most similar to +current C++ practice. This model replaces unchecked references with +checked references. Other safety models get rid of reference types +entirely or replace them with garbage collection which is incompatible +with C++’s manual memory management and RAII. The design philosophy of +borrow checking is to take normal references but constrain them to uses +that can be checked for soundness by the compiler.
  6. +
+

It’s not surprising that the C++ community hasn’t discovered a better +way to approach safe references than the lifetime parameter model. After +all, there isn’t a well-funded effort to advance C++ language-level +lifetime safety. But there is in the Rust community. Rust has made +valuable improvements to its lifetime safety design. Lots of effort goes +into making borrow checking more permissive: The integration of +mid-level IR and non-lexical lifetimes in 2016 revitalized the +toolchain. Polonius[polonius] approaches dataflow analysis +from the opposite direction, hoping to shake loose more improvements. +Ideas like view types[view-types] and the sentinel +pattern[sentinel-pattern] are being +investigated. But all this activity has not discovered a mechanism +that’s superior to lifetime parameters for specifying constraints. If +something had been discovered, it would be integrated into the Rust +language and I’d be proposing to adopt that into C++. For now, +lifetime parameters are the best solution that the world has to +offer.

+

The US government and major players in tech including Google[secure-by-design] and Microsoft[ms-vulnerabilities] are telling industry +to transition to memory-safe languages because C++ is too unsafe to use. +There’s already a proven safety technology compatible with C++’s goals +of performance and manual memory management. If the C++ community +rejects this robust safety solution on the grounds of slightly +inconvenient lifetime annotations, and allows C++ to limp forward as a +memory-unsafe language, can it still claim to care about software +quality? If the lifetime model is good enough for a Rust, a safe +language that is enjoying snowballing investment in industry, why is it +it not good enough for C++?

+

Finally, adoption of this feature brings a major benefit even if you +personally want to get off C++: It’s critical for improving +C++/Rust interop. Your C++ project is generating revenue and +there’s scant economic incentive to rewrite it. But there is an +incentive to pivot to a memory-safe language for new development, +because new code is how vulnerabilities get introduced.[android] Bringing C++ closer to Rust +with the inclusion of safe-specifier, relocation, choice types, +and, importantly, lifetime parameters, reduces the friction of +interfacing the two languages. The easier it is to interoperate with +Rust, the more options and freedom companies have to fulfill with their +security mandate.[rust-interop]

+

An up-to-date draft of this document is maintained at safe-cpp.org/draft-lifetimes.html.

+

6 References

+
+
+[android] Eliminating Memory Safety Vulnerabilites at the Source.
https://security.googleblog.com/2024/09/eliminating-memory-safety-vulnerabilities-Android.html?m=1
+
+
+[hylo] Borrow checking Hylo.
https://2023.splashcon.org/details/iwaco-2023-papers/5/Borrow-checking-Hylo
+
+
+[ms-vulnerabilities] We need a safer systems programming language.
https://msrc.microsoft.com/blog/2019/07/we-need-a-safer-systems-programming-language\
+
+
+[mutable-value-semantics] Implementation Strategies for Mutable Value +Semantics.
https://www.jot.fm/issues/issue_2022_02/article2.pdf
+
+
+[nll] The Rust RFC Book - Non-lexical lifetimes.
https://rust-lang.github.io/rfcs/2094-nll.html
+
+
+[polonius] Polonius revisited.
https://smallcultfollowing.com/babysteps/blog/2023/09/22/polonius-part-1/
+
+
+[rust-interop] Improving Interoperability Between Rust and C++.
https://security.googleblog.com/2024/02/improving-interoperability-between-rust-and-c.html
+
+
+[rustnomicon] Rustnomicon – The Dark Arts of Unsafe Rust.
https://doc.rust-lang.org/nomicon/intro.html
+
+
+[safe-coding] Tackling cybersecurity vulnerabilities through Secure by +Design.
https://blog.google/technology/safety-security/tackling-cybersecurity-vulnerabilities-through-secure-by-design/
+
+
+[safecpp] Safe C++.
https://safecpp.org/draft.html
+
+
+[second-class] Second-Class References.
https://borretti.me/article/second-class-references
+
+
+[secure-by-design] Secure by Design : Google’s Perspective on Memory +Safety.
https://research.google/pubs/secure-by-design-googles-perspective-on-memory-safety/
+
+
+[sentinel-pattern] After NLL: Moving from borrowed data and the sentinel +pattern.
https://smallcultfollowing.com/babysteps/blog/2018/11/10/after-nll-moving-from-borrowed-data-and-the-sentinel-pattern/
+
+
+[view-types] View types for Rust.
https://smallcultfollowing.com/babysteps/blog/2021/11/05/view-types/
+
+
+
+
+ + diff --git a/docs/draft-lifetimes.html b/docs/draft-lifetimes.html index fe6eaca..780215b 100644 --- a/docs/draft-lifetimes.html +++ b/docs/draft-lifetimes.html @@ -424,7 +424,7 @@

Memory safety without - + diff --git a/lifetimes/P3444P0.md b/lifetimes/P3444P0.md new file mode 100644 index 0000000..7bb1e3c --- /dev/null +++ b/lifetimes/P3444P0.md @@ -0,0 +1,573 @@ +--- +title: "Memory safety without lifetime parameters" +document: P3444R0 +date: 2024-10-15 +audience: SG23 +author: + - name: Sean Baxter + email: +toc: false +--- + +# Safe references + +"Safe C++"[@safecpp] introduced a comprehensive design for compile-time memory safety in C++. The borrow checking model in Safe C++ requires lifetime parameters, a feature that increases expressiveness but complicates the language's type system. This proposal describes an alternative style of borrow checking, guaranteeing lifetime safety without the involvement of lifetime annotations. + +First let's recap how lifetime parameters are declared and used. + +[**lifetimes1.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/lifetimes/lifetimes1.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/5s9qG1h4E) +```cpp +#feature on safety + +// Function parameters have different lifetime parameters. +// Return type is constrained by x's lifetime. +auto f1/(a, b)(int^/a x, int^/b y, bool pred) safe -> int^/a { + // Error: + // function auto f1/(a, b)(int^/a, int^/b) -> int^/a returns + // object with lifetime b, but b doesn't outlive a + // return y; + return pred ? x : y; +} + +// Function parameters have a common lifetime parameter. +auto f2/(a)(int^/a x, int^/a y, bool pred) safe -> int^/a { + // Ok + return pred ? x : y; +} + +// Error: +// cannot use lifetime elision for return type int^ +auto f3(int^ x, int^ y) safe -> int^; +``` + +In Safe C++, occurrences of the borrow type `T^` in function declarations and in data members require specialization with _lifetime arguments_. Lifetime arguments name _lifetime-parameters_ declared as part of the function declaration. Borrow types without lifetime arguments have _unbound lifetimes_ and borrows with lifetime arguments have _bound lifetimes_. These are treated as different entities by the language's type system, and there are subtle rules on how bound lifetimes decay to unbound lifetimes and how unbound lifetimes become bound. Lifetime annotations greatly improve the capability of safe references, but extend an already complicated type system. + +The above code declares functions `f1`, `f2` and `f3` with _lifetime-parameter-lists_. Borrows in function return types must be constrained by the lifetimes of one or more function parameters. Failure to match lifetime arguments between function parameters and return types will cause a borrow checker failure. `f1` fails to borrow check because the returned parameter `y` does not outlive the lifetime `/a` on the return type. + +Elision rules make lifetime annotations implicit in some cases. But elision can fail, requiring users to intervene with annotations. In the example above, the declaration of `f3` fails because the elision rules cannot determine the lifetime argument on the returned borrow. + +[**lifetimes2.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/lifetimes/lifetimes2.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/G6TWx83M9) +```cpp +#feature on safety + +// New elision rules: +// All parameters are constrained by a common lifetime. +// The common lifetime constrains the return type. +int% f4(int% x, int% y, bool pred) safe { + // Can return either x or y, because they outlive the common lifetime + // and the common lifetime outlives the result object. + return pred ? x : y; +} +``` + +This proposal introduces a new _safe reference_ marked by the reference declarator `T%`. Safe references do not take lifetime arguments and there is no notion of _bound_ or _unbound_ lifetimes. The lifetime parameterization is determined by the formation of the function type. For a free function, all function parameters outlive a single invented lifetime that extends through the duration of the function call. For a non-static member function with the `%` _ref-qualifier_, the implicit object parameter outlives the invented lifetime. In turn, this invented lifetime outlives the returned safe reference. + +## Exclusivity + +* `T%` is a _mutable safe reference_. It cannot alias other references to overlapping places. +* `const T%` is a _shared safe reference_. It may alias shared safe references to overlapping places, but may never overlap a mutable reference. + +If lifetime safety can be guaranteed without lifetime parameters, why involve a new reference type `T%` at all? Why not perform this form of borrow checking on the existing lvalue- and rvalue-references `T&` and `T&&`? The answer is that safe references enforce _exclusivity_ and legacy references do not. There may be one mutable reference to a place, or any number of shared (constant) references, but not both at the same time. This is the universal invariant of borrow checking. Borrow checking legacy reference types would break all existing code, because that code was written without upholding the exclusivity invariant. + +Exclusivity is a program-wide invariant. It doesn't hinge on the safeness of a function. + +* A safe function is sound for all valid inputs. +* An unsafe function has preconditions and may be unsound for some valid inputs. + +"Valid" borrow and safe reference inputs don't mutably alias. This is something a function can just _assume_; it doesn't need to check and there's no way to check. Borrow checking upholds exclusivity even for unsafe functions (when compiled under the `[safety]` feature). There are other assumptions C++ programmers already make about the validity of inputs: for instance, references never hold null addresses. Non-valid inputs are implicated in undefined behavior. + +By the parsimony principal you may suggest "rather than adding a new safe reference type, just enforce exclusivity on lvalue- and rvalue-references when compiled under the `[safety]` feature." But that makes the soundness problem worse. New code will _assume_ legacy references don't mutably alias, but existing code doesn't uphold that invariant because it was written without even knowing about it. + +If safe code calls legacy code that returns a struct with a pair of references, do those references alias? Of course they may alias, but the parsimonious treatment claims that mutable references don't alias under the `[safety]` feature. We've already stumbled on a soundness bug. + +Coming from the other direction, it may be necessary to form aliasing references just to use the APIs for existing code. Consider a function that takes an lvalue reference to a container and an lvalue reference to one of its elements. If safe code can't even form aliased lvalue references, it wouldn't be able to use that API at all. + +Exclusivity is a program-wide invariant on safe references. We need separate safe and unsafe reference types for both soundness and expressiveness. + +[**vector1.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/lifetimes/vector1.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/KTEWEdEsM) +```cpp +#include + +void f1(std::vector& vec, float& x) { + // Do vec and x alias? If so, the push_back may invalidate x. + vec.push_back(6); + + // Potential UB: x may have been invalidated by the push_back. + x = 6; +} + +int main() { + std::vector vec { 1.0f }; + + // Legacy references permit aliasing. + f1(vec, vec[0]); +} +``` + +This example demonstrates how perilous mutable aliasing in C++ is. In `f1`, the compiler doesn't know if `vec` and `x` alias. Pushing to the vector may cause a buffer resize and copy its data into a new allocation, invalidating existing references or pointers into the container. As C++ doesn't enforce exclusivity on legacy references, the code in `main` is legal, even though it leads to a use-after-free defect. + +[**vector2.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/lifetimes/vector2.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/ETenGYK8n) +```cpp +#feature on safety +#include + +template +class Vec { +public: + void push_back(T value) % safe; + + const T% operator[](size_t idx) const % safe; + T% operator[](size_t idx) % safe; +}; + +void f2(Vec% vec, float% x) safe { + // Does push_back potentially invalidate x? + // No! Exclusivity prevents vec and x from aliasing. + vec.push_back(7); + + // Okay to store to x, because it doesn't point into vec's data. + *x = 7; +} + +int main() safe { + Vec vec { }; + mut vec.push_back(1); + + // Ill-formed: mutable borrow of vec between its mutable borrow and its use + f2(mut vec, mut vec[0]); +} +``` +``` +$ circle vector2.cxx +safety: during safety checking of int main() safe + borrow checking: vector2.cxx:27:19 + f2(mut vec, mut vec[0]); + ^ + mutable borrow of vec between its mutable borrow and its use + loan created at vector2.cxx:27:10 + f2(mut vec, mut vec[0]); + ^ +``` + +Rewrite the example using our simplified safe references. In `main`, the user attempts to pass a safe reference to `vec` and a safe reference to one of its elements. This violates exclusivity, causing the program to be ill-formed. + +Mutable safe references are prohibited from aliasing. Exclusivity is enforced by the same MIR analysis that polices Safe C++'s more general borrow type `T^`. While enforcing exclusivity involves more complicated tooling, it simplifies reasoning about your functions. Since safe reference parameters don't alias, users don't even have to think about aliasing bugs. You're free to store to references without worrying about iterator invalidation or other side effects leading to use-after-free defects. + +## Constraint rules + +This proposal implements two sets of constraint rules. Free functions constrain return references by the shortest of the argument lifetimes. Non-static member functions constrain return references by the implicit object lifetime. + +[**lifetimes3.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/lifetimes/lifetimes3.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/Yb6EoMMb6) +```cpp +#feature on safety + +const int% f1(const int% x, const int% y, bool pred) safe { + // The return reference is constrained by all reference parameters: x and y. + return pred ? x : y; +} + +struct Obj { + const int% f2(const int% arg) const % safe { + // Non-static member functions are constrained by the implicit + // object lifetime. + // It's OK to return `x`, because self outlives the return. + return %x; + } + + const int% f3(const int% arg) const % safe { + // Error: arg does not outlive the return reference. + return arg; + } + + const int% f4(const self%, const int% arg) safe { + // OK - f4 is a free function with an explicit self parameter. + return arg; + } + + int x; +}; + +int main() { + int x = 1, y = 2; + f1(x, y, true); // OK + + Obj obj { }; + obj.f2(x); // OK + obj.f3(x); // Error + obj.f4(x); // OK. +} +``` +``` +$ circle lifetimes3.cxx +safety: during safety checking of const int% Obj::f3(const int%) const % safe + error: lifetimes3.cxx:18:12 + return arg; + ^ + function const int% Obj::f3(const int%) const % safe returns object with lifetime SCC-ref-1, but SCC-ref-1 doesn't outlive SCC-ref-0 +``` + +The definitions of free function `f1` and non-static member function `f2` compile, because they return function parameters that constrain the return type: the returned parameter _outlives_ the returned reference. The non-static member function `f3` fails to compile, because the returned parameter _does not outlive_ the the return type. In a non-static member function, only the implicit object parameter outlives the return type. `f4` returns a function parameter but compiles; it uses the explicit object syntax to gain the ergonomics of a non-static member function, but retains the constraint rules of a free function. + +[**vector3.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/lifetimes/vector3.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/KEr1chMac) +```cpp +#feature on safety + +template +class Map { +public: + // Non-static member functions do not constrain the result object to + // the function parameters. + auto get1(const Key% key) % safe -> Value%; + + // Free function do constrain the result object to the function parameters. + auto get2(self%, const Key% key) safe -> Value%; +}; + +int main() safe { + Map map { }; + + // Bind the key reference to a materialized temporary. + // The temporary expires at the end of this statement. + long% value1 = mut map.get1(3.14f); + + // We can still access value, because it's not constrained on the + // key argument. + *value1 = 1001; + + // The call to get2 constrains the returned reference to the lifetime + // of the key temporary. + long% value2 = mut map.get2(1.6186f); + + // This is ill-formed, because get2's key argument is out of scope. + *value2 = 1002; +} +``` +``` +$ circle vector3.cxx +safety: during safety checking of int main() safe + borrow checking: vector3.cxx:30:4 + *value2 = 1002; + ^ + use of value2 depends on expired loan + drop of temporary object float between its shared borrow and its use + loan created at vector3.cxx:27:31 + long% value2 = mut map.get2(1.6186f); + ^ +``` + +The constraint rules for non-static member functions reflect the idea that resources are owned by class objects. Consider a map data structure that associates values with keys. The map may be specialized a key type that's expensive to copy, such as a string or another map. We don't want to compel the user to pass the key by value, because that may require copying this expensive type. Naturally, we pass by const reference. + +However, the accessor only needs the key inside the body of the function. Once it locates the value, it should return a reference to that, unconstrained by the lifetime of the key argument. Consider passing a materialized temporary for a key: it goes out of scope at the end of the full expression. `get1` uses the non-static member function constraint rules. The caller can use the returned reference even after the key temporary goes out of scope. `get2` uses the free function constraint rules, which constrains the return type to all of its function parameters. This leaves the program ill-formed when the returned reference is used after the expiration of the key temporary. + +In this model, lifetime constraints are not generally programmable, but that design still provides a degree of freedom in the form of non-static member functions. + +[**vector4.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/lifetimes/vector4.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/hdMr5G3j1) +```cpp +#feature on safety + +template +class Map { +public: + // Lifetime elision rules constrain the return by self. + auto get1(self^, const Key^ key) safe -> Value^; + + // Use explicit parameterizations for alternate constraints. + auto get2/(a)(self^/a, const Key^/a key) safe -> Value^/a; +}; + +int main() safe { + Map map { }; + + // Bind the key reference to a materialized temporary. + // The temporary expires at the end of this statement. + long^ value1 = mut map.get1(3.14f); + + // We can still access value, because it's not constrained on the + // key argument. + *value1 = 1001; + + // The call to get2 constrains the returned reference to the lifetime + // of the key temporary. + long^ value2 = mut map.get2(1.6186f); + + // This is ill-formed, because get2's key argument is out of scope. + *value2 = 1002; +} +``` +``` +$ circle vector4.cxx +safety: during safety checking of int main() safe + borrow checking: vector4.cxx:29:4 + *value2 = 1002; + ^ + use of value2 depends on expired loan + drop of temporary object float between its shared borrow and its use + loan created at vector4.cxx:26:31 + long^ value2 = mut map.get2(1.6186f); + ^ +``` + +The general borrow type `T^` has programmable constraints. The map above declares accessor functions. `get1` relies on lifetime elision to constrain the result object by the `self` parameter. This is equivalent to the non-static member function constraint rule. We can call `get1` and use the returned reference even after the key temporary goes out of scope. + +`get2` includes lifetime annotations to constrain the returned reference by both the `self` and `key` parameters. This is like the free function constraint rules. The program fails borrow checking when the returned reference `value2` is used after the expiration of its key temporary. + +# Second-class references + +References can be taxonimized into two classes:[@second-class] + +* First-class references can pass data into functions, be returned from functions, made into objects and be stored in structures. +* Second-class references can pass data into functions but cannot be returned from functions, made into objects or stored in structures. + +_Parameter-passing directives_ like `in` and `inout` are equivalent to second-class references. The _mutable value semantics_[@mutable-value-semantics] model uses parameter-passing directives to pass objects to functions by reference without involving the complexity of a borrow checker. + +```cpp +void func(Vec% vec, float% x) safe; +``` + +In this fragment, the reference parameters `vec` and `x` serve as _second-class references_. The compiler can achieve memory safety without involving the complexity of borrow checking. Both references point at data that outlives the duration of the call to `func`. Exclusivity is enforced at the point of the call, which prevents `vec` and `x` from aliasing. Since `vec` and `x` don't alias, resizing or clearing `vec` cannot invalidate the `x` reference. + +The safe references presented here are more powerful than second-class references. While they don't support all the capabilities of borrows, they can be returned from functions and made into objects. The compiler must implement borrow checking to support this additional capability. + +Borrow checking operates on a function lowering called mid-level IR (MIR). A fresh region variable is provisioned for each local variable with a safe reference type. Dataflow analysis populates each region variable with the liveness of its reference. Assignments and function calls involving references generate _lifetime constraints_. The compiler _solves the constraint equation_ to find the liveness of each _loan_. All instructions in the MIR are scanned for _conflicting actions_ with any of the loans in scope at that point. Conflicting actions raise borrow checker errors. + +The Hylo[@hylo] model is largely equivalent to this model and it requires borrow checking technology. `let` and `inout` parameter directives use mutable value semantics to ensure memory safety for objects passed by reference into functions. But Hylo also supports returning references in the form of subscripts: + +[**Array.hylo**](https://github.com/hylo-lang/hylo/blob/main/StandardLibrary/Sources/Array.hylo) +```swift +public conformance Array: Collection { + ... + public subscript(_ position: Int): Element { + let { + precondition((position >= 0) && (position < count()), "position is out of bounds") + yield pointer_to_element(at: position).unsafe[] + } + inout { + precondition((position >= 0) && (position < count()), "position is out of bounds") + yield &(pointer_to_element(at: position).unsafe[]) + } + } +} +``` + +Subscripts are reference-returning _coroutines_. Coroutines with a single yield point are split into two normal functions: a ramp function that starts at the top and returns the expression of the yield statement, and a continuation function which resumes after the yield and runs to the end. Local state that's live over the yield point must live in a _coroutine frame_ so that it's available to the continuation function. These `Array` subscripts don't have instructions after the yield, so the continuation function is empty and hopefully elided by the optimizer. + +```cpp +template +struct Vec { + const T% operator[](size_t idx) const % safe; + T% operator[](size_t idx) % safe; +}; +``` + +The Hylo `Array` subscripts are lowered to reference-returning ramp functions exactly like their C++ `Vec` counterparts. For both languages, the borrow checker relates lifetimes through the function arguments and out the result object. This isn't the simple safety of second-class references/mutable value semantics. This is full-fat live analysis. + +Safe references without lifetime annotations shields users from dealing with a new degree of freedom, but it doesn't simplify the static analysis that upholds lifetime safety. To prevent use-after-free defects, compilers must still lower functions to mid-level IR, compute non-lexical lifetimes[@nll] and solve the constraint equation. When it comes to returning references, in for a penny, in for a pound. + +Since Circle has already made the investment in borrow checking, adding simplified safe references was an easy extension. If the community is able to fill in our gaps in knowledge around this sort of reference, the compiler could accommodate those advances as well. + +# Other aspects of safety + +As detailed in the Safe C++[@safecpp] proposal, there are four categories of memory safety: + +1. **Lifetime safety** - This proposal advances a simpler form of safe references that provides safety against use-after-free defects. The feature is complementary with borrow types `T^` that take lifetime arguments. Both types can be used in the same translation unit, and even the same function, without conflict. +2. **Type safety** - Relocation must replace move semantics to eliminate unsafe null pointer exposure. Choice types and pattern matching must be included for safe modeling of optional types. +3. **Thread safety** - The `send` and `sync` interfaces account for which types can be copied and shared between threads. +4. **Runtime checks** - The compiler automatically emits runtime bounds checks on array and slice subscripts. It emits checks for integer divide-by-zero and INT_MIN / -1, which are undefined behavior. Conforming safe library functions must also implement panics to prevent out-of-bounds access to heap allocations. + +Most critically, the _safe-specifier_ is added to a function's type. Inside a safe function, only safe operations may be used, unless escaped by an _unsafe-block_. + +C++ must adopt a new standard library with a safe API, which observes all four categories of safety. We need new tooling. _But it's not the case that we have to rewrite all C++ code_. Time has already shaken out most of the vulnerabilities in old code. As demonstrated by the recent Android study on memory safety[@android], the benefits of rewriting are often not worth the costs. What we have to prioritize is the transition to safe coding practices[@safe-coding] for new code. + +# Achieving first-class references + +The presented design is as far as I could go to address the goal of "memory safety without lifetime parameters." But safe references aren't yet powerful enough to replace all the unsafe mechanisms necessary for productivity in C++. We need support for safe versions of idioms that are central to the C++ experience, such as: + +* Iterators. +* Views like `string_view` and `span`. +* RAII types with reference semantics. + +Let's consider RAII types with reference semantics. An example is `std::lock_guard`, which keeps a reference to a mutex. When the `lock_guard` goes out of scope its destructor calls `unlock` on the mutex. This is a challenge for safe references, because safe reference data members aren't supported. Normally those would require lifetime parameters on the containing class. + +What are some options for RAII reference semantics? + +* Coroutines. This is the Hylo strategy. The ramp function locks a mutex and returns a safe reference to the data within. The continuation unlocks the mutex. The reference to the mutex is kept in the coroutine frame. But this still reduces to supporting structs with reference data members. In this case it's not a user-defined type, but a compiler-defined coroutine frame. I feel that the coroutine solution is an unidiomatic fit for C++ for several reasons: static allocation of the coroutine frame requires exposing the definition of the coroutine to the caller, which breaks C++'s approach to modularity; the continuation is called immediately after the last use of the yielded reference, which runs counter to expectation that cleanup runs at the end of the enclosing scope; and since the continuation is called implicitly, there's nothing textual on the caller side to indicate an unlock. +* Defer expressions. Some garbage-collected languages include _defer_ expressions, which run after some condition is met. We could defer a call to the mutex unlock until the end of the enclosing lexical scope. This has the benefit of being explicit to the caller and not requiring computation of a coroutine frame. But it introduces a fundamental new control flow mechanism to the language with applicability that almost perfectly overlaps with destructors. +* Destructors. This is the idiomatic C++ choice. A local object is destroyed when it goes out of scope (or is dropped, with the Safe C++ `drop` keyword). The destructor calls the mutex unlock. + +It makes sense to strengthen safe references to support current RAII practice. How do we support safe references as data members? A reasonable starting point is to declare a class as having _safe reference semantics_. `class name %;` is a possible syntax. Inside these classes, you can declare data members and base classes with safe reference semantics: that includes both safe reference and other classes with safe reference semantics. + +```cpp +class lock_guard % { + // Permitted because the containing class has safe reference semantics. + std2::mutex% mutex; +public: + ~lock_guard() safe { + mutex.unlock(); + } +}; +``` + +The constraint rules can apply to the new `lock_guard` class exactly as it applies to safe references. Returning a `lock_guard` constraints its lifetime by the lifetimes of the function arguments. Transitively, the lifetimes of the data members are constrained by the lifetime of the containing class. + +Unfortunately, we run into problems immediately upon declaring member functions that take safe reference objects or safe reference parameter types. + +```cpp +class string_view %; + +template +class info % { + // Has reference semantics, but that's okay because the containing class does. + string_view sv; +public: + void swap(info% rhs) % safe; +}; +``` + +Consider an `info` class that has _safe reference semantics_ and keeps a `string_view` as a data member. The `string_view` also has reference semantics, so it constrains the underlying string that owns the data. Declare a non-static member function that binds the implicit object with the `%` _ref-qualifier_ and also takes an `info` by safe reference. This is uncharted water. The implicit object type `info` has reference semantics, yet we're taking a reference to that with `swap` call. We're also taking a reference to `info` in the function parameter. How do we deal with references to references? The existing constraint rules only invent a single lifetime: if we used those, we'd be clobbering the lifetime of the inner `string_view` member. + +There's a big weakness with the safe reference type `T%`: it's under-specified when dealing with references to references. We need a fix that respects the lifetimes on the class's data members. + +[**lifetimes5.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/lifetimes/lifetimes5.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/Gj7zoq343) +```cpp +#feature on safety + +class string_view/(a) { + // Keep a borrow to a slice over the string data. + const [char; dyn]^/a p_; +public: +}; + +class info/(a) { + // The handle has lifetime /a. + string_view/a sv; + +public: + void swap(self^, info^ rhs) safe { + string_view temp = self->sv; + self->sv = rhs->sv; + rhs->sv = temp; + } +}; + +void func/(a)(info/a^ lhs, info/a^ rhs) safe { + lhs.swap(rhs); +} + +void func2(info^ lhs, info^ rhs) safe { + lhs.swap(rhs); +} +``` + +Rust and Safe C++ have a way to keep the lifetime of the `string_view` member distinct from the lifetimes of the `self` and `rhs` references: lifetime parameters. `func` assumes that the `string_view`s of its parameters come from sources with overlapping lifetimes, so it declares a lifetime parameter `/a` that's common to both parameters. The lifetimes on the two references are created implicitly by elision, as they don't have to be related in the `swap` call. `func` compiles and doesn't clobber the lifetimes of the contained `string_view`s. + +``` +safety: during safety checking of void func2(info^, info^) safe + error: lifetimes5.cxx:26:12 + lhs.swap(rhs); + ^ + function void func2(info^, info^) safe returns object with lifetime #0, but #0 doesn't outlive #2 + + error: lifetimes5.cxx:26:3 + lhs.swap(rhs); + ^ + function void func2(info^, info^) safe returns object with lifetime #2, but #2 doesn't outlive #0 +``` + +Compiling `func2` raises borrow checker errors. Instead of providing explicit lifetime annotations that relate the lifetimes of the `lhs` and `rhs` `info` types, lifetime elision create four distinct lifetimes: `#0` for the `lhs` `info`, `#1` for the `lhs` `info^`, `#2` for the `rhs` `info` and `#3` for the `rhs` `info^`. The `lhs.swap(rhs)` call relates the lifetimes of the operands through the common lifetime `/a`. But these lifetimes aren't related! The compiler has no information whether `#0` outlives `#2` or vice versa. Since the lifetimes aren't related in `func2`'s declaration, the program is rejected as ill-formed. + +This contrasts with the safe reference constraint rules, which would assign the same lifetime to all four lifetime binders and clobber the `string_view` lifetimes, causing a borrow checker failure further from the source and leaving the developer without the possibility of a fix. + +# Lifetime parameters + +If there's a community-wide research effort among compiler experts to evolve safe references it may be possible to get them into a state to support the abstractions most important for C++. But soundness reasoning is very subtle work. As you increase the indirection capabilty of safe references, you invite networks of dependencies of implied constraints and variances. This increases complexity for the compiler implementation and puts a mental burden on the authors of unsafe code to properly uphold the invariants assumed by safe references. A research project must produce _soundness doctrine_, which is essential guidance on how to interface safe and unsafe systems while upholding the soundness invariants of the program. + +But we don't have to do the research. There's already a solution that's been deployed in a successful production toolchain for a decade: _lifetime parameters_ as used in Rust. The soundness doctrine for writing unsafe code that upholds the invariants established by lifetime parameters is described in the Rustnomicon[@rustnomicon]. + +This is the only known viable solution for first-class safe references without garbage collection. It's a critical lifeline that addresses an existential problem facing C++. By adopting lifetime parameters, C++ can achieve safety parity with the security community's favored languages. + +Consider common objections to Rust's lifetime-annotation flavor of borrow checking: + +1. **You need heavy annotations.** This concern is misplaced. Are you intrigued by mutable value semantics, parameter-passing directives or second-class references? Borrow checking gives you those, without ever having to write lifetime arguments. If your function only uses references as parameters, elision implicitly annotates them in a way that can't fail. You only have to involve lifetime arguments when going beyond the capabilities of second-class references or mutable value semantics. More advanced usages such as the implementation of iterators, views and RAII wrappers with reference semantics are where annotations most often appear, because those designs deal with multiple levels of references. +2. **Borrow checking doesn't permit patterns such as self-references.** It's true that checked references are less flexible than unsafe references or pointers, but this objection is at odds with the claim that lifetime parameters are too burdensome. Lifetime parameters _increase_ the expressiveness of safe references. Additionally, they can reference things important to C++ users that a garbage collection can't, such as variables on the stack. Do we want more expressive references at the cost of annotations, or do we want to get rid of lifetime parameters to make a simpler language? Those are opposite goals. +3. **Borrow checking with lifetimes is too different from normal C++.** Borrow checking is the safety technology most similar to current C++ practice. This model replaces unchecked references with checked references. Other safety models get rid of reference types entirely or replace them with garbage collection which is incompatible with C++'s manual memory management and RAII. The design philosophy of borrow checking is to take normal references but constrain them to uses that can be checked for soundness by the compiler. + +It's not surprising that the C++ community hasn't discovered a better way to approach safe references than the lifetime parameter model. After all, there isn't a well-funded effort to advance C++ language-level lifetime safety. But there is in the Rust community. Rust has made valuable improvements to its lifetime safety design. Lots of effort goes into making borrow checking more permissive: The integration of mid-level IR and non-lexical lifetimes in 2016 revitalized the toolchain. Polonius[@polonius] approaches dataflow analysis from the opposite direction, hoping to shake loose more improvements. Ideas like view types[@view-types] and the sentinel pattern[@sentinel-pattern] are being investigated. But all this activity has not discovered a mechanism that's superior to lifetime parameters for specifying constraints. If something had been discovered, it would be integrated into the Rust language and I'd be proposing to adopt _that_ into C++. For now, lifetime parameters are the best solution that the world has to offer. + +The US government and major players in tech including Google[@secure-by-design] and Microsoft[@ms-vulnerabilities] are telling industry to transition to memory-safe languages because C++ is too unsafe to use. There's already a proven safety technology compatible with C++'s goals of performance and manual memory management. If the C++ community rejects this robust safety solution on the grounds of slightly inconvenient lifetime annotations, and allows C++ to limp forward as a memory-unsafe language, can it still claim to care about software quality? If the lifetime model is good enough for a Rust, a safe language that is enjoying snowballing investment in industry, why is it it not good enough for C++? + +Finally, adoption of this feature brings a major benefit even if you personally want to get off C++: It's critical for **improving C++/Rust interop**. Your C++ project is generating revenue and there's scant economic incentive to rewrite it. But there is an incentive to pivot to a memory-safe language for new development, because new code is how vulnerabilities get introduced.[@android] Bringing C++ closer to Rust with the inclusion of _safe-specifier_, relocation, choice types, and, importantly, lifetime parameters, reduces the friction of interfacing the two languages. The easier it is to interoperate with Rust, the more options and freedom companies have to fulfill with their security mandate.[@rust-interop] + +An up-to-date draft of this document is maintained at [safe-cpp.org/draft-lifetimes.html](https://www.safe-cpp.org/draft-lifetimes.html). + +--- +references: + - id: safecpp + citation-label: safecpp + title: Safe C++ + URL: https://safecpp.org/draft.html + + - id: second-class + citation-label: second-class + title: Second-Class References + URL: https://borretti.me/article/second-class-references + + - id: mutable-value-semantics + citation-label: mutable-value-semantics + title: Implementation Strategies for Mutable Value Semantics + URL: https://www.jot.fm/issues/issue_2022_02/article2.pdf + + - id: hylo + citation-label: hylo + title: Borrow checking Hylo + URL: https://2023.splashcon.org/details/iwaco-2023-papers/5/Borrow-checking-Hylo + + - id: nll + citation-label: nll + title: The Rust RFC Book - Non-lexical lifetimes + URL: https://rust-lang.github.io/rfcs/2094-nll.html + + - id: android + citation-label: android + title: Eliminating Memory Safety Vulnerabilites at the Source + URL: https://security.googleblog.com/2024/09/eliminating-memory-safety-vulnerabilities-Android.html?m=1 + + - id: safe-coding + citation-label: safe-coding + title: Tackling cybersecurity vulnerabilities through Secure by Design + URL: https://blog.google/technology/safety-security/tackling-cybersecurity-vulnerabilities-through-secure-by-design/ + + - id: rustnomicon + citation-label: rustnomicon + title: Rustnomicon -- The Dark Arts of Unsafe Rust + URL: https://doc.rust-lang.org/nomicon/intro.html + + - id: polonius + citation-label: polonius + title: Polonius revisited + URL: https://smallcultfollowing.com/babysteps/blog/2023/09/22/polonius-part-1/ + + - id: view-types + citation-label: view-types + title: View types for Rust + URL: https://smallcultfollowing.com/babysteps/blog/2021/11/05/view-types/ + + - id: sentinel-pattern + citation-label: sentinel-pattern + title: After NLL: Moving from borrowed data and the sentinel pattern + URL: https://smallcultfollowing.com/babysteps/blog/2018/11/10/after-nll-moving-from-borrowed-data-and-the-sentinel-pattern/ + + - id: secure-by-design + citation-label: secure-by-design + title: Secure by Design : Google's Perspective on Memory Safety + URL: https://research.google/pubs/secure-by-design-googles-perspective-on-memory-safety/ + + - id: ms-vulnerabilities + citation-label: ms-vulnerabilities + title: We need a safer systems programming language + URL: https://msrc.microsoft.com/blog/2019/07/we-need-a-safer-systems-programming-language\ + + - id: rust-interop + citation-label: rust-interop + title: Improving Interoperability Between Rust and C++ + URL: https://security.googleblog.com/2024/02/improving-interoperability-between-rust-and-c.html +--- \ No newline at end of file diff --git a/lifetimes/draft-lifetimes.md b/lifetimes/draft-lifetimes.md index 125f0b3..eaa91f8 100644 --- a/lifetimes/draft-lifetimes.md +++ b/lifetimes/draft-lifetimes.md @@ -1,6 +1,6 @@ --- title: "Memory safety without lifetime parameters" -document: DXXXX +document: D3444 date: 2024-10-15 audience: SG23 author: From 64f7a2aed61f16e267b0c80b7e40062a0e3a24af Mon Sep 17 00:00:00 2001 From: Sean Baxter Date: Mon, 14 Oct 2024 12:48:25 -0400 Subject: [PATCH 15/27] Fixed P3444R0 filenames --- docs/{P3444P0.html => P3444R0.html} | 0 lifetimes/{P3444P0.md => P3444R0.md} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename docs/{P3444P0.html => P3444R0.html} (100%) rename lifetimes/{P3444P0.md => P3444R0.md} (100%) diff --git a/docs/P3444P0.html b/docs/P3444R0.html similarity index 100% rename from docs/P3444P0.html rename to docs/P3444R0.html diff --git a/lifetimes/P3444P0.md b/lifetimes/P3444R0.md similarity index 100% rename from lifetimes/P3444P0.md rename to lifetimes/P3444R0.md From c4e41dc8750ea7f70a446d9e5e42d1a2b2745b9b Mon Sep 17 00:00:00 2001 From: Sean Baxter Date: Mon, 14 Oct 2024 13:45:04 -0400 Subject: [PATCH 16/27] Updated P3390R0 --- docs/P3444R0.html | 14 +++++++++----- docs/draft-lifetimes.html | 2 +- lifetimes/P3444R0.md | 8 ++++---- lifetimes/draft-lifetimes.md | 6 ++++-- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/docs/P3444R0.html b/docs/P3444R0.html index 29f6815..0bc6f91 100644 --- a/docs/P3444R0.html +++ b/docs/P3444R0.html @@ -571,13 +571,13 @@

1.1 -

By the parsimony principal you may suggest “rather than adding a new +

With a desire to simplify, you may suggest “rather than adding a new safe reference type, just enforce exclusivity on lvalue- and rvalue-references when compiled under the [safety] feature.” But that makes the soundness problem worse. New code will assume legacy references don’t mutably alias, but existing code -doesn’t uphold that invariant because it was written without even -knowing about it.

+doesn’t uphold that invariant because it was written without considering +exclusivity.

If safe code calls legacy code that returns a struct with a pair of references, do those references alias? Of course they may alias, but the parsimonious treatment claims that mutable references don’t alias under @@ -1007,6 +1007,11 @@

shared_ptr).

What are some options for RAII reference semantics?

  • Coroutines. This is the Hylo strategy. The ramp function locks a @@ -1268,7 +1273,6 @@

    [rust-interop]

    -

    An up-to-date draft of this document is maintained at safe-cpp.org/draft-lifetimes.html.

    6 References

    @@ -1301,7 +1305,7 @@

    6
    https://blog.google/technology/safety-security/tackling-cybersecurity-vulnerabilities-through-secure-by-design/

    -[safecpp] Safe C++.
    https://safecpp.org/draft.html
    +[safecpp] P3390 – Safe C++.
    https://safecpp.org/draft.html
    [second-class] Second-Class References.
    https://borretti.me/article/second-class-references
    diff --git a/docs/draft-lifetimes.html b/docs/draft-lifetimes.html index 780215b..80e89e2 100644 --- a/docs/draft-lifetimes.html +++ b/docs/draft-lifetimes.html @@ -1300,7 +1300,7 @@

    6
    https://blog.google/technology/safety-security/tackling-cybersecurity-vulnerabilities-through-secure-by-design/

    -[safecpp] Safe C++.
    https://safecpp.org/draft.html
    +[safecpp] P3390 – Safe C++.
    https://safecpp.org/draft.html
    [second-class] Second-Class References.
    https://borretti.me/article/second-class-references
    diff --git a/lifetimes/P3444R0.md b/lifetimes/P3444R0.md index 7bb1e3c..334f120 100644 --- a/lifetimes/P3444R0.md +++ b/lifetimes/P3444R0.md @@ -76,7 +76,7 @@ Exclusivity is a program-wide invariant. It doesn't hinge on the safeness of a f "Valid" borrow and safe reference inputs don't mutably alias. This is something a function can just _assume_; it doesn't need to check and there's no way to check. Borrow checking upholds exclusivity even for unsafe functions (when compiled under the `[safety]` feature). There are other assumptions C++ programmers already make about the validity of inputs: for instance, references never hold null addresses. Non-valid inputs are implicated in undefined behavior. -By the parsimony principal you may suggest "rather than adding a new safe reference type, just enforce exclusivity on lvalue- and rvalue-references when compiled under the `[safety]` feature." But that makes the soundness problem worse. New code will _assume_ legacy references don't mutably alias, but existing code doesn't uphold that invariant because it was written without even knowing about it. +With a desire to simplify, you may suggest "rather than adding a new safe reference type, just enforce exclusivity on lvalue- and rvalue-references when compiled under the `[safety]` feature." But that makes the soundness problem worse. New code will _assume_ legacy references don't mutably alias, but existing code doesn't uphold that invariant because it was written without considering exclusivity. If safe code calls legacy code that returns a struct with a pair of references, do those references alias? Of course they may alias, but the parsimonious treatment claims that mutable references don't alias under the `[safety]` feature. We've already stumbled on a soundness bug. @@ -388,6 +388,8 @@ The presented design is as far as I could go to address the goal of "memory safe Let's consider RAII types with reference semantics. An example is `std::lock_guard`, which keeps a reference to a mutex. When the `lock_guard` goes out of scope its destructor calls `unlock` on the mutex. This is a challenge for safe references, because safe reference data members aren't supported. Normally those would require lifetime parameters on the containing class. +Robust support for user-defined types with reference data members isn't just a convenience in a safe C++ system. It's a necessary part of _interior mutability_, the core design pattern for implementing shared ownership of mutable state (think safe versions of `shared_ptr`). + What are some options for RAII reference semantics? * Coroutines. This is the Hylo strategy. The ramp function locks a mutex and returns a safe reference to the data within. The continuation unlocks the mutex. The reference to the mutex is kept in the coroutine frame. But this still reduces to supporting structs with reference data members. In this case it's not a user-defined type, but a compiler-defined coroutine frame. I feel that the coroutine solution is an unidiomatic fit for C++ for several reasons: static allocation of the coroutine frame requires exposing the definition of the coroutine to the caller, which breaks C++'s approach to modularity; the continuation is called immediately after the last use of the yielded reference, which runs counter to expectation that cleanup runs at the end of the enclosing scope; and since the continuation is called implicitly, there's nothing textual on the caller side to indicate an unlock. @@ -497,13 +499,11 @@ The US government and major players in tech including Google[@secure-by-design] Finally, adoption of this feature brings a major benefit even if you personally want to get off C++: It's critical for **improving C++/Rust interop**. Your C++ project is generating revenue and there's scant economic incentive to rewrite it. But there is an incentive to pivot to a memory-safe language for new development, because new code is how vulnerabilities get introduced.[@android] Bringing C++ closer to Rust with the inclusion of _safe-specifier_, relocation, choice types, and, importantly, lifetime parameters, reduces the friction of interfacing the two languages. The easier it is to interoperate with Rust, the more options and freedom companies have to fulfill with their security mandate.[@rust-interop] -An up-to-date draft of this document is maintained at [safe-cpp.org/draft-lifetimes.html](https://www.safe-cpp.org/draft-lifetimes.html). - --- references: - id: safecpp citation-label: safecpp - title: Safe C++ + title: P3390 -- Safe C++ URL: https://safecpp.org/draft.html - id: second-class diff --git a/lifetimes/draft-lifetimes.md b/lifetimes/draft-lifetimes.md index eaa91f8..5ef6948 100644 --- a/lifetimes/draft-lifetimes.md +++ b/lifetimes/draft-lifetimes.md @@ -76,7 +76,7 @@ Exclusivity is a program-wide invariant. It doesn't hinge on the safeness of a f "Valid" borrow and safe reference inputs don't mutably alias. This is something a function can just _assume_; it doesn't need to check and there's no way to check. Borrow checking upholds exclusivity even for unsafe functions (when compiled under the `[safety]` feature). There are other assumptions C++ programmers already make about the validity of inputs: for instance, references never hold null addresses. Non-valid inputs are implicated in undefined behavior. -By the parsimony principal you may suggest "rather than adding a new safe reference type, just enforce exclusivity on lvalue- and rvalue-references when compiled under the `[safety]` feature." But that makes the soundness problem worse. New code will _assume_ legacy references don't mutably alias, but existing code doesn't uphold that invariant because it was written without even knowing about it. +With a desire to simplify, you may suggest "rather than adding a new safe reference type, just enforce exclusivity on lvalue- and rvalue-references when compiled under the `[safety]` feature." But that makes the soundness problem worse. New code will _assume_ legacy references don't mutably alias, but existing code doesn't uphold that invariant because it was written without considering exclusivity. If safe code calls legacy code that returns a struct with a pair of references, do those references alias? Of course they may alias, but the parsimonious treatment claims that mutable references don't alias under the `[safety]` feature. We've already stumbled on a soundness bug. @@ -388,6 +388,8 @@ The presented design is as far as I could go to address the goal of "memory safe Let's consider RAII types with reference semantics. An example is `std::lock_guard`, which keeps a reference to a mutex. When the `lock_guard` goes out of scope its destructor calls `unlock` on the mutex. This is a challenge for safe references, because safe reference data members aren't supported. Normally those would require lifetime parameters on the containing class. +Robust support for user-defined types with reference data members isn't just a convenience in a safe C++ system. It's a necessary part of _interior mutability_, the core design pattern for implementing shared ownership of mutable state (think safe versions of `shared_ptr`). + What are some options for RAII reference semantics? * Coroutines. This is the Hylo strategy. The ramp function locks a mutex and returns a safe reference to the data within. The continuation unlocks the mutex. The reference to the mutex is kept in the coroutine frame. But this still reduces to supporting structs with reference data members. In this case it's not a user-defined type, but a compiler-defined coroutine frame. I feel that the coroutine solution is an unidiomatic fit for C++ for several reasons: static allocation of the coroutine frame requires exposing the definition of the coroutine to the caller, which breaks C++'s approach to modularity; the continuation is called immediately after the last use of the yielded reference, which runs counter to expectation that cleanup runs at the end of the enclosing scope; and since the continuation is called implicitly, there's nothing textual on the caller side to indicate an unlock. @@ -501,7 +503,7 @@ Finally, adoption of this feature brings a major benefit even if you personally references: - id: safecpp citation-label: safecpp - title: Safe C++ + title: P3390 -- Safe C++ URL: https://safecpp.org/draft.html - id: second-class From 7446ab46b341228db4a0cfc94d351bd1f51bed94 Mon Sep 17 00:00:00 2001 From: Sean Baxter Date: Mon, 14 Oct 2024 14:03:36 -0400 Subject: [PATCH 17/27] Editing changes to P3444 --- docs/P3444R0.html | 16 +++++++++------- lifetimes/P3444R0.md | 10 +++++----- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/docs/P3444R0.html b/docs/P3444R0.html index 0bc6f91..0c18f9f 100644 --- a/docs/P3444R0.html +++ b/docs/P3444R0.html @@ -898,7 +898,9 @@

    [hylo] model is largely equivalent to this model and it requires borrow checking technology. let and @@ -928,7 +930,7 @@

    Array subscripts don’t have instructions after the yield, so the continuation function is -empty and hopefully elided by the optimizer.

    +empty and hopefully optimized away.

    template<typename T>
     struct Vec {
       const T% operator[](size_t idx) const % safe;
    @@ -973,8 +975,8 @@ 

    class name %; is a possible syntax. Inside these classes, you can declare data members and base classes with safe reference semantics: that includes both safe -reference and other classes with safe reference semantics.

    +references and other classes with safe reference semantics.

    class lock_guard % {
       // Permitted because the containing class has safe reference semantics.
       std2::mutex% mutex;
    diff --git a/lifetimes/P3444R0.md b/lifetimes/P3444R0.md
    index 334f120..e0ee1dc 100644
    --- a/lifetimes/P3444R0.md
    +++ b/lifetimes/P3444R0.md
    @@ -328,7 +328,7 @@ In this fragment, the reference parameters `vec` and `x` serve as _second-class
     
     The safe references presented here are more powerful than second-class references. While they don't support all the capabilities of borrows, they can be returned from functions and made into objects. The compiler must implement borrow checking to support this additional capability.
     
    -Borrow checking operates on a function lowering called mid-level IR (MIR). A fresh region variable is provisioned for each local variable with a safe reference type. Dataflow analysis populates each region variable with the liveness of its reference. Assignments and function calls involving references generate _lifetime constraints_. The compiler _solves the constraint equation_ to find the liveness of each _loan_. All instructions in the MIR are scanned for _conflicting actions_ with any of the loans in scope at that point. Conflicting actions raise borrow checker errors.
    +Borrow checking operates on a function lowering called mid-level IR (MIR). A fresh region variable is provisioned for each local variable with a safe reference type. Dataflow analysis populates each region variable with the liveness of its reference. Assignments and function calls involving references generate _lifetime constraints_. The compiler _solves the constraint equation_ to find the liveness of each _loan_. All instructions in the MIR are scanned for _conflicting actions_ with any of the loans in scope at that point. Examples of conflicting actions are stores to places with live shared borrows or loads from places with live mutable borrows. Conflicting actions raise borrow checker errors.
     
     The Hylo[@hylo] model is largely equivalent to this model and it requires borrow checking technology. `let` and `inout` parameter directives use mutable value semantics to ensure memory safety for objects passed by reference into functions. But Hylo also supports returning references in the form of subscripts:
     
    @@ -349,7 +349,7 @@ public conformance Array: Collection {
     }
     ```
     
    -Subscripts are reference-returning _coroutines_. Coroutines with a single yield point are split into two normal functions: a ramp function that starts at the top and returns the expression of the yield statement, and a continuation function which resumes after the yield and runs to the end. Local state that's live over the yield point must live in a _coroutine frame_ so that it's available to the continuation function. These `Array` subscripts don't have instructions after the yield, so the continuation function is empty and hopefully elided by the optimizer.
    +Subscripts are reference-returning _coroutines_. Coroutines with a single yield point are split into two normal functions: a ramp function that starts at the top and returns the expression of the yield statement, and a continuation function which resumes after the yield and runs to the end. Local state that's live over the yield point must live in a _coroutine frame_ so that it's available to the continuation function. These `Array` subscripts don't have instructions after the yield, so the continuation function is empty and hopefully optimized away.
     
     ```cpp
     template
    @@ -372,7 +372,7 @@ As detailed in the Safe C++[@safecpp] proposal, there are four categories of mem
     1. **Lifetime safety** - This proposal advances a simpler form of safe references that provides safety against use-after-free defects. The feature is complementary with borrow types `T^` that take lifetime arguments. Both types can be used in the same translation unit, and even the same function, without conflict.
     2. **Type safety** - Relocation must replace move semantics to eliminate unsafe null pointer exposure. Choice types and pattern matching must be included for safe modeling of optional types.
     3. **Thread safety** - The `send` and `sync` interfaces account for which types can be copied and shared between threads. 
    -4. **Runtime checks** - The compiler automatically emits runtime bounds checks on array and slice subscripts. It emits checks for integer divide-by-zero and INT_MIN / -1, which are undefined behavior. Conforming safe library functions must also implement panics to prevent out-of-bounds access to heap allocations.
    +4. **Runtime checks** - The compiler automatically emits runtime bounds checks on array and slice subscripts. It emits checks for integer divide-by-zero and INT_MIN / -1, which are undefined behavior. Conforming safe library functions must panic to prevent out-of-bounds access to heap allocations.
     
     Most critically, the _safe-specifier_ is added to a function's type. Inside a safe function, only safe operations may be used, unless escaped by an _unsafe-block_. 
     
    @@ -392,11 +392,11 @@ Robust support for user-defined types with reference data members isn't just a c
     
     What are some options for RAII reference semantics?
     
    -* Coroutines. This is the Hylo strategy. The ramp function locks a mutex and returns a safe reference to the data within. The continuation unlocks the mutex. The reference to the mutex is kept in the coroutine frame. But this still reduces to supporting structs with reference data members. In this case it's not a user-defined type, but a compiler-defined coroutine frame. I feel that the coroutine solution is an unidiomatic fit for C++ for several reasons: static allocation of the coroutine frame requires exposing the definition of the coroutine to the caller, which breaks C++'s approach to modularity; the continuation is called immediately after the last use of the yielded reference, which runs counter to expectation that cleanup runs at the end of the enclosing scope; and since the continuation is called implicitly, there's nothing textual on the caller side to indicate an unlock.
    +* Coroutines. This is the Hylo strategy. The ramp function locks a mutex and returns a safe reference to the data within. The continuation unlocks the mutex. The reference to the mutex is kept in the coroutine frame. But this still reduces to supporting structs with reference data members. In this case it's not a user-defined type, but a compiler-defined coroutine frame. The coroutine solution is an unidiomatic fit for C++ for several reasons: static allocation of the coroutine frame requires exposing the definition of the coroutine to the caller, which breaks C++'s approach to modularity; the continuation is called immediately after the last use of the yielded reference, which runs counter to expectation that cleanup runs at the end of the enclosing scope; and since the continuation is called implicitly, there's nothing textual on the caller side to indicate an unlock.
     * Defer expressions. Some garbage-collected languages include _defer_ expressions, which run after some condition is met. We could defer a call to the mutex unlock until the end of the enclosing lexical scope. This has the benefit of being explicit to the caller and not requiring computation of a coroutine frame. But it introduces a fundamental new control flow mechanism to the language with applicability that almost perfectly overlaps with destructors.
     * Destructors. This is the idiomatic C++ choice. A local object is destroyed when it goes out of scope (or is dropped, with the Safe C++ `drop` keyword). The destructor calls the mutex unlock.
     
    -It makes sense to strengthen safe references to support current RAII practice. How do we support safe references as data members? A reasonable starting point is to declare a class as having _safe reference semantics_. `class name %;` is a possible syntax. Inside these classes, you can declare data members and base classes with safe reference semantics: that includes both safe reference and other classes with safe reference semantics.
    +It makes sense to strengthen safe references to support current RAII practice. How do we support safe references as data members? A reasonable starting point is to declare a class as having _safe reference semantics_. `class name %;` is a possible syntax. Inside these classes, you can declare data members and base classes with safe reference semantics: that includes both safe references and other classes with safe reference semantics.
     
     ```cpp
     class lock_guard % {
    
    From 37381a125cc532952fccfe8747ee5f2a2b045315 Mon Sep 17 00:00:00 2001
    From: Sean Baxter 
    Date: Mon, 14 Oct 2024 15:05:16 -0400
    Subject: [PATCH 18/27] Added exclusive1 example
    
    ---
     docs/P3444R0.html        | 433 +++++++++++++++++++++------------------
     lifetimes/P3444R0.md     |  32 ++-
     lifetimes/exclusive1.cxx |  18 ++
     3 files changed, 278 insertions(+), 205 deletions(-)
     create mode 100644 lifetimes/exclusive1.cxx
    
    diff --git a/docs/P3444R0.html b/docs/P3444R0.html
    index 0c18f9f..97589b3 100644
    --- a/docs/P3444R0.html
    +++ b/docs/P3444R0.html
    @@ -671,6 +671,37 @@ 

    1.1 +

    exclusive1.cxx +– (Compiler Explorer)

    +
    #feature on safety
    +
    +void f(int% x, int% y) safe;
    +
    +void g(int& x, int& y) safe {
    +  unsafe {
    +    // Enter an unsafe block to dereference legacy references.
    +    // The precondition to the unsafe-block is that the legacy
    +    // references *do not alias* and *do not dangle*.
    +    f(%*x, %*y);
    +  }
    +}
    +
    +void f(int% x, int% y) safe {
    +  // We can demote safe references to legacy references without 
    +  // an unsafe block. The are no preconditions to enforce.
    +  g(&*x, &*y);
    +}
    +

    While safe references and legacy references are different types, +they’re inter-convertible. Converting a safe reference to legacy +reference can be done safely, because it doesn’t involve any +preconditions. Function f converts a +safe reference x to an lvalue +reference with a dereference and borrow: +&*x. +Going the other way is unsafe: the precondition of the +unsafe-block is that the legacy references do not +alias and do not dangle: +%*x.

    1.2 Constraint rules

    This proposal implements two sets of constraint rules. Free functions constrain return references by the shortest of the argument lifetimes. @@ -678,49 +709,49 @@

    object lifetime.

    lifetimes3.cxx(Compiler Explorer)

    -
    #feature on safety
    -
    -const int% f1(const int% x, const int% y, bool pred) safe {
    -  // The return reference is constrained by all reference parameters: x and y.
    -  return pred ? x : y;
    -}
    -
    -struct Obj {
    -  const int% f2(const int% arg) const % safe {
    -    // Non-static member functions are constrained by the implicit 
    -    // object lifetime.
    -    // It's OK to return `x`, because self outlives the return.
    -    return %x;
    -  }
    -
    -  const int% f3(const int% arg) const % safe {
    -    // Error: arg does not outlive the return reference.
    -    return arg;
    -  }
    -
    -  const int% f4(const self%, const int% arg) safe {
    -    // OK - f4 is a free function with an explicit self parameter.
    -    return arg;
    -  }
    -
    -  int x;
    -};
    -
    -int main() {
    -  int x = 1, y = 2;
    -  f1(x, y, true); // OK
    -
    -  Obj obj { };
    -  obj.f2(x);  // OK
    -  obj.f3(x);  // Error
    -  obj.f4(x);  // OK.
    -}
    -
    $ circle lifetimes3.cxx 
    -safety: during safety checking of const int% Obj::f3(const int%) const % safe
    -  error: lifetimes3.cxx:18:12
    -      return arg; 
    -             ^
    -  function const int% Obj::f3(const int%) const % safe returns object with lifetime SCC-ref-1, but SCC-ref-1 doesn't outlive SCC-ref-0
    +
    #feature on safety
    +
    +const int% f1(const int% x, const int% y, bool pred) safe {
    +  // The return reference is constrained by all reference parameters: x and y.
    +  return pred ? x : y;
    +}
    +
    +struct Obj {
    +  const int% f2(const int% arg) const % safe {
    +    // Non-static member functions are constrained by the implicit 
    +    // object lifetime.
    +    // It's OK to return `x`, because self outlives the return.
    +    return %x;
    +  }
    +
    +  const int% f3(const int% arg) const % safe {
    +    // Error: arg does not outlive the return reference.
    +    return arg;
    +  }
    +
    +  const int% f4(const self%, const int% arg) safe {
    +    // OK - f4 is a free function with an explicit self parameter.
    +    return arg;
    +  }
    +
    +  int x;
    +};
    +
    +int main() {
    +  int x = 1, y = 2;
    +  f1(x, y, true); // OK
    +
    +  Obj obj { };
    +  obj.f2(x);  // OK
    +  obj.f3(x);  // Error
    +  obj.f4(x);  // OK.
    +}
    +
    $ circle lifetimes3.cxx 
    +safety: during safety checking of const int% Obj::f3(const int%) const % safe
    +  error: lifetimes3.cxx:18:12
    +      return arg; 
    +             ^
    +  function const int% Obj::f3(const int%) const % safe returns object with lifetime SCC-ref-1, but SCC-ref-1 doesn't outlive SCC-ref-0

    The definitions of free function f1 and non-static member function f2 compile, because they return @@ -736,47 +767,47 @@

    function.

    vector3.cxx(Compiler Explorer)

    -
    #feature on safety
    -
    -template<typename Key, typename Value>
    -class Map {
    -public:
    -  // Non-static member functions do not constrain the result object to
    -  // the function parameters.
    -  auto get1(const Key% key) % safe -> Value%;
    -
    -  // Free function do constrain the result object to the function parameters.
    -  auto get2(self%, const Key% key) safe -> Value%;
    -};
    -
    -int main() safe {
    -  Map<float, long> map { };
    -
    -  // Bind the key reference to a materialized temporary.
    -  // The temporary expires at the end of this statement.
    -  long% value1 = mut map.get1(3.14f);
    -
    -  // We can still access value, because it's not constrained on the 
    -  // key argument.
    -  *value1 = 1001;
    -
    -  // The call to get2 constrains the returned reference to the lifetime
    -  // of the key temporary.
    -  long% value2 = mut map.get2(1.6186f);
    -
    -  // This is ill-formed, because get2's key argument is out of scope.
    -  *value2 = 1002;
    -}
    -
    $ circle vector3.cxx 
    -safety: during safety checking of int main() safe
    -  borrow checking: vector3.cxx:30:4
    -    *value2 = 1002; 
    -     ^
    -  use of value2 depends on expired loan
    -  drop of temporary object float between its shared borrow and its use
    -  loan created at vector3.cxx:27:31
    -    long% value2 = mut map.get2(1.6186f); 
    -                                ^
    +
    #feature on safety
    +
    +template<typename Key, typename Value>
    +class Map {
    +public:
    +  // Non-static member functions do not constrain the result object to
    +  // the function parameters.
    +  auto get1(const Key% key) % safe -> Value%;
    +
    +  // Free function do constrain the result object to the function parameters.
    +  auto get2(self%, const Key% key) safe -> Value%;
    +};
    +
    +int main() safe {
    +  Map<float, long> map { };
    +
    +  // Bind the key reference to a materialized temporary.
    +  // The temporary expires at the end of this statement.
    +  long% value1 = mut map.get1(3.14f);
    +
    +  // We can still access value, because it's not constrained on the 
    +  // key argument.
    +  *value1 = 1001;
    +
    +  // The call to get2 constrains the returned reference to the lifetime
    +  // of the key temporary.
    +  long% value2 = mut map.get2(1.6186f);
    +
    +  // This is ill-formed, because get2's key argument is out of scope.
    +  *value2 = 1002;
    +}
    +
    $ circle vector3.cxx 
    +safety: during safety checking of int main() safe
    +  borrow checking: vector3.cxx:30:4
    +    *value2 = 1002; 
    +     ^
    +  use of value2 depends on expired loan
    +  drop of temporary object float between its shared borrow and its use
    +  loan created at vector3.cxx:27:31
    +    long% value2 = mut map.get2(1.6186f); 
    +                                ^

    The constraint rules for non-static member functions reflect the idea that resources are owned by class objects. Consider a map data structure that associates values with keys. The map may be specialized a key type @@ -800,46 +831,46 @@

    non-static member functions.

    vector4.cxx(Compiler Explorer)

    -
    #feature on safety
    -
    -template<typename Key, typename Value>
    -class Map {
    -public:
    -  // Lifetime elision rules constrain the return by self.
    -  auto get1(self^, const Key^ key) safe -> Value^;
    -
    -  // Use explicit parameterizations for alternate constraints.
    -  auto get2/(a)(self^/a, const Key^/a key) safe -> Value^/a;
    -};
    -
    -int main() safe {
    -  Map<float, long> map { };
    -
    -  // Bind the key reference to a materialized temporary.
    -  // The temporary expires at the end of this statement.
    -  long^ value1 = mut map.get1(3.14f);
    -
    -  // We can still access value, because it's not constrained on the 
    -  // key argument.
    -  *value1 = 1001;
    -
    -  // The call to get2 constrains the returned reference to the lifetime
    -  // of the key temporary.
    -  long^ value2 = mut map.get2(1.6186f);
    -
    -  // This is ill-formed, because get2's key argument is out of scope.
    -  *value2 = 1002;
    -}
    -
    $ circle vector4.cxx 
    -safety: during safety checking of int main() safe
    -  borrow checking: vector4.cxx:29:4
    -    *value2 = 1002; 
    -     ^
    -  use of value2 depends on expired loan
    -  drop of temporary object float between its shared borrow and its use
    -  loan created at vector4.cxx:26:31
    -    long^ value2 = mut map.get2(1.6186f); 
    -                                ^
    +
    #feature on safety
    +
    +template<typename Key, typename Value>
    +class Map {
    +public:
    +  // Lifetime elision rules constrain the return by self.
    +  auto get1(self^, const Key^ key) safe -> Value^;
    +
    +  // Use explicit parameterizations for alternate constraints.
    +  auto get2/(a)(self^/a, const Key^/a key) safe -> Value^/a;
    +};
    +
    +int main() safe {
    +  Map<float, long> map { };
    +
    +  // Bind the key reference to a materialized temporary.
    +  // The temporary expires at the end of this statement.
    +  long^ value1 = mut map.get1(3.14f);
    +
    +  // We can still access value, because it's not constrained on the 
    +  // key argument.
    +  *value1 = 1001;
    +
    +  // The call to get2 constrains the returned reference to the lifetime
    +  // of the key temporary.
    +  long^ value2 = mut map.get2(1.6186f);
    +
    +  // This is ill-formed, because get2's key argument is out of scope.
    +  *value2 = 1002;
    +}
    +
    $ circle vector4.cxx 
    +safety: during safety checking of int main() safe
    +  borrow checking: vector4.cxx:29:4
    +    *value2 = 1002; 
    +     ^
    +  use of value2 depends on expired loan
    +  drop of temporary object float between its shared borrow and its use
    +  loan created at vector4.cxx:26:31
    +    long^ value2 = mut map.get2(1.6186f); 
    +                                ^

    The general borrow type T^ has programmable constraints. The map above declares accessor functions. @@ -870,7 +901,7 @@

    [mutable-value-semantics] model uses parameter-passing directives to pass objects to functions by reference without involving the complexity of a borrow checker.

    -
    void func(Vec<float>% vec, float% x) safe;
    +
    void func(Vec<float>% vec, float% x) safe;

    In this fragment, the reference parameters vec and x serve as second-class @@ -931,11 +962,11 @@

    Array subscripts don’t have instructions after the yield, so the continuation function is empty and hopefully optimized away.

    -
    template<typename T>
    -struct Vec {
    -  const T% operator[](size_t idx) const % safe;
    -        T% operator[](size_t idx)       % safe;
    -};
    +
    template<typename T>
    +struct Vec {
    +  const T% operator[](size_t idx) const % safe;
    +        T% operator[](size_t idx)       % safe;
    +};

    The Hylo Array subscripts are lowered to reference-returning ramp functions exactly like their C++ Vec counterparts. For both @@ -1016,12 +1047,12 @@

    shared_ptr).

    What are some options for RAII reference semantics?

      -
    • Coroutines. This is the Hylo strategy. The ramp function locks a -mutex and returns a safe reference to the data within. The continuation -unlocks the mutex. The reference to the mutex is kept in the coroutine -frame. But this still reduces to supporting structs with reference data -members. In this case it’s not a user-defined type, but a -compiler-defined coroutine frame. The coroutine solution is an +
    • Coroutines. This is the Hylo strategy. The ramp +function locks a mutex and returns a safe reference to the data within. +The continuation unlocks the mutex. The reference to the mutex is kept +in the coroutine frame. But this still reduces to supporting structs +with reference data members. In this case it’s not a user-defined type, +but a compiler-defined coroutine frame. The coroutine solution is an unidiomatic fit for C++ for several reasons: static allocation of the coroutine frame requires exposing the definition of the coroutine to the caller, which breaks C++’s approach to modularity; the continuation is @@ -1029,17 +1060,17 @@

      drop keyword). The destructor calls -the mutex unlock.

    • +
    • Destructors. This is the idiomatic C++ choice. A +local object is destroyed when it goes out of scope (or is dropped, with +the Safe C++ drop keyword). The +destructor calls the mutex unlock.

    It makes sense to strengthen safe references to support current RAII practice. How do we support safe references as data members? A @@ -1048,33 +1079,33 @@

    class lock_guard % {
    -  // Permitted because the containing class has safe reference semantics.
    -  std2::mutex% mutex;
    -public:
    -  ~lock_guard() safe {
    -    mutex.unlock();
    -  }
    -};

    +
    class lock_guard % {
    +  // Permitted because the containing class has safe reference semantics.
    +  std2::mutex% mutex;
    +public:
    +  ~lock_guard() safe {
    +    mutex.unlock();
    +  }
    +};

    The constraint rules can apply to the new lock_guard class exactly as it applies to safe references. Returning a -lock_guard constraints its lifetime +lock_guard constrains its lifetime by the lifetimes of the function arguments. Transitively, the lifetimes of the data members are constrained by the lifetime of the containing class.

    Unfortunately, we run into problems immediately upon declaring member functions that take safe reference objects or safe reference parameter types.

    -
    class string_view %;
    -
    -template<typename T>
    -class info % {
    -  // Has reference semantics, but that's okay because the containing class does.
    -  string_view sv;
    -public:
    -  void swap(info% rhs) % safe;
    -};
    +
    class string_view %;
    +
    +template<typename T>
    +class info % {
    +  // Has reference semantics, but that's okay because the containing class does.
    +  string_view sv;
    +public:
    +  void swap(info% rhs) % safe;
    +};

    Consider an info class that has safe reference semantics and keeps a string_view as a data member. The @@ -1099,33 +1130,33 @@

    lifetimes5.cxx(Compiler Explorer)

    -
    #feature on safety
    -
    -class string_view/(a) {
    -  // Keep a borrow to a slice over the string data.
    -  const [char; dyn]^/a p_;
    -public:
    -};
    -
    -class info/(a) {
    -  // The handle has lifetime /a.
    -  string_view/a sv;
    -
    -public:
    -  void swap(self^, info^ rhs) safe {
    -    string_view temp = self->sv;
    -    self->sv = rhs->sv;
    -    rhs->sv = temp;
    -  }
    -};
    -
    -void func/(a)(info/a^ lhs, info/a^ rhs) safe {
    -  lhs.swap(rhs);
    -}
    -
    -void func2(info^ lhs, info^ rhs) safe {
    -  lhs.swap(rhs);
    -}
    +
    #feature on safety
    +
    +class string_view/(a) {
    +  // Keep a borrow to a slice over the string data.
    +  const [char; dyn]^/a p_;
    +public:
    +};
    +
    +class info/(a) {
    +  // The handle has lifetime /a.
    +  string_view/a sv;
    +
    +public:
    +  void swap(self^, info^ rhs) safe {
    +    string_view temp = self->sv;
    +    self->sv = rhs->sv;
    +    rhs->sv = temp;
    +  }
    +};
    +
    +void func/(a)(info/a^ lhs, info/a^ rhs) safe {
    +  lhs.swap(rhs);
    +}
    +
    +void func2(info^ lhs, info^ rhs) safe {
    +  lhs.swap(rhs);
    +}

    Rust and Safe C++ have a way to keep the lifetime of the string_view member distinct from the lifetimes of the self and @@ -1140,16 +1171,16 @@

    func compiles and doesn’t clobber the lifetimes of the contained string_views.

    -
    safety: during safety checking of void func2(info^, info^) safe
    -  error: lifetimes5.cxx:26:12
    -    lhs.swap(rhs); 
    -             ^
    -  function void func2(info^, info^) safe returns object with lifetime #0, but #0 doesn't outlive #2
    -
    -  error: lifetimes5.cxx:26:3
    -    lhs.swap(rhs); 
    -    ^
    -  function void func2(info^, info^) safe returns object with lifetime #2, but #2 doesn't outlive #0
    +
    safety: during safety checking of void func2(info^, info^) safe
    +  error: lifetimes5.cxx:26:12
    +    lhs.swap(rhs); 
    +             ^
    +  function void func2(info^, info^) safe returns object with lifetime #0, but #0 doesn't outlive #2
    +
    +  error: lifetimes5.cxx:26:3
    +    lhs.swap(rhs); 
    +    ^
    +  function void func2(info^, info^) safe returns object with lifetime #2, but #2 doesn't outlive #0

    Compiling func2 raises borrow checker errors. Instead of providing explicit lifetime annotations that relate the lifetimes of the lhs and diff --git a/lifetimes/P3444R0.md b/lifetimes/P3444R0.md index e0ee1dc..fe66d6e 100644 --- a/lifetimes/P3444R0.md +++ b/lifetimes/P3444R0.md @@ -153,6 +153,30 @@ Rewrite the example using our simplified safe references. In `main`, the user at Mutable safe references are prohibited from aliasing. Exclusivity is enforced by the same MIR analysis that polices Safe C++'s more general borrow type `T^`. While enforcing exclusivity involves more complicated tooling, it simplifies reasoning about your functions. Since safe reference parameters don't alias, users don't even have to think about aliasing bugs. You're free to store to references without worrying about iterator invalidation or other side effects leading to use-after-free defects. +[**exclusive1.cxx**](https://github.com/cppalliance/safe-cpp/blob/master/lifetimes/exclusive1.cxx) -- [(Compiler Explorer)](https://godbolt.org/z/xEh9arYK4) +```cpp +#feature on safety + +void f(int% x, int% y) safe; + +void g(int& x, int& y) safe { + unsafe { + // Enter an unsafe block to dereference legacy references. + // The precondition to the unsafe-block is that the legacy + // references *do not alias* and *do not dangle*. + f(%*x, %*y); + } +} + +void f(int% x, int% y) safe { + // We can demote safe references to legacy references without + // an unsafe block. The are no preconditions to enforce. + g(&*x, &*y); +} +``` + +While safe references and legacy references are different types, they're inter-convertible. Converting a safe reference to legacy reference can be done safely, because it doesn't involve any preconditions. Function `f` converts a safe reference `x` to an lvalue reference with a dereference and borrow: `&*x`. Going the other way is unsafe: the precondition of the _unsafe-block_ is that the legacy references _do not alias_ and _do not dangle_: `%*x`. + ## Constraint rules This proposal implements two sets of constraint rules. Free functions constrain return references by the shortest of the argument lifetimes. Non-static member functions constrain return references by the implicit object lifetime. @@ -392,9 +416,9 @@ Robust support for user-defined types with reference data members isn't just a c What are some options for RAII reference semantics? -* Coroutines. This is the Hylo strategy. The ramp function locks a mutex and returns a safe reference to the data within. The continuation unlocks the mutex. The reference to the mutex is kept in the coroutine frame. But this still reduces to supporting structs with reference data members. In this case it's not a user-defined type, but a compiler-defined coroutine frame. The coroutine solution is an unidiomatic fit for C++ for several reasons: static allocation of the coroutine frame requires exposing the definition of the coroutine to the caller, which breaks C++'s approach to modularity; the continuation is called immediately after the last use of the yielded reference, which runs counter to expectation that cleanup runs at the end of the enclosing scope; and since the continuation is called implicitly, there's nothing textual on the caller side to indicate an unlock. -* Defer expressions. Some garbage-collected languages include _defer_ expressions, which run after some condition is met. We could defer a call to the mutex unlock until the end of the enclosing lexical scope. This has the benefit of being explicit to the caller and not requiring computation of a coroutine frame. But it introduces a fundamental new control flow mechanism to the language with applicability that almost perfectly overlaps with destructors. -* Destructors. This is the idiomatic C++ choice. A local object is destroyed when it goes out of scope (or is dropped, with the Safe C++ `drop` keyword). The destructor calls the mutex unlock. +* **Coroutines**. This is the Hylo strategy. The ramp function locks a mutex and returns a safe reference to the data within. The continuation unlocks the mutex. The reference to the mutex is kept in the coroutine frame. But this still reduces to supporting structs with reference data members. In this case it's not a user-defined type, but a compiler-defined coroutine frame. The coroutine solution is an unidiomatic fit for C++ for several reasons: static allocation of the coroutine frame requires exposing the definition of the coroutine to the caller, which breaks C++'s approach to modularity; the continuation is called immediately after the last use of the yielded reference, which runs counter to expectation that cleanup runs at the end of the enclosing scope; and since the continuation is called implicitly, there's nothing textual on the caller side to indicate an unlock. +* **Defer expressions**. Some garbage-collected languages include _defer_ expressions, which run after some condition is met. We could defer a call to the mutex unlock until the end of the enclosing lexical scope. This has the benefit of being explicit to the caller and not requiring computation of a coroutine frame. But it introduces a fundamental new control flow mechanism to the language with applicability that almost perfectly overlaps with destructors. +* **Destructors**. This is the idiomatic C++ choice. A local object is destroyed when it goes out of scope (or is dropped, with the Safe C++ `drop` keyword). The destructor calls the mutex unlock. It makes sense to strengthen safe references to support current RAII practice. How do we support safe references as data members? A reasonable starting point is to declare a class as having _safe reference semantics_. `class name %;` is a possible syntax. Inside these classes, you can declare data members and base classes with safe reference semantics: that includes both safe references and other classes with safe reference semantics. @@ -409,7 +433,7 @@ public: }; ``` -The constraint rules can apply to the new `lock_guard` class exactly as it applies to safe references. Returning a `lock_guard` constraints its lifetime by the lifetimes of the function arguments. Transitively, the lifetimes of the data members are constrained by the lifetime of the containing class. +The constraint rules can apply to the new `lock_guard` class exactly as it applies to safe references. Returning a `lock_guard` constrains its lifetime by the lifetimes of the function arguments. Transitively, the lifetimes of the data members are constrained by the lifetime of the containing class. Unfortunately, we run into problems immediately upon declaring member functions that take safe reference objects or safe reference parameter types. diff --git a/lifetimes/exclusive1.cxx b/lifetimes/exclusive1.cxx new file mode 100644 index 0000000..ba3e86c --- /dev/null +++ b/lifetimes/exclusive1.cxx @@ -0,0 +1,18 @@ +#feature on safety + +void f(int% x, int% y) safe; + +void g(int& x, int& y) safe { + unsafe { + // Enter an unsafe block to dereference legacy references. + // The precondition to the unsafe-block is that the legacy + // references *do not alias* and *do not dangle*. + f(%*x, %*y); + } +} + +void f(int% x, int% y) safe { + // We can demote safe references to legacy references without + // an unsafe block. The are no preconditions to enforce. + g(&*x, &*y); +} \ No newline at end of file From 442225ffea046cb83740dea42ad1d2299259c88c Mon Sep 17 00:00:00 2001 From: Sean Baxter Date: Mon, 14 Oct 2024 15:38:51 -0400 Subject: [PATCH 19/27] Updated exclusivity example --- docs/P3444R0.html | 12 ++++++++---- lifetimes/P3444R0.md | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/docs/P3444R0.html b/docs/P3444R0.html index 97589b3..97fbfe4 100644 --- a/docs/P3444R0.html +++ b/docs/P3444R0.html @@ -584,10 +584,14 @@

    1.1[safety] feature. We’ve already stumbled on a soundness bug.

    Coming from the other direction, it may be necessary to form aliasing -references just to use the APIs for existing code. Consider a function -that takes an lvalue reference to a container and an lvalue reference to -one of its elements. If safe code can’t even form aliased lvalue -references, it wouldn’t be able to use that API at all.

    +references just to use the APIs for existing code. Consider a call to +vec.push_back(vec[0]). +This is impossible to express without mutable aliasing: we form +a mutable lvalue reference to vec +and a const lvalue reference to one of +vec’s elements. If safe code can’t +even form aliased lvalue references, it won’t be able to use this API at +all.

    Exclusivity is a program-wide invariant on safe references. We need separate safe and unsafe reference types for both soundness and expressiveness.

    diff --git a/lifetimes/P3444R0.md b/lifetimes/P3444R0.md index fe66d6e..3cc7e4f 100644 --- a/lifetimes/P3444R0.md +++ b/lifetimes/P3444R0.md @@ -80,7 +80,7 @@ With a desire to simplify, you may suggest "rather than adding a new safe refere If safe code calls legacy code that returns a struct with a pair of references, do those references alias? Of course they may alias, but the parsimonious treatment claims that mutable references don't alias under the `[safety]` feature. We've already stumbled on a soundness bug. -Coming from the other direction, it may be necessary to form aliasing references just to use the APIs for existing code. Consider a function that takes an lvalue reference to a container and an lvalue reference to one of its elements. If safe code can't even form aliased lvalue references, it wouldn't be able to use that API at all. +Coming from the other direction, it may be necessary to form aliasing references just to use the APIs for existing code. Consider a call to `vec.push_back(vec[0])`. This is _impossible to express_ without mutable aliasing: we form a mutable lvalue reference to `vec` and a const lvalue reference to one of `vec`'s elements. If safe code can't even form aliased lvalue references, it won't be able to use this API at all. Exclusivity is a program-wide invariant on safe references. We need separate safe and unsafe reference types for both soundness and expressiveness. From 5bee7ea533be51c4e19a0c7c496f27965d5c200c Mon Sep 17 00:00:00 2001 From: Sean Baxter Date: Mon, 14 Oct 2024 17:36:00 -0400 Subject: [PATCH 20/27] Put the P and D version in sync --- docs/P3444R0.html | 8 +- docs/draft-lifetimes.html | 478 +++++++++++++++++++---------------- lifetimes/P3444R0.md | 4 +- lifetimes/draft-lifetimes.md | 46 +++- 4 files changed, 301 insertions(+), 235 deletions(-) diff --git a/docs/P3444R0.html b/docs/P3444R0.html index 97fbfe4..1fa63a4 100644 --- a/docs/P3444R0.html +++ b/docs/P3444R0.html @@ -5,7 +5,7 @@ - Memory safety without lifetime parameters + Memory Safety without Lifetime Parameters

Document #:DXXXXD3444
Date: