Skip to content
This repository was archived by the owner on Aug 1, 2024. It is now read-only.

Commit 8e46b3a

Browse files
author
Giuseppe Baccini
committed
rgw/sfs: honor retry_raced_bucket_write mechanism
Updating bucket's metadata concurrently by two or more threads is allowed in radosgw. There is a retry mechanism: retry_raced_bucket_write(), that expects the bucket references to fetch the latest data from the persistent store. rgw/sfs driver didn't implement try_refresh_info() in its bucket class definition; this could cause two references to the same bucket to potentially lead to partial metadata updates. Fixes: https://github.com/aquarist-labs/s3gw/issues/637 Signed-off-by: Giuseppe Baccini <[email protected]>
1 parent 4976c75 commit 8e46b3a

File tree

2 files changed

+263
-3
lines changed

2 files changed

+263
-3
lines changed

src/rgw/driver/sfs/bucket.cc

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -534,11 +534,39 @@ int SFSBucket::abort_multiparts(
534534
return sfs::SFSMultipartUploadV2::abort_multiparts(dpp, store, this);
535535
}
536536

537+
/**
538+
* @brief Refresh this bucket's rep with the state obtained from the store.
539+
* Indeed it can happen that the rep of this bucket is obsolete due to
540+
* concurrent threads updating metadata using their own SFSBucket instance.
541+
*/
537542
int SFSBucket::try_refresh_info(
538543
const DoutPrefixProvider* dpp, ceph::real_time* /*pmtime*/
539544
) {
540-
ldpp_dout(dpp, 10) << __func__ << ": TODO" << dendl;
541-
return -ENOTSUP;
545+
auto bref = store->get_bucket_ref(get_name());
546+
// maybe excessively safe approach: instead of directly assign the returned
547+
// value to this.bucket let's check first if bref is valid.
548+
if (!bref) {
549+
lsfs_dout(dpp, 0) << fmt::format("no such bucket! {}", get_name()) << dendl;
550+
return -ERR_NO_SUCH_BUCKET;
551+
}
552+
bucket = bref;
553+
554+
// update views like we do in the constructor
555+
556+
// info view
557+
get_info() = bucket->get_info();
558+
559+
// attrs view
560+
set_attrs(bucket->get_attrs());
561+
562+
// acls view
563+
auto it = attrs.find(RGW_ATTR_ACL);
564+
if (it != attrs.end()) {
565+
auto lval = it->second.cbegin();
566+
acls.decode(lval);
567+
}
568+
569+
return 0;
542570
}
543571

544572
int SFSBucket::read_usage(

src/test/rgw/sfs/test_rgw_sfs_sfs_bucket.cc

Lines changed: 233 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,49 @@
1515
#include "rgw/driver/sfs/sqlite/sqlite_users.h"
1616
#include "rgw/rgw_sal_sfs.h"
1717

18+
/*
19+
These structs are in-memory mockable versions of actual structs/classes
20+
that have a private rep.
21+
Real types normally populate their rep via encode/decode methods.
22+
For the sake of convenience, we define binary equivalent types with
23+
public editable members.
24+
*/
25+
namespace mockable {
26+
struct DefaultRetention {
27+
std::string mode;
28+
int days;
29+
int years;
30+
31+
bool operator==(const DefaultRetention& other) const {
32+
return this->mode == other.mode && this->days == other.days &&
33+
this->years == other.years;
34+
}
35+
};
36+
37+
struct ObjectLockRule {
38+
mockable::DefaultRetention defaultRetention;
39+
40+
bool operator==(const ObjectLockRule& other) const {
41+
return this->defaultRetention == other.defaultRetention;
42+
}
43+
};
44+
45+
struct RGWObjectLock {
46+
bool enabled;
47+
bool rule_exist;
48+
mockable::ObjectLockRule rule;
49+
50+
bool operator==(const RGWObjectLock& other) const {
51+
return this->enabled == other.enabled &&
52+
this->rule_exist == other.rule_exist && this->rule == other.rule;
53+
}
54+
};
55+
56+
mockable::RGWObjectLock& actual2mock(::RGWObjectLock& actual) {
57+
return (mockable::RGWObjectLock&)actual;
58+
}
59+
} // namespace mockable
60+
1861
/*
1962
HINT
2063
s3gw.db will create here: /tmp/rgw_sfs_tests
@@ -1437,7 +1480,8 @@ TEST_F(TestSFSBucket, ListNamespaceMultipartsBasics) {
14371480
.object_name = "object_name",
14381481
.path_uuid = uuid,
14391482
.meta_str = "metastr",
1440-
.mtime = now};
1483+
.mtime = now
1484+
};
14411485
int id = multipart.insert(mpop);
14421486
ASSERT_GE(id, 0);
14431487

@@ -1453,3 +1497,191 @@ TEST_F(TestSFSBucket, ListNamespaceMultipartsBasics) {
14531497
EXPECT_EQ(results.objs[0].key.name, std::to_string(id));
14541498
EXPECT_EQ(results.objs[0].meta.mtime, now);
14551499
}
1500+
1501+
TEST_F(TestSFSBucket, RacedBucketMetadataWriteOperations) {
1502+
auto ceph_context = std::make_shared<CephContext>(CEPH_ENTITY_TYPE_CLIENT);
1503+
ceph_context->_conf.set_val("rgw_sfs_data_path", getTestDir());
1504+
ceph_context->_log->start();
1505+
auto store = new rgw::sal::SFStore(ceph_context.get(), getTestDir());
1506+
1507+
NoDoutPrefix ndp(ceph_context.get(), 1);
1508+
RGWEnv env;
1509+
env.init(ceph_context.get());
1510+
createUser("usr_id", store->db_conn);
1511+
1512+
rgw_user arg_user("", "usr_id", "");
1513+
auto user = store->get_user(arg_user);
1514+
1515+
rgw_bucket arg_bucket("t_id", "b_name", "");
1516+
rgw_placement_rule arg_pl_rule("default", "STANDARD");
1517+
std::string arg_swift_ver_location;
1518+
RGWQuotaInfo arg_quota_info;
1519+
RGWAccessControlPolicy arg_aclp_d = get_aclp_default();
1520+
rgw::sal::Attrs arg_attrs;
1521+
1522+
RGWBucketInfo arg_info = get_binfo();
1523+
obj_version arg_objv;
1524+
bool existed = false;
1525+
req_info arg_req_info(ceph_context.get(), &env);
1526+
1527+
std::unique_ptr<rgw::sal::Bucket> bucket_from_create;
1528+
1529+
EXPECT_EQ(
1530+
user->create_bucket(
1531+
&ndp, //dpp
1532+
arg_bucket, //b
1533+
"zg1", //zonegroup_id
1534+
arg_pl_rule, //placement_rule
1535+
arg_swift_ver_location, //swift_ver_location
1536+
&arg_quota_info, //pquota_info
1537+
arg_aclp_d, //policy
1538+
arg_attrs, //attrs
1539+
arg_info, //info
1540+
arg_objv, //ep_objv
1541+
false, //exclusive
1542+
false, //obj_lock_enabled
1543+
&existed, //existed
1544+
arg_req_info, //req_info
1545+
&bucket_from_create, //bucket
1546+
null_yield //optional_yield
1547+
),
1548+
0
1549+
);
1550+
1551+
std::unique_ptr<rgw::sal::Bucket> bucket_from_store_1;
1552+
1553+
EXPECT_EQ(
1554+
store->get_bucket(
1555+
&ndp, user.get(), arg_info.bucket, &bucket_from_store_1, null_yield
1556+
),
1557+
0
1558+
);
1559+
1560+
std::unique_ptr<rgw::sal::Bucket> bucket_from_store_2;
1561+
1562+
EXPECT_EQ(
1563+
store->get_bucket(
1564+
&ndp, user.get(), arg_info.bucket, &bucket_from_store_2, null_yield
1565+
),
1566+
0
1567+
);
1568+
1569+
EXPECT_EQ(*bucket_from_store_1, *bucket_from_store_2);
1570+
1571+
// merge_and_store_attrs
1572+
1573+
rgw::sal::Attrs new_attrs;
1574+
RGWAccessControlPolicy arg_aclp = get_aclp_1();
1575+
{
1576+
bufferlist acl_bl;
1577+
arg_aclp.encode(acl_bl);
1578+
new_attrs[RGW_ATTR_ACL] = acl_bl;
1579+
}
1580+
1581+
EXPECT_EQ(
1582+
bucket_from_store_1->merge_and_store_attrs(&ndp, new_attrs, null_yield), 0
1583+
);
1584+
1585+
// assert b1 contains the RGW_ATTR_ACL attribute
1586+
auto acl_bl_1 = bucket_from_store_1->get_attrs().find(RGW_ATTR_ACL);
1587+
EXPECT_NE(bucket_from_store_1->get_attrs().end(), acl_bl_1);
1588+
1589+
// assert b2 does not contain the RGW_ATTR_ACL attribute
1590+
auto acl_bl_2 = bucket_from_store_2->get_attrs().find(RGW_ATTR_ACL);
1591+
EXPECT_EQ(bucket_from_store_2->get_attrs().end(), acl_bl_2);
1592+
1593+
// put_info
1594+
1595+
RGWObjectLock obj_lock;
1596+
mockable::RGWObjectLock& ol = mockable::actual2mock(obj_lock);
1597+
ol.enabled = true;
1598+
ol.rule.defaultRetention.years = 12;
1599+
ol.rule.defaultRetention.days = 31;
1600+
ol.rule.defaultRetention.mode = "GOVERNANCE";
1601+
ol.rule_exist = true;
1602+
1603+
bucket_from_store_2->get_info().obj_lock = obj_lock;
1604+
EXPECT_EQ(bucket_from_store_2->put_info(&ndp, false, real_time()), 0);
1605+
1606+
auto& ol1 = mockable::actual2mock(bucket_from_store_1->get_info().obj_lock);
1607+
auto& ol2 = mockable::actual2mock(bucket_from_store_2->get_info().obj_lock);
1608+
1609+
// obj lock structure in memory cannot be equal at this point in memory for the
1610+
// two b1 and b2 objects; this simulates two threads updating metadata over the
1611+
// same bucket using their own bucket reference (as it happens actually when 2
1612+
// concurrent calls are issued from one or more S3 clients).
1613+
EXPECT_NE(ol1, ol2);
1614+
1615+
// Getting now a third object from the backing store should fetch an image equal to
1616+
// b2 since that object is the latest one that did a put_info().
1617+
// merge_and_store_attrs() done with b1 should now be lost due to b2.put_info().
1618+
std::unique_ptr<rgw::sal::Bucket> bucket_from_store_3;
1619+
EXPECT_EQ(
1620+
store->get_bucket(
1621+
&ndp, user.get(), arg_info.bucket, &bucket_from_store_3, null_yield
1622+
),
1623+
0
1624+
);
1625+
1626+
// ol2 and ol3 should be the same.
1627+
auto& ol3 = mockable::actual2mock(bucket_from_store_3->get_info().obj_lock);
1628+
EXPECT_EQ(ol2, ol3);
1629+
1630+
// We expect to have lost RGW_ATTR_ACL attribute in the backing store.
1631+
auto acl_bl_3 = bucket_from_store_3->get_attrs().find(RGW_ATTR_ACL);
1632+
EXPECT_EQ(bucket_from_store_3->get_attrs().end(), acl_bl_3);
1633+
1634+
// Now we repeat the updates interposing the try_refresh_info() on b2.
1635+
// try_refresh_info() refreshes b2's rep with the state obtained
1636+
// from the store.
1637+
EXPECT_EQ(
1638+
bucket_from_store_1->merge_and_store_attrs(&ndp, new_attrs, null_yield), 0
1639+
);
1640+
EXPECT_EQ(bucket_from_store_2->try_refresh_info(&ndp, nullptr), 0);
1641+
EXPECT_EQ(bucket_from_store_2->put_info(&ndp, false, real_time()), 0);
1642+
1643+
// let's refetch b3 from store.
1644+
EXPECT_EQ(
1645+
store->get_bucket(
1646+
&ndp, user.get(), arg_info.bucket, &bucket_from_store_3, null_yield
1647+
),
1648+
0
1649+
);
1650+
1651+
// Now all the views over b1, b2 and b3 should be the same, given that the
1652+
// underlying reps are (hopefully) the same.
1653+
1654+
// get_info() view
1655+
ol1 = mockable::actual2mock(bucket_from_store_1->get_info().obj_lock);
1656+
ol2 = mockable::actual2mock(bucket_from_store_2->get_info().obj_lock);
1657+
ol3 = mockable::actual2mock(bucket_from_store_3->get_info().obj_lock);
1658+
EXPECT_EQ(ol1, ol2);
1659+
EXPECT_EQ(ol2, ol3);
1660+
1661+
// get_attrs() view and acls views
1662+
acl_bl_1 = bucket_from_store_1->get_attrs().find(RGW_ATTR_ACL);
1663+
EXPECT_NE(bucket_from_store_1->get_attrs().end(), acl_bl_1);
1664+
acl_bl_2 = bucket_from_store_2->get_attrs().find(RGW_ATTR_ACL);
1665+
EXPECT_NE(bucket_from_store_2->get_attrs().end(), acl_bl_2);
1666+
acl_bl_3 = bucket_from_store_3->get_attrs().find(RGW_ATTR_ACL);
1667+
EXPECT_NE(bucket_from_store_3->get_attrs().end(), acl_bl_3);
1668+
1669+
{
1670+
RGWAccessControlPolicy aclp;
1671+
auto ci_lval = acl_bl_1->second.cbegin();
1672+
aclp.decode(ci_lval);
1673+
EXPECT_EQ(aclp, arg_aclp);
1674+
}
1675+
{
1676+
RGWAccessControlPolicy aclp;
1677+
auto ci_lval = acl_bl_2->second.cbegin();
1678+
aclp.decode(ci_lval);
1679+
EXPECT_EQ(aclp, arg_aclp);
1680+
}
1681+
{
1682+
RGWAccessControlPolicy aclp;
1683+
auto ci_lval = acl_bl_3->second.cbegin();
1684+
aclp.decode(ci_lval);
1685+
EXPECT_EQ(aclp, arg_aclp);
1686+
}
1687+
}

0 commit comments

Comments
 (0)