Skip to content

Commit e7c892c

Browse files
committed
[ntuple] schema evolution for fixed-size array --> std::vector<bool>
This reverts the addition of RArrayAsBoolVectorField. It seems that for the std::vector<bool> specialization, it is easier to directly add the support for schema evolution from fixed-size arrays. This is different from the general RVectorField case because the bool vector is much simpler to begin with and it is also already inherently slow.
1 parent dcc3429 commit e7c892c

File tree

5 files changed

+71
-168
lines changed

5 files changed

+71
-168
lines changed

tree/ntuple/inc/ROOT/RField/RFieldSequenceContainer.hxx

Lines changed: 4 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,9 @@ template <>
284284
class RField<std::vector<bool>> final : public RFieldBase {
285285
private:
286286
ROOT::Internal::RColumnIndex fNWritten{0};
287+
/// If schema-evolved from an std::array, fOnDiskNRepetition is > 0 and there will be no
288+
/// principal column.
289+
std::size_t fOnDiskNRepetitions = 0;
287290

288291
protected:
289292
std::unique_ptr<RFieldBase> CloneImpl(std::string_view newName) const final
@@ -300,8 +303,8 @@ protected:
300303

301304
std::size_t AppendImpl(const void *from) final;
302305
void ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to) final;
306+
void ReadInClusterImpl(ROOT::RNTupleLocalIndex localIndex, void *to) final;
303307

304-
std::unique_ptr<RFieldBase> BeforeConnectPageSource(ROOT::Internal::RPageSource &pageSource) final;
305308
void ReconcileOnDiskField(const RNTupleDescriptor &desc) final;
306309

307310
void CommitClusterImpl() final { fNWritten = 0; }
@@ -417,48 +420,6 @@ public:
417420
void AcceptVisitor(ROOT::Detail::RFieldVisitor &visitor) const final;
418421
};
419422

420-
/**
421-
\class ROOT::RArrayAsBoolVectorField
422-
\brief A field for fixed-size arrays that are represented as std::vector<bool> in memory.
423-
\ingroup NTuple
424-
This class is used only for reading. In particular, it helps for schema evolution of fixed-size arrays into vectors.
425-
*/
426-
class RArrayAsBoolVectorField final : public RFieldBase {
427-
private:
428-
std::size_t fArrayLength; ///< The length of the arrays in this field
429-
430-
protected:
431-
std::unique_ptr<RFieldBase> CloneImpl(std::string_view newName) const final;
432-
433-
void GenerateColumns() final;
434-
using RFieldBase::GenerateColumns;
435-
436-
void ConstructValue(void *where) const final { new (where) std::vector<bool>(); }
437-
std::unique_ptr<RDeleter> GetDeleter() const final { return std::make_unique<RTypedDeleter<std::vector<bool>>>(); }
438-
439-
void ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to) final;
440-
void ReadInClusterImpl(RNTupleLocalIndex localIndex, void *to) final;
441-
442-
void ReconcileOnDiskField(const RNTupleDescriptor &desc) final;
443-
444-
public:
445-
/// The `itemField` argument represents the inner item of the on-disk array,
446-
/// i.e. for an `std::array<bool>` it is the `bool`
447-
RArrayAsBoolVectorField(std::string_view fieldName, std::unique_ptr<RField<bool>> itemField,
448-
std::size_t arrayLength);
449-
RArrayAsBoolVectorField(const RArrayAsBoolVectorField &other) = delete;
450-
RArrayAsBoolVectorField &operator=(const RArrayAsBoolVectorField &other) = delete;
451-
RArrayAsBoolVectorField(RArrayAsBoolVectorField &&other) = default;
452-
RArrayAsBoolVectorField &operator=(RArrayAsBoolVectorField &&other) = default;
453-
~RArrayAsBoolVectorField() final = default;
454-
455-
size_t GetValueSize() const final { return sizeof(std::vector<bool>); }
456-
size_t GetAlignment() const final { return std::alignment_of<std::vector<bool>>(); }
457-
458-
std::vector<RFieldBase::RValue> SplitValue(const RFieldBase::RValue &value) const final;
459-
void AcceptVisitor(ROOT::Detail::RFieldVisitor &visitor) const final;
460-
};
461-
462423
} // namespace ROOT
463424

464425
#endif

tree/ntuple/inc/ROOT/RFieldVisitor.hxx

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ public:
6262
virtual void VisitArrayField(const ROOT::RArrayField &field) { VisitField(field); }
6363
virtual void VisitArrayAsRVecField(const ROOT::RArrayAsRVecField &field) { VisitField(field); }
6464
virtual void VisitArrayAsVectorField(const ROOT::RArrayAsVectorField &field) { VisitField(field); }
65-
virtual void VisitArrayAsBoolVectorField(const ROOT::RArrayAsBoolVectorField &field) { VisitField(field); }
6665
virtual void VisitAtomicField(const ROOT::RAtomicField &field) { VisitField(field); }
6766
virtual void VisitBitsetField(const ROOT::RBitsetField &field) { VisitField(field); }
6867
virtual void VisitBoolField(const ROOT::RField<bool> &field) { VisitField(field); }
@@ -238,7 +237,6 @@ public:
238237
void VisitArrayField(const ROOT::RArrayField &field) final;
239238
void VisitArrayAsRVecField(const ROOT::RArrayAsRVecField &field) final;
240239
void VisitArrayAsVectorField(const ROOT::RArrayAsVectorField &field) final;
241-
void VisitArrayAsBoolVectorField(const ROOT::RArrayAsBoolVectorField &field) final;
242240
void VisitClassField(const ROOT::RClassField &field) final;
243241
void VisitTObjectField(const ROOT::RField<TObject> &field) final;
244242
void VisitStreamerField(const ROOT::RStreamerField &field) final;

tree/ntuple/src/RFieldSequenceContainer.cxx

Lines changed: 65 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -112,22 +112,6 @@ std::vector<ROOT::RFieldBase::RValue> SplitVector(std::shared_ptr<void> valuePtr
112112
return result;
113113
}
114114

115-
std::vector<ROOT::RFieldBase::RValue>
116-
SplitBoolVector(std::shared_ptr<std::vector<bool>> boolVec, ROOT::RFieldBase &itemField)
117-
{
118-
auto count = boolVec->size();
119-
std::vector<ROOT::RFieldBase::RValue> result;
120-
result.reserve(count);
121-
for (unsigned i = 0; i < count; ++i) {
122-
if ((*boolVec)[i]) {
123-
result.emplace_back(itemField.BindValue(std::shared_ptr<bool>(new bool(true))));
124-
} else {
125-
result.emplace_back(itemField.BindValue(std::shared_ptr<bool>(new bool(false))));
126-
}
127-
}
128-
return result;
129-
}
130-
131115
} // anonymous namespace
132116

133117
ROOT::RArrayField::RArrayField(std::string_view fieldName, std::unique_ptr<RFieldBase> itemField,
@@ -785,15 +769,47 @@ void ROOT::RField<std::vector<bool>>::ReadGlobalImpl(ROOT::NTupleSize_t globalIn
785769
{
786770
auto typedValue = static_cast<std::vector<bool> *>(to);
787771

788-
ROOT::NTupleSize_t nItems;
789-
RNTupleLocalIndex collectionStart;
790-
fPrincipalColumn->GetCollectionInfo(globalIndex, &collectionStart, &nItems);
772+
if (fOnDiskNRepetitions == 0) {
773+
ROOT::NTupleSize_t nItems;
774+
RNTupleLocalIndex collectionStart;
775+
fPrincipalColumn->GetCollectionInfo(globalIndex, &collectionStart, &nItems);
776+
typedValue->resize(nItems);
777+
for (std::size_t i = 0; i < nItems; ++i) {
778+
bool bval;
779+
CallReadOn(*fSubfields[0], collectionStart + i, &bval);
780+
(*typedValue)[i] = bval;
781+
}
782+
} else {
783+
typedValue->resize(fOnDiskNRepetitions);
784+
for (std::size_t i = 0; i < fOnDiskNRepetitions; ++i) {
785+
bool bval;
786+
CallReadOn(*fSubfields[0], globalIndex * fOnDiskNRepetitions + i, &bval);
787+
(*typedValue)[i] = bval;
788+
}
789+
}
790+
}
791791

792-
typedValue->resize(nItems);
793-
for (unsigned i = 0; i < nItems; ++i) {
794-
bool bval;
795-
CallReadOn(*fSubfields[0], collectionStart + i, &bval);
796-
(*typedValue)[i] = bval;
792+
void ROOT::RField<std::vector<bool>>::ReadInClusterImpl(ROOT::RNTupleLocalIndex localIndex, void *to)
793+
{
794+
auto typedValue = static_cast<std::vector<bool> *>(to);
795+
796+
if (fOnDiskNRepetitions == 0) {
797+
ROOT::NTupleSize_t nItems;
798+
RNTupleLocalIndex collectionStart;
799+
fPrincipalColumn->GetCollectionInfo(localIndex, &collectionStart, &nItems);
800+
typedValue->resize(nItems);
801+
for (std::size_t i = 0; i < nItems; ++i) {
802+
bool bval;
803+
CallReadOn(*fSubfields[0], collectionStart + i, &bval);
804+
(*typedValue)[i] = bval;
805+
}
806+
} else {
807+
typedValue->resize(fOnDiskNRepetitions);
808+
for (std::size_t i = 0; i < fOnDiskNRepetitions; ++i) {
809+
bool bval;
810+
CallReadOn(*fSubfields[0], localIndex * fOnDiskNRepetitions + i, &bval);
811+
(*typedValue)[i] = bval;
812+
}
797813
}
798814
}
799815

@@ -803,7 +819,7 @@ const ROOT::RFieldBase::RColumnRepresentations &ROOT::RField<std::vector<bool>>:
803819
{ENTupleColumnType::kIndex64},
804820
{ENTupleColumnType::kSplitIndex32},
805821
{ENTupleColumnType::kIndex32}},
806-
{});
822+
{{}});
807823
return representations;
808824
}
809825

@@ -814,36 +830,39 @@ void ROOT::RField<std::vector<bool>>::GenerateColumns()
814830

815831
void ROOT::RField<std::vector<bool>>::GenerateColumns(const ROOT::RNTupleDescriptor &desc)
816832
{
817-
GenerateColumnsImpl<ROOT::Internal::RColumnIndex>(desc);
833+
if (fOnDiskNRepetitions == 0)
834+
GenerateColumnsImpl<ROOT::Internal::RColumnIndex>(desc);
818835
}
819836

820-
std::unique_ptr<ROOT::RFieldBase>
821-
ROOT::RField<std::vector<bool>>::BeforeConnectPageSource(Internal::RPageSource &pageSource)
837+
void ROOT::RField<std::vector<bool>>::ReconcileOnDiskField(const RNTupleDescriptor &desc)
822838
{
823-
if (GetOnDiskId() == kInvalidDescriptorId)
824-
return nullptr;
825-
826-
const auto descGuard = pageSource.GetSharedDescriptorGuard();
827-
const auto &fieldDesc = descGuard->GetFieldDescriptor(GetOnDiskId());
839+
const auto &fieldDesc = desc.GetFieldDescriptor(GetOnDiskId());
828840
if (fieldDesc.GetTypeName().rfind("std::array<", 0) == 0) {
829-
auto itemClone = fSubfields[0]->Clone(fSubfields[0]->GetFieldName()).release();
830-
auto substitute = std::make_unique<RArrayAsBoolVectorField>(
831-
GetFieldName(), std::unique_ptr<RField<bool>>(static_cast<RField<bool> *>(itemClone)),
832-
fieldDesc.GetNRepetitions());
833-
substitute->SetOnDiskId(GetOnDiskId());
834-
return substitute;
841+
EnsureMatchingOnDiskField(fieldDesc, kDiffTypeName | kDiffStructure | kDiffNRepetitions).ThrowOnError();
842+
if (fieldDesc.GetNRepetitions() == 0)
843+
throw RException(R__FAIL("fixed-size array --> std::vector<bool>: expected repetition count > 0"));
844+
if (fieldDesc.GetStructure() != ENTupleStructure::kPlain)
845+
throw RException(R__FAIL("fixed-size array --> std::vector<bool>: expected plain on-disk field"));
846+
fOnDiskNRepetitions = fieldDesc.GetNRepetitions();
847+
} else {
848+
EnsureMatchingOnDiskField(fieldDesc, kDiffTypeName).ThrowOnError();
835849
}
836-
return nullptr;
837-
}
838-
839-
void ROOT::RField<std::vector<bool>>::ReconcileOnDiskField(const RNTupleDescriptor &desc)
840-
{
841-
EnsureMatchingOnDiskField(desc.GetFieldDescriptor(GetOnDiskId()), kDiffTypeName).ThrowOnError();
842850
}
843851

844852
std::vector<ROOT::RFieldBase::RValue> ROOT::RField<std::vector<bool>>::SplitValue(const RValue &value) const
845853
{
846-
return SplitBoolVector(value.GetPtr<std::vector<bool>>(), *fSubfields[0]);
854+
const auto typedValue = value.GetPtr<std::vector<bool>>();
855+
auto count = typedValue->size();
856+
std::vector<ROOT::RFieldBase::RValue> result;
857+
result.reserve(count);
858+
for (unsigned i = 0; i < count; ++i) {
859+
if ((*typedValue)[i]) {
860+
result.emplace_back(fSubfields[0]->BindValue(std::shared_ptr<bool>(new bool(true))));
861+
} else {
862+
result.emplace_back(fSubfields[0]->BindValue(std::shared_ptr<bool>(new bool(false))));
863+
}
864+
}
865+
return result;
847866
}
848867

849868
void ROOT::RField<std::vector<bool>>::AcceptVisitor(ROOT::Detail::RFieldVisitor &visitor) const
@@ -1036,71 +1055,3 @@ void ROOT::RArrayAsVectorField::AcceptVisitor(ROOT::Detail::RFieldVisitor &visit
10361055
{
10371056
visitor.VisitArrayAsVectorField(*this);
10381057
}
1039-
1040-
//------------------------------------------------------------------------------
1041-
1042-
ROOT::RArrayAsBoolVectorField::RArrayAsBoolVectorField(std::string_view fieldName,
1043-
std::unique_ptr<ROOT::RField<bool>> itemField,
1044-
std::size_t arrayLength)
1045-
: ROOT::RFieldBase(fieldName, "std::vector<bool>", ROOT::ENTupleStructure::kCollection, false /* isSimple */),
1046-
fArrayLength(arrayLength)
1047-
{
1048-
Attach(std::move(itemField));
1049-
}
1050-
1051-
std::unique_ptr<ROOT::RFieldBase> ROOT::RArrayAsBoolVectorField::CloneImpl(std::string_view newName) const
1052-
{
1053-
auto clone = fSubfields[0]->Clone(fSubfields[0]->GetFieldName()).release();
1054-
auto newItemField = std::unique_ptr<RField<bool>>(static_cast<RField<bool> *>(clone));
1055-
return std::make_unique<RArrayAsBoolVectorField>(newName, std::move(newItemField), fArrayLength);
1056-
}
1057-
1058-
void ROOT::RArrayAsBoolVectorField::GenerateColumns()
1059-
{
1060-
throw RException(R__FAIL("RArrayAsBoolVectorField fields must only be used for reading"));
1061-
}
1062-
1063-
void ROOT::RArrayAsBoolVectorField::ReadGlobalImpl(ROOT::NTupleSize_t globalIndex, void *to)
1064-
{
1065-
auto typedValue = static_cast<std::vector<bool> *>(to);
1066-
1067-
typedValue->resize(fArrayLength);
1068-
for (std::size_t i = 0; i < fArrayLength; ++i) {
1069-
bool val;
1070-
CallReadOn(*fSubfields[0], globalIndex * fArrayLength + i, &val);
1071-
(*typedValue)[i] = val;
1072-
}
1073-
}
1074-
1075-
void ROOT::RArrayAsBoolVectorField::ReadInClusterImpl(ROOT::RNTupleLocalIndex localIndex, void *to)
1076-
{
1077-
auto typedValue = static_cast<std::vector<char> *>(to);
1078-
1079-
typedValue->resize(fArrayLength);
1080-
for (std::size_t i = 0; i < fArrayLength; ++i) {
1081-
bool val;
1082-
CallReadOn(*fSubfields[0], localIndex * fArrayLength + i, &val);
1083-
(*typedValue)[i] = val;
1084-
}
1085-
}
1086-
1087-
void ROOT::RArrayAsBoolVectorField::ReconcileOnDiskField(const RNTupleDescriptor &desc)
1088-
{
1089-
const auto &fieldDesc = desc.GetFieldDescriptor(GetOnDiskId());
1090-
EnsureMatchingOnDiskField(fieldDesc, kDiffTypeName | kDiffTypeVersion | kDiffStructure | kDiffNRepetitions);
1091-
if (fieldDesc.GetTypeName().rfind("std::array<", 0) != 0) {
1092-
throw RException(
1093-
R__FAIL("RArrayAsBoolVectorField " + GetQualifiedFieldName() + " expects an on-disk array field"));
1094-
}
1095-
}
1096-
1097-
std::vector<ROOT::RFieldBase::RValue>
1098-
ROOT::RArrayAsBoolVectorField::SplitValue(const ROOT::RFieldBase::RValue &value) const
1099-
{
1100-
return SplitBoolVector(value.GetPtr<std::vector<bool>>(), *fSubfields[0]);
1101-
}
1102-
1103-
void ROOT::RArrayAsBoolVectorField::AcceptVisitor(ROOT::Detail::RFieldVisitor &visitor) const
1104-
{
1105-
visitor.VisitArrayAsBoolVectorField(*this);
1106-
}

tree/ntuple/src/RFieldVisitor.cxx

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -330,11 +330,6 @@ void ROOT::Internal::RPrintValueVisitor::VisitArrayAsVectorField(const ROOT::RAr
330330
PrintCollection(field);
331331
}
332332

333-
void ROOT::Internal::RPrintValueVisitor::VisitArrayAsBoolVectorField(const ROOT::RArrayAsBoolVectorField &field)
334-
{
335-
PrintCollection(field);
336-
}
337-
338333
void ROOT::Internal::RPrintValueVisitor::VisitStreamerField(const ROOT::RStreamerField &field)
339334
{
340335
PrintIndent();

tree/ntuple/test/ntuple_evolution_type.cxx

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -267,15 +267,13 @@ TEST(RNTupleEvolution, ArrayAsVector)
267267
auto reader = RNTupleReader::Open("ntpl", fileGuard.GetPath());
268268

269269
auto aAsShort = reader->GetView<std::vector<short>>("a");
270-
const auto &f1 = aAsShort.GetField(); // necessary to silence clang warning
271-
EXPECT_EQ(typeid(f1), typeid(ROOT::RArrayAsVectorField));
270+
const auto &f = aAsShort.GetField(); // necessary to silence clang warning
271+
EXPECT_EQ(typeid(f), typeid(ROOT::RArrayAsVectorField));
272272
EXPECT_EQ(2u, aAsShort(0).size());
273273
EXPECT_EQ(0, aAsShort(0)[0]);
274274
EXPECT_EQ(1, aAsShort(0)[1]);
275275

276276
auto aAsBool = reader->GetView<std::vector<bool>>("a");
277-
const auto &f2 = aAsBool.GetField(); // necessary to silence clang warning
278-
EXPECT_EQ(typeid(f2), typeid(ROOT::RArrayAsBoolVectorField));
279277
EXPECT_EQ(2u, aAsBool(0).size());
280278
EXPECT_FALSE(aAsBool(0)[0]);
281279
EXPECT_TRUE(aAsBool(0)[1]);

0 commit comments

Comments
 (0)