Skip to content

Commit b94a6b8

Browse files
committed
[df] Check format-specific snapshot options
... and warn users when an option has been set that has no effect on the chosen output format.
1 parent 602fbbb commit b94a6b8

File tree

6 files changed

+107
-9
lines changed

6 files changed

+107
-9
lines changed

tree/dataframe/inc/ROOT/RDF/InterfaceUtils.hxx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -800,6 +800,8 @@ struct IsDeque_t<std::deque<T>> : std::true_type {};
800800

801801
void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols);
802802

803+
void CheckSnapshotOptionsFormatCompatibility(const ROOT::RDF::RSnapshotOptions &opts);
804+
803805
template <typename T>
804806
struct InnerValueType {
805807
using type = T; // fallback for when T is not a nested RVec

tree/dataframe/inc/ROOT/RDF/RInterface.hxx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1376,6 +1376,8 @@ public:
13761376
}
13771377
};
13781378

1379+
RDFInternal::CheckSnapshotOptionsFormatCompatibility(options);
1380+
13791381
if (options.fOutputFormat == ESnapshotOutputFormat::kRNTuple) {
13801382
// The data source of the RNTuple resulting from the Snapshot action does not exist yet here, so we create one
13811383
// without a data source for now, and set it once the actual data source can be created (i.e., after

tree/dataframe/src/RDFInterfaceUtils.cxx

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -939,6 +939,37 @@ void CheckForDuplicateSnapshotColumns(const ColumnNames_t &cols)
939939
}
940940
}
941941

942+
void CheckSnapshotOptionsFormatCompatibility(const ROOT::RDF::RSnapshotOptions &opts)
943+
{
944+
const ROOT::RDF::RSnapshotOptions defaultSnapshotOpts;
945+
if ((opts.fOutputFormat == ROOT::RDF::ESnapshotOutputFormat::kTTree ||
946+
opts.fOutputFormat == ROOT::RDF::ESnapshotOutputFormat::kDefault) &&
947+
opts.fNTupleWriteOpts != defaultSnapshotOpts.fNTupleWriteOpts) {
948+
Warning(
949+
"Snapshot",
950+
"The RNTuple-specific fNTupleWriteOptions option in RSnapshotOptions has been set, but the output format is "
951+
"set to TTree, so this option won't have any effect. Use the other options available in RSnapshotOptions to "
952+
"configure the output TTree. Alternatively, change fOutputFormat to snapshot to RNTuple instead.");
953+
} else if (opts.fOutputFormat == ROOT::RDF::ESnapshotOutputFormat::kRNTuple) {
954+
if (opts.fAutoFlush != defaultSnapshotOpts.fAutoFlush) {
955+
Warning("Snapshot", "The TTree-specific fAutoFlush option in RSnapshotOptions has been set, but the output "
956+
"format is set to RNTuple, so this option won't have any effect. Use the "
957+
"fNTupleWriteOptions option available in RSnapshotOptions to configure the output "
958+
"RNTuple. Alternatively, change fOutputFormat to snapshot to TTree instead.");
959+
} else if (opts.fSplitLevel != defaultSnapshotOpts.fSplitLevel) {
960+
Warning("Snapshot", "The TTree-specific fSplitLevel option in RSnapshotOptions has been set, but the output "
961+
"format is set to RNTuple, so this option won't have any effect. Use the "
962+
"fNTupleWriteOptions option available in RSnapshotOptions to configure the output "
963+
"RNTuple. Alternatively, change fOutputFormat to snapshot to TTree instead.");
964+
} else if (opts.fBasketSize != defaultSnapshotOpts.fBasketSize) {
965+
Warning("Snapshot", "The TTree-specific fBasketSize option in RSnapshotOptions has been set, but the output "
966+
"format is set to RNTuple, so this option won't have any effect. Use the "
967+
"fNTupleWriteOptions option available in RSnapshotOptions to configure the output "
968+
"RNTuple. Alternatively, change fOutputFormat to snapshot to TTree instead.");
969+
}
970+
}
971+
}
972+
942973
/// Return copies of colsWithoutAliases and colsWithAliases with size branches for variable-sized array branches added
943974
/// in the right positions (i.e. before the array branches that need them).
944975
std::pair<std::vector<std::string>, std::vector<std::string>>

tree/dataframe/test/dataframe_snapshot.cxx

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -447,6 +447,18 @@ TEST_F(RDFSnapshot, Snapshot_action_with_options)
447447
test_snapshot_options(tdf);
448448
}
449449

450+
TEST_F(RDFSnapshot, Snapshot_action_warn_on_rntuple_specific_opts)
451+
{
452+
RSnapshotOptions opts;
453+
opts.fNTupleWriteOpts.SetApproxZippedClusterSize(64);
454+
455+
ROOT_EXPECT_WARNING(
456+
tdf.Snapshot("ntuple", "snapshot_test_warn_on_rntuple_specific_opts.root", "", opts), "Snapshot",
457+
"The RNTuple-specific fNTupleWriteOptions option in RSnapshotOptions has been set, but the output format is set "
458+
"to TTree, so this option won't have any effect. Use the other options available in RSnapshotOptions to "
459+
"configure the output TTree. Alternatively, change fOutputFormat to snapshot to RNTuple instead.");
460+
}
461+
450462
void checkSnapshotArrayFile(RResultPtr<RInterface<RLoopManager>> &df, unsigned int kNEvents)
451463
{
452464
// fixedSizeArr and varSizeArr are RResultPtr<vector<vector<T>>>

tree/dataframe/test/dataframe_snapshot_ntuple.cxx

Lines changed: 46 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -108,19 +108,56 @@ TEST(RDFSnapshotRNTuple, WriteOpts)
108108

109109
auto df = ROOT::RDataFrame(25ull).Define("x", [] { return 10; });
110110

111-
ROOT::RNTupleWriteOptions writeOpts;
112-
writeOpts.SetEnablePageChecksums(false);
111+
{
112+
ROOT::RNTupleWriteOptions writeOpts;
113+
writeOpts.SetEnablePageChecksums(false);
113114

114-
RSnapshotOptions opts;
115-
opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple;
116-
opts.fNTupleWriteOpts = writeOpts;
115+
RSnapshotOptions opts;
116+
opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple;
117+
opts.fNTupleWriteOpts = writeOpts;
117118

118-
auto sdf = df.Snapshot("ntuple", fileGuard.GetPath(), "x", opts);
119+
auto sdf = df.Snapshot("ntuple", fileGuard.GetPath(), "x", opts);
119120

120-
EXPECT_EQ(columns, sdf->GetColumnNames());
121+
EXPECT_EQ(columns, sdf->GetColumnNames());
121122

122-
auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath());
123-
EXPECT_FALSE(reader->GetDescriptor().GetClusterDescriptor(0).GetPageRange(0).GetPageInfos()[0].HasChecksum());
123+
auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath());
124+
EXPECT_FALSE(reader->GetDescriptor().GetClusterDescriptor(0).GetPageRange(0).GetPageInfos()[0].HasChecksum());
125+
}
126+
127+
// Setting TTree-specific options while the output format is set to RNTuple should result in a warning
128+
{
129+
RSnapshotOptions opts;
130+
opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple;
131+
opts.fAutoFlush = 1;
132+
133+
ROOT_EXPECT_WARNING(df.Snapshot("ntuple", fileGuard.GetPath(), "x", opts), "Snapshot",
134+
"The TTree-specific fAutoFlush option in RSnapshotOptions has been set, but the output "
135+
"format is set to RNTuple, so this option won't have any effect. Use the fNTupleWriteOptions "
136+
"option available in RSnapshotOptions to configure the output RNTuple. Alternatively, change "
137+
"fOutputFormat to snapshot to TTree instead.");
138+
}
139+
{
140+
RSnapshotOptions opts;
141+
opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple;
142+
opts.fSplitLevel = 1;
143+
144+
ROOT_EXPECT_WARNING(df.Snapshot("ntuple", fileGuard.GetPath(), "x", opts), "Snapshot",
145+
"The TTree-specific fSplitLevel option in RSnapshotOptions has been set, but the output "
146+
"format is set to RNTuple, so this option won't have any effect. Use the fNTupleWriteOptions "
147+
"option available in RSnapshotOptions to configure the output RNTuple. Alternatively, change "
148+
"fOutputFormat to snapshot to TTree instead.");
149+
}
150+
{
151+
RSnapshotOptions opts;
152+
opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple;
153+
opts.fBasketSize = 64000;
154+
155+
ROOT_EXPECT_WARNING(df.Snapshot("ntuple", fileGuard.GetPath(), "x", opts), "Snapshot",
156+
"The TTree-specific fBasketSize option in RSnapshotOptions has been set, but the output "
157+
"format is set to RNTuple, so this option won't have any effect. Use the fNTupleWriteOptions "
158+
"option available in RSnapshotOptions to configure the output RNTuple. Alternatively, change "
159+
"fOutputFormat to snapshot to TTree instead.");
160+
}
124161
}
125162

126163
TEST(RDFSnapshotRNTuple, Compression)

tree/ntuple/inc/ROOT/RNTupleWriteOptions.hxx

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,6 +263,20 @@ public:
263263
void SetEnableSamePageMerging(bool val);
264264

265265
std::uint64_t GetMaxKeySize() const { return fMaxKeySize; }
266+
267+
friend bool operator==(const RNTupleWriteOptions &lhs, const RNTupleWriteOptions &rhs)
268+
{
269+
return lhs.fCompression == rhs.fCompression && lhs.fApproxZippedClusterSize == rhs.fApproxZippedClusterSize &&
270+
lhs.fMaxUnzippedClusterSize == rhs.fMaxUnzippedClusterSize &&
271+
lhs.fInitialUnzippedPageSize == rhs.fInitialUnzippedPageSize &&
272+
lhs.fMaxUnzippedPageSize == rhs.fMaxUnzippedPageSize && lhs.fPageBufferBudget == rhs.fPageBufferBudget &&
273+
lhs.fUseBufferedWrite == rhs.fUseBufferedWrite && lhs.fUseDirectIO == rhs.fUseDirectIO &&
274+
lhs.fWriteBufferSize == rhs.fWriteBufferSize && lhs.fUseImplicitMT == rhs.fUseImplicitMT &&
275+
lhs.fEnablePageChecksums == rhs.fEnablePageChecksums &&
276+
lhs.fEnableSamePageMerging == rhs.fEnableSamePageMerging && lhs.fMaxKeySize == rhs.fMaxKeySize;
277+
}
278+
279+
friend bool operator!=(const RNTupleWriteOptions &lhs, const RNTupleWriteOptions &rhs) { return !(lhs == rhs); }
266280
};
267281

268282
namespace Internal {

0 commit comments

Comments
 (0)