Skip to content

Commit 602fbbb

Browse files
committed
[df] Add RNTupleWriteOptions to RSnapshotOptions
N.B., compression settings that have been set directly through the snapshot options are propagated to the RNTuple write options, provided that they haven't been set there as well.
1 parent db22551 commit 602fbbb

File tree

3 files changed

+90
-11
lines changed

3 files changed

+90
-11
lines changed

tree/dataframe/inc/ROOT/RSnapshotOptions.hxx

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,9 @@
1111
#ifndef ROOT_RSNAPSHOTOPTIONS
1212
#define ROOT_RSNAPSHOTOPTIONS
1313

14+
#include "ROOT/RNTupleWriteOptions.hxx"
1415
#include <Compression.h>
15-
#include <optional>
16+
1617
#include <string_view>
1718
#include <string>
1819

@@ -31,6 +32,7 @@ struct RSnapshotOptions {
3132
RSnapshotOptions() = default;
3233
RSnapshotOptions(std::string_view mode, ECAlgo comprAlgo, int comprLevel, int autoFlush, int splitLevel, bool lazy,
3334
bool overwriteIfExists = false, bool vector2RVec = true, int basketSize = -1,
35+
ROOT::RNTupleWriteOptions ntupleWriteOpts = ROOT::RNTupleWriteOptions(),
3436
ESnapshotOutputFormat outputFormat = ESnapshotOutputFormat::kDefault)
3537
: fMode(mode),
3638
fCompressionAlgorithm(comprAlgo),
@@ -41,6 +43,7 @@ struct RSnapshotOptions {
4143
fOverwriteIfExists(overwriteIfExists),
4244
fVector2RVec(vector2RVec),
4345
fBasketSize(basketSize),
46+
fNTupleWriteOpts(ntupleWriteOpts),
4447
fOutputFormat(outputFormat)
4548
{
4649
}
@@ -55,7 +58,8 @@ struct RSnapshotOptions {
5558
bool fVector2RVec = true; ///< If set to true will convert std::vector columns to RVec when saving to disk
5659
int fBasketSize = -1; ///< Set a custom basket size option. For more details, see
5760
///< https://root.cern/manual/trees/#baskets-clusters-and-the-tree-header
58-
ESnapshotOutputFormat fOutputFormat = ESnapshotOutputFormat::kDefault; ///< Which data format to write to
61+
ROOT::RNTupleWriteOptions fNTupleWriteOpts = ROOT::RNTupleWriteOptions(); ///< RNTuple-specific write options
62+
ESnapshotOutputFormat fOutputFormat = ESnapshotOutputFormat::kDefault; ///< Which data format to write to
5963
};
6064
} // namespace RDF
6165
} // namespace ROOT

tree/dataframe/src/RDFSnapshotHelpers.cxx

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -852,8 +852,14 @@ void ROOT::Internal::RDF::UntypedSnapshotRNTupleHelper::Initialize()
852852
}
853853
model->Freeze();
854854

855-
ROOT::RNTupleWriteOptions writeOptions;
856-
writeOptions.SetCompression(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
855+
// Propagate fOptions.fCompressionAlgorithm and fOptions.fCompressionLevel to fOptions.fNTupleWriteOpts *only* if the
856+
// compression settings in fNTupleWriteOpts have not been changed, and the compression algorithm or level in fOptions
857+
// have.
858+
if (fOptions.fNTupleWriteOpts.GetCompression() == RCompressionSetting::EDefaults::kUseGeneralPurpose &&
859+
(fOptions.fCompressionAlgorithm != RCompressionSetting::EAlgorithm::kZLIB ||
860+
fOptions.fCompressionLevel != RCompressionSetting::ELevel::kDefaultZLIB)) {
861+
fOptions.fNTupleWriteOpts.SetCompression(fOptions.fCompressionAlgorithm, fOptions.fCompressionLevel);
862+
}
857863

858864
fOutputFile.reset(TFile::Open(fFileName.c_str(), fOptions.fMode.c_str()));
859865
if (!fOutputFile)
@@ -871,7 +877,8 @@ void ROOT::Internal::RDF::UntypedSnapshotRNTupleHelper::Initialize()
871877

872878
// The RNTupleParallelWriter has exclusive access to the underlying TFile, no further synchronization is needed for
873879
// calls to Fill() (in Exec) and FlushCluster() (in FinalizeTask).
874-
fWriter = ROOT::Experimental::RNTupleParallelWriter::Append(std::move(model), fNTupleName, *outputDir, writeOptions);
880+
fWriter = ROOT::Experimental::RNTupleParallelWriter::Append(std::move(model), fNTupleName, *outputDir,
881+
fOptions.fNTupleWriteOpts);
875882
}
876883

877884
void ROOT::Internal::RDF::UntypedSnapshotRNTupleHelper::InitTask(TTreeReader *, unsigned int slot)

tree/dataframe/test/dataframe_snapshot_ntuple.cxx

Lines changed: 74 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -101,25 +101,93 @@ TEST(RDFSnapshotRNTuple, LazyNotTriggered)
101101
EXPECT_TRUE(gSystem->AccessPathName(fileGuard.GetPath().c_str()));
102102
}
103103

104-
TEST(RDFSnapshotRNTuple, Compression)
104+
TEST(RDFSnapshotRNTuple, WriteOpts)
105105
{
106-
FileRAII fileGuard{"RDFSnapshotRNTuple_compression.root"};
106+
FileRAII fileGuard{"RDFSnapshotRNTuple_write_opts.root"};
107107
const std::vector<std::string> columns = {"x"};
108108

109109
auto df = ROOT::RDataFrame(25ull).Define("x", [] { return 10; });
110110

111+
ROOT::RNTupleWriteOptions writeOpts;
112+
writeOpts.SetEnablePageChecksums(false);
113+
111114
RSnapshotOptions opts;
112115
opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple;
113-
opts.fCompressionAlgorithm = ROOT::RCompressionSetting::EAlgorithm::kLZ4;
114-
opts.fCompressionLevel = 4;
116+
opts.fNTupleWriteOpts = writeOpts;
115117

116118
auto sdf = df.Snapshot("ntuple", fileGuard.GetPath(), "x", opts);
117119

118120
EXPECT_EQ(columns, sdf->GetColumnNames());
119121

120122
auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath());
121-
auto compSettings = *reader->GetDescriptor().GetClusterDescriptor(0).GetColumnRange(0).GetCompressionSettings();
122-
EXPECT_EQ(404, compSettings);
123+
EXPECT_FALSE(reader->GetDescriptor().GetClusterDescriptor(0).GetPageRange(0).GetPageInfos()[0].HasChecksum());
124+
}
125+
126+
TEST(RDFSnapshotRNTuple, Compression)
127+
{
128+
FileRAII fileGuard{"RDFSnapshotRNTuple_compression.root"};
129+
const std::vector<std::string> columns = {"x"};
130+
131+
auto df = ROOT::RDataFrame(25ull).Define("x", [] { return 10; });
132+
133+
// Default should be taken from RNTupleWriteOptions provided by RSnapshotOptions (zstd)
134+
{
135+
RSnapshotOptions opts;
136+
opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple;
137+
138+
auto sdf = df.Snapshot("ntuple", fileGuard.GetPath(), "x", opts);
139+
140+
EXPECT_EQ(columns, sdf->GetColumnNames());
141+
142+
auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath());
143+
auto compSettings = *reader->GetDescriptor().GetClusterDescriptor(0).GetColumnRange(0).GetCompressionSettings();
144+
EXPECT_EQ(505, compSettings);
145+
}
146+
// Directly through RSnapshotOptions
147+
{
148+
RSnapshotOptions opts;
149+
opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple;
150+
opts.fCompressionAlgorithm = ROOT::RCompressionSetting::EAlgorithm::kLZ4;
151+
opts.fCompressionLevel = 4;
152+
153+
auto sdf = df.Snapshot("ntuple", fileGuard.GetPath(), "x", opts);
154+
155+
EXPECT_EQ(columns, sdf->GetColumnNames());
156+
157+
auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath());
158+
auto compSettings = *reader->GetDescriptor().GetClusterDescriptor(0).GetColumnRange(0).GetCompressionSettings();
159+
EXPECT_EQ(404, compSettings);
160+
}
161+
// Through RNTupleWriteOptions provided by RSnapshotOptions
162+
{
163+
RSnapshotOptions opts;
164+
opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple;
165+
opts.fNTupleWriteOpts.SetCompression(404);
166+
167+
auto sdf = df.Snapshot("ntuple", fileGuard.GetPath(), "x", opts);
168+
169+
EXPECT_EQ(columns, sdf->GetColumnNames());
170+
171+
auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath());
172+
auto compSettings = *reader->GetDescriptor().GetClusterDescriptor(0).GetColumnRange(0).GetCompressionSettings();
173+
EXPECT_EQ(404, compSettings);
174+
}
175+
// When both are set, preference is given to RNTupleWriteOptions
176+
{
177+
RSnapshotOptions opts;
178+
opts.fOutputFormat = ROOT::RDF::ESnapshotOutputFormat::kRNTuple;
179+
opts.fCompressionAlgorithm = ROOT::RCompressionSetting::EAlgorithm::kLZ4;
180+
opts.fCompressionLevel = 4;
181+
opts.fNTupleWriteOpts.SetCompression(207);
182+
183+
auto sdf = df.Snapshot("ntuple", fileGuard.GetPath(), "x", opts);
184+
185+
EXPECT_EQ(columns, sdf->GetColumnNames());
186+
187+
auto reader = RNTupleReader::Open("ntuple", fileGuard.GetPath());
188+
auto compSettings = *reader->GetDescriptor().GetClusterDescriptor(0).GetColumnRange(0).GetCompressionSettings();
189+
EXPECT_EQ(207, compSettings);
190+
}
123191
}
124192

125193
class RDFSnapshotRNTupleTest : public ::testing::Test {

0 commit comments

Comments
 (0)