Skip to content

Commit e3747e0

Browse files
committed
[ntuple] improve type name renormalization
Add different code path for renormalizing a demangled typeid name. Such type names come from RField<T>::TypeName(). In this case, the compiler has done already most of the normalization work and we don't need to go through ROOT meta. There are some parts ROOT meta does that we need to repeat for this case, e.g. dropping optional template arguments of stdlib containers.
1 parent d176ad1 commit e3747e0

File tree

6 files changed

+253
-93
lines changed

6 files changed

+253
-93
lines changed

tree/ntuple/inc/ROOT/RField.hxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ public:
284284
template <typename T, typename = void>
285285
class RField final : public RClassField {
286286
public:
287-
static std::string TypeName() { return ROOT::Internal::GetRenormalizedDemangledTypeName(typeid(T)); }
287+
static std::string TypeName() { return ROOT::Internal::GetRenormalizedTypeName(typeid(T)); }
288288
RField(std::string_view name) : RClassField(name, TypeName())
289289
{
290290
static_assert(std::is_class_v<T>, "no I/O support for this basic C++ type");

tree/ntuple/inc/ROOT/RField/RFieldProxiedCollection.hxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ struct IsCollectionProxy : HasCollectionProxyMemberType<T> {
265265
template <typename T>
266266
class RField<T, typename std::enable_if<IsCollectionProxy<T>::value>::type> final : public RProxiedCollectionField {
267267
public:
268-
static std::string TypeName() { return ROOT::Internal::GetRenormalizedDemangledTypeName(typeid(T)); }
268+
static std::string TypeName() { return ROOT::Internal::GetRenormalizedTypeName(typeid(T)); }
269269
RField(std::string_view name) : RProxiedCollectionField(name, TypeName())
270270
{
271271
static_assert(std::is_class<T>::value, "collection proxy unsupported for fundamental types");

tree/ntuple/inc/ROOT/RFieldUtils.hxx

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ std::string GetRenormalizedTypeName(const std::string &metaNormalizedName);
2727

2828
/// Given a type info ask ROOT meta to demangle it, then renormalize the resulting type name for RNTuple. Useful to
2929
/// ensure that e.g. fundamental types are normalized to the type used by RNTuple (e.g. int -> std::int32_t).
30-
std::string GetRenormalizedDemangledTypeName(const std::type_info &ti);
30+
std::string GetRenormalizedTypeName(const std::type_info &ti);
3131

3232
/// Applies all RNTuple type normalization rules except typedef resolution.
3333
std::string GetNormalizedUnresolvedTypeName(const std::string &origName);
@@ -57,8 +57,10 @@ std::tuple<std::string, std::vector<std::size_t>> ParseArrayType(const std::stri
5757

5858
/// Used in RFieldBase::Create() in order to get the comma-separated list of template types
5959
/// E.g., gets {"int", "std::variant<double,int>"} from "int,std::variant<double,int>".
60+
/// If maxArgs > 0, stop tokenizing after the given number of tokens are found. Used to strip
61+
/// STL allocator and other optional arguments.
6062
/// TODO(jblomer): Try to merge with TClassEdit::TSplitType
61-
std::vector<std::string> TokenizeTypeList(std::string_view templateType);
63+
std::vector<std::string> TokenizeTypeList(std::string_view templateType, std::size_t maxArgs = 0);
6264

6365
} // namespace Internal
6466
} // namespace ROOT

tree/ntuple/inc/ROOT/RNTupleReader.hxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -324,7 +324,7 @@ public:
324324
/// \sa GetView(std::string_view, std::shared_ptr<T>)
325325
ROOT::RNTupleView<void> GetView(std::string_view fieldName, void *rawPtr, const std::type_info &ti)
326326
{
327-
return GetView(RetrieveFieldId(fieldName), rawPtr, ROOT::Internal::GetRenormalizedDemangledTypeName(ti));
327+
return GetView(RetrieveFieldId(fieldName), rawPtr, ROOT::Internal::GetRenormalizedTypeName(ti));
328328
}
329329

330330
/// Provides access to an individual (sub)field from its on-disk ID.
@@ -377,7 +377,7 @@ public:
377377
/// \sa GetView(std::string_view, std::shared_ptr<T>)
378378
ROOT::RNTupleView<void> GetView(ROOT::DescriptorId_t fieldId, void *rawPtr, const std::type_info &ti)
379379
{
380-
return GetView(fieldId, rawPtr, ROOT::Internal::GetRenormalizedDemangledTypeName(ti));
380+
return GetView(fieldId, rawPtr, ROOT::Internal::GetRenormalizedTypeName(ti));
381381
}
382382

383383
/// Provides direct access to the I/O buffers of a **mappable** (sub)field.

tree/ntuple/src/RFieldUtils.cxx

Lines changed: 198 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -118,25 +118,14 @@ std::vector<AnglePos> FindTemplateAngleBrackets(const std::string &typeName)
118118
return result;
119119
}
120120

121-
} // namespace
122-
123-
std::string ROOT::Internal::GetCanonicalTypePrefix(const std::string &typeName)
121+
// TClassEdit::CleanType and the name demangling insert blanks between closing angle brackets,
122+
// as they were required before C++11. We want to remove them for RNTuple.
123+
void RemoveSpaceBeforeClosingAngleBracket(std::string &typeName)
124124
{
125-
std::string canonicalType{TClassEdit::CleanType(typeName.c_str(), /*mode=*/1)};
126-
if (canonicalType.substr(0, 7) == "struct ") {
127-
canonicalType.erase(0, 7);
128-
} else if (canonicalType.substr(0, 5) == "enum ") {
129-
canonicalType.erase(0, 5);
130-
} else if (canonicalType.substr(0, 2) == "::") {
131-
canonicalType.erase(0, 2);
132-
}
133-
134-
// TClassEdit::CleanType inserts blanks between closing angle brackets, as they were required before C++11. We want
135-
// to remove them for RNTuple.
136-
auto angle = canonicalType.find('<');
125+
auto angle = typeName.find('<');
137126
if (angle != std::string::npos) {
138-
auto dst = canonicalType.begin() + angle;
139-
auto end = canonicalType.end();
127+
auto dst = typeName.begin() + angle;
128+
auto end = typeName.end();
140129
for (auto src = dst; src != end; ++src) {
141130
if (*src == ' ') {
142131
auto next = src + 1;
@@ -147,9 +136,185 @@ std::string ROOT::Internal::GetCanonicalTypePrefix(const std::string &typeName)
147136
}
148137
*(dst++) = *src;
149138
}
150-
canonicalType.erase(dst, end);
139+
typeName.erase(dst, end);
140+
}
141+
}
142+
143+
// The demangled name adds spaces after commas
144+
void RemoveSpaceAfterComma(std::string &typeName)
145+
{
146+
auto itr = typeName.begin();
147+
while (itr != typeName.end()) {
148+
auto c = *itr;
149+
itr++;
150+
151+
if (c != ',')
152+
continue;
153+
154+
R__ASSERT(itr != typeName.end());
155+
if (*itr == ' ') {
156+
itr = typeName.erase(itr);
157+
}
158+
}
159+
}
160+
161+
// Map fundamental integer types to stdint integer types (e.g. int --> std::int32_t)
162+
void MapIntegerType(std::string &typeName)
163+
{
164+
if (typeName == "signed char") {
165+
typeName = ROOT::RField<signed char>::TypeName();
166+
} else if (typeName == "unsigned char") {
167+
typeName = ROOT::RField<unsigned char>::TypeName();
168+
} else if (typeName == "short" || typeName == "short int" || typeName == "signed short" ||
169+
typeName == "signed short int") {
170+
typeName = ROOT::RField<short int>::TypeName();
171+
} else if (typeName == "unsigned short" || typeName == "unsigned short int") {
172+
typeName = ROOT::RField<unsigned short int>::TypeName();
173+
} else if (typeName == "int" || typeName == "signed" || typeName == "signed int") {
174+
typeName = ROOT::RField<int>::TypeName();
175+
} else if (typeName == "unsigned" || typeName == "unsigned int") {
176+
typeName = ROOT::RField<unsigned int>::TypeName();
177+
} else if (typeName == "long" || typeName == "long int" || typeName == "signed long" ||
178+
typeName == "signed long int") {
179+
typeName = ROOT::RField<long int>::TypeName();
180+
} else if (typeName == "unsigned long" || typeName == "unsigned long int") {
181+
typeName = ROOT::RField<unsigned long int>::TypeName();
182+
} else if (typeName == "long long" || typeName == "long long int" || typeName == "signed long long" ||
183+
typeName == "signed long long int") {
184+
typeName = ROOT::RField<long long int>::TypeName();
185+
} else if (typeName == "unsigned long long" || typeName == "unsigned long long int") {
186+
typeName = ROOT::RField<unsigned long long int>::TypeName();
187+
}
188+
}
189+
190+
std::string GetRenormalizedMetaTypeName(const std::string &metaNormalizedName)
191+
{
192+
const std::string canonicalTypePrefix{ROOT::Internal::GetCanonicalTypePrefix(metaNormalizedName)};
193+
// RNTuple resolves Double32_t for the normalized type name but keeps Double32_t for the type alias
194+
// (also in template parameters)
195+
if (canonicalTypePrefix == "Double32_t")
196+
return "double";
197+
198+
if (canonicalTypePrefix.find('<') == std::string::npos) {
199+
// If there are no templates, the function is done.
200+
return canonicalTypePrefix;
201+
}
202+
203+
const auto angleBrackets = FindTemplateAngleBrackets(canonicalTypePrefix);
204+
R__ASSERT(!angleBrackets.empty());
205+
206+
std::string normName;
207+
std::string::size_type currentPos = 0;
208+
for (std::size_t i = 0; i < angleBrackets.size(); i++) {
209+
const auto [posOpen, posClose] = angleBrackets[i];
210+
// Append the type prefix until the open angle bracket.
211+
normName += canonicalTypePrefix.substr(currentPos, posOpen + 1 - currentPos);
212+
213+
const auto argList = canonicalTypePrefix.substr(posOpen + 1, posClose - posOpen - 1);
214+
const auto templateArgs = ROOT::Internal::TokenizeTypeList(argList);
215+
R__ASSERT(!templateArgs.empty());
216+
217+
for (const auto &a : templateArgs) {
218+
normName += GetNormalizedTemplateArg(a, GetRenormalizedMetaTypeName) + ",";
219+
}
220+
221+
normName[normName.size() - 1] = '>';
222+
currentPos = posClose + 1;
223+
}
224+
225+
// Append the rest of the type from the last closing angle bracket.
226+
const auto lastClosePos = angleBrackets.back().second;
227+
normName += canonicalTypePrefix.substr(lastClosePos + 1);
228+
229+
return normName;
230+
}
231+
232+
std::string GetRenormalizedDemangledTypeName(const std::string &demangledName)
233+
{
234+
std::string canonicalTypePrefix{demangledName};
235+
MapIntegerType(canonicalTypePrefix);
236+
237+
if (canonicalTypePrefix.find('<') == std::string::npos) {
238+
// If there are no templates, the function is done.
239+
return canonicalTypePrefix;
240+
}
241+
RemoveSpaceBeforeClosingAngleBracket(canonicalTypePrefix);
242+
RemoveSpaceAfterComma(canonicalTypePrefix);
243+
244+
const auto angleBrackets = FindTemplateAngleBrackets(canonicalTypePrefix);
245+
R__ASSERT(!angleBrackets.empty());
246+
247+
// Remove optional stdlib template arguments
248+
int maxTemplateArgs = 0;
249+
if (canonicalTypePrefix.rfind("std::vector<", 0) == 0 || canonicalTypePrefix.rfind("std::set<", 0) == 0 ||
250+
canonicalTypePrefix.rfind("std::unordered_set<", 0) == 0 ||
251+
canonicalTypePrefix.rfind("std::multiset<", 0) == 0 ||
252+
canonicalTypePrefix.rfind("std::unordered_multiset<", 0) == 0 ||
253+
canonicalTypePrefix.rfind("std::unique_ptr<", 0) == 0) {
254+
maxTemplateArgs = 1;
255+
} else if (canonicalTypePrefix.rfind("std::map<", 0) == 0 ||
256+
canonicalTypePrefix.rfind("std::unordered_map<", 0) == 0 ||
257+
canonicalTypePrefix.rfind("std::multimap<", 0) == 0 ||
258+
canonicalTypePrefix.rfind("std::unordered_multimap<", 0) == 0) {
259+
maxTemplateArgs = 2;
260+
}
261+
262+
std::string normName;
263+
std::string::size_type currentPos = 0;
264+
for (std::size_t i = 0; i < angleBrackets.size(); i++) {
265+
const auto [posOpen, posClose] = angleBrackets[i];
266+
// Append the type prefix until the open angle bracket.
267+
normName += canonicalTypePrefix.substr(currentPos, posOpen + 1 - currentPos);
268+
269+
const auto argList = canonicalTypePrefix.substr(posOpen + 1, posClose - posOpen - 1);
270+
auto templateArgs = ROOT::Internal::TokenizeTypeList(argList, maxTemplateArgs);
271+
R__ASSERT(!templateArgs.empty());
272+
273+
for (const auto &a : templateArgs) {
274+
normName += GetNormalizedTemplateArg(a, GetRenormalizedDemangledTypeName) + ",";
275+
}
276+
277+
normName[normName.size() - 1] = '>';
278+
currentPos = posClose + 1;
279+
}
280+
281+
// Append the rest of the type from the last closing angle bracket.
282+
const auto lastClosePos = angleBrackets.back().second;
283+
normName += canonicalTypePrefix.substr(lastClosePos + 1);
284+
285+
// Reverse std::string --> std::basic_string<char> typedef
286+
static std::string stringName = []() {
287+
int e;
288+
char *str = TClassEdit::DemangleName(typeid(std::string).name(), e);
289+
R__ASSERT(str && e == 0);
290+
std::string res{str};
291+
free(str);
292+
res.erase(std::remove(res.begin(), res.end(), ' '), res.end());
293+
return res;
294+
}();
295+
if (normName == stringName)
296+
return "std::string";
297+
298+
return normName;
299+
}
300+
301+
} // namespace
302+
303+
std::string ROOT::Internal::GetCanonicalTypePrefix(const std::string &typeName)
304+
{
305+
// Remove outer cv qualifiers
306+
std::string canonicalType{TClassEdit::CleanType(typeName.c_str(), /*mode=*/1)};
307+
308+
if (canonicalType.substr(0, 7) == "struct ") {
309+
canonicalType.erase(0, 7);
310+
} else if (canonicalType.substr(0, 5) == "enum ") {
311+
canonicalType.erase(0, 5);
312+
} else if (canonicalType.substr(0, 2) == "::") {
313+
canonicalType.erase(0, 2);
151314
}
152315

316+
RemoveSpaceBeforeClosingAngleBracket(canonicalType);
317+
153318
if (canonicalType.substr(0, 6) == "array<") {
154319
canonicalType = "std::" + canonicalType;
155320
} else if (canonicalType.substr(0, 7) == "atomic<") {
@@ -191,77 +356,27 @@ std::string ROOT::Internal::GetCanonicalTypePrefix(const std::string &typeName)
191356
canonicalType = it->second;
192357
}
193358

194-
// Map fundamental integer types to stdint integer types (e.g. int --> std::int32_t)
195-
if (canonicalType == "signed char") {
196-
canonicalType = RField<signed char>::TypeName();
197-
} else if (canonicalType == "unsigned char") {
198-
canonicalType = RField<unsigned char>::TypeName();
199-
} else if (canonicalType == "short" || canonicalType == "short int" || canonicalType == "signed short" ||
200-
canonicalType == "signed short int") {
201-
canonicalType = RField<short int>::TypeName();
202-
} else if (canonicalType == "unsigned short" || canonicalType == "unsigned short int") {
203-
canonicalType = RField<unsigned short int>::TypeName();
204-
} else if (canonicalType == "int" || canonicalType == "signed" || canonicalType == "signed int") {
205-
canonicalType = RField<int>::TypeName();
206-
} else if (canonicalType == "unsigned" || canonicalType == "unsigned int") {
207-
canonicalType = RField<unsigned int>::TypeName();
208-
} else if (canonicalType == "long" || canonicalType == "long int" || canonicalType == "signed long" ||
209-
canonicalType == "signed long int") {
210-
canonicalType = RField<long int>::TypeName();
211-
} else if (canonicalType == "unsigned long" || canonicalType == "unsigned long int") {
212-
canonicalType = RField<unsigned long int>::TypeName();
213-
} else if (canonicalType == "long long" || canonicalType == "long long int" || canonicalType == "signed long long" ||
214-
canonicalType == "signed long long int") {
215-
canonicalType = RField<long long int>::TypeName();
216-
} else if (canonicalType == "unsigned long long" || canonicalType == "unsigned long long int") {
217-
canonicalType = RField<unsigned long long int>::TypeName();
218-
}
359+
MapIntegerType(canonicalType);
219360

220361
return canonicalType;
221362
}
222363

223-
std::string ROOT::Internal::GetRenormalizedTypeName(const std::string &metaNormalizedName)
364+
std::string ROOT::Internal::GetRenormalizedTypeName(const std::type_info &ti)
224365
{
225-
const std::string canonicalTypePrefix{GetCanonicalTypePrefix(metaNormalizedName)};
226-
// RNTuple resolves Double32_t for the normalized type name but keeps Double32_t for the type alias
227-
// (also in template parameters)
228-
if (canonicalTypePrefix == "Double32_t")
229-
return "double";
230-
231-
if (canonicalTypePrefix.find('<') == std::string::npos) {
232-
// If there are no templates, the function is done.
233-
return canonicalTypePrefix;
234-
}
235-
236-
const auto angleBrackets = FindTemplateAngleBrackets(canonicalTypePrefix);
237-
R__ASSERT(!angleBrackets.empty());
238-
239-
std::string normName;
240-
std::string::size_type currentPos = 0;
241-
for (std::size_t i = 0; i < angleBrackets.size(); i++) {
242-
const auto [posOpen, posClose] = angleBrackets[i];
243-
// Append the type prefix until the open angle bracket.
244-
normName += canonicalTypePrefix.substr(currentPos, posOpen + 1 - currentPos);
245-
246-
const auto argList = canonicalTypePrefix.substr(posOpen + 1, posClose - posOpen - 1);
247-
const auto templateArgs = TokenizeTypeList(argList);
248-
R__ASSERT(!templateArgs.empty());
249-
250-
for (const auto &a : templateArgs) {
251-
normName += GetNormalizedTemplateArg(a, GetRenormalizedTypeName) + ",";
252-
}
253-
254-
normName[normName.size() - 1] = '>';
255-
currentPos = posClose + 1;
256-
}
257-
258-
// Append the rest of the type from the last closing angle bracket.
259-
const auto lastClosePos = angleBrackets.back().second;
260-
normName += canonicalTypePrefix.substr(lastClosePos + 1);
366+
int errCode;
367+
char *cstrDemangledName = TClassEdit::DemangleName(ti.name(), errCode);
368+
R__ASSERT(cstrDemangledName && errCode == 0);
369+
std::string normName{GetRenormalizedDemangledTypeName(cstrDemangledName)};
370+
free(cstrDemangledName);
261371

262372
return normName;
263373
}
264374

375+
std::string ROOT::Internal::GetRenormalizedTypeName(const std::string &metaNormalizedName)
376+
{
377+
return GetRenormalizedMetaTypeName(metaNormalizedName);
378+
}
379+
265380
std::string ROOT::Internal::GetNormalizedUnresolvedTypeName(const std::string &origName)
266381
{
267382
const TClassEdit::EModType modType = static_cast<TClassEdit::EModType>(
@@ -435,7 +550,7 @@ std::tuple<std::string, std::vector<std::size_t>> ROOT::Internal::ParseArrayType
435550
return std::make_tuple(prefix, sizeVec);
436551
}
437552

438-
std::vector<std::string> ROOT::Internal::TokenizeTypeList(std::string_view templateType)
553+
std::vector<std::string> ROOT::Internal::TokenizeTypeList(std::string_view templateType, std::size_t maxArgs)
439554
{
440555
std::vector<std::string> result;
441556
if (templateType.empty())
@@ -452,6 +567,8 @@ std::vector<std::string> ROOT::Internal::TokenizeTypeList(std::string_view templ
452567
case ',':
453568
if (nestingLevel == 0) {
454569
result.push_back(std::string(typeBegin, typeCursor - typeBegin));
570+
if (maxArgs && result.size() == maxArgs)
571+
return result;
455572
typeBegin = typeCursor + 1;
456573
}
457574
break;
@@ -461,8 +578,3 @@ std::vector<std::string> ROOT::Internal::TokenizeTypeList(std::string_view templ
461578
result.push_back(std::string(typeBegin, typeCursor - typeBegin));
462579
return result;
463580
}
464-
465-
std::string ROOT::Internal::GetRenormalizedDemangledTypeName(const std::type_info &ti)
466-
{
467-
return ROOT::Internal::GetRenormalizedTypeName(ROOT::Internal::GetDemangledTypeName(ti));
468-
}

0 commit comments

Comments
 (0)