From 5f8fde7fc0d4f668ae2abfc312bde8f511ccafc9 Mon Sep 17 00:00:00 2001 From: Matthias Kretz Date: Fri, 28 Jun 2024 15:32:28 -0500 Subject: [PATCH 01/14] Make intrinsics conversion recommended practice. ChangeLog: * P1928/changelog.tex: * P1928/wording.tex: --- P1928/changelog.tex | 1 + P1928/wording.tex | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/P1928/changelog.tex b/P1928/changelog.tex index 06205ba..fd35bd6 100644 --- a/P1928/changelog.tex +++ b/P1928/changelog.tex @@ -172,6 +172,7 @@ \section{Changelog} \item Say “\textit{op}” instead of “the indicated operator” \item Fix constraints on shift operators with \simdsizetype{} on the right operand. \item Remove wording removed by P3275 (non-const \code{operator[]}). +\item Make intrinsics conversion recommended practice. %\todo Reorder \code{simd} and \code{simd_mask} specification in the wording (mask first). \end{revision} diff --git a/P1928/wording.tex b/P1928/wording.tex index b726629..81b8c1e 100644 --- a/P1928/wording.tex +++ b/P1928/wording.tex @@ -561,8 +561,8 @@ If \tcode{basic_simd} is supported, \tcode{basic_simd} is trivially copyable. -\pnum -Implementations are encouraged to enable explicit conversion from and to +\pnum\recommended: +Implementations should enable explicit conversion from and to implementation-defined types. This adds one or more of the following declarations to class \tcode{basic_simd}: From 139172b17ba09ba15011bd7f31e130e2cf07a202 Mon Sep 17 00:00:00 2001 From: Matthias Kretz Date: Fri, 28 Jun 2024 15:33:47 -0500 Subject: [PATCH 02/14] Make simd_flags template arguments exposition-only. ChangeLog: * P1928/changelog.tex: * P1928/main.tex: * P1928/wording.tex: --- P1928/changelog.tex | 1 + P1928/main.tex | 3 +++ P1928/wording.tex | 54 ++++++++++++++++++++++----------------------- 3 files changed, 30 insertions(+), 28 deletions(-) diff --git a/P1928/changelog.tex b/P1928/changelog.tex index fd35bd6..948cd0f 100644 --- a/P1928/changelog.tex +++ b/P1928/changelog.tex @@ -173,6 +173,7 @@ \section{Changelog} \item Fix constraints on shift operators with \simdsizetype{} on the right operand. \item Remove wording removed by P3275 (non-const \code{operator[]}). \item Make intrinsics conversion recommended practice. +\item Make \code{simd_flags} template arguments exposition-only. %\todo Reorder \code{simd} and \code{simd_mask} specification in the wording (mask first). \end{revision} diff --git a/P1928/main.tex b/P1928/main.tex index 9194dc4..42004e1 100644 --- a/P1928/main.tex +++ b/P1928/main.tex @@ -36,6 +36,9 @@ \newcommand\maskelementsize{\UNSP{mask-element-size}} \newcommand\integerfrom{\UNSP{integer-from}} \newcommand\constexprwrapperlike{\UNSP{constexpr-wrapper-like}} +\newcommand\convertflag{\UNSP{convert-flag}} +\newcommand\alignedflag{\UNSP{aligned-flag}} +\newcommand\overalignedflag{\UNSP{overaligned-flag}} \renewcommand{\lst}[1]{Listing~\ref{#1}} \renewcommand{\sect}[1]{Section~\ref{#1}} diff --git a/P1928/wording.tex b/P1928/wording.tex index 81b8c1e..a3e5c6b 100644 --- a/P1928/wording.tex +++ b/P1928/wording.tex @@ -14,17 +14,17 @@ [\tcode{#1, #1 + $i$ + 1}) is a valid range.} \newcommand\flagsRequires[2]{ -\item If the template parameter pack \tcode{Flags} contains the type - identifying \tcode{simd_flag_aligned}, \tcode{addressof(*first)} shall - point to storage aligned by \tcode{simd_alignment_v<#1>}. -\item If the template parameter pack \tcode{Flags} contains the type - identifying \tcode{simd_flag_overaligned}, \tcode{addressof(*first)} +\item If the template parameter pack \tcode{Flags} contains + \tcode{\alignedflag}, \tcode{addressof(*first)} shall point to storage + aligned by \tcode{simd_alignment_v<#1>}. +\item If the template parameter pack \tcode{Flags} contains + \tcode{\overalignedflag}, \tcode{addressof(*first)} points to storage aligned by \tcode{N}. } \newcommand\conversionFlagsMandate[2]{ - If the template parameter pack \tcode{Flags} does not contain the type - identifying \tcode{simd_flag_convert}, then the conversion from \tcode{#1} to + If the template parameter pack \tcode{Flags} does not contain + \tcode{\convertflag}, then the conversion from \tcode{#1} to \tcode{#2} is value-preserving. } @@ -115,12 +115,16 @@ template<@\simdsizetype@ N, class V> using resize_simd_t = typename resize_simd::type; // \ref{simd.flags}, Load and store flags + struct @\convertflag@; // \expos + struct @\alignedflag@; // \expos + template struct @\overalignedflag@; // \expos + template struct simd_flags; inline constexpr simd_flags<> simd_flag_default{}; - inline constexpr simd_flags<@\seebelow@> simd_flag_convert{}; - inline constexpr simd_flags<@\seebelow@> simd_flag_aligned{}; + inline constexpr simd_flags<@\convertflag@> simd_flag_convert{}; + inline constexpr simd_flags<@\alignedflag@> simd_flag_aligned{}; template requires (has_single_bit(N)) - inline constexpr simd_flags<@\seebelow@> simd_flag_overaligned{}; + inline constexpr simd_flags<@\overalignedflag@> simd_flag_overaligned{}; // \ref{simd.class}, Class template \tcode{basic_simd} template> class basic_simd; @@ -400,19 +404,6 @@ \rSec1[simd.flags]{Load and store flags} -\begin{itemdecl} -inline constexpr simd_flags<@\seebelow@> simd_flag_convert{}; -inline constexpr simd_flags<@\seebelow@> simd_flag_aligned{}; -template requires (has_single_bit(N)) - inline constexpr simd_flags<@\seebelow@> simd_flag_overaligned{}; -\end{itemdecl} - -\begin{itemdescr} - \pnum - The template arguments to \tcode{simd_flags} are unspecified types used - by the implementation to identify the different load and store flags. -\end{itemdescr} - \rSec2[simd.flags.overview]{Class template \tcode{simd_flags} overview} \begin{codeblock} @@ -429,9 +420,10 @@ \end{note} \pnum\constraints -Every type in \code{Flags} is a valid template argument to one of -\code{simd_flag_convert}, \tcode{simd_flag_aligned}, or -\tcode{simd_flag_overaligned}. +Every type in \tcode{Flags} is one of \tcode{\convertflag}, +\tcode{\alignedflag}, or \tcode{\overalignedflag}. + +\FIXME{“\ldots for any \tcode{N}”?} \rSec2[simd.flags.oper]{\tcode{simd_flags} operators} @@ -442,8 +434,14 @@ \begin{itemdescr} \pnum\returns - A specialization of \tcode{simd_flags} identifying all load and - store flags identified either by \tcode{a} or \tcode{b}. + A default-initialized object of type \tcode{simd_flags} where + every type in pack \tcode{Flags2} is present either in pack \tcode{Flags} or + pack \tcode{Other} and every type in packs \tcode{Flags} and \tcode{Other} is + present in \tcode{Flags2}. + Additionally, if the packs \tcode{Flags} and \tcode{Other} contain two + different specializations \tcode{\overalignedflag} and + \tcode{\overalignedflag}, \tcode{Flags2} does not have to contain the + specialization \tcode{\overalignedflag}. \end{itemdescr} \rSec1[simd.class]{Class template \tcode{basic_simd}} From d9836220b0f701017a22cd55395de5674b58cd03 Mon Sep 17 00:00:00 2001 From: Matthias Kretz Date: Thu, 4 Jul 2024 21:18:28 +0200 Subject: [PATCH 03/14] Use \range and \crange macro from standard repo ChangeLog: * P1928/wording.tex: * wg21macros.sty: --- P1928/wording.tex | 24 +++++++++++------------- wg21macros.sty | 8 ++++++++ 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/P1928/wording.tex b/P1928/wording.tex index a3e5c6b..8662997 100644 --- a/P1928/wording.tex +++ b/P1928/wording.tex @@ -5,13 +5,11 @@ \or\error \fi} -\newcommand\openrange[1]{{[}\tcode{#1})} - -\renewcommand\foralli[1][]{for all $i$ in the range of [\tcode{0, #1size()})} +\renewcommand\foralli[1][]{for all $i$ in the range of \range{0}{#1size()}} %\renewcommand\forallmaskedi{for all selected indices $i$} \newcommand\validMaskedRange[1][first]{For all selected indices $i$, -[\tcode{#1, #1 + $i$ + 1}) is a valid range.} +\range{#1}{#1 + $i$ + 1} is a valid range.} \newcommand\flagsRequires[2]{ \item If the template parameter pack \tcode{Flags} contains @@ -73,8 +71,8 @@ \pnum\label{wording.selected.indices}% Given a \tcode{basic_simd_mask} object \tcode{mask}, the -\defn{selected indices} signify the integers $i$ in the range \tcode{[0, -mask.size())} for which \tcode{mask[$i$]} is \tcode{true}. +\defn{selected indices} signify the integers $i$ in the range +\range{0}{mask.size()} for which \tcode{mask[$i$]} is \tcode{true}. Given an object \tcode{data} of type \tcode{basic_simd} or \tcode{basic_simd_mask}, the \defn{selected elements} signify the elements \tcode{data[$i$]} for all selected indices $i$. @@ -540,7 +538,7 @@ \pnum Every specialization of \tcode{basic_simd} is a complete type. The types \tcode{basic_simd>} for all vectorizable -\tcode{T} and with \tcode{N} in the range of \tcode{[1, 64]} are \defn{supported}. +\tcode{T} and with \tcode{N} in the range of \crange{1}{64} are \defn{supported}. It is \impdef\ whether any other \tcode{basic_simd} specializations with vectorizable \tcode{T} are supported. @@ -686,7 +684,7 @@ \begin{itemdescr} \SimdLoadDescr - {\openrange{first, first + size()} is a valid range.} + {\range{first}{first + size()} is a valid range.} {Initializes the $i^\text{th}$ element with \tcode{static_cast(first[$i$])} \foralli.} \end{itemdescr} @@ -710,7 +708,7 @@ \begin{itemdescr} \SimdLoadDescr - {\openrange{first, first + size()} is a valid range.} + {\range{first}{first + size()} is a valid range.} {Replaces the elements of the \tcode{basic_simd} object such that the $i^\text{th}$ element is assigned with \tcode{static_cast(first[$i$])} \foralli.} \end{itemdescr} @@ -752,7 +750,7 @@ \begin{itemdescr} \SimdStoreDescr - {\openrange{first, first + size()} is a valid range.} + {\range{first}{first + size()} is a valid range.} {Copies all \tcode{basic_simd} elements as if \tcode{first[$i$] = static_cast>(operator[]($i$))} \foralli.} \end{itemdescr} @@ -1527,7 +1525,7 @@ \begin{itemdescr} \MaskLoadDescr - {\openrange{first, first + size()} is a valid range.} + {\range{first}{first + size()} is a valid range.} {Initializes the $i^\text{th}$ element with \tcode{first[$i$]} \foralli.} \end{itemdescr} @@ -1551,7 +1549,7 @@ \begin{itemdescr} \MaskLoadDescr - {\openrange{first, first + size()} is a valid range.} + {\range{first}{first + size()} is a valid range.} {Replaces the elements of the \tcode{basic_simd_mask} object such that the $i^\text{th}$ element is replaced with \tcode{first[$i$]} \foralli.} \end{itemdescr} @@ -1592,7 +1590,7 @@ \begin{itemdescr} \MaskStoreDescr - {\openrange{first, first + size()} is a valid range.} + {\range{first}{first + size()} is a valid range.} {Copies all \tcode{basic_simd_mask} elements as if \tcode{first[$i$] = operator[]($i$)} \foralli.} \end{itemdescr} diff --git a/wg21macros.sty b/wg21macros.sty index e840098..1dda115 100644 --- a/wg21macros.sty +++ b/wg21macros.sty @@ -415,6 +415,14 @@ \newcommand{\UNSP}[1]{\textit{\texttt{#1}}} \newcommand{\seebelow}{\UNSP{see below}} +%% Ranges +\newcommand{\Range}[4]{\ensuremath{#1}\tcode{#3}\ensuremath{,}\,\penalty2000{}\tcode{#4}\ensuremath{#2}} +\newcommand{\crange}[2]{\Range{[}{]}{#1}{#2}} +\newcommand{\brange}[2]{\Range{(}{]}{#1}{#2}} +\newcommand{\orange}[2]{\Range{(}{)}{#1}{#2}} +\newcommand{\range}[2]{\Range{[}{)}{#1}{#2}} +\newcommand{\countedrange}[2]{$\tcode{#1} + \range{0}{#2}$} + %% Change descriptions \newcommand{\diffhead}[1]{\textbf{#1:}\space} \newcommand{\diffdef}[1]{\ifvmode\else\hfill\break\fi\diffhead{#1}} From 22ad5b6709e89cf8a5b7e416f78528fe6a64a3aa Mon Sep 17 00:00:00 2001 From: Matthias Kretz Date: Thu, 4 Jul 2024 21:20:19 +0200 Subject: [PATCH 04/14] LWG feedback from Friday ChangeLog: * P1928/wording.tex: --- P1928/wording.tex | 29 ++++++++++++----------------- 1 file changed, 12 insertions(+), 17 deletions(-) diff --git a/P1928/wording.tex b/P1928/wording.tex index 8662997..a0dfa77 100644 --- a/P1928/wording.tex +++ b/P1928/wording.tex @@ -6,7 +6,7 @@ \fi} \renewcommand\foralli[1][]{for all $i$ in the range of \range{0}{#1size()}} -%\renewcommand\forallmaskedi{for all selected indices $i$} +\renewcommand\forallmaskedi{for all selected indices $i$ of \tcode{mask}} \newcommand\validMaskedRange[1][first]{For all selected indices $i$, \range{#1}{#1 + $i$ + 1} is a valid range.} @@ -526,7 +526,7 @@ friend constexpr mask_type operator>(const basic_simd&, const basic_simd&) noexcept; friend constexpr mask_type operator<(const basic_simd&, const basic_simd&) noexcept; - // \ref{simd.cond}, \tcode{basic_simd} conditional operators + // \ref{simd.cond}, \tcode{basic_simd} exposition-only conditional operators friend constexpr basic_simd @\simdselect@( const mask_type&, const basic_simd&, const basic_simd&) noexcept; }; @@ -989,7 +989,7 @@ A \tcode{basic_simd_mask} object initialized with the results of applying \op{} to \tcode{lhs} and \tcode{rhs} as a binary element-wise operation. \end{itemdescr} -\rSec2[simd.cond]{\tcode{basic_simd} conditional operators} +\rSec2[simd.cond]{\tcode{basic_simd} exposition-only conditional operators} \begin{itemdecl} friend constexpr basic_simd @@ -1006,11 +1006,6 @@ \pnum In [simd.reductions], \tcode{BinaryOperation} shall be a binary element-wise operation. -\INFO{ - In principle, reductions over signed integral \code{T} have a precondition. - It could actually be nice to call this out here, and implement a precondition check in simd reductions. -} - \begin{itemdecl} template> constexpr T reduce(const basic_simd& x, BinaryOperation binary_op = {}); @@ -1060,7 +1055,7 @@ \end{itemize} \pnum\returns - If \tcode{none_of(mask)}, returns \tcode{identity_element}. + If \tcode{none_of(mask)} is \tcode{true}, returns \tcode{identity_element}. Otherwise, returns \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, simd(x[$i$]), ...)[0]} \forallmaskedi. \pnum\throws @@ -1075,7 +1070,7 @@ \begin{itemdescr} \pnum\returns - If \tcode{none_of(mask)}, returns \tcode{T()}. Otherwise, returns \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, x[$i$], ...)} \forallmaskedi. + If \tcode{none_of(mask)} is \tcode{true}, returns \tcode{T()}. Otherwise, returns \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, x[$i$], ...)} \forallmaskedi. \end{itemdescr} \begin{itemdecl} @@ -1086,7 +1081,7 @@ \begin{itemdescr} \pnum\returns - If \tcode{none_of(x)}, returns \tcode{1}. Otherwise, returns \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, x[$i$], ...)} \forallmaskedi. + If \tcode{none_of(x)} is \tcode{true}, returns \tcode{T(1)}. Otherwise, returns \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, x[$i$], ...)} \forallmaskedi. \end{itemdescr} \begin{itemdecl} @@ -1100,7 +1095,7 @@ \tcode{is_integral_v} is \tcode{true}. \pnum\returns - If \tcode{none_of(mask)}, returns \tcode{\~{}T()}. Otherwise, returns \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, x[$i$], ...)} \forallmaskedi. + If \tcode{none_of(mask)} is \tcode{true}, returns \tcode{T(\~{}T())}. Otherwise, returns \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, x[$i$], ...)} \forallmaskedi. \end{itemdescr} \begin{itemdecl} @@ -1117,7 +1112,7 @@ \tcode{is_integral_v} is \tcode{true}. \pnum\returns - If \tcode{none_of(mask)}, returns \tcode{T()}. Otherwise, returns \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, x[$i$], ...)} \forallmaskedi. + If \tcode{none_of(mask)} is \tcode{true}, returns \tcode{T()}. Otherwise, returns \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, x[$i$], ...)} \forallmaskedi. \end{itemdescr} \begin{itemdecl} @@ -1143,7 +1138,7 @@ \tcode{T} models \tcode{totally_ordered}. \pnum\returns - If \tcode{none_of(mask)}, returns \tcode{numeric_limits::max()}. + If \tcode{none_of(mask)} is \tcode{true}, returns \tcode{numeric_limits::max()}. Otherwise, returns the value of a selected element \tcode{x[$j$]} for which \tcode{x[$i$] < x[$j$]} is \tcode{false} \forallmaskedi. \end{itemdescr} @@ -1170,7 +1165,7 @@ \tcode{T} models \tcode{totally_ordered}. \pnum\returns - If \tcode{none_of(mask)}, returns \tcode{numeric_limits::lowest()}. + If \tcode{none_of(mask)} is \tcode{true}, returns \tcode{numeric_limits::lowest()}. Otherwise, returns the value of a selected element \tcode{x[$j$]} for which \tcode{x[$j$] < x[$i$]} is \tcode{false} \forallmaskedi. \end{itemdescr} @@ -1418,7 +1413,7 @@ friend constexpr basic_simd_mask operator<(const basic_simd_mask&, const basic_simd_mask&) noexcept; - // \ref{simd.mask.cond}, \tcode{basic_simd_mask} conditional operators + // \ref{simd.mask.cond}, \tcode{basic_simd_mask} exposition-only conditional operators friend constexpr basic_simd_mask @\simdselect@( const basic_simd_mask&, const basic_simd_mask&, const basic_simd_mask&) noexcept; friend constexpr basic_simd_mask @\simdselect@( @@ -1724,7 +1719,7 @@ A \tcode{basic_simd_mask} object initialized with the results of applying the indicated operator to \tcode{lhs} and \tcode{rhs} as a binary element-wise operation. \end{itemdescr} -\rSec2[simd.mask.cond]{\tcode{basic_simd_mask} conditional operators} +\rSec2[simd.mask.cond]{\tcode{basic_simd_mask} exposition-only conditional operators} \begin{itemdecl} friend constexpr basic_simd_mask @\simdselect@( From c4e74d890eb422fe2a1194f25eaab811eb4544e5 Mon Sep 17 00:00:00 2001 From: Matthias Kretz Date: Thu, 4 Jul 2024 21:20:46 +0200 Subject: [PATCH 05/14] Use \tcode instead of \code ChangeLog: * P1928/wording.tex: --- P1928/wording.tex | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/P1928/wording.tex b/P1928/wording.tex index a0dfa77..24a64f9 100644 --- a/P1928/wording.tex +++ b/P1928/wording.tex @@ -586,7 +586,7 @@ \end{itemdecl} \begin{itemdescr} - \pnum Let \tcode{From} denote the type \code{remove_cvref_t}. + \pnum Let \tcode{From} denote the type \tcode{remove_cvref_t}. \pnum\constraints \tcode{From} satisfies \tcode{convertible_to}, and either @@ -622,7 +622,7 @@ \pnum\remarks %The constructor is \tcode{explicit} if - The expression inside \code{explicit} evaluates to \code{true} if either + The expression inside \tcode{explicit} evaluates to \tcode{true} if either \begin{itemize} \item the conversion from \tcode{U} to \tcode{value_type} is not value-preserving, or @@ -1202,7 +1202,7 @@ The $i^\text{th}$ \simd or \mask element of the $j^\text{th}$ \tcode{tuple} element of type \tcode{T} is initialized to the value of the element in \tcode{x} with index \tcode{$i$ + $j$ * T::size()}. - The $i^\text{th}$ \simd or \mask element of the \code{N}$^\text{th}$ + The $i^\text{th}$ \simd or \mask element of the \tcode{N}$^\text{th}$ \tcode{tuple} element is initialized to the value of the element in \tcode{x} with index \tcode{$i$ + $N$ * T::size()}. \end{itemize} @@ -1855,7 +1855,7 @@ \pnum\expects The value of the argument is \tcode{true}. - \pnum\returns \code{0}. + \pnum\returns \tcode{0}. \end{itemdescr} % vim: tw=0 From d2cd321f21e47a6281e1d6775a26d64bff767886 Mon Sep 17 00:00:00 2001 From: Matthias Kretz Date: Mon, 8 Jul 2024 10:13:33 +0200 Subject: [PATCH 06/14] Consistently don't use "shall" in alignment preconditions ChangeLog: * P1928/wording.tex: --- P1928/wording.tex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/P1928/wording.tex b/P1928/wording.tex index 24a64f9..b7ae4df 100644 --- a/P1928/wording.tex +++ b/P1928/wording.tex @@ -13,7 +13,7 @@ \newcommand\flagsRequires[2]{ \item If the template parameter pack \tcode{Flags} contains - \tcode{\alignedflag}, \tcode{addressof(*first)} shall point to storage + \tcode{\alignedflag}, \tcode{addressof(*first)} points to storage aligned by \tcode{simd_alignment_v<#1>}. \item If the template parameter pack \tcode{Flags} contains \tcode{\overalignedflag}, \tcode{addressof(*first)} From 923ce4e708d3f82a75a027cfe073d25970d31d15 Mon Sep 17 00:00:00 2001 From: Matthias Kretz Date: Wed, 10 Jul 2024 17:32:11 +0200 Subject: [PATCH 07/14] Linebreaks in LaTeX, no change to the PDF ChangeLog: * P1928/wording.tex: --- P1928/wording.tex | 206 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 142 insertions(+), 64 deletions(-) diff --git a/P1928/wording.tex b/P1928/wording.tex index b7ae4df..5fc9773 100644 --- a/P1928/wording.tex +++ b/P1928/wording.tex @@ -57,7 +57,9 @@ \tcode{std::float64_t} are vectorizable types if defined ([basic.extended.fp]). \pnum -The term \defn{data-parallel type} refers to all supported (\ref{simd.overview}) specializations of the \tcode{basic_simd} and \tcode{basic_simd_mask} class templates. A \defn{data-parallel object} is an object of \term{data-parallel type}. +The term \defn{data-parallel type} refers to all supported (\ref{simd.overview}) specializations of +the \tcode{basic_simd} and \tcode{basic_simd_mask} class templates. A \defn{data-parallel object} is +an object of \term{data-parallel type}. \pnum A data-parallel type consists of one or more elements of an underlying vectorizable type, @@ -67,7 +69,11 @@ The elements in a data-parallel type are indexed from 0 to $\textrm{width} - 1$. \pnum -An \defn{element-wise operation} applies a specified operation to the elements of one or more data-parallel objects. Each such application is unsequenced with respect to the others. A \defn{unary element-wise operation} is an element-wise operation that applies a unary operation to each element of a data-parallel object. A \defn{binary element-wise operation} is an element-wise operation that applies a binary operation to corresponding elements of two data-parallel objects. +An \defn{element-wise operation} applies a specified operation to the elements of one or more +data-parallel objects. Each such application is unsequenced with respect to the others. A +\defn{unary element-wise operation} is an element-wise operation that applies a unary operation to +each element of a data-parallel object. A \defn{binary element-wise operation} is an element-wise +operation that applies a binary operation to corresponding elements of two data-parallel objects. \pnum\label{wording.selected.indices}% Given a \tcode{basic_simd_mask} object \tcode{mask}, the @@ -77,7 +83,8 @@ the \defn{selected elements} signify the elements \tcode{data[$i$]} for all selected indices $i$. \pnum -The conversion from vectorizable type \tcode{U} to vectorizable type \tcode{T} is \defn{value-preserving} if +The conversion from vectorizable type \tcode{U} to vectorizable type \tcode{T} is +\defn{value-preserving} if all possible values of \tcode{U} can be represented with type \tcode{T}. \rSec1[simd.syn]{Header \texorpdfstring{\tcode{}}{} synopsis} @@ -242,7 +249,8 @@ \tcode{\maskelementsize>} has the value \tcode{Bytes}. \pnum -\tcode{\integerfrom} is an alias for a signed integer type \tcode{T} so that \tcode{sizeof(T) == Bytes}. +\tcode{\integerfrom} is an alias for a signed integer type \tcode{T} so that \tcode{sizeof(T) +== Bytes}. \rSec1[simd.abi]{\tcode{simd} ABI tags} @@ -341,7 +349,11 @@ \end{itemize} \pnum -If \tcode{value} is present, the type \tcode{simd_alignment} is a \tcode{BinaryTypeTrait} with a base characteristic of \tcode{integral_constant} for some implementation-defined \tcode{N} (see \ref{simd.copy} and \ref{simd.mask.copy}). \begin{note}\tcode{value} identifies the alignment restrictions on pointers used for (converting) loads and stores for the give type \tcode{T} on arrays of type \tcode{U}.\end{note} +If \tcode{value} is present, the type \tcode{simd_alignment} is a \tcode{BinaryTypeTrait} with +a base characteristic of \tcode{integral_constant} for some implementation-defined +\tcode{N} (see \ref{simd.copy} and \ref{simd.mask.copy}). \begin{note}\tcode{value} identifies the +alignment restrictions on pointers used for (converting) loads and stores for the give type +\tcode{T} on arrays of type \tcode{U}.\end{note} \pnum The behavior of a program that adds specializations for \tcode{simd_alignment} is undefined. @@ -568,7 +580,8 @@ \end{codeblock} \begin{example} - Consider an implementation that supports the type \tcode{__vec4f} and the function \tcode{__vec4f _vec4f_addsub(__vec4f, __vec4f)} for the currently targeted system. + Consider an implementation that supports the type \tcode{__vec4f} and the function \tcode{__vec4f + _vec4f_addsub(__vec4f, __vec4f)} for the currently targeted system. A user may require the use of \tcode{_vec4f_addsub} for maximum performance and thus writes: \begin{codeblock} using V = basic_simd; @@ -696,7 +709,8 @@ \begin{itemdescr} \SimdLoadDescr {\validMaskedRange} - {Initializes the $i^\text{th}$ element with \tcode{mask[$i$] ? static_cast(first[$i$]) : T()} \foralli.} + {Initializes the $i^\text{th}$ element with \tcode{mask[$i$] ? static_cast(first[$i$]) : T()} + \foralli.} \end{itemdescr} \rSec2[simd.copy]{\tcode{basic_simd} copy functions} @@ -709,7 +723,8 @@ \begin{itemdescr} \SimdLoadDescr {\range{first}{first + size()} is a valid range.} - {Replaces the elements of the \tcode{basic_simd} object such that the $i^\text{th}$ element is assigned with \tcode{static_cast(first[$i$])} \foralli.} + {Replaces the elements of the \tcode{basic_simd} object such that the $i^\text{th}$ element is + assigned with \tcode{static_cast(first[$i$])} \foralli.} \end{itemdescr} \begin{itemdecl} @@ -720,7 +735,8 @@ \begin{itemdescr} \SimdLoadDescr {\validMaskedRange} - {Replaces the selected elements of the \tcode{basic_simd} object such that the $i^\text{th}$ element is replaced with \tcode{static_cast(first[$i$])} \forallmaskedi.} + {Replaces the selected elements of the \tcode{basic_simd} object such that the $i^\text{th}$ + element is replaced with \tcode{static_cast(first[$i$])} \forallmaskedi.} \end{itemdescr} \newcommand\SimdStoreDescr[2]{ @@ -751,7 +767,8 @@ \begin{itemdescr} \SimdStoreDescr {\range{first}{first + size()} is a valid range.} - {Copies all \tcode{basic_simd} elements as if \tcode{first[$i$] = static_cast>(operator[]($i$))} \foralli.} + {Copies all \tcode{basic_simd} elements as if \tcode{first[$i$] = + static_cast>(operator[]($i$))} \foralli.} \end{itemdescr} \begin{itemdecl} @@ -762,7 +779,8 @@ \begin{itemdescr} \SimdStoreDescr {\validMaskedRange} - {Copies the selected elements as if \tcode{first[$i$] = static_cast>(operator[]($i$))} \forallmaskedi.} + {Copies the selected elements as if \tcode{first[$i$] = + static_cast>(operator[]($i$))} \forallmaskedi.} \end{itemdescr} \rSec2[simd.subscr]{\tcode{basic_simd} subscript operator} @@ -850,7 +868,8 @@ \pnum\ConstraintUnaryOperatorWellFormed{!a} \pnum\returns - A \tcode{basic_simd_mask} object with the $i^\text{th}$ element set to \tcode{!operator[]($i$)} \foralli. + A \tcode{basic_simd_mask} object with the $i^\text{th}$ element set to \tcode{!operator[]($i$)} + \foralli. \end{itemdescr} \begin{itemdecl} @@ -861,7 +880,8 @@ \pnum\ConstraintUnaryOperatorWellFormed{\~{}a} \pnum\returns - A \tcode{basic_simd} object with the $i^\text{th}$ element set to \tcode{\~{}operator[]($i$)} \foralli. + A \tcode{basic_simd} object with the $i^\text{th}$ element set to \tcode{\~{}operator[]($i$)} + \foralli. \end{itemdescr} \begin{itemdecl} @@ -883,7 +903,8 @@ \pnum\ConstraintUnaryOperatorWellFormed{-a} \pnum\returns - A \tcode{basic_simd} object where the $i^\text{th}$ element is initialized to \tcode{-operator[]($i$)} \foralli. + A \tcode{basic_simd} object where the $i^\text{th}$ element is initialized to + \tcode{-operator[]($i$)} \foralli. \end{itemdescr} \rSec1[simd.nonmembers]{\tcode{basic_simd} non-member operations} @@ -909,7 +930,8 @@ \pnum\ConstraintOperatorTWellFormed \pnum\returns - A \tcode{basic_simd} object initialized with the results of applying \op{} to \tcode{lhs} and \tcode{rhs} as a binary element-wise operation. + A \tcode{basic_simd} object initialized with the results of applying \op{} to \tcode{lhs} and + \tcode{rhs} as a binary element-wise operation. \end{itemdescr} \begin{itemdecl} @@ -924,7 +946,8 @@ \tcode{requires (value_type a, \simdsizetype{} b) \{ a \op{} b; \}} is \tcode{true}. \pnum\returns - A \tcode{basic_simd} object where the $i^\text{th}$ element is initialized to the result of applying \op{} to \tcode{v[$i$]} and \tcode{n} \foralli. + A \tcode{basic_simd} object where the $i^\text{th}$ element is initialized to the result of + applying \op{} to \tcode{v[$i$]} and \tcode{n} \foralli. \end{itemdescr} \rSec2[simd.cassign]{\tcode{basic_simd} compound assignment} @@ -948,7 +971,8 @@ \pnum\ConstraintOperatorTWellFormed \pnum\effects - These operators apply the indicated operator to \tcode{lhs} and \tcode{rhs} as an element-wise operation. + These operators apply the indicated operator to \tcode{lhs} and \tcode{rhs} as an element-wise + operation. \pnum\returns \tcode{lhs}. @@ -986,7 +1010,8 @@ \pnum\ConstraintOperatorTWellFormed \pnum\returns - A \tcode{basic_simd_mask} object initialized with the results of applying \op{} to \tcode{lhs} and \tcode{rhs} as a binary element-wise operation. + A \tcode{basic_simd_mask} object initialized with the results of applying \op{} to \tcode{lhs} and + \tcode{rhs} as a binary element-wise operation. \end{itemdescr} \rSec2[simd.cond]{\tcode{basic_simd} exposition-only conditional operators} @@ -998,7 +1023,8 @@ \begin{itemdescr} \pnum\returns - A \tcode{basic_simd} object where the $i^\text{th}$ element equals \tcode{mask[$i$] ? a[$i$] : b[$i$]} \foralli. + A \tcode{basic_simd} object where the $i^\text{th}$ element equals \tcode{mask[$i$] ? a[$i$] : + b[$i$]} \foralli. \end{itemdescr} \rSec2[simd.reductions]{\tcode{basic_simd} reductions} @@ -1024,7 +1050,8 @@ \tcode{BinaryOperation} does not modify \tcode{x}. \pnum\returns - \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, simd(x[$i$]), ...)[0]} \foralli{} (\iref{numerics.defns}). + \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, simd(x[$i$]), ...)[0]} \foralli{} + (\iref{numerics.defns}). \pnum\throws Any exception thrown from \tcode{binary_op}. @@ -1041,7 +1068,8 @@ \tcode{BinaryOperation} satisfies \tcode{invocable, simd>}. \pnum\mandates - \tcode{binary_op} can be invoked with two arguments of type \tcode{basic_simd} returning \tcode{basic_simd} for every \tcode{A1} that is an ABI tag type. + \tcode{binary_op} can be invoked with two arguments of type \tcode{basic_simd} returning + \tcode{basic_simd} for every \tcode{A1} that is an ABI tag type. \pnum\expects \begin{itemize} @@ -1056,7 +1084,8 @@ \pnum\returns If \tcode{none_of(mask)} is \tcode{true}, returns \tcode{identity_element}. - Otherwise, returns \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, simd(x[$i$]), ...)[0]} \forallmaskedi. + Otherwise, returns \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, simd(x[$i$]), ...)[0]} + \forallmaskedi. \pnum\throws Any exception thrown from \tcode{binary_op}. @@ -1070,7 +1099,8 @@ \begin{itemdescr} \pnum\returns - If \tcode{none_of(mask)} is \tcode{true}, returns \tcode{T()}. Otherwise, returns \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, x[$i$], ...)} \forallmaskedi. + If \tcode{none_of(mask)} is \tcode{true}, returns \tcode{T()}. Otherwise, returns + \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, x[$i$], ...)} \forallmaskedi. \end{itemdescr} \begin{itemdecl} @@ -1081,7 +1111,8 @@ \begin{itemdescr} \pnum\returns - If \tcode{none_of(x)} is \tcode{true}, returns \tcode{T(1)}. Otherwise, returns \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, x[$i$], ...)} \forallmaskedi. + If \tcode{none_of(x)} is \tcode{true}, returns \tcode{T(1)}. Otherwise, returns + \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, x[$i$], ...)} \forallmaskedi. \end{itemdescr} \begin{itemdecl} @@ -1095,7 +1126,8 @@ \tcode{is_integral_v} is \tcode{true}. \pnum\returns - If \tcode{none_of(mask)} is \tcode{true}, returns \tcode{T(\~{}T())}. Otherwise, returns \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, x[$i$], ...)} \forallmaskedi. + If \tcode{none_of(mask)} is \tcode{true}, returns \tcode{T(\~{}T())}. Otherwise, returns + \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, x[$i$], ...)} \forallmaskedi. \end{itemdescr} \begin{itemdecl} @@ -1112,7 +1144,8 @@ \tcode{is_integral_v} is \tcode{true}. \pnum\returns - If \tcode{none_of(mask)} is \tcode{true}, returns \tcode{T()}. Otherwise, returns \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, x[$i$], ...)} \forallmaskedi. + If \tcode{none_of(mask)} is \tcode{true}, returns \tcode{T()}. Otherwise, returns + \tcode{\placeholdernc{GENERALIZED_SUM}(binary_op, x[$i$], ...)} \forallmaskedi. \end{itemdescr} \begin{itemdecl} @@ -1124,7 +1157,8 @@ \tcode{T} models \tcode{totally_ordered}. \pnum\returns - The value of an element \tcode{x[$j$]} for which \tcode{x[$i$] < x[$j$]} is \tcode{false} \foralli. + The value of an element \tcode{x[$j$]} for which \tcode{x[$i$] < x[$j$]} is \tcode{false} + \foralli. \end{itemdescr} \begin{itemdecl} @@ -1139,7 +1173,8 @@ \pnum\returns If \tcode{none_of(mask)} is \tcode{true}, returns \tcode{numeric_limits::max()}. - Otherwise, returns the value of a selected element \tcode{x[$j$]} for which \tcode{x[$i$] < x[$j$]} is \tcode{false} \forallmaskedi. + Otherwise, returns the value of a selected element \tcode{x[$j$]} for which \tcode{x[$i$] < + x[$j$]} is \tcode{false} \forallmaskedi. \end{itemdescr} \begin{itemdecl} @@ -1151,7 +1186,8 @@ \tcode{T} models \tcode{totally_ordered}. \pnum\returns - The value of an element \tcode{x[$j$]} for which \tcode{x[$j$] < x[$i$]} is \tcode{false} \foralli. + The value of an element \tcode{x[$j$]} for which \tcode{x[$j$] < x[$i$]} is \tcode{false} + \foralli. \end{itemdescr} \begin{itemdecl} @@ -1166,7 +1202,8 @@ \pnum\returns If \tcode{none_of(mask)} is \tcode{true}, returns \tcode{numeric_limits::lowest()}. - Otherwise, returns the value of a selected element \tcode{x[$j$]} for which \tcode{x[$j$] < x[$i$]} is \tcode{false} \forallmaskedi. + Otherwise, returns the value of a selected element \tcode{x[$j$]} for which \tcode{x[$j$] < + x[$i$]} is \tcode{false} \forallmaskedi. \end{itemdescr} \rSec2[simd.creation]{\tcode{basic_simd} and \tcode{basic_simd_mask} creation} @@ -1220,7 +1257,10 @@ \begin{itemdescr} \pnum\returns - A data-parallel object initialized with the concatenated values in the \tcode{xs} pack of data-parallel objects: The $i^\text{th}$ \tcode{basic_simd}/\tcode{basic_simd_mask} element of the $j^\text{th}$ parameter in the \tcode{xs} pack is copied to the return value's element with index $i$ + the sum of the width of the first $j$ parameters in the \tcode{xs} pack. + A data-parallel object initialized with the concatenated values in the \tcode{xs} pack of + data-parallel objects: The $i^\text{th}$ \tcode{basic_simd}/\tcode{basic_simd_mask} element of the + $j^\text{th}$ parameter in the \tcode{xs} pack is copied to the return value's element with index + $i$ + the sum of the width of the first $j$ parameters in the \tcode{xs} pack. \end{itemdescr} \rSec2[simd.alg]{Algorithms} @@ -1264,8 +1304,10 @@ \pnum\returns A \tcode{pair} initialized with \begin{itemize} - \item the result of element-wise application of \tcode{std::min(a[$i$], b[$i$])} \foralli{} in the \tcode{first} member, and - \item the result of element-wise application of \tcode{std::max(a[$i$], b[$i$])} \foralli{} in the \tcode{second} member. + \item the result of element-wise application of \tcode{std::min(a[$i$], b[$i$])} \foralli{} in + the \tcode{first} member, and + \item the result of element-wise application of \tcode{std::max(a[$i$], b[$i$])} \foralli{} in + the \tcode{second} member. \end{itemize} \end{itemdescr} @@ -1309,28 +1351,44 @@ \rSec2[simd.math]{\tcode{basic_simd} math library} \pnum -For each set of overloaded functions within \tcode{}, there shall be additional overloads sufficient to ensure that if any argument corresponding to a \tcode{double} parameter has type \tcode{basic_simd}, where \tcode{is_floating_point_v} is \tcode{true}, then: +For each set of overloaded functions within \tcode{}, there shall be additional overloads +sufficient to ensure that if any argument corresponding to a \tcode{double} parameter has type +\tcode{basic_simd}, where \tcode{is_floating_point_v} is \tcode{true}, then: \begin{itemize} - \item All arguments corresponding to \tcode{double} parameters shall be convertible to \tcode{basic_simd}. - \item All arguments corresponding to \tcode{double*} parameters shall be of type \tcode{basic_simd*}. - \item All arguments corresponding to parameters of integral type \tcode{U} shall be convertible to \tcode{rebind_simd_t>}. - \item All arguments corresponding to \tcode{U*}, where \tcode{U} is integral, shall be of type \tcode{rebind_simd_t>*}. - \item If the corresponding return type is \tcode{double}, the return type of the additional overloads is \tcode{basic_simd}. Otherwise, if the corresponding return type is \tcode{bool}, the return type of the additional overload is \tcode{basic_simd::mask_type}. Otherwise, the return type is \tcode{rebind_simd_t>}, with \tcode{R} denoting the corresponding return type. + \item All arguments corresponding to \tcode{double} parameters shall be convertible to + \tcode{basic_simd}. + \item All arguments corresponding to \tcode{double*} parameters shall be of type + \tcode{basic_simd*}. + \item All arguments corresponding to parameters of integral type \tcode{U} shall be convertible to + \tcode{rebind_simd_t>}. + \item All arguments corresponding to \tcode{U*}, where \tcode{U} is integral, shall be of type + \tcode{rebind_simd_t>*}. + \item If the corresponding return type is \tcode{double}, the return type of the additional + overloads is \tcode{basic_simd}. Otherwise, if the corresponding return type is + \tcode{bool}, the return type of the additional overload is \tcode{basic_simd::mask_type}. Otherwise, the return type is \tcode{rebind_simd_t>}, + with \tcode{R} denoting the corresponding return type. \end{itemize} -It is unspecified whether a call to these overloads with arguments that are all convertible to \tcode{basic_simd} but are not of type \tcode{basic_simd} is well-formed. +It is unspecified whether a call to these overloads with arguments that are all convertible to +\tcode{basic_simd} but are not of type \tcode{basic_simd} is well-formed. \pnum -Each function overload produced by the above rules applies the indicated \tcode{} function element-wise. For the mathematical functions, the results per element only need to be approximately equal to the application of the function which is overloaded for the element type. +Each function overload produced by the above rules applies the indicated \tcode{} function +element-wise. For the mathematical functions, the results per element only need to be approximately +equal to the application of the function which is overloaded for the element type. \pnum -The result is unspecified if a domain, pole, or range error occurs when the input argument(s) are applied to the indicated \tcode{} function. -\begin{note}Implementations are encouraged to follow the C specification (especially Annex F).\end{note} +The result is unspecified if a domain, pole, or range error occurs when the input argument(s) are +applied to the indicated \tcode{} function. +\begin{note}Implementations are encouraged to follow the C specification (especially Annex +F).\end{note} \pnum \FIXME{Allow \tcode{abs(basic_simd<\textrm{signed-integral}>)}.} \pnum -If \tcode{abs} is called with an argument of type \tcode{basic_simd} for which \tcode{is_unsigned_v} is \tcode{true}, the program is ill-formed. +If \tcode{abs} is called with an argument of type \tcode{basic_simd} for which +\tcode{is_unsigned_v} is \tcode{true}, the program is ill-formed. \rSec1[simd.mask.class]{Class template \tcode{basic_simd_mask}} @@ -1425,16 +1483,20 @@ \end{codeblock} \pnum -The specializations of class template \tcode{basic_simd_mask} are data-parallel types with element type \tcode{bool}. +The specializations of class template \tcode{basic_simd_mask} are data-parallel types with element +type \tcode{bool}. \pnum Every specialization of \tcode{basic_simd_mask} is a complete type. -The specialization \tcode{basic_simd_mask} is not supported if the type \tcode{\integerfrom} does not exist or if it is not a vectorizable type. +The specialization \tcode{basic_simd_mask} is not supported if the type +\tcode{\integerfrom} does not exist or if it is not a vectorizable type. It is implementation-defined whether \tcode{basic_simd_mask} is supported. -\begin{note}The intent is for implementations to decide on the basis of the currently targeted system.\end{note} +\begin{note}The intent is for implementations to decide on the basis of the currently targeted +system.\end{note} -If \tcode{basic_simd_mask} is not supported, the specialization shall have a deleted default constructor, deleted destructor, deleted copy constructor, and deleted copy assignment. +If \tcode{basic_simd_mask} is not supported, the specialization shall have a deleted +default constructor, deleted destructor, deleted copy constructor, and deleted copy assignment. Otherwise, the following are true: \begin{itemize} \item \tcode{is_nothrow_move_constructible_v>}, and @@ -1443,10 +1505,13 @@ \end{itemize} \pnum -Default initialization performs no initialization of the elements; value-initialization initializes each element with \tcode{false}. \begin{note}Thus, default initialization leaves the elements in an indeterminate state.\end{note} +Default initialization performs no initialization of the elements; value-initialization initializes +each element with \tcode{false}. \begin{note}Thus, default initialization leaves the elements in an +indeterminate state.\end{note} \pnum -Implementations should enable explicit conversion from and to implementation-defined types. This adds one or more of the following declarations to class \tcode{basic_simd_mask}: +Implementations should enable explicit conversion from and to implementation-defined types. This +adds one or more of the following declarations to class \tcode{basic_simd_mask}: \begin{codeblock} constexpr explicit operator @\impdef@() const; @@ -1492,7 +1557,8 @@ \pnum The calls to \tcode{gen} are unsequenced with respect to each other. - Vectorization-unsafe standard library functions may not be invoked by \tcode{gen} (\iref{algorithms.parallel.exec}). + Vectorization-unsafe standard library functions may not be invoked by \tcode{gen} + (\iref{algorithms.parallel.exec}). \end{itemdescr} \newcommand\MaskLoadDescr[2]{ @@ -1545,7 +1611,8 @@ \begin{itemdescr} \MaskLoadDescr {\range{first}{first + size()} is a valid range.} - {Replaces the elements of the \tcode{basic_simd_mask} object such that the $i^\text{th}$ element is replaced with \tcode{first[$i$]} \foralli.} + {Replaces the elements of the \tcode{basic_simd_mask} object such that the $i^\text{th}$ element + is replaced with \tcode{first[$i$]} \foralli.} \end{itemdescr} \begin{itemdecl} @@ -1556,7 +1623,8 @@ \begin{itemdescr} \MaskLoadDescr {\validMaskedRange} - {Replaces the selected elements of the \tcode{basic_simd_mask} object such that the $i^\text{th}$ element is replaced with \tcode{first[$i$]} \forallmaskedi.} + {Replaces the selected elements of the \tcode{basic_simd_mask} object such that the + $i^\text{th}$ element is replaced with \tcode{first[$i$]} \forallmaskedi.} \end{itemdescr} \newcommand\MaskStoreDescr[2]{ @@ -1586,7 +1654,8 @@ \begin{itemdescr} \MaskStoreDescr {\range{first}{first + size()} is a valid range.} - {Copies all \tcode{basic_simd_mask} elements as if \tcode{first[$i$] = operator[]($i$)} \foralli.} + {Copies all \tcode{basic_simd_mask} elements as if \tcode{first[$i$] = operator[]($i$)} + \foralli.} \end{itemdescr} \begin{itemdecl} @@ -1653,7 +1722,8 @@ \tcode{\simdsizev == \simdsizev}. \pnum\returns - An object where the $i^\text{th}$ element is initialized to \tcode{static_cast(operator[]($i$))}. + An object where the $i^\text{th}$ element is initialized to + \tcode{static_cast(operator[]($i$))}. \end{itemdescr} \rSec1[simd.mask.nonmembers]{Non-member operations} @@ -1675,7 +1745,8 @@ \begin{itemdescr} \pnum\returns - A \tcode{basic_simd_mask} object initialized with the results of applying the indicated operator to \tcode{lhs} and \tcode{rhs} as a binary element-wise operation. + A \tcode{basic_simd_mask} object initialized with the results of applying the indicated operator + to \tcode{lhs} and \tcode{rhs} as a binary element-wise operation. \end{itemdescr} \rSec2[simd.mask.cassign]{\tcode{basic_simd_mask} compound assignment} @@ -1691,7 +1762,8 @@ \begin{itemdescr} \pnum\effects - These operators apply the indicated operator to \tcode{lhs} and \tcode{rhs} as a binary element-wise operation. + These operators apply the indicated operator to \tcode{lhs} and \tcode{rhs} as a binary + element-wise operation. \pnum\returns \tcode{lhs}. @@ -1716,7 +1788,8 @@ \begin{itemdescr} \pnum\returns - A \tcode{basic_simd_mask} object initialized with the results of applying the indicated operator to \tcode{lhs} and \tcode{rhs} as a binary element-wise operation. + A \tcode{basic_simd_mask} object initialized with the results of applying the indicated operator + to \tcode{lhs} and \tcode{rhs} as a binary element-wise operation. \end{itemdescr} \rSec2[simd.mask.cond]{\tcode{basic_simd_mask} exposition-only conditional operators} @@ -1728,7 +1801,8 @@ \begin{itemdescr} \pnum\returns - A \tcode{basic_simd_mask} object where the $i^\text{th}$ element equals \tcode{mask[$i$] ? a[$i$] : b[$i$]} \foralli. + A \tcode{basic_simd_mask} object where the $i^\text{th}$ element equals \tcode{mask[$i$] ? a[$i$] + : b[$i$]} \foralli. \end{itemdescr} \begin{itemdecl} @@ -1738,7 +1812,8 @@ \begin{itemdescr} \pnum\returns - A \tcode{basic_simd_mask} object where the $i^\text{th}$ element equals \tcode{mask[$i$] ? a : b} \foralli. + A \tcode{basic_simd_mask} object where the $i^\text{th}$ element equals \tcode{mask[$i$] ? a : b} + \foralli. \end{itemdescr} \begin{itemdecl} @@ -1761,7 +1836,8 @@ \end{itemize} \pnum\returns - A \tcode{basic_simd} object where the $i^\text{th}$ element equals \tcode{mask[$i$] ? a : b} \foralli. + A \tcode{basic_simd} object where the $i^\text{th}$ element equals \tcode{mask[$i$] ? a : + b} \foralli. \end{itemdescr} \rSec2[simd.mask.reductions]{\tcode{basic_simd_mask} reductions} @@ -1783,7 +1859,8 @@ \begin{itemdescr} \pnum\returns - \tcode{true} if at least one boolean element in \tcode{k} is \tcode{true}, \tcode{false} otherwise. + \tcode{true} if at least one boolean element in \tcode{k} is \tcode{true}, \tcode{false} + otherwise. \end{itemdescr} \begin{itemdecl} @@ -1793,7 +1870,8 @@ \begin{itemdescr} \pnum\returns - \tcode{true} if none of the one boolean elements in \tcode{k} is \tcode{true}, \tcode{false} otherwise. + \tcode{true} if none of the one boolean elements in \tcode{k} is \tcode{true}, \tcode{false} + otherwise. \end{itemdescr} \begin{itemdecl} @@ -1858,4 +1936,4 @@ \pnum\returns \tcode{0}. \end{itemdescr} -% vim: tw=0 +% vim: tw=100 From f279053d7e64eb12fee0e9641b7815f0f30ed135 Mon Sep 17 00:00:00 2001 From: Matthias Kretz Date: Wed, 10 Jul 2024 17:40:21 +0200 Subject: [PATCH 08/14] simd_alignment is not implementation-defined ChangeLog: * P1928/changelog.tex: * P1928/wording.tex: --- P1928/changelog.tex | 1 + P1928/wording.tex | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/P1928/changelog.tex b/P1928/changelog.tex index 948cd0f..9465ad2 100644 --- a/P1928/changelog.tex +++ b/P1928/changelog.tex @@ -174,6 +174,7 @@ \section{Changelog} \item Remove wording removed by P3275 (non-const \code{operator[]}). \item Make intrinsics conversion recommended practice. \item Make \code{simd_flags} template arguments exposition-only. +\item Make \code{simd_alignment} \emph{not} implementation-defined. %\todo Reorder \code{simd} and \code{simd_mask} specification in the wording (mask first). \end{revision} diff --git a/P1928/wording.tex b/P1928/wording.tex index 5fc9773..550b97b 100644 --- a/P1928/wording.tex +++ b/P1928/wording.tex @@ -350,7 +350,7 @@ \pnum If \tcode{value} is present, the type \tcode{simd_alignment} is a \tcode{BinaryTypeTrait} with -a base characteristic of \tcode{integral_constant} for some implementation-defined +a base characteristic of \tcode{integral_constant} for some unspecified \tcode{N} (see \ref{simd.copy} and \ref{simd.mask.copy}). \begin{note}\tcode{value} identifies the alignment restrictions on pointers used for (converting) loads and stores for the give type \tcode{T} on arrays of type \tcode{U}.\end{note} From 3379b9f40296c9e08079dea295d980ece00dbecc Mon Sep 17 00:00:00 2001 From: Matthias Kretz Date: Thu, 11 Jul 2024 09:24:42 +0200 Subject: [PATCH 09/14] Add notes on reduce requirement on binary_op ChangeLog: * P1928/wording.tex: --- P1928/wording.tex | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/P1928/wording.tex b/P1928/wording.tex index 550b97b..f1cf23e 100644 --- a/P1928/wording.tex +++ b/P1928/wording.tex @@ -1045,6 +1045,13 @@ \tcode{binary_op} can be invoked with two arguments of type \tcode{basic_simd} returning \tcode{basic_simd} for every \tcode{A1} that is an ABI tag type. + \INFO{Better alternative? “[\ldots] for zero or more unspecified ABI tag types \tcode{A1}.”} + \FIXME{This is not supposed to require exhaustive testing of all ABI tags. + What we need to express is that the user-supplied \tcode{binary_op} \emph{can} be called + with every possible ABI tag since different implementations / compiler flags / targets will lead + to a different subset getting called. + Basically, (start waving hands) “\tcode{binary_op} can be invoked with the specializations of + \tcode{basic_simd} that the implementation needs” (stop waving hands).} \pnum\expects \tcode{BinaryOperation} does not modify \tcode{x}. From cca4c33e2ffb40c20cc417f2e3aa9175fd1f4613 Mon Sep 17 00:00:00 2001 From: Matthias Kretz Date: Thu, 11 Jul 2024 11:41:00 +0200 Subject: [PATCH 10/14] Reword supported to use enabled/disabled ChangeLog: * P1928/changelog.tex: * P1928/wording.tex: --- P1928/changelog.tex | 2 ++ P1928/wording.tex | 54 ++++++++++++++++++++++++--------------------- 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/P1928/changelog.tex b/P1928/changelog.tex index 9465ad2..2739bbe 100644 --- a/P1928/changelog.tex +++ b/P1928/changelog.tex @@ -175,6 +175,8 @@ \section{Changelog} \item Make intrinsics conversion recommended practice. \item Make \code{simd_flags} template arguments exposition-only. \item Make \code{simd_alignment} \emph{not} implementation-defined. +\item Reword “supported” to “enabled or disabled”. +\item Apply improved wording from \ref{simd.overview} to \ref{simd.mask.overview}. %\todo Reorder \code{simd} and \code{simd_mask} specification in the wording (mask first). \end{revision} diff --git a/P1928/wording.tex b/P1928/wording.tex index f1cf23e..b2add54 100644 --- a/P1928/wording.tex +++ b/P1928/wording.tex @@ -57,10 +57,14 @@ \tcode{std::float64_t} are vectorizable types if defined ([basic.extended.fp]). \pnum -The term \defn{data-parallel type} refers to all supported (\ref{simd.overview}) specializations of +The term \defn{data-parallel type} refers to all enabled specializations of the \tcode{basic_simd} and \tcode{basic_simd_mask} class templates. A \defn{data-parallel object} is an object of \term{data-parallel type}. +\pnum +Each specialization of \tcode{basic_simd} or \tcode{basic_simd_mask} is either enabled or disabled, +as described in \ref{simd.overview} and \ref{simd.mask.overview}. + \pnum A data-parallel type consists of one or more elements of an underlying vectorizable type, called the \defn{element type}. @@ -294,7 +298,7 @@ Where present, \tcode{\deducet} names an ABI tag type that satisfies \begin{itemize} \item \tcode{\simdsizev> == N}, and - \item \tcode{basic_simd>} is supported (see \ref{simd.overview}). + \item \tcode{basic_simd>} is enabled (see \ref{simd.overview}). \end{itemize} % TODO: If we really want the Abis... pack back this needs a paper to LEWG. @@ -316,9 +320,10 @@ \pnum \tcode{\nativeabi} is an implementation-defined alias for an ABI tag. +\tcode{basic_simd} is an enabled specialization. \begin{note} The intent is to use the ABI tag producing the most efficient data-parallel -execution for the element type \tcode{T} that is supported on the currently +execution for the element type \tcode{T} on the currently targeted system. For target architectures with ISA extensions, compiler flags can change the type of the \tcode{\nativeabi} alias. @@ -550,23 +555,24 @@ \pnum Every specialization of \tcode{basic_simd} is a complete type. The types \tcode{basic_simd>} for all vectorizable -\tcode{T} and with \tcode{N} in the range of \crange{1}{64} are \defn{supported}. -It is \impdef\ whether any other \tcode{basic_simd} specializations -with vectorizable \tcode{T} are supported. +\tcode{T} and with \tcode{N} in the range of \crange{1}{64} are enabled. +It is implementation-defined whether any other \tcode{basic_simd} specialization +with vectorizable \tcode{T} is enabled. +Any other specialization of \tcode{basic_simd} is disabled. \begin{note} The intent is for implementations to determine on the basis of the currently - targeted system, whether \tcode{basic_simd} is supported. + targeted system, whether \tcode{basic_simd} is enabled. \end{note} \FIXME{drop the note?} -If \tcode{basic_simd} is not supported, the specialization has a +If \tcode{basic_simd} is disabled, the specialization has a deleted default constructor, deleted destructor, deleted copy constructor, and deleted copy assignment. In addition only the \tcode{value_type}, \tcode{abi_type}, and \tcode{mask_type} members are present. -If \tcode{basic_simd} is supported, \tcode{basic_simd} is +If \tcode{basic_simd} is enabled, \tcode{basic_simd} is trivially copyable. \pnum\recommended: @@ -1495,26 +1501,24 @@ \pnum Every specialization of \tcode{basic_simd_mask} is a complete type. -The specialization \tcode{basic_simd_mask} is not supported if the type -\tcode{\integerfrom} does not exist or if it is not a vectorizable type. +The types \tcode{basic_simd_mask>} for all vectorizable +\tcode{T} and with \tcode{N} in the range of \crange{1}{64} are enabled. +It is implementation-defined whether any other \tcode{basic_simd_mask} +specialization with vectorizable \tcode{T} is enabled. +Any other specialization of \tcode{basic_simd_mask} is disabled. -It is implementation-defined whether \tcode{basic_simd_mask} is supported. -\begin{note}The intent is for implementations to decide on the basis of the currently targeted -system.\end{note} +\begin{note} + The intent is for implementations to determine on the basis of the currently + targeted system, whether \tcode{basic_simd_mask} is enabled. +\end{note} +\FIXME{drop the note?} -If \tcode{basic_simd_mask} is not supported, the specialization shall have a deleted +If \tcode{basic_simd_mask} is disabled, the specialization has a deleted default constructor, deleted destructor, deleted copy constructor, and deleted copy assignment. -Otherwise, the following are true: -\begin{itemize} - \item \tcode{is_nothrow_move_constructible_v>}, and - \item \tcode{is_nothrow_move_assignable_v>}, and - \item \tcode{is_nothrow_default_constructible_v>}. -\end{itemize} +In addition only the \tcode{value_type} and \tcode{abi_type} members are present. -\pnum -Default initialization performs no initialization of the elements; value-initialization initializes -each element with \tcode{false}. \begin{note}Thus, default initialization leaves the elements in an -indeterminate state.\end{note} +If \tcode{basic_simd_mask} is enabled, \tcode{basic_simd_mask} is +trivially copyable. \pnum Implementations should enable explicit conversion from and to implementation-defined types. This From f2032b1a844d2ac26d2d7f6e224af1fa76d99bd0 Mon Sep 17 00:00:00 2001 From: Matthias Kretz Date: Fri, 12 Jul 2024 09:13:16 +0200 Subject: [PATCH 11/14] Align \expos comments in synopsis ChangeLog: * P1928/wording.tex: --- P1928/wording.tex | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/P1928/wording.tex b/P1928/wording.tex index b2add54..62ae34f 100644 --- a/P1928/wording.tex +++ b/P1928/wording.tex @@ -96,22 +96,22 @@ %\indexhdr{simd} \begin{codeblock} namespace std { - using @\simdsizetype@ = @\seebelow@; // \expos + using @\simdsizetype@ = @\seebelow@; // \expos template constexpr @\simdsizetype\ \simdsizev@ = @\seebelow@; // \expos template constexpr size_t @\maskelementsize@ = @\seebelow@; // \expos - template using @\integerfrom@ = @\seebelow@; // \expos + template using @\integerfrom@ = @\seebelow@; // \expos template - concept @\constexprwrapperlike@ = // \expos + concept @\constexprwrapperlike@ = // \expos convertible_to && equality_comparable_with && bool_constant::value && bool_constant(T()) == T::value>::value; // \ref{simd.abi}, \tcode{simd} ABI tags - template using @\nativeabi@ = @\seebelow@; // \expos - template using @\deducet@ = @\seebelow@; // \expos + template using @\nativeabi@ = @\seebelow@; // \expos + template using @\deducet@ = @\seebelow@; // \expos // \ref{simd.traits}, \tcode{simd} type traits template struct simd_alignment; From 4b9dba644dd7026843c0a5bba6d80104f5cb7ac0 Mon Sep 17 00:00:00 2001 From: Matthias Kretz Date: Tue, 16 Jul 2024 10:52:35 +0200 Subject: [PATCH 12/14] Improve broadcast and generator constraints ChangeLog: * P1928/changelog.tex: * P1928/wording.tex: --- P1928/changelog.tex | 4 +++- P1928/wording.tex | 41 +++++++++++++++++++++++++++++++---------- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/P1928/changelog.tex b/P1928/changelog.tex index 2739bbe..b7891a6 100644 --- a/P1928/changelog.tex +++ b/P1928/changelog.tex @@ -176,7 +176,9 @@ \section{Changelog} \item Make \code{simd_flags} template arguments exposition-only. \item Make \code{simd_alignment} \emph{not} implementation-defined. \item Reword “supported” to “enabled or disabled”. -\item Apply improved wording from \ref{simd.overview} to \ref{simd.mask.overview}. +\item Apply improved wording from \ref{sec:simd.overview} to \ref{sec:simd.mask.overview}. +\item Add comments for LWG to address to broadcast ctor (\ref{sec:simd.ctor}). +\item Respecify generator ctor to not reuse broadcast constraint (\ref{sec:simd.ctor}). %\todo Reorder \code{simd} and \code{simd_mask} specification in the wording (mask first). \end{revision} diff --git a/P1928/wording.tex b/P1928/wording.tex index 62ae34f..74824df 100644 --- a/P1928/wording.tex +++ b/P1928/wording.tex @@ -610,17 +610,35 @@ \pnum\constraints \tcode{From} satisfies \tcode{convertible_to}, and either \begin{itemize} - \item \tcode{From} satisfies \tcode{\constexprwrapperlike} (\ref{simd.syn}) - and the actual value of \tcode{From::value} after conversion to - \tcode{value_type} will fit into \tcode{value_type} and will produce the - original value when converted back to \tcode{decltype(From::value)}, or - \item \tcode{From} is a vectorizable type and the conversion from \tcode{From} to \tcode{value_type} is value-preserving (\ref{simd.general}), or \item \tcode{From} is not an arithmetic type and does not satisfy - \tcode{\constexprwrapperlike}. + \tcode{\constexprwrapperlike}, or + + \item \tcode{From} satisfies \tcode{\constexprwrapperlike} (\ref{simd.syn}) + and the actual value of \tcode{From::value} after conversion to + \tcode{value_type} will fit into \tcode{value_type} and will produce the + original value when converted back to \tcode{decltype(From::value)}. + \FIXME{ + A value “after conversion to \tcode{To}” is always representable by \tcode{To}. + What I actually implemented is + \tcode{!(unsigned_integral \&\& From::value < decltype(From::value)() + \&\& From::value <= numeric_limits::max() + \&\& From::value >= numeric_limits::lowest()} + } + \INFO{ + Design intent: + I'm trying to allow \tcode{1.f $\rightarrow$ int} while disallowing \tcode{1.1f $\rightarrow$ + int}. + Also, if \tcode{From::value} is a UDT, e.g. fixed-point, I believe we cannot use wording + such as “value can be represented” because how can we speak about the numerical value of a + UDT? Or more importantly, how would you implement such a constraint? That'd be hand waving + at best. We can speak about the value after conversion. But then we don't know what was lost + until we convert it back.\\ + Ultimately, I think we need to aim for a reasonable heuristic, no more. + } \end{itemize} \pnum\effects @@ -661,14 +679,17 @@ \end{itemdecl} \begin{itemdescr} + \pnum Let \tcode{From}$_i$ denote the type + \tcode{decltype(gen(integral_constant<\simdsizetype, $i$>()))}. + \pnum\constraints - \tcode{basic_simd(gen(integral_constant<\simdsizetype, $i$>()))} is well-formed - and the return type of \tcode{gen(integral_constant<\simdsizetype, $i$>())} - satisfies \tcode{convertible_to} \foralli. + \tcode{From}$_i$ satisfies \tcode{convertible_to} \foralli. + In addition, \foralli, if \tcode{From}$_i$ is a vectorizable type, conversion from + \tcode{From}$_i$ to \tcode{value_type} is value-preserving. \pnum\effects Initializes the $i^\text{th}$ element with - \tcode{gen(integral_constant<\simdsizetype, i>())} \foralli. + \tcode{static_cast(gen(integral_constant<\simdsizetype, i>()))} \foralli. \pnum The calls to \tcode{gen} are unsequenced with respect to each other. From 78f3310319d878aa6081436f3af0d6838df24fd8 Mon Sep 17 00:00:00 2001 From: Matthias Kretz Date: Tue, 16 Jul 2024 11:03:18 +0200 Subject: [PATCH 13/14] Use to_address on contiguous iterators ChangeLog: * P1928/changelog.tex: * P1928/wording.tex: --- P1928/changelog.tex | 3 +++ P1928/wording.tex | 18 ++++++++++-------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/P1928/changelog.tex b/P1928/changelog.tex index b7891a6..b50cc62 100644 --- a/P1928/changelog.tex +++ b/P1928/changelog.tex @@ -179,6 +179,9 @@ \section{Changelog} \item Apply improved wording from \ref{sec:simd.overview} to \ref{sec:simd.mask.overview}. \item Add comments for LWG to address to broadcast ctor (\ref{sec:simd.ctor}). \item Respecify generator ctor to not reuse broadcast constraint (\ref{sec:simd.ctor}). +\item Use \code{to_address} on contiguous iterators (\ref{sec:simd.ctor} and \ref{sec:simd.copy}). + This is more explicit about allowing memcpy on the complete range rather than + having to iterate the range per element. %\todo Reorder \code{simd} and \code{simd_mask} specification in the wording (mask first). \end{revision} diff --git a/P1928/wording.tex b/P1928/wording.tex index 74824df..14d8506 100644 --- a/P1928/wording.tex +++ b/P1928/wording.tex @@ -13,10 +13,10 @@ \newcommand\flagsRequires[2]{ \item If the template parameter pack \tcode{Flags} contains - \tcode{\alignedflag}, \tcode{addressof(*first)} points to storage + \tcode{\alignedflag}, \tcode{to_address(first)} points to storage aligned by \tcode{simd_alignment_v<#1>}. \item If the template parameter pack \tcode{Flags} contains - \tcode{\overalignedflag}, \tcode{addressof(*first)} + \tcode{\overalignedflag}, \tcode{to_address(first)} points to storage aligned by \tcode{N}. } @@ -725,7 +725,8 @@ \begin{itemdescr} \SimdLoadDescr {\range{first}{first + size()} is a valid range.} - {Initializes the $i^\text{th}$ element with \tcode{static_cast(first[$i$])} \foralli.} + {Initializes the $i^\text{th}$ element with \tcode{static_cast(to_address(first)[$i$])} + \foralli.} \end{itemdescr} \begin{itemdecl} @@ -736,7 +737,8 @@ \begin{itemdescr} \SimdLoadDescr {\validMaskedRange} - {Initializes the $i^\text{th}$ element with \tcode{mask[$i$] ? static_cast(first[$i$]) : T()} + {Initializes the $i^\text{th}$ element with \tcode{mask[$i$] ? + static_cast(to_address(first)[$i$]) : T()} \foralli.} \end{itemdescr} @@ -751,7 +753,7 @@ \SimdLoadDescr {\range{first}{first + size()} is a valid range.} {Replaces the elements of the \tcode{basic_simd} object such that the $i^\text{th}$ element is - assigned with \tcode{static_cast(first[$i$])} \foralli.} + assigned with \tcode{static_cast(to_address(first)[$i$])} \foralli.} \end{itemdescr} \begin{itemdecl} @@ -763,7 +765,7 @@ \SimdLoadDescr {\validMaskedRange} {Replaces the selected elements of the \tcode{basic_simd} object such that the $i^\text{th}$ - element is replaced with \tcode{static_cast(first[$i$])} \forallmaskedi.} + element is replaced with \tcode{static_cast(to_address(first)[$i$])} \forallmaskedi.} \end{itemdescr} \newcommand\SimdStoreDescr[2]{ @@ -794,7 +796,7 @@ \begin{itemdescr} \SimdStoreDescr {\range{first}{first + size()} is a valid range.} - {Copies all \tcode{basic_simd} elements as if \tcode{first[$i$] = + {Copies all \tcode{basic_simd} elements as if \tcode{to_address(first)[$i$] = static_cast>(operator[]($i$))} \foralli.} \end{itemdescr} @@ -806,7 +808,7 @@ \begin{itemdescr} \SimdStoreDescr {\validMaskedRange} - {Copies the selected elements as if \tcode{first[$i$] = + {Copies the selected elements as if \tcode{to_address(first)[$i$] = static_cast>(operator[]($i$))} \forallmaskedi.} \end{itemdescr} From 2807273535caf4714e8f29a6a4f495b13794b107 Mon Sep 17 00:00:00 2001 From: Matthias Kretz Date: Tue, 16 Jul 2024 15:03:09 +0200 Subject: [PATCH 14/14] P1928R11 ChangeLog: * P1928/main.tex: --- P1928/main.tex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/P1928/main.tex b/P1928/main.tex index 42004e1..5174843 100644 --- a/P1928/main.tex +++ b/P1928/main.tex @@ -1,7 +1,7 @@ \newcommand\wgTitle{std::simd --- merge data-parallel types from the Parallelism TS 2} \newcommand\wgName{Matthias Kretz } -\newcommand\wgDocumentNumber{D1928R11} -\newcommand\wgGroup{LWG, LEWG} +\newcommand\wgDocumentNumber{P1928R11} +\newcommand\wgGroup{LWG} \newcommand\wgTarget{\CC{}26} \newcommand\wgAcknowledgements{Thanks to Daniel Towner, Ruslan Arutyunyan, Jonathan Müller, Jeff Garland, and Nicolas Morales for discussions and/or pull requests on this/previous paper(s).}