From 2c038db7cc54bc99afb914b2ab10a0eca499c98d Mon Sep 17 00:00:00 2001
From: Matthias Kretz <m.kretz@gsi.de>
Date: Wed, 22 May 2024 16:39:56 +0200
Subject: [PATCH] Finalize P1928R9

ChangeLog:

	* P1928/changelog.tex:
	* P1928/main.tex:
	* P1928/strawpolls.tex:
	* P1928/wording.tex:
	* wg21.bib:
---
 P1928/changelog.tex  |   4 +
 P1928/main.tex       | 643 +++++++++++++++++++++----------------------
 P1928/strawpolls.tex |  18 ++
 P1928/wording.tex    | 190 ++++++-------
 wg21.bib             |  16 ++
 5 files changed, 440 insertions(+), 431 deletions(-)
diff --git a/P1928/changelog.tex b/P1928/changelog.tex
index 83c4e11..a1eb07b 100644
--- a/P1928/changelog.tex
+++ b/P1928/changelog.tex
@@ -140,4 +140,8 @@ \section{Changelog}
 \item Add instructions to add a new subclause to the table in [numerics.general].
 \item Add instructions to add \code{<simd>} [diff.23.library].
 \item Add \simdsizev to the wording and replace \code{simd_size_v} to actually implement “Make \code{simd_size} exposition-only.”
+\item Restored precondition (and removed \code{noexcept}) on
+  \code{reduce_min_index} and \code{reduce_max_index} as directed by LEWG.
 \end{revision}
+
+  %\todo Reorder \code{simd} and \code{simd_mask} specification in the wording (mask first).
diff --git a/P1928/main.tex b/P1928/main.tex
index 640d0c1..90bf21f 100644
--- a/P1928/main.tex
+++ b/P1928/main.tex
@@ -1,6 +1,6 @@
 \newcommand\wgTitle{std::simd --- merge data-parallel types from the Parallelism TS 2}
 \newcommand\wgName{Matthias Kretz <m.kretz@gsi.de>}
-\newcommand\wgDocumentNumber{D1928R9}
+\newcommand\wgDocumentNumber{P1928R9}
 \newcommand\wgGroup{LWG, LEWG}
 \newcommand\wgTarget{\CC{}26}
 \newcommand\wgAcknowledgements{Thanks to Daniel Towner, Ruslan Arutyunyan, Jonathan Müller, Jeff Garland, and Nicolas Morales for discussions and/or pull requests on this/previous paper(s).}
@@ -490,38 +490,17 @@ \subsection{No freestanding SIMD}\label{sec:freestanding}
 Note that freestanding is just the baseline requirement and embedded targets
 are still free to add \code{simd} support.
 
-\section{Open questions / Outlook}
-\subsection{Correct place for \code{simd} in the IS?}
-
-While \code{simd} is certainly very important for numerics and therefore fits into the “Numerics library” clause, it is also more than that.
-E.g. \code{simd} can be used for vectorization of text processing.
-In principle \code{simd} should be understood similar to fundamental types.
-Is the “General utilities library” clause a better place?
-Or rename “Concurrency support library” to “Parallelism and concurrency support library” and put it there?
-Alternatively, add a new library clause?
-
-I am seeking feedback before making a recommendation.
-
-\TODO{
-  \todoitem LWG prefers to append “Data-parallel types” to the “Numerics library”.
-}
-
+\section{Outlook}
 \subsection{\code{element_reference} is overspecified}
 \code{element_reference} is spelled out in a lot of detail.
 It may be better to define its requirements in a list of requirements or a table instead.
 
 This change is not reflected in the wording, pending encouragement from WG21 (mostly LWG).
 
-\subsection{Implementation hints}\label{sec:implnote}
-We should consider the addition of a note recommending implementations let
-\simd and \mask operations behave like operations of built-in types.
-Specifically, built-in operators are never function calls\footnote{The
-exception may be soft-float?}.
-(cf. \href{https://gcc.gnu.org/PR108030}{GCC PR108030})
-
-\TODO{
-  \todoitem Left completely to QoI. No new note.
-}
+As an alternative \cite{P3275R0} discusses removal of non-const subscripts
+altogether.
+This would imply removal of \code{element_reference}, simplifying the wording
+by a good chunk.
 
 \subsection{Clean up math function overloads}
 The wording that produces \simd overloads misses a few cases and leaves room for ambiguity.
@@ -606,311 +585,311 @@ \subsection{Formatting support}\label{sec:formatting}
 This seems to be a good solution unless there is a demand to format \code{simd}
 objects differently from \code{random_access_range}.
 
-\section{Changes after LEWG approval (for LEWG)}
-
-\subsection{\code{simd_select} overload set}\label{sec:simdselectwording}
-
-\wglink{P1928R6} presented the following overload set of the exposition-only
-hidden friend \code{\simdselect}:
-\begin{codeblock}
-template<class T, class Abi> class basic_simd {
-  // [...]
-  friend constexpr basic_simd @\simdselect@(
-    const mask_type&, const basic_simd&, const basic_simd&) noexcept; // #1
-};
-// [...]
-template<size_t Bytes, class Abi> class basic_simd_mask {
-  // [...]
-  friend constexpr basic_simd_mask @\simdselect@(
-    const basic_simd_mask&, const basic_simd_mask&, const basic_simd_mask&) noexcept; // #2
-  friend constexpr basic_simd_mask @\simdselect@(
-    const basic_simd_mask&, bool, bool) noexcept; // #3
-  template <class T0, class T1>
-    friend constexpr basic_simd<@\seebelow@, Abi>
-      @\simdselect@(const basic_simd_mask&, const T0&, const T1&) noexcept; // #4
-};
-\end{codeblock}
-
-Given \code{std::simd_select(std::simd<double, 4>() == 0, 1, 2)}, the compiler
-would choose overload \code{\#3} because \code{int} is convertible to
-\code{bool} and \code{\#4} is constrained, requiring
-\code{sizeof(\UNSP{non-promoting-common-type}<T0, T1>) == sizeof(double)}.
-That does not match the design intent.
-The intent was for non-boolean and non-simd arguments to pick overload
-\code{\#4} or fail to compile.
-This can be achieved either by replacing \code{bool} with a type that is
-convertible from \code{bool} only, or via \code{same_as<bool> auto} instead of
-\code{bool}.
-The former leads to puzzling error messages, because overload \code{\#4} is not
-mentioned in the resulting diagnostics.
-The latter will lead to a listing of all candidates and the reason why they
-were not viable.
-
-Therefore, the wording for overload \code{\#3} was changed to say
-\code{same_as<bool> auto} instead of \code{bool}.
-
-
-\subsection{Tighten \code{simd_split} specification}\label{sec:bettersimdsplitwording}
-
-The reviewed wording (Varna 2023) for \code{simd_split} left the “epilogue”
-\simd object(s) unspecified.
-A user of \code{simd_split} therefore would have to cope with implementations
-returning one or more \simd objects for the otherwise same input parameters.
-Consider the case
-\code{simd_split\MayBreak{}<\MayBreak{}simd\MayBreak{}<\MayBreak{}int, 8>>(x)}
-with \code{simd\MayBreak{}<\MayBreak{}int, 15>}.
-One implementation might return
-\\\code{tuple<simd<int, 8>, simd<int, 7>>}\\
-while another implementation returns
-\\\code{tuple<simd<int, 8>, simd<int, 4>, simd<int, 2>, simd<int, 1>>}\\
-and yet another could choose to return
-\\\code{tuple<simd<int, 8>, simd<int, 4>, simd<int, 3>>}.
-There are good reasons for either one of these.
-However, letting the implementation choose which one is best doesn't really
-help the user of the interface.
-Therefore, the wording was modified to return a single “epilogue” \simd object.
-In the example above, the user is thus returned a \code{simd<int, 7>} on every
-implementation and can choose to apply another \code{simd_split} to arrive at
-\code{tuple<simd<int, 4>, simd<int, 3>>} and so on.
-
-\subsection{Reconsider precondition on mask reductions}\label{sec:removemaskreductionprecondition}
-
-As directed by LEWG, the precondition on \code{reduce_min_index} and
-\code{reduce_max_index} was removed from the latest wording.
-This required a specification of the return value for the missing case.
-The following results were chosen:
-\begin{enumerate}
-  \item \code{reduce_min_index(simd_mask<int, 4>(false))} returns \code{4} (the SIMD width)
-
-  \item \code{reduce_max_index(simd_mask<int, 4>(false))} returns \code{-1}
-
-  \item \code{reduce_min_index(false)} returns \code{1}
-
-  \item \code{reduce_max_index(false)} returns \code{-1}
-\end{enumerate}
-
-\subsubsection{New information}
-It was always stated in LEWG discussions that removal of the precondition has
-no performance cost on modern processors.
-This is true for some cases but not in general.
-Consider \code{reduce_min_index(simd_mask<int, 4>(...))}: A reasonable x86
-implementation will either already use a bit-mask (AVX512) or turn the
-vector-mask into a bit-mask (e.g. \code{movmskps}).
-\std\code{countr_zero} can be used to determine the position of the first
-non-zero bit in the bit-mask.
-If, however, the given mask was empty, then \code{countr_zero} will return the
-width of the given integer type, typically \code{32}.
-The correct answer for \code{reduce_min_index} needs to be \code{4}, though.
-So a fix-up is required.
-This could either be a branch on \code{32} or the implementation can
-unconditionally set the bit at index 4 before calling \code{countr_zero}.
-In any case, code size increases.
-In the branch-free implementation, the latency of the \code{reduce_min_index}
-call unconditionally increases by one clock cycle.
-
-While avoiding UB is nice, the usefulness of returning \mask\code{::size()} or
-\code{-1} is questionable.
-How can these numbers be used other than for branching?
-Isn't it better to branch on \code{none_of(mask)} before calling
-\code{reduce_min_index}?
-If the goal is to avoid UB, then we need to consider whether the current state
-actually helps. Consider:
-\medskip\begin{lstlisting}
-auto f(std::simd<float> x) {
-  return x[std::reduce_min_index(x < 0.f)];
-}
-\end{lstlisting}
-Here we see a precondition violation on subscripting \code{x}, unless at least one value in \code{x} is negative.
-Currently there are two possible solutions:
-\medskip\begin{lstlisting}[style=Vc]
-auto f(std::simd<float> x) {
-  if (any_of(x < 0.f)
-    return x[std::reduce_min_index(x < 0.f)];
-  return 0.f;
-}
-\end{lstlisting}
-or
-\medskip\begin{lstlisting}[style=Vc]
-auto f(std::simd<float> x) {
-  int i = std::reduce_min_index(x < 0.f);
-  if (i < x.size)
-    return x[i];
-  return 0.f;
-}
-\end{lstlisting}
-The first solution is more efficient and, in my opinion, more readable.
-If unchecked use of \code{reduce_min_index}/\code{reduce_max_index} doesn't
-lead to UB, then it likely leads to logical errors.
-
-In order to follow the “don't pay for whay you don't use” guideline, the
-precondition should be restored. Or:
-
-\subsubsection{Alternative 1}
-Instead of UB, the reduction functions could also return an unspecified value.
-Better even, an unspecified value outside of the range of valid subscript
-indices could be returned (e.g. 32 instead of 4).
-Maybe debug builds can be encouraged to diagnose calls with an empty
-mask\footnote{I guess Contracts would only trigger for a real precondition,
-a.k.a. UB?}.
-
-\subsubsection{Alternative 2}
-The functions could also return \code{std::optional<\simdsizetype>}.
-Exploration results using GCC 13 below.
-The current state of how GCC optimizes the \code{std::optional} case is
-really good, but still more expensive on average than the solution returning an
-unspecified value.
-Note the \code{unspecified_value_nocheck} solution which optimizes even further
-by dropping the compare instruction, reducing execution latency even more.
-(This is valid because \code{vmovmskps} extracts the sign bits into the
-\code{eax} register.)
-
-Personally, I'm also bothered by the “out of place” need to call
-\code{.value()} or add a \code{*}.
-This introduces an inconsistency in how a very similar set of functions needs
-to be used.
-
-Considering the typical use (that the code branches before calling
-\code{reduce_min_index}) and our ability to avoid UB I recommend to return any
-unspecified value if \code{none_of(mask)}.
-
-\paragraph{unspecified value}
-\medskip\begin{lstlisting}
-int unspecified_value(std::simd<int> x)
-{
-  if (any_of(x < 0))
-    return reduce_min_index(x < 0);
-  return -1;
-}
-\end{lstlisting}
-
-\medskip\begin{lstlisting}[language=asm]
-unspecified_value(std::basic_simd<int, std::experimental::parallelism_v2::simd_abi::_VecBuiltin<32> >):
-	vmovdqa	ymm1, ymm0
-	vpxor	xmm0, xmm0, xmm0
-	vpcmpgtd	ymm0, ymm0, ymm1
-	vptest	ymm0, ymm0
-	je	.L10
-	vmovmskps	eax, ymm0
-	tzcnt	eax, eax
-	ret
-.L10:
-	mov	eax, -1
-	ret
-\end{lstlisting}
-
-\medskip\begin{lstlisting}
-int unspecified_value_nocheck(std::simd<int> x)
-{
-  x[1] = -1;
-  return reduce_min_index(x < 0);
-}
-\end{lstlisting}
-
-\medskip\begin{lstlisting}[language=asm]
-unspecified_value_nocheck(std::basic_simd<int, std::experimental::parallelism_v2::simd_abi::_VecBuiltin<32> >):
-	mov	eax, -1
-	vpinsrd	xmm1, xmm0, eax, 1
-	vinserti128	ymm0, ymm0, xmm1, 0x0
-	vmovmskps	eax, ymm0
-	tzcnt	eax, eax
-	ret
-\end{lstlisting}
-
-\paragraph{optional}
-
-\medskip\begin{lstlisting}
-int opt1(std::simd<int> x)
-{
-  if (any_of(x < 0))
-    return reduce_min_index_opt(x < 0).value();
-  return -1;
-}
-\end{lstlisting}
-
-\medskip\begin{lstlisting}[language=asm]
-opt1(std::basic_simd<int, std::experimental::parallelism_v2::simd_abi::_VecBuiltin<32> >):
-	vmovdqa	ymm1, ymm0
-	vpxor	xmm0, xmm0, xmm0
-	vpcmpgtd	ymm0, ymm0, ymm1
-	vptest	ymm0, ymm0
-	je	.L13
-	vmovmskps	eax, ymm0
-	tzcnt	eax, eax
-	ret
----------------------------------------------
-.L13:
-	mov	eax, -1
-	ret
-\end{lstlisting}
-
-\medskip\begin{lstlisting}
-int opt2(std::simd<int> x)
-{
-  auto opt = reduce_min_index_opt(x < 0);
-  return opt.value_or(-1);
-}
-\end{lstlisting}
-
-\medskip\begin{lstlisting}[language=asm]
-opt2(std::basic_simd<int, std::experimental::parallelism_v2::simd_abi::_VecBuiltin<32> >):
-	vmovdqa	ymm1, ymm0
-	vpxor	xmm0, xmm0, xmm0
-	xor	edx, edx
-	vpcmpgtd	ymm0, ymm0, ymm1
-	vptest	ymm0, ymm0
-	vmovmskps	eax, ymm0
-	sete	dl
-	tzcnt	eax, eax
-	test	edx, edx
-	mov	edx, -1
-	cmovne	eax, edx
-	ret
-\end{lstlisting}
-
-\medskip\begin{lstlisting}
-int opt_nocheck(std::simd<int> x)
-{
-  x[1] = -1;
-  return reduce_min_index_opt(x < 0).value();
-}
-\end{lstlisting}
-
-\medskip\begin{lstlisting}[language=asm]
-opt_nocheck(std::basic_simd<int, std::experimental::parallelism_v2::simd_abi::_VecBuiltin<32> >):
-	mov	eax, -1
-	vpinsrd	xmm1, xmm0, eax, 1
-	vinserti128	ymm0, ymm0, xmm1, 0x0
-	vpxor	xmm1, xmm1, xmm1
-	vpcmpgtd	ymm0, ymm1, ymm0
-	vptest	ymm0, ymm0
-	je	.L19
-	vmovmskps	eax, ymm0
-	tzcnt	eax, eax
-	ret
----------------------------------------------
-opt_nocheck(std::basic_simd<int, std::experimental::parallelism_v2::simd_abi::_VecBuiltin<32> >) [clone .cold]:
----------------------------------------------
-.L19:
-	push	rbp
-	mov	rbp, rsp
-	and	rsp, -32
-	vzeroupper
-	call	std::__throw_bad_optional_access()
-\end{lstlisting}
-
-\subsubsection{Suggested Polls}
-\wgPoll{Restore the precondition on \code{reduce_min_index(empty_mask)} and
-\code{reduce_max_index(empty_mask)} (TS status quo).}
-{&&&&}
-
-\wgPoll{Return an unspecified value on \code{reduce_min_index(empty_mask)} and
-\code{reduce_max_index(empty_mask)}.}
-{&&&&}
-
-\wgPoll{Return \code{std::optional<\simdsizetype>} from
-\code{reduce_min_index} and \code{reduce_max_index}.}
-{&&&&}
-
+%\section{Changes after LEWG approval (for LEWG)}
+%
+%\subsection{\code{simd_select} overload set}\label{sec:simdselectwording}
+%
+%\wglink{P1928R6} presented the following overload set of the exposition-only
+%hidden friend \code{\simdselect}:
+%\begin{codeblock}
+%template<class T, class Abi> class basic_simd {
+%  // [...]
+%  friend constexpr basic_simd @\simdselect@(
+%    const mask_type&, const basic_simd&, const basic_simd&) noexcept; // #1
+%};
+%// [...]
+%template<size_t Bytes, class Abi> class basic_simd_mask {
+%  // [...]
+%  friend constexpr basic_simd_mask @\simdselect@(
+%    const basic_simd_mask&, const basic_simd_mask&, const basic_simd_mask&) noexcept; // #2
+%  friend constexpr basic_simd_mask @\simdselect@(
+%    const basic_simd_mask&, bool, bool) noexcept; // #3
+%  template <class T0, class T1>
+%    friend constexpr basic_simd<@\seebelow@, Abi>
+%      @\simdselect@(const basic_simd_mask&, const T0&, const T1&) noexcept; // #4
+%};
+%\end{codeblock}
+%
+%Given \code{std::simd_select(std::simd<double, 4>() == 0, 1, 2)}, the compiler
+%would choose overload \code{\#3} because \code{int} is convertible to
+%\code{bool} and \code{\#4} is constrained, requiring
+%\code{sizeof(\UNSP{non-promoting-common-type}<T0, T1>) == sizeof(double)}.
+%That does not match the design intent.
+%The intent was for non-boolean and non-simd arguments to pick overload
+%\code{\#4} or fail to compile.
+%This can be achieved either by replacing \code{bool} with a type that is
+%convertible from \code{bool} only, or via \code{same_as<bool> auto} instead of
+%\code{bool}.
+%The former leads to puzzling error messages, because overload \code{\#4} is not
+%mentioned in the resulting diagnostics.
+%The latter will lead to a listing of all candidates and the reason why they
+%were not viable.
+%
+%Therefore, the wording for overload \code{\#3} was changed to say
+%\code{same_as<bool> auto} instead of \code{bool}.
+%
+%
+%\subsection{Tighten \code{simd_split} specification}\label{sec:bettersimdsplitwording}
+%
+%The reviewed wording (Varna 2023) for \code{simd_split} left the “epilogue”
+%\simd object(s) unspecified.
+%A user of \code{simd_split} therefore would have to cope with implementations
+%returning one or more \simd objects for the otherwise same input parameters.
+%Consider the case
+%\code{simd_split\MayBreak{}<\MayBreak{}simd\MayBreak{}<\MayBreak{}int, 8>>(x)}
+%with \code{simd\MayBreak{}<\MayBreak{}int, 15>}.
+%One implementation might return
+%\\\code{tuple<simd<int, 8>, simd<int, 7>>}\\
+%while another implementation returns
+%\\\code{tuple<simd<int, 8>, simd<int, 4>, simd<int, 2>, simd<int, 1>>}\\
+%and yet another could choose to return
+%\\\code{tuple<simd<int, 8>, simd<int, 4>, simd<int, 3>>}.
+%There are good reasons for either one of these.
+%However, letting the implementation choose which one is best doesn't really
+%help the user of the interface.
+%Therefore, the wording was modified to return a single “epilogue” \simd object.
+%In the example above, the user is thus returned a \code{simd<int, 7>} on every
+%implementation and can choose to apply another \code{simd_split} to arrive at
+%\code{tuple<simd<int, 4>, simd<int, 3>>} and so on.
+%
+%\subsection{Reconsider precondition on mask reductions}\label{sec:removemaskreductionprecondition}
+%
+%As directed by LEWG, the precondition on \code{reduce_min_index} and
+%\code{reduce_max_index} was removed from the latest wording.
+%This required a specification of the return value for the missing case.
+%The following results were chosen:
+%\begin{enumerate}
+%  \item \code{reduce_min_index(simd_mask<int, 4>(false))} returns \code{4} (the SIMD width)
+%
+%  \item \code{reduce_max_index(simd_mask<int, 4>(false))} returns \code{-1}
+%
+%  \item \code{reduce_min_index(false)} returns \code{1}
+%
+%  \item \code{reduce_max_index(false)} returns \code{-1}
+%\end{enumerate}
+%
+%\subsubsection{New information}
+%It was always stated in LEWG discussions that removal of the precondition has
+%no performance cost on modern processors.
+%This is true for some cases but not in general.
+%Consider \code{reduce_min_index(simd_mask<int, 4>(...))}: A reasonable x86
+%implementation will either already use a bit-mask (AVX512) or turn the
+%vector-mask into a bit-mask (e.g. \code{movmskps}).
+%\std\code{countr_zero} can be used to determine the position of the first
+%non-zero bit in the bit-mask.
+%If, however, the given mask was empty, then \code{countr_zero} will return the
+%width of the given integer type, typically \code{32}.
+%The correct answer for \code{reduce_min_index} needs to be \code{4}, though.
+%So a fix-up is required.
+%This could either be a branch on \code{32} or the implementation can
+%unconditionally set the bit at index 4 before calling \code{countr_zero}.
+%In any case, code size increases.
+%In the branch-free implementation, the latency of the \code{reduce_min_index}
+%call unconditionally increases by one clock cycle.
+%
+%While avoiding UB is nice, the usefulness of returning \mask\code{::size()} or
+%\code{-1} is questionable.
+%How can these numbers be used other than for branching?
+%Isn't it better to branch on \code{none_of(mask)} before calling
+%\code{reduce_min_index}?
+%If the goal is to avoid UB, then we need to consider whether the current state
+%actually helps. Consider:
+%\medskip\begin{lstlisting}
+%auto f(std::simd<float> x) {
+%  return x[std::reduce_min_index(x < 0.f)];
+%}
+%\end{lstlisting}
+%Here we see a precondition violation on subscripting \code{x}, unless at least one value in \code{x} is negative.
+%Currently there are two possible solutions:
+%\medskip\begin{lstlisting}[style=Vc]
+%auto f(std::simd<float> x) {
+%  if (any_of(x < 0.f)
+%    return x[std::reduce_min_index(x < 0.f)];
+%  return 0.f;
+%}
+%\end{lstlisting}
+%or
+%\medskip\begin{lstlisting}[style=Vc]
+%auto f(std::simd<float> x) {
+%  int i = std::reduce_min_index(x < 0.f);
+%  if (i < x.size)
+%    return x[i];
+%  return 0.f;
+%}
+%\end{lstlisting}
+%The first solution is more efficient and, in my opinion, more readable.
+%If unchecked use of \code{reduce_min_index}/\code{reduce_max_index} doesn't
+%lead to UB, then it likely leads to logical errors.
+%
+%In order to follow the “don't pay for whay you don't use” guideline, the
+%precondition should be restored. Or:
+%
+%\subsubsection{Alternative 1}
+%Instead of UB, the reduction functions could also return an unspecified value.
+%Better even, an unspecified value outside of the range of valid subscript
+%indices could be returned (e.g. 32 instead of 4).
+%Maybe debug builds can be encouraged to diagnose calls with an empty
+%mask\footnote{I guess Contracts would only trigger for a real precondition,
+%a.k.a. UB?}.
+%
+%\subsubsection{Alternative 2}
+%The functions could also return \code{std::optional<\simdsizetype>}.
+%Exploration results using GCC 13 below.
+%The current state of how GCC optimizes the \code{std::optional} case is
+%really good, but still more expensive on average than the solution returning an
+%unspecified value.
+%Note the \code{unspecified_value_nocheck} solution which optimizes even further
+%by dropping the compare instruction, reducing execution latency even more.
+%(This is valid because \code{vmovmskps} extracts the sign bits into the
+%\code{eax} register.)
+%
+%Personally, I'm also bothered by the “out of place” need to call
+%\code{.value()} or add a \code{*}.
+%This introduces an inconsistency in how a very similar set of functions needs
+%to be used.
+%
+%Considering the typical use (that the code branches before calling
+%\code{reduce_min_index}) and our ability to avoid UB I recommend to return any
+%unspecified value if \code{none_of(mask)}.
+%
+%\paragraph{unspecified value}
+%\medskip\begin{lstlisting}
+%int unspecified_value(std::simd<int> x)
+%{
+%  if (any_of(x < 0))
+%    return reduce_min_index(x < 0);
+%  return -1;
+%}
+%\end{lstlisting}
+%
+%\medskip\begin{lstlisting}[language=asm]
+%unspecified_value(std::basic_simd<int, std::experimental::parallelism_v2::simd_abi::_VecBuiltin<32> >):
+%	vmovdqa	ymm1, ymm0
+%	vpxor	xmm0, xmm0, xmm0
+%	vpcmpgtd	ymm0, ymm0, ymm1
+%	vptest	ymm0, ymm0
+%	je	.L10
+%	vmovmskps	eax, ymm0
+%	tzcnt	eax, eax
+%	ret
+%.L10:
+%	mov	eax, -1
+%	ret
+%\end{lstlisting}
+%
+%\medskip\begin{lstlisting}
+%int unspecified_value_nocheck(std::simd<int> x)
+%{
+%  x[1] = -1;
+%  return reduce_min_index(x < 0);
+%}
+%\end{lstlisting}
+%
+%\medskip\begin{lstlisting}[language=asm]
+%unspecified_value_nocheck(std::basic_simd<int, std::experimental::parallelism_v2::simd_abi::_VecBuiltin<32> >):
+%	mov	eax, -1
+%	vpinsrd	xmm1, xmm0, eax, 1
+%	vinserti128	ymm0, ymm0, xmm1, 0x0
+%	vmovmskps	eax, ymm0
+%	tzcnt	eax, eax
+%	ret
+%\end{lstlisting}
+%
+%\paragraph{optional}
+%
+%\medskip\begin{lstlisting}
+%int opt1(std::simd<int> x)
+%{
+%  if (any_of(x < 0))
+%    return reduce_min_index_opt(x < 0).value();
+%  return -1;
+%}
+%\end{lstlisting}
+%
+%\medskip\begin{lstlisting}[language=asm]
+%opt1(std::basic_simd<int, std::experimental::parallelism_v2::simd_abi::_VecBuiltin<32> >):
+%	vmovdqa	ymm1, ymm0
+%	vpxor	xmm0, xmm0, xmm0
+%	vpcmpgtd	ymm0, ymm0, ymm1
+%	vptest	ymm0, ymm0
+%	je	.L13
+%	vmovmskps	eax, ymm0
+%	tzcnt	eax, eax
+%	ret
+%---------------------------------------------
+%.L13:
+%	mov	eax, -1
+%	ret
+%\end{lstlisting}
+%
+%\medskip\begin{lstlisting}
+%int opt2(std::simd<int> x)
+%{
+%  auto opt = reduce_min_index_opt(x < 0);
+%  return opt.value_or(-1);
+%}
+%\end{lstlisting}
+%
+%\medskip\begin{lstlisting}[language=asm]
+%opt2(std::basic_simd<int, std::experimental::parallelism_v2::simd_abi::_VecBuiltin<32> >):
+%	vmovdqa	ymm1, ymm0
+%	vpxor	xmm0, xmm0, xmm0
+%	xor	edx, edx
+%	vpcmpgtd	ymm0, ymm0, ymm1
+%	vptest	ymm0, ymm0
+%	vmovmskps	eax, ymm0
+%	sete	dl
+%	tzcnt	eax, eax
+%	test	edx, edx
+%	mov	edx, -1
+%	cmovne	eax, edx
+%	ret
+%\end{lstlisting}
+%
+%\medskip\begin{lstlisting}
+%int opt_nocheck(std::simd<int> x)
+%{
+%  x[1] = -1;
+%  return reduce_min_index_opt(x < 0).value();
+%}
+%\end{lstlisting}
+%
+%\medskip\begin{lstlisting}[language=asm]
+%opt_nocheck(std::basic_simd<int, std::experimental::parallelism_v2::simd_abi::_VecBuiltin<32> >):
+%	mov	eax, -1
+%	vpinsrd	xmm1, xmm0, eax, 1
+%	vinserti128	ymm0, ymm0, xmm1, 0x0
+%	vpxor	xmm1, xmm1, xmm1
+%	vpcmpgtd	ymm0, ymm1, ymm0
+%	vptest	ymm0, ymm0
+%	je	.L19
+%	vmovmskps	eax, ymm0
+%	tzcnt	eax, eax
+%	ret
+%---------------------------------------------
+%opt_nocheck(std::basic_simd<int, std::experimental::parallelism_v2::simd_abi::_VecBuiltin<32> >) [clone .cold]:
+%---------------------------------------------
+%.L19:
+%	push	rbp
+%	mov	rbp, rsp
+%	and	rsp, -32
+%	vzeroupper
+%	call	std::__throw_bad_optional_access()
+%\end{lstlisting}
+%
+%\subsubsection{Suggested Polls}
+%\wgPoll{Restore the precondition on \code{reduce_min_index(empty_mask)} and
+%\code{reduce_max_index(empty_mask)} (TS status quo).}
+%{&&&&}
+%
+%\wgPoll{Return an unspecified value on \code{reduce_min_index(empty_mask)} and
+%\code{reduce_max_index(empty_mask)}.}
+%{&&&&}
+%
+%\wgPoll{Return \code{std::optional<\simdsizetype>} from
+%\code{reduce_min_index} and \code{reduce_max_index}.}
+%{&&&&}
+%
 \section{Wording: Add Section 9 of N4808 with modifications}\label{sec:wording}
 
 The following section presents the wording to be applied against the \CC{}
diff --git a/P1928/strawpolls.tex b/P1928/strawpolls.tex
index 1ff493b..70da51a 100644
--- a/P1928/strawpolls.tex
+++ b/P1928/strawpolls.tex
@@ -194,3 +194,21 @@ \subsection{LEWG at Varna 2023}
 \wgPoll{Modify P1928D6 (“simd”) as described above, and then send the revised paper to library for \CC{}26, to be confirmed with a library evolution electronic poll.}
 {16  & 3  & 1  &  0 &0   }
 
+\subsection{LEWG Telecon 2024-01-16}
+\wgPoll{Restore the precondition on \code{reduce_min_index(empty_mask)} and
+\code{reduce_max_index(empty_mask)} (TS status quo, UB).}
+{7 &6 &1 &0 &0}
+
+\wgPoll{Return an unspecified value on \code{reduce_min_index(empty_mask)} and
+\code{reduce_max_index(empty_mask)}.}
+{0 &7 &3 &1 &2}
+
+\wgPoll{Return \code{std::optional} from \code{reduce_min_index} and \code{reduce_max_index}.}
+{0 &1 &2 &7 &4}
+
+\wgPoll{Modify P1928R8 (Merge data-parallel types from the Parallelism TS 2) by
+  restoring the TS specification for
+  \code{reduce_min_index}/\code{reduce_max_index} and adding the change to
+  16.4.2.3 to list the header, and then send the revised paper to LWG for C++26
+to be confirmed with a Library Evolution electronic poll.}
+{9 &2 &0 &1 &1}
diff --git a/P1928/wording.tex b/P1928/wording.tex
index c9512cc..2ec1a4a 100644
--- a/P1928/wording.tex
+++ b/P1928/wording.tex
@@ -43,15 +43,15 @@
 %transparent fallback to sequential execution.
 \end{note}
 
+\pnum
+The term \defn{data-parallel type} refers to all supported (\ref{simd.overview}) specializations of the \tcode{basic_simd} and \tcode{basic_simd_mask} class templates. A \defn{data-parallel object} is an object of \term{data-parallel type}.
+
 \pnum
 A data-parallel type consists of one or more elements of an underlying vectorizable type,
 called the \defn{element type}.
 The number of elements is a constant for each data-parallel type and called the
 \defn{width} of that type.
-The sequence of elements contained in a data-parallel-type 
-
-\pnum
-The term \defn{data-parallel type} refers to all supported (\ref{simd.overview}) specializations of the \tcode{basic_simd} and \tcode{basic_simd_mask} class templates. A \defn{data-parallel object} is an object of \term{data-parallel type}.
+The elements in a data-parallel type are indexed from 0 to $\textrm{width} - 1$.
 
 \pnum\label{wording.vectorizable.types}%
 The set of \defn{vectorizable types} comprises all standard integer types,
@@ -150,16 +150,16 @@
   template<size_t Bs, class Abi>
     constexpr @\simdsizetype@ reduce_count(const basic_simd_mask<Bs, Abi>&) noexcept;
   template<size_t Bs, class Abi>
-    constexpr @\simdsizetype@ reduce_min_index(const basic_simd_mask<Bs, Abi>&) noexcept;
+    constexpr @\simdsizetype@ reduce_min_index(const basic_simd_mask<Bs, Abi>&);
   template<size_t Bs, class Abi>
-    constexpr @\simdsizetype@ reduce_max_index(const basic_simd_mask<Bs, Abi>&) noexcept;
+    constexpr @\simdsizetype@ reduce_max_index(const basic_simd_mask<Bs, Abi>&);
 
   constexpr bool all_of(same_as<bool> auto) noexcept;
   constexpr bool any_of(same_as<bool> auto) noexcept;
   constexpr bool none_of(same_as<bool> auto) noexcept;
   constexpr @\simdsizetype@ reduce_count(same_as<bool> auto) noexcept;
-  constexpr @\simdsizetype@ reduce_min_index(same_as<bool> auto) noexcept;
-  constexpr @\simdsizetype@ reduce_max_index(same_as<bool> auto) noexcept;
+  constexpr @\simdsizetype@ reduce_min_index(same_as<bool> auto);
+  constexpr @\simdsizetype@ reduce_max_index(same_as<bool> auto);
 
   // \ref{simd.reductions}, \tcode{basic_simd} reductions
   template<class T, class Abi, class BinaryOperation = plus<>>
@@ -263,27 +263,40 @@
 An implementation defines ABI tag types as necessary for the following
 exposition-only aliases.
 
-\pnum
-\tcode{\deducet<T, N>} results in a substitution failure if
+\pnum\label{wording.deducet}
+\tcode{\deducet<T, N>} is defined if
 \begin{itemize}
-  \item \tcode{T} is not a vectorizable type, or
-  \item \tcode{N} is larger than an implementation-defined maximum.
+  \item \tcode{T} is a vectorizable type, and
+  \item \tcode{N} greater than zero but no larger than an implementation-defined maximum.
 \end{itemize}
-The implementation-defined maximum for \tcode{N} is no smaller than 64.
+The implementation-defined maximum for \tcode{N} is no smaller than 64
+and can differ depending on \tcode{T}.
 
 \pnum
 Where present, \tcode{\deducet<T, N>} names an ABI tag type that satisfies
 \begin{itemize}
   \item \tcode{\simdsizev<T, \deducet<T, N>> == N}, and
-  \item \tcode{basic_simd<T, \deducet<T, N>>} is default constructible (see \ref{simd.overview}).
+  \item \tcode{basic_simd<T, \deducet<T, N>>} is supported (see \ref{simd.overview}).
+  \item \tcode{basic_simd<T, \deducet<T, N>>} is supported (see \ref{simd.overview}).
 \end{itemize}
 
-%\pnum TODO???
+% TODO: If we really want the Abis... pack back this needs a paper to LEWG.
+%       The pack allows e.g. to stay with ymm registers even when zmm is available,
+%         or to implement an MMX ABI that doesn't get used unless explicitly called for.
+%       Also one can imagine a target with multiple different SIMD execution facilities
+%         where moving between them has a high cost and shouldn't happen without a request.
 %\begin{note}
-%The value of \tcode{max_fixed_size<T>} can depend on compiler flags and can
-%change between different compiler versions.
+  %If multiple ABI tags can satisfy the above conditions, differences in \tcode{Abis...} can lead to different results.
 %\end{note}
 
+\INFO{I removed the paragraph saying “The type of \tcode{\deducet<T, N>} in
+  translation unit 1 differs from the type of \tcode{\deducet<T, N>} in
+  translation unit 2 if and only if the type of \tcode{\nativeabi<T>} in
+  translation unit 1 differs from the type of \tcode{\nativeabi<T>} in
+  translation unit 2.” after consulting Jens.
+  He said I can't reasonably say anything about working around ODR problems in an implementation.
+Implementations thus have to figure this out on their own.}
+
 \pnum
 \tcode{\nativeabi<T>} is an implementation-defined alias for an ABI tag.
 \begin{note}
@@ -304,12 +317,6 @@
   \end{itemize}
 \end{example}
 
-\pnum\label{wording.deducet}
-The type of \tcode{\deducet<T, N>} in translation unit 1 differs from the type
-of \tcode{\deducet<T, N>} in translation unit 2 if and only if the type of
-\tcode{\nativeabi<T>} in translation unit 1 differs from the type of
-\tcode{\nativeabi<T>} in translation unit 2.
-
 \rSec1[simd.traits]{\tcode{simd} type traits}
 
 \begin{itemdecl}
@@ -341,11 +348,11 @@
   \begin{itemize}
     \item \tcode{V} is either \tcode{basic_simd<U, Abi0>} or \tcode{basic_simd_mask<UBytes, Abi0>}, where \tcode{U}, \tcode{UBytes}, and \tcode{Abi0} are deduced from \tcode{V}, and
     \item \tcode{T} is a vectorizable type, and
-    \item \tcode{simd_abi::deduce<T, basic_simd<U, Abi0>::size, Abi0>} has a member type \tcode{type}.
+    \item \tcode{\deducet<T, V::size()>} has a member type \tcode{type}.
   \end{itemize}
 
   \pnum
-  Let \tcode{Abi1} denote the type \tcode{deduce_t<T, basic_simd<U, Abi0>::size, Abi0>}.
+  Let \tcode{Abi1} denote the type \tcode{\deducet<T, V::size()>}.
   Where present, the member typedef \tcode{type} names
   \tcode{basic_simd<T, Abi1>} if \tcode V is \tcode{basic_simd<U, Abi0>} or
   \tcode{basic_simd_mask<sizeof(T), Abi1>} if \tcode V is \tcode{basic_simd_mask<UBytes, Abi0>}.
@@ -430,7 +437,7 @@
   using mask_type = basic_simd_mask<sizeof(T), Abi>;
   using abi_type = Abi;
 
-  static constexpr integral_constant<@\simdsizetype@, @\seebelow@> size;
+  static constexpr integral_constant<@\simdsizetype@, @\simdsizev@<T, Abi>> size {};
 
   constexpr basic_simd() noexcept = default;
 
@@ -511,52 +518,39 @@
 \end{codeblock}
 
 \pnum
-The class template \tcode{basic_simd} is a data-parallel type. The width of a given \tcode{basic_simd} specialization is a constant expression, determined by the template parameters.
-
-\TODO{
-  \todoitem \code{basic_simd} is not a data-parallel type. Only its supported
-  specializations are.
-}
+The specializations of class template \tcode{basic_simd} are data-parallel types.
 
 \pnum
 Every specialization of \tcode{basic_simd} is a complete type.
-The specialization \tcode{basic_simd<T, Abi>} is \defn{supported} if \tcode{T} is a vectorizable type and
-\begin{itemize}
-  \item \tcode{Abi} is \tcode{simd_abi::scalar}, or
-  \item \tcode{Abi} is \tcode{simd_abi::fixed_size<N>}, with \tcode{N} constrained as defined in \ref{simd.abi}.
-\end{itemize}
+The types \tcode{basic_simd<T, \deducet<T, N>>} for all vectorizable
+\tcode{T} and with \tcode{N} in the range of \tcode{[1, 64]} are \defn{supported}.
+It is \impdef\ whether any other \tcode{basic_simd<T, Abi>} specializations
+with vectorizable \tcode{T} are supported.
 
-It is implementation-defined whether \tcode{basic_simd<T, Abi>} is supported.
 \begin{note}
-The intent is for implementations to decide on the basis of the currently
-targeted system.
+  The intent is for implementations to determine on the basis of the currently
+  targeted system, whether \tcode{basic_simd<T, Abi>} is supported.
 \end{note}
+\FIXME{drop the note?}
 
-If \tcode{basic_simd<T, Abi>} is not supported, the specialization shall have a deleted default constructor, deleted destructor, deleted copy constructor, and deleted copy assignment.
-Otherwise, the following are true:
-\begin{itemize}
-  \item \tcode{is_nothrow_move_constructible_v<basic_simd<T, Abi>>}, and
-  \item \tcode{is_nothrow_move_assignable_v<basic_simd<T, Abi>>}, and
-  \item \tcode{is_nothrow_default_constructible_v<basic_simd<T, Abi>>}.
-\end{itemize}
+If \tcode{basic_simd<T, Abi>} is not supported, the specialization has a
+deleted default constructor, deleted destructor, deleted copy constructor, and
+deleted copy assignment.
+In addition only the \tcode{value_type}, \tcode{abi_type}, and
+\tcode{mask_type} members are present.
 
-\begin{example}
-  Consider an implementation that defines the ABI tags \tcode{__simd_x} and \tcode{__gpu_y}. When the compiler is invoked to translate to a machine that has support for the \tcode{__simd_x} ABI tag for all arithmetic types other than \tcode{long double} and no support for the \tcode{__gpu_y} ABI tag, then:
-  \begin{itemize}
-    \item \tcode{basic_simd<T, simd_abi::__gpu_y>} is not supported for any \tcode{T} and has a deleted constructor.
-    \item \tcode{basic_simd<long double, simd_abi::__simd_x>} is not supported and has a deleted constructor.
-    \item \tcode{basic_simd<double, simd_abi::__simd_x>} is supported.
-    \item \tcode{basic_simd<long double, simd_abi::scalar>} is supported.
-  \end{itemize}
-\end{example}
+If \tcode{basic_simd<T, Abi>} is supported, \tcode{is_trivial_v<basic_simd<T, Abi>>} is true.
 
 \pnum
 Default initialization performs no initialization of the elements;
 value-initialization initializes each element with \tcode{T()}.
 \begin{note}Thus, default initialization leaves the elements in an indeterminate state.\end{note}
+\FIXME{This follows from \tcode{is_trivial}, so just drop it?}
 
 \pnum
-Implementations should enable explicit conversion from and to implementation-defined types. This adds one or more of the following declarations to class \tcode{basic_simd}:
+Implementations are encouraged to enable explicit conversion from and to
+implementation-defined types. This adds one or more of the following
+declarations to class \tcode{basic_simd}:
 
 \begin{codeblock}
 constexpr explicit operator @\impdef@() const;
@@ -574,17 +568,6 @@
   \end{codeblock}
 \end{example}
 
-\rSec2[simd.width]{\tcode{basic_simd} width}
-
-\begin{itemdecl}
-static constexpr integral_constant<@\simdsizetype@, @\seebelow@> size;
-\end{itemdecl}
-
-\begin{itemdescr}
-    \pnum \tcode{size} is an \tcode{integral_constant<@\simdsizetype@, N>} with \tcode{N} equal to the number of elements in a \tcode{basic_simd} object.
-    \pnum \begin{note}This member is present even if the particular \tcode{basic_simd} specialization is not supported.\end{note}
-\end{itemdescr}
-
 \rSec2[simd.reference]{Element references}
 
 \pnum
@@ -753,7 +736,7 @@
 
 \begin{itemdescr}
   \pnum\constraints
-  \tcode{basic_simd<U, UAbi>::size() == size()}.
+  \tcode{\simdsizev<U, UAbi> == size()}.
 
   \pnum\effects
   Constructs an object where the $i^\text{th}$ element equals \tcode{static_cast<T>(x[$i$])} \foralli.
@@ -1316,14 +1299,14 @@
 
 \begin{itemdecl}
 template<class T, class Abi>
-  constexpr auto simd_split(const basic_simd<typename V::value_type, Abi>& x) noexcept;
+  constexpr auto simd_split(const basic_simd<typename T::value_type, Abi>& x) noexcept;
 template<class T, class Abi>
   constexpr auto simd_split(const basic_simd_mask<@\maskelementsize@<T>, Abi>& x) noexcept;
 \end{itemdecl}
 
 \begin{itemdescr}
   % probably not necessary/helpful:
-  %\pnum\mandates \tcode{V::size() <= basic_simd<V::value_type, Abi>::size}.
+  %\pnum\mandates \tcode{T::size() <= \simdsizev<V::value_type, Abi>}.
 
   \pnum\constraints
   \begin{itemize}
@@ -1331,23 +1314,23 @@
     \item For the second overload \tcode{T} is a specialization of \tcode{basic_simd_mask}.
   \end{itemize}
 
-  \pnum Let $N$ be \tcode{x.size() / V::size()}.
+  \pnum Let $N$ be \tcode{x.size() / T::size()}.
 
     \pnum\returns
     \begin{itemize}
-      \item If \tcode{x.size() \% V::size() == 0}, an \tcode{array<T, $N$>} with
+      \item If \tcode{x.size() \% T::size() == 0}, an \tcode{array<T, $N$>} with
         the $i^\text{th}$ \simd or \mask element of the $j^\text{th}$ \tcode{array}
         element initialized to the value of the element in \tcode{x} with index
-        \tcode{$i$ + $j$ * V::size()}.
+        \tcode{$i$ + $j$ * T::size()}.
 
       \item Otherwise, a \tcode{tuple} of $N$ objects of type \tcode{T} and one
-        object of type \tcode{resize_simd_t<x.size() \% V::size(), T>}.
+        object of type \tcode{resize_simd_t<x.size() \% T::size(), T>}.
         The $i^\text{th}$ \simd or \mask element of the $j^\text{th}$
         \tcode{tuple} element of type \tcode{T} is initialized to the value of
-        the element in \tcode{x} with index \tcode{$i$ + $j$ * V::size()}.
+        the element in \tcode{x} with index \tcode{$i$ + $j$ * T::size()}.
         The $i^\text{th}$ \simd or \mask element of the \code{N}$^\text{th}$
         \tcode{tuple} element is initialized to the value of the element in
-        \tcode{x} with index \tcode{$i$ + $N$ * V::size()}.
+        \tcode{x} with index \tcode{$i$ + $N$ * T::size()}.
     \end{itemize}
   \end{itemdescr}
 
@@ -1468,9 +1451,9 @@
 \begin{itemize}
   \item All arguments corresponding to \tcode{double} parameters shall be convertible to \tcode{basic_simd<T, Abi>}.
   \item All arguments corresponding to \tcode{double*} parameters shall be of type \tcode{basic_simd<T, Abi>*}.
-  \item All arguments corresponding to parameters of integral type \tcode{U} shall be convertible to \tcode{simd<U, basic_simd<T, Abi>::size>}.
-  \item All arguments corresponding to \tcode{U*}, where \tcode{U} is integral, shall be of type \tcode{simd<U, basic_simd<T, Abi>::size>*}.
-  \item If the corresponding return type is \tcode{double}, the return type of the additional overloads is \tcode{basic_simd<T, Abi>}. Otherwise, if the corresponding return type is \tcode{bool}, the return type of the additional overload is \tcode{basic_simd_mask<T, Abi>}. Otherwise, the return type is \tcode{simd<R, basic_simd<T, Abi>::size>}, with \tcode{R} denoting the corresponding return type.
+  \item All arguments corresponding to parameters of integral type \tcode{U} shall be convertible to \tcode{rebind_simd_t<U, basic_simd<T, Abi>>}.
+  \item All arguments corresponding to \tcode{U*}, where \tcode{U} is integral, shall be of type \tcode{rebind_simd_t<U, basic_simd<T, Abi>>*}.
+  \item If the corresponding return type is \tcode{double}, the return type of the additional overloads is \tcode{basic_simd<T, Abi>}. Otherwise, if the corresponding return type is \tcode{bool}, the return type of the additional overload is \tcode{basic_simd<T, Abi>::mask_type}. Otherwise, the return type is \tcode{rebind_simd_t<R, basic_simd<T, Abi>>}, with \tcode{R} denoting the corresponding return type.
 \end{itemize}
 It is unspecified whether a call to these overloads with arguments that are all convertible to \tcode{basic_simd<T, Abi>} but are not of type \tcode{basic_simd<T, Abi>} is well-formed.
 
@@ -1482,7 +1465,7 @@
 \begin{note}Implementations are encouraged to follow the C specification (especially Annex F).\end{note}
 
 \pnum
-TODO: Allow \tcode{abs(basic_simd<\textrm{signed-integral}>)}.
+\FIXME{Allow \tcode{abs(basic_simd<\textrm{signed-integral}>)}.}
 
 \pnum
 If \tcode{abs} is called with an argument of type \tcode{basic_simd<X, Abi>} for which \tcode{is_unsigned_v<X>} is \tcode{true}, the program is ill-formed.
@@ -1498,7 +1481,7 @@
   using reference = @\seebelow@;
   using abi_type = Abi;
 
-  static constexpr auto size = basic_simd<@\integerfrom@<Bytes>, Abi>::size;
+  static constexpr integral_constant<@\simdsizetype@, @\simdsizev@<@\integerfrom@<Bytes>, Abi>> size {};
 
   constexpr basic_simd_mask() noexcept = default;
 
@@ -1582,14 +1565,11 @@
 \end{codeblock}
 
 \pnum
-The class template \tcode{basic_simd_mask} is a data-parallel type with the element type \tcode{bool}. The width of a given \tcode{basic_simd_mask} specialization is a constant expression, determined by the template parameters. Specifically, \tcode{basic_simd_mask<T, Abi>::size() == basic_simd<T, Abi>::size()}.
+The specializations of class template \tcode{basic_simd_mask} are data-parallel types with element type \tcode{bool}.
 
 \pnum
-Every specialization of \tcode{basic_simd_mask} is a complete type. The specialization \tcode{basic_simd_mask<T, Abi>} is supported if \tcode{T} is a vectorizable type and
-\begin{itemize}
-  \item \tcode{Abi} is \tcode{simd_abi::scalar}, or
-  \item \tcode{Abi} is \tcode{simd_abi::fixed_size<N>}, with \tcode{N} constrained as defined in (\ref{simd.abi}).
-\end{itemize}
+Every specialization of \tcode{basic_simd_mask} is a complete type.
+The specialization \tcode{basic_simd_mask<Bytes, Abi>} is not supported if the type \tcode{@\integerfrom@<Bytes>} does not exist or if it is not a vectorizable type.
 
 It is implementation-defined whether \tcode{basic_simd_mask<T, Abi>} is supported.
 \begin{note}The intent is for implementations to decide on the basis of the currently targeted system.\end{note}
@@ -1988,24 +1968,28 @@
 
 \begin{itemdecl}
 template<size_t Bytes, class Abi>
-  constexpr @\simdsizetype@ reduce_min_index(const basic_simd_mask<Bytes, Abi>& k) noexcept;
+  constexpr @\simdsizetype@ reduce_min_index(const basic_simd_mask<Bytes, Abi>& k);
 \end{itemdecl}
 
 \begin{itemdescr}
+  \pnum\expects
+  \tcode{any_of(k)} is \tcode{true}.
+
   \pnum\returns
-  If \tcode{none_of(k)} is \tcode{true}, \tcode{size()}, otherwise the lowest
-  element index $i$ where \tcode{k[$i$]} is \tcode{true}.
+  The lowest element index $i$ where \tcode{k[$i$]} is \tcode{true}.
 \end{itemdescr}
 
 \begin{itemdecl}
 template<size_t Bytes, class Abi>
-  constexpr @\simdsizetype@ reduce_max_index(const basic_simd_mask<Bytes, Abi>& k) noexcept;
+  constexpr @\simdsizetype@ reduce_max_index(const basic_simd_mask<Bytes, Abi>& k);
 \end{itemdecl}
 
 \begin{itemdescr}
+  \pnum\expects
+  \tcode{any_of(k)} is \tcode{true}.
+
   \pnum\returns
-  If \tcode{none_of(k)} is \tcode{true}, \tcode{-1}, otherwise the greatest
-  element index $i$ where \tcode{k[$i$]} is \tcode{true}.
+  The greatest element index $i$ where \tcode{k[$i$]} is \tcode{true}.
 \end{itemdescr}
 
 \begin{itemdecl}
@@ -2013,17 +1997,25 @@
 constexpr bool any_of(same_as<bool> auto) noexcept;
 constexpr bool none_of(same_as<bool> auto) noexcept;
 constexpr @\simdsizetype@ reduce_count(same_as<bool> auto x) noexcept;
-constexpr @\simdsizetype@ reduce_min_index(same_as<bool> auto y) noexcept;
-constexpr @\simdsizetype@ reduce_max_index(same_as<bool> auto z) noexcept;
 \end{itemdecl}
 
 \begin{itemdescr}
   \pnum\returns
   \tcode{all_of} and \tcode{any_of} return their arguments; \tcode{none_of}
   returns the negation of its argument; \tcode{reduce_count} returns the
-  integral representation of \tcode{x}; \tcode{reduce_min_index} returns the
-  integral representation of \tcode{!y}; \tcode{reduce_max_index} returns
-  \tcode{-!z}.
+  integral representation of \tcode{x}.
+\end{itemdescr}
+
+\begin{itemdecl}
+constexpr @\simdsizetype@ reduce_min_index(same_as<bool> auto y);
+constexpr @\simdsizetype@ reduce_max_index(same_as<bool> auto z);
+\end{itemdecl}
+
+\begin{itemdescr}
+  \pnum\expects
+  The value of the argument is \tcode{true}.
+
+  \pnum\returns \code{0}.
 \end{itemdescr}
 
 % vim: tw=0
diff --git a/wg21.bib b/wg21.bib
index 6bf896a..311ad10 100644
--- a/wg21.bib
+++ b/wg21.bib
@@ -642,3 +642,19 @@ @misc{P2725R0
     url = {https://wg21.link/p2725r0},
     year = {2022}
 }
+@misc{P3275R0,
+    shorthand = {P3275R0},
+    author = {Kretz, Matthias},
+    howpublished = {ISO/IEC \CC{} Standards Committee Paper},
+    title = {{P3275R0: Replace simd operator[] with getter and setter functions --- or not}},
+    url = {https://wg21.link/p3275r0},
+    year = {2024}
+}
+@misc{P3287R0,
+    shorthand = {P3287R0},
+    author = {Kretz, Matthias},
+    howpublished = {ISO/IEC \CC{} Standards Committee Paper},
+    title = {{P3287R0: Exploration of namespaces for std::simd}},
+    url = {https://wg21.link/p3287r0},
+    year = {2024}
+}