Skip to content

Commit

Permalink
Add P3319R0 iota_v<simd> paper
Browse files Browse the repository at this point in the history
ChangeLog:

	* P3319_iota/Makefile: New file.
	* P3319_iota/changelog.tex: New file.
	* P3319_iota/main.tex: New file.
	* P3319_iota/strawpolls.tex: New file.
  • Loading branch information
mattkretz committed Jun 20, 2024
1 parent 5c58e5f commit 043fa34
Show file tree
Hide file tree
Showing 4 changed files with 283 additions and 0 deletions.
133 changes: 133 additions & 0 deletions P3319_iota/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
NAME := $(shell grep 'newcommand.wgDocumentNumber' main.tex|cut -f2 -d'{'|cut -f1 -d'}')
DRAFT := $(NAME)_draft
TEXINPUTS := ..:.:
PDFLATEX := TEXINPUTS=$(TEXINPUTS) flock . lualatex --shell-escape --halt-on-error --file-line-error --interaction nonstopmode
BIBER := biber --input-directory .. --output-directory .
MAKEINDEX := makeindex
SHELL := /bin/bash
EXTRA_DEPS := $(wildcard *.sty ../*.sty) ._dummy.tex
CREATEMD5 := md5sum `find . -regex '.*\.\(aux\|bbl\|toc\|tex\|ind\)'` > ._aux-md5sums
CHECKMD5 := md5sum --quiet -c ._aux-md5sums
UPLOADNAME := D$(shell echo $(NAME)|cut -c2-5).pdf

all: final

._dummy.tex:
touch $@

help:
@echo "all"
@echo "devel"
@echo "draft-loop"
@echo "final-loop"
@echo "final-figure<number>-loop"
@echo "draft"
@echo "final"
@echo "clean"
@echo "autocommit"

TEX_INPUTS := $(shell egrep '^[^%]*\\in(put|clude){.*?}' main.tex|sed 's@^.*{\(.*\)}.*$$@\1.tex@')
TEX_INPUTS2 := $(shell test -n "$(TEX_INPUTS)" && egrep '^[^%]*\\in(put|clude){.*?}' $(TEX_INPUTS) ._dummy.tex|sed -e 's@^.*{\([^.]*\)}.*$$@\1.tex@' -e 's@^.*{\(.*\)}.*$$@\1@') $(TEX_INPUTS)
TEX_INPUTS := $(shell test -n "$(TEX_INPUTS2)" && egrep '^[^%]*\\in(put|clude){.*?}' $(TEX_INPUTS2) ._dummy.tex|sed -e 's@^.*{\([^.]*\)}.*$$@\1.tex@' -e 's@^.*{\(.*\)}.*$$@\1@') $(TEX_INPUTS2) $(TEX_INPUTS) main.tex
TEX_LISTINGS := $(shell test -n "$(TEX_INPUTS)" && awk -F '{|}' -- '/lstinputlisting\[[^\]]*$$/ { getline; while($$0 !~ /\] *{.*\..*}$$/) getline; sub(/^.*\]/, ""); print } /lstinputlisting\[.*\] *{.*}/ { print }' $(TEX_INPUTS) |sed 's=.*\(\[[^]]*\]\)\?{\([^}]*\)}.*=\2='| sort -u)
TEX_PLOTDATA := $(shell grep '\\addplot.* file' $(TEX_INPUTS)|sed 's=^.*\\addplot.* file *{\(.*\)}.*$$=\1=')
GRAPHICS := $(shell grep 'includegraphics' main.tex $(TEX_INPUTS)|grep -v newcommand|sed -e 's=.*\\includegraphics[^{]*{\([^}]*\)}.*=\1=' |sort -u|while read f; do for d in $(subst :, ,$(TEXINPUTS)); do test -f $$d/$$f && echo $$d/$$f && break; done; done)
ALL_DEPS := $(TEX_INPUTS) $(TEX_LISTINGS) $(TEX_PLOTDATA) $(GRAPHICS) $(EXTRA_DEPS)

devel: final-loop

draft: $(DRAFT).pdf
final: $(NAME).pdf
mobile: $(NAME)_mobile.pdf

upload: $(NAME).pdf
@echo -n "Uploading $< ."
@while ! timeout 120s scp -B $< lxpool.gsi.de:web-docs/$(UPLOADNAME); do echo -n .; done
@echo .

loop-internal:
@screenTitle() { case "$$TERM" in screen*|tmux*) printf '\ek%s\e\\' "$$*";; *) printf '\e]1;%s:q\a' "$$*";; esac }; \
while true; do \
while true; do \
screenTitle 'LaTeX waiting: $(loopinternaltarget)'; \
if ! $(MAKE) -q $(loopinternaltarget).pdf>/dev/null; then \
screenTitle 'LaTeX build: $(loopinternaltarget)'; \
ls -lt $(ALL_DEPS) | head -n1 > ._stamp; \
nice ionice -c idle $(MAKE) $(loopinternaltarget).pdf || break; \
fi; \
sleep 1s; \
done; \
screenTitle 'failed'; \
kdialog --passivepopup "LaTeX ($(NAME)) failed" 1; \
echo -e '\a'; \
ls -lt $(ALL_DEPS) $(MORE_DEPS) | head -n1 > ._stamp.new; \
while diff -q ._stamp ._stamp.new; do \
sleep 1s; \
ls -lt $(ALL_DEPS) $(MORE_DEPS) | head -n1 > ._stamp.new; \
done; \
done

draft-loop:
$(MAKE) loopinternaltarget=$(DRAFT) MORE_DEPS=../draft.tex loop-internal

final-loop:
$(MAKE) loopinternaltarget=$(NAME) MORE_DEPS=../final.tex loop-internal

%-loop:
$(MAKE) loopinternaltarget=$(NAME)_$* MORE_DEPS=../$*.tex loop-internal

final-figure%-loop:
@while true; do $(MAKE) -q final-figure$*.pdf || $(MAKE) final-figure$*.pdf && sleep 1s; done

clean:
rm -f $(NAME).pdf $(DRAFT).pdf *.aux *.bbl *.blg *.brf *.lof *.log *.lol *.lot *.out *.toc */*.aux *.auxlock ._aux-md5sums *.bcf *.run.xml ._stamp ._stamp.new

ALL_DEPS=$(TEX_INPUTS) $(TEX_LISTINGS) $(EXTRA_DEPS) $(TEX_PLOTDATA)

$(DRAFT).pdf: ../draft.tex $(ALL_DEPS)
$(PDFLATEX) ../draft
@test -s draft.pdf && mv draft.pdf $@
$(BIBER) draft || true

$(NAME).pdf: ../final.tex $(ALL_DEPS)
@$(CREATEMD5)
$(PDFLATEX) ../final
@test -s final.pdf && mv final.pdf $@
$(BIBER) final || true
@while ! $(CHECKMD5); do \
$(CREATEMD5); \
$(PDFLATEX) ../final; \
test -s final.pdf && mv final.pdf $@; \
done
test "$(USER)" = "mkretz" && timeout 20s scp -B $@ lxpool.gsi.de:web-docs/$(UPLOADNAME) &

$(NAME)_%.pdf: ../%.tex $(ALL_DEPS)
@$(CREATEMD5)
$(PDFLATEX) ../$*
@test -s $*.pdf && mv $*.pdf $@
test -f $*.bcf && $(BIBER) $* || true
test -f $*.ind && $(MAKEINDEX) $* || true
while ! $(CHECKMD5); do \
$(CREATEMD5); \
$(PDFLATEX) ../$*; \
test -s $*.pdf && mv $*.pdf $@; \
test -f $*.ind && $(MAKEINDEX) $*; \
done

final-figure%.pdf: final.tex $(ALL_DEPS)
$(PDFLATEX) --halt-on-error --interaction=nonstopmode --jobname "final-figure$*" "\def\tikzexternalrealjob{final}\input{final}"

autocommit:
@screenTitle() { echo $$TERM|grep -q screen && printf '\ek%s\e\\' "$$*"; }; \
screenTitle 'autocommit' \
while true; do \
git commit -am "auto: `git status --porcelain|grep -v '^??'|cut -c4-|paste -s -d' '`" && \
screenTitle 'committed' && \
git push && \
screenTitle 'committed & pushed'; \
sleep 5s; \
screenTitle 'autocommit'; \
sleep 55s; \
done

.PHONY: clean
5 changes: 5 additions & 0 deletions P3319_iota/changelog.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
\section{Changelog}
(placeholder)
%\begin{revision}
% \todo Everything
%\end{revision}
143 changes: 143 additions & 0 deletions P3319_iota/main.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
\newcommand\wgTitle{Add an iota object for simd (and more)}
\newcommand\wgName{Matthias Kretz <[email protected]>}
\newcommand\wgDocumentNumber{P3319R0}
\newcommand\wgGroup{LEWG}
\newcommand\wgTarget{\CC{}26}
%\newcommand\wgAcknowledgements{Daniel Towner and Ruslan Arutyunyan contributed to this paper via discussions / reviews. Thanks also to Jeff Garland for reviewing.}

\usepackage{mymacros}
\usepackage{wg21}
\setcounter{tocdepth}{2} % show sections and subsections in TOC
\hypersetup{bookmarksdepth=5}
\usepackage{changelog}
\usepackage{underscore}
\usepackage{multirow}

\addbibresource{extra.bib}

\newcommand\simd[1][]{\type{ba\-sic\_simd#1}\xspace}
\newcommand\simdT{\type{ba\-sic\_simd\MayBreak<\MayBreak{}T>}\xspace}
\newcommand\valuetype{\type{val\-ue\_type}\xspace}
\newcommand\referencetype{\type{ref\-er\-ence}\xspace}
\newcommand\mask[1][]{\type{ba\-sic\_simd\_mask#1}\xspace}
\newcommand\maskT{\type{ba\-sic\_simd\_mask\MayBreak<\MayBreak{}T>}\xspace}
\newcommand\wglink[1]{\href{https://wg21.link/#1}{#1}}

\newcommand\nativeabi{\UNSP{native-abi}}
\newcommand\deducet{\UNSP{deduce-t}}
\newcommand\simdsizev{\UNSP{simd-size-v}}
\newcommand\simdsizetype{\UNSP{simd-size-type}}
\newcommand\simdselect{\UNSP{simd-select-impl}}
\newcommand\maskelementsize{\UNSP{mask-element-size}}
\newcommand\integerfrom{\UNSP{integer-from}}
\newcommand\constexprwrapperlike{\UNSP{constexpr-wrapper-like}}

\renewcommand{\lst}[1]{Listing~\ref{#1}}
\renewcommand{\sect}[1]{Section~\ref{#1}}
\renewcommand{\ttref}[1]{Tony~Table~\ref{#1}}
\renewcommand{\tabref}[1]{Table~\ref{#1}}

\begin{document}
\selectlanguage{american}
\begin{wgTitlepage}
There is one important constant in SIMD programming: \code{{0, 1, 2, 3,
...}}. In the standard library we have an algorithm called \code{iota} that
can initialize a range with such values. For \code{simd} we want to have
simple to spell constants that scale with the SIMD width. This paper proposes
a simple facility that can be generalized.
\end{wgTitlepage}

\pagestyle{scrheadings}

\input{changelog}
\input{strawpolls}

\section{Motivation}
The 90\% use case for simd generator constructors is a simd with values 0, 1,
2, 3, \ldots{} potentially with scaling and offset applied.
However, often it would be more easier and more readable to use an “iota”
\code{simd} object instead.

\begingroup
\smaller[1]
\begin{tabular}{p{.45\textwidth}|p{.45\textwidth}}
generator ctor & iota \\
\hline
\begin{lstlisting}
std::simd<int> a([](int i) { return i; };

std::simd<int> b([](int i) { return 2 + 3 * i; };
\end{lstlisting}
&
\begin{lstlisting}
auto a = std::iota_v<std::simd<int>>;

auto b = 2 + 3 * std::iota_v<std::simd<int>>;
\end{lstlisting}
\end{tabular}
\endgroup

The minimal definition I propose for \simd can look like this:
\medskip\begin{lstlisting}[style=Vc]
template <class T>
inline constexpr T
iota_v;

template <class T>
requires(std::is_arithmetic_v<T>)
inline constexpr T
iota_v<T> = T();

template <detail::simd_type T>
inline constexpr T
iota_v<T> = T([](auto i) { return static_cast<typename T::value_type>(i); });
\end{lstlisting}


\section{Generalization}
If we define a (\code{constexpr}) variable template \std\code{iota_v<T>} where
\code{T} must be a \simd type, we're simply filling a sequence of values.
We can create such an object for any type with static extent.
This is especially interesting for the degenerate case in SIMD-generic
programming, where \code{T} could \eg be an \code{int}.
A \std\code{iota_v<int>} is nothing other than an object \code{int} with value
\code{0}.
We can easily generalize to \code{iota_v<std::array<T, N>>} and
\code{iota_v<T[N]>}.
And the next step then is to allow any type that
\begin{itemize}
\item has a static extent,
\item has a \code{value_type} member type,
\item can be list-initialized with \code{N} numbers of type
\code{value_type}, where \code{N} equals the static extent of the type, and
\item where \code{value_type() + 1} is an constant expression and convertible to \code{value_type}.
\end{itemize}

Consequently you could write
\medskip\begin{lstlisting}[style=Vc]
auto x = std::iota_v<float[5]>;
auto y = std::iota_v<std::array<my_fixed_point, 8>>;
// ...
\end{lstlisting}

\section{Relation to list-initialization of \code{simd}}
If we add a constructor to \simd that enables list-initialization, then many
users might use that in place of a generator constructor.
This leads to code that doesn't scale with the vector width anymore.
Therefore we should provide a simple facility that works better and is more
portable.

\section{Proposed polls}

\wgPoll{We want an iota facility for \simd}{&&&&}

\wgPoll{The iota facility should be generalized to scalars}{&&&&}

\wgPoll{The iota facility should be generalized to any sequence of static extent}{&&&&}

\section{Wording}\label{sec:wording}

TBD after deciding on the preferred solution.

\end{document}
% vim: sw=2 sts=2 ai et tw=0
2 changes: 2 additions & 0 deletions P3319_iota/strawpolls.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
\section{Straw Polls}
(placeholder)

0 comments on commit 043fa34

Please sign in to comment.