Skip to content

Commit

Permalink
D3287R0 (exploration of simd namespaces) draft ready for review
Browse files Browse the repository at this point in the history
ChangeLog:

	* P3287_exploration_of_namespaces_for_simd/Makefile: New file.
	* P3287_exploration_of_namespaces_for_simd/alt1.tex: New file.
	* P3287_exploration_of_namespaces_for_simd/alt2.tex: New file.
	* P3287_exploration_of_namespaces_for_simd/alt3.tex: New file.
	* P3287_exploration_of_namespaces_for_simd/alt4.tex: New file.
	* P3287_exploration_of_namespaces_for_simd/alt5.tex: New file.
	* P3287_exploration_of_namespaces_for_simd/alt6.tex: New file.
	* P3287_exploration_of_namespaces_for_simd/alt7.tex: New file.
	* P3287_exploration_of_namespaces_for_simd/changelog.tex: New file.
	* P3287_exploration_of_namespaces_for_simd/main.tex: New file.
	* P3287_exploration_of_namespaces_for_simd/strawpolls.tex: New file.
	* listingscpp.sty:
	* wg21.sty:
  • Loading branch information
mattkretz committed May 21, 2024
1 parent 1cd964d commit 0c41051
Show file tree
Hide file tree
Showing 13 changed files with 1,462 additions and 20 deletions.
133 changes: 133 additions & 0 deletions P3287_exploration_of_namespaces_for_simd/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
NAME := $(shell grep 'newcommand.wgDocumentNumber' main.tex|cut -f2 -d'{'|cut -f1 -d'}')
DRAFT := $(NAME)_draft
TEXINPUTS := ..:.:
PDFLATEX := TEXINPUTS=$(TEXINPUTS) flock . lualatex --shell-escape --halt-on-error --file-line-error --interaction nonstopmode
BIBER := biber --input-directory .. --output-directory .
MAKEINDEX := makeindex
SHELL := /bin/bash
EXTRA_DEPS := $(wildcard *.sty ../*.sty) ._dummy.tex
CREATEMD5 := md5sum `find . -regex '.*\.\(aux\|bbl\|toc\|tex\|ind\)'` > ._aux-md5sums
CHECKMD5 := md5sum --quiet -c ._aux-md5sums
UPLOADNAME := D$(shell echo $(NAME)|cut -c2-5).pdf

all: final

._dummy.tex:
touch $@

help:
@echo "all"
@echo "devel"
@echo "draft-loop"
@echo "final-loop"
@echo "final-figure<number>-loop"
@echo "draft"
@echo "final"
@echo "clean"
@echo "autocommit"

TEX_INPUTS := $(shell egrep '^[^%]*\\in(put|clude){.*?}' main.tex|sed 's@^.*{\(.*\)}.*$$@\1.tex@')
TEX_INPUTS2 := $(shell test -n "$(TEX_INPUTS)" && egrep '^[^%]*\\in(put|clude){.*?}' $(TEX_INPUTS) ._dummy.tex|sed -e 's@^.*{\([^.]*\)}.*$$@\1.tex@' -e 's@^.*{\(.*\)}.*$$@\1@') $(TEX_INPUTS)
TEX_INPUTS := $(shell test -n "$(TEX_INPUTS2)" && egrep '^[^%]*\\in(put|clude){.*?}' $(TEX_INPUTS2) ._dummy.tex|sed -e 's@^.*{\([^.]*\)}.*$$@\1.tex@' -e 's@^.*{\(.*\)}.*$$@\1@') $(TEX_INPUTS2) $(TEX_INPUTS) main.tex
TEX_LISTINGS := $(shell test -n "$(TEX_INPUTS)" && awk -F '{|}' -- '/lstinputlisting\[[^\]]*$$/ { getline; while($$0 !~ /\] *{.*\..*}$$/) getline; sub(/^.*\]/, ""); print } /lstinputlisting\[.*\] *{.*}/ { print }' $(TEX_INPUTS) |sed 's=.*\(\[[^]]*\]\)\?{\([^}]*\)}.*=\2='| sort -u)
TEX_PLOTDATA := $(shell grep '\\addplot.* file' $(TEX_INPUTS)|sed 's=^.*\\addplot.* file *{\(.*\)}.*$$=\1=')
GRAPHICS := $(shell grep 'includegraphics' main.tex $(TEX_INPUTS)|grep -v newcommand|sed -e 's=.*\\includegraphics[^{]*{\([^}]*\)}.*=\1=' |sort -u|while read f; do for d in $(subst :, ,$(TEXINPUTS)); do test -f $$d/$$f && echo $$d/$$f && break; done; done)
ALL_DEPS := $(TEX_INPUTS) $(TEX_LISTINGS) $(TEX_PLOTDATA) $(GRAPHICS) $(EXTRA_DEPS)

devel: final-loop

draft: $(DRAFT).pdf
final: $(NAME).pdf
mobile: $(NAME)_mobile.pdf

upload: $(NAME).pdf
@echo -n "Uploading $< ."
@while ! timeout 120s scp -B $< lxpool.gsi.de:web-docs/$(UPLOADNAME); do echo -n .; done
@echo .

loop-internal:
@screenTitle() { case "$$TERM" in screen*|tmux*) printf '\ek%s\e\\' "$$*";; *) printf '\e]1;%s:q\a' "$$*";; esac }; \
while true; do \
while true; do \
screenTitle 'LaTeX waiting: $(loopinternaltarget)'; \
if ! $(MAKE) -q $(loopinternaltarget).pdf>/dev/null; then \
screenTitle 'LaTeX build: $(loopinternaltarget)'; \
ls -lt $(ALL_DEPS) | head -n1 > ._stamp; \
nice ionice -c idle $(MAKE) $(loopinternaltarget).pdf || break; \
fi; \
sleep 1s; \
done; \
screenTitle 'failed'; \
kdialog --passivepopup "LaTeX ($(NAME)) failed" 1; \
echo -e '\a'; \
ls -lt $(ALL_DEPS) $(MORE_DEPS) | head -n1 > ._stamp.new; \
while diff -q ._stamp ._stamp.new; do \
sleep 1s; \
ls -lt $(ALL_DEPS) $(MORE_DEPS) | head -n1 > ._stamp.new; \
done; \
done

draft-loop:
$(MAKE) loopinternaltarget=$(DRAFT) MORE_DEPS=../draft.tex loop-internal

final-loop:
$(MAKE) loopinternaltarget=$(NAME) MORE_DEPS=../final.tex loop-internal

%-loop:
$(MAKE) loopinternaltarget=$(NAME)_$* MORE_DEPS=../$*.tex loop-internal

final-figure%-loop:
@while true; do $(MAKE) -q final-figure$*.pdf || $(MAKE) final-figure$*.pdf && sleep 1s; done

clean:
rm -f $(NAME).pdf $(DRAFT).pdf *.aux *.bbl *.blg *.brf *.lof *.log *.lol *.lot *.out *.toc */*.aux *.auxlock ._aux-md5sums *.bcf *.run.xml ._stamp ._stamp.new

ALL_DEPS=$(TEX_INPUTS) $(TEX_LISTINGS) $(EXTRA_DEPS) $(TEX_PLOTDATA)

$(DRAFT).pdf: ../draft.tex $(ALL_DEPS)
$(PDFLATEX) ../draft
@test -s draft.pdf && mv draft.pdf $@
$(BIBER) draft || true

$(NAME).pdf: ../final.tex $(ALL_DEPS)
@$(CREATEMD5)
$(PDFLATEX) ../final
@test -s final.pdf && mv final.pdf $@
$(BIBER) final || true
@while ! $(CHECKMD5); do \
$(CREATEMD5); \
$(PDFLATEX) ../final; \
test -s final.pdf && mv final.pdf $@; \
done
test "$(USER)" = "mkretz" && timeout 20s scp -B $@ lxpool.gsi.de:web-docs/$(UPLOADNAME) &

$(NAME)_%.pdf: ../%.tex $(ALL_DEPS)
@$(CREATEMD5)
$(PDFLATEX) ../$*
@test -s $*.pdf && mv $*.pdf $@
test -f $*.bcf && $(BIBER) $* || true
test -f $*.ind && $(MAKEINDEX) $* || true
while ! $(CHECKMD5); do \
$(CREATEMD5); \
$(PDFLATEX) ../$*; \
test -s $*.pdf && mv $*.pdf $@; \
test -f $*.ind && $(MAKEINDEX) $*; \
done

final-figure%.pdf: final.tex $(ALL_DEPS)
$(PDFLATEX) --halt-on-error --interaction=nonstopmode --jobname "final-figure$*" "\def\tikzexternalrealjob{final}\input{final}"

autocommit:
@screenTitle() { echo $$TERM|grep -q screen && printf '\ek%s\e\\' "$$*"; }; \
screenTitle 'autocommit' \
while true; do \
git commit -am "auto: `git status --porcelain|grep -v '^??'|cut -c4-|paste -s -d' '`" && \
screenTitle 'committed' && \
git push && \
screenTitle 'committed & pushed'; \
sleep 5s; \
screenTitle 'autocommit'; \
sleep 55s; \
done

.PHONY: clean
131 changes: 131 additions & 0 deletions P3287_exploration_of_namespaces_for_simd/alt1.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
\subsection{Alternative 1: every function is a non-member with \code{simd} prefix}

\medskip\begin{lstlisting}[style=Vc]
template<class V, class G>
V
simd_generate(G&& gen);

template<class V = void, class It, class... Flags>
conditional_t<is_same_v<V, void>, simd<iter_value_t<It>>, V>
simd_copy_from(It first, simd_flags<Flags...> f = {});

template<class Rg, std::integral Idx, class AbiIdx, class... Flags>
simd<ranges::range_value_t<Rg>, basic_simd<Idx, AbiIdx>::size()>
simd_gather_from(const Rg&& in, const basic_simd<Idx, AbiIdx>& indexes,
simd_flags<Flags...> f = {});

template<size_t SizeSelector = 0, class T, class Abi, class PermuteGenerator>
simd<T, output-size>
simd_permute(const basic_simd<T, Abi>& v, PermuteGenerator&& fn);

template<size_t Bytes, class Abi, class T, class U>
auto
simd_select(const basic_simd_mask<Bytes, Abi>& c, const T& a, const U& b)
-> decltype(simd-select-impl(c, a, b));

template<class T, class Abi>
basic_simd<T, Abi>
simd_exp(const basic_simd<T, Abi>& x);

template<class T, class Abi>
basic_simd<T, Abi>
simd_min(const basic_simd<T, Abi>& x, const basic_simd<T, Abi>& y);

template<size_t Bs, class Abi>
bool
simd_all_of(const basic_simd_mask<Bs, Abi>&);

template<class T>
concept simd_integral = /*...*/;

template<class T>
concept simd_generic_integral = integral<T> or simd_integral<T>;
\end{lstlisting}

Usage example:
\medskip\begin{lstlisting}[style=Vc]
void f(std::simd<float> vf, const std::vector<int>& data) {
auto iota = std::simd_generate<std::simd<int>>([](int i) { return i; });
auto chunk = std::simd_copy_from(data.begin());
auto chunk_swapped = std::simd_gather_from(data, iota ^ 1);
auto chunk_swapped2 = std::simd_permute(chunk, [](int i) { return i ^ 1; });
assert(std::simd_all_of(chunk_swapped == chunk_swapped2));

vf = std::simd_select(vf > 1.f, 1.f, vf);
vf = std::simd_exp(vf);
auto lo = std::simd_min(iota, chunk);
}
\end{lstlisting}

There is little variation possible for the above code.
The most important variation is using unqualified calls, relying on ADL:
\medskip\begin{lstlisting}[style=Vc]
void f(std::simd<float> vf, const std::vector<int>& data) {
auto iota = std::simd_generate<std::simd<int>>([](int i) { return i; });
auto chunk = std::simd_copy_from(data.begin());
auto chunk_swapped = simd_gather_from(data, iota ^ 1);
auto chunk_swapped2 = simd_permute(chunk, [](int i) { return i ^ 1; });
assert(simd_all_of(chunk_swapped == chunk_swapped2));

vf = simd_select(vf > 1.f, 1.f, vf);
vf = simd_exp(vf);
auto lo = simd_min(iota, chunk);
}
\end{lstlisting}

For \simdgeneric programming a trivial example looks like this:
\medskip\begin{lstlisting}[style=Vc]
template<std::integral T>
T scalar_only(T a, T b) {
return 2 * std::min(a, b);
}

template<std::simd_integral T>
T simd_only(T a, T b) {
return 2 * std::simd_min(a, b);
}

template<std::simd_generic_integral T>
T generic(T a, T b) {
if constexpr (std::simd_integral<T>)
return 2 * std::simd_min(a, b);
else
return 2 * std::min(a, b);
}
\end{lstlisting}

The ability to constrain a function like this actually resolves a missing
feature in the TS that I hit when working on using \stdx\code{simd} in the
core of the GNU Radio framework.
Obviously, the TS couldn't have proposed any concepts.
The ability to constrain a function with any of the three choices above had to
be solved with an ad-hoc solution in GNU Radio.

However, looking at the implementation of the \code{generic} function above,
this can't be what we want.

\begin{description}
\item[pros]
\begin{itemize}
\item Consistent.
\item[$\Rightarrow$] Users don't need to remember which functions don't
need a \code{simd} prefix.

\item Consistent naming scheme for SIMD and \simdgeneric concepts.
\end{itemize}

\item[cons]
\begin{itemize}
\item Verbose.
\item[$\Rightarrow$] There's a lot of “simd” spelled out in the code.
It is not adding information (IOW: it's noise) -- at least in this
code.

\item \simdgeneric programming is barely possible (because it requires
too many constexpr-if branches).
\end{itemize}
\end{description}

\myrating{unacceptable for lack of \simdgeneric programming;
too verbose without opt-out of the verbosity;
there must be a better alternative}
118 changes: 118 additions & 0 deletions P3287_exploration_of_namespaces_for_simd/alt2.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
\subsection{Alternative 2: every function is a non-member without \code{simd} prefix}

\medskip\begin{lstlisting}[style=Vc]
template<class V, class G>
V
generate(G&& gen);

template<class V = void, class It, class... Flags>
conditional_t<is_same_v<V, void>, simd<iter_value_t<It>>, V>
copy_from(It first, simd_flags<Flags...> f = {});

template<class Rg, std::integral Idx, class AbiIdx, class... Flags>
simd<ranges::range_value_t<Rg>, basic_simd<Idx, AbiIdx>::size()>
gather_from(const Rg&& in, const basic_simd<Idx, AbiIdx>& indexes,
simd_flags<Flags...> f = {});

template<size_t SizeSelector = 0, class T, class Abi, class PermuteGenerator>
simd<T, output-size>
permute(const basic_simd<T, Abi>& v, PermuteGenerator&& fn);

template<size_t Bytes, class Abi, class T, class U>
auto
select(const basic_simd_mask<Bytes, Abi>& c, const T& a, const U& b)
-> decltype(simd-select-impl(c, a, b));

template<class T, class Abi>
basic_simd<T, Abi>
exp(const basic_simd<T, Abi>& x);

template<class T, class Abi>
basic_simd<T, Abi>
min(const basic_simd<T, Abi>& x, const basic_simd<T, Abi>& y);

template<size_t Bs, class Abi>
bool
all_of(const basic_simd_mask<Bs, Abi>&);

// no way around a prefix:
template<class T>
concept simd_integral = /*...*/;

template<class T>
concept simd_generic_integral = integral<T> or simd_integral<T>;
\end{lstlisting}

Usage example:
\medskip\begin{lstlisting}[style=Vc]
void f(std::simd<float> vf, const std::vector<int>& data) {
auto iota = std::generate<std::simd<int>>([](int i) { return i; });
auto chunk = std::copy_from(data.begin());
auto chunk_swapped = std::gather_from(data, iota ^ 1);
auto chunk_swapped2 = std::permute(chunk, [](int i) { return i ^ 1; });
assert(std::all_of(chunk_swapped == chunk_swapped2));

vf = std::select(vf > 1.f, 1.f, vf);
vf = std::exp(vf);
auto lo = std::min(iota, chunk);
}
\end{lstlisting}

There is little variation possible for the above code.
The most important variation is using unqualified calls, relying on ADL:
\medskip\begin{lstlisting}[style=Vc]
void f(std::simd<float> vf, const std::vector<int>& data) {
auto iota = std::generate<std::simd<int>>([](int i) { return i; });
auto chunk = std::copy_from(data.begin());
auto chunk_swapped = gather_from(data, iota ^ 1);
auto chunk_swapped2 = permute(chunk, [](int i) { return i ^ 1; });
assert(all_of(chunk_swapped == chunk_swapped2));

vf = select(vf > 1.f, 1.f, vf);
vf = exp(vf);
auto lo = min(iota, chunk);
}
\end{lstlisting}

For \simdgeneric programming the example now looks like this:
\medskip\begin{lstlisting}[style=Vc]
template<std::integral T>
T scalar_only(T a, T b) {
return 2 * std::min(a, b);
}

template<std::simd_integral T>
T simd_only(T a, T b) {
return 2 * std::min(a, b);
}

template<std::simd_generic_integral T>
T generic(T a, T b) {
return 2 * std::min(a, b);
}
\end{lstlisting}

\begin{description}
\item[pros]
\begin{itemize}
\item Consistent.
\item[$\Rightarrow$] Simple to remember.
\item \simdgeneric interfaces can easily be provided.
\end{itemize}

\item[cons]
\begin{itemize}
\item Nothing in e.g. \code{auto x = std::copy_from(data.begin())} hints
at the creation of a \simd object.
\item Non-\code{simd} overloads for the same names become questionable
as soon as the functionality isn't equivalent. (huge “name grab”)

\item If we ever need to disambiguate an inconsistently overloaded term,
then it will need a \code{simd_} prefix.
\Eg the \code{simd_integral} concept would be such a term.
This could be considered less consistent than what we'd like to aim
for.
\end{itemize}
\end{description}

\myrating{unacceptable “name grab” and potentially confusing overloads}
Loading

0 comments on commit 0c41051

Please sign in to comment.