-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
D3287R0 (exploration of simd namespaces) draft ready for review
ChangeLog: * P3287_exploration_of_namespaces_for_simd/Makefile: New file. * P3287_exploration_of_namespaces_for_simd/alt1.tex: New file. * P3287_exploration_of_namespaces_for_simd/alt2.tex: New file. * P3287_exploration_of_namespaces_for_simd/alt3.tex: New file. * P3287_exploration_of_namespaces_for_simd/alt4.tex: New file. * P3287_exploration_of_namespaces_for_simd/alt5.tex: New file. * P3287_exploration_of_namespaces_for_simd/alt6.tex: New file. * P3287_exploration_of_namespaces_for_simd/alt7.tex: New file. * P3287_exploration_of_namespaces_for_simd/changelog.tex: New file. * P3287_exploration_of_namespaces_for_simd/main.tex: New file. * P3287_exploration_of_namespaces_for_simd/strawpolls.tex: New file. * listingscpp.sty: * wg21.sty:
- Loading branch information
Showing
13 changed files
with
1,462 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
NAME := $(shell grep 'newcommand.wgDocumentNumber' main.tex|cut -f2 -d'{'|cut -f1 -d'}') | ||
DRAFT := $(NAME)_draft | ||
TEXINPUTS := ..:.: | ||
PDFLATEX := TEXINPUTS=$(TEXINPUTS) flock . lualatex --shell-escape --halt-on-error --file-line-error --interaction nonstopmode | ||
BIBER := biber --input-directory .. --output-directory . | ||
MAKEINDEX := makeindex | ||
SHELL := /bin/bash | ||
EXTRA_DEPS := $(wildcard *.sty ../*.sty) ._dummy.tex | ||
CREATEMD5 := md5sum `find . -regex '.*\.\(aux\|bbl\|toc\|tex\|ind\)'` > ._aux-md5sums | ||
CHECKMD5 := md5sum --quiet -c ._aux-md5sums | ||
UPLOADNAME := D$(shell echo $(NAME)|cut -c2-5).pdf | ||
|
||
all: final | ||
|
||
._dummy.tex: | ||
touch $@ | ||
|
||
help: | ||
@echo "all" | ||
@echo "devel" | ||
@echo "draft-loop" | ||
@echo "final-loop" | ||
@echo "final-figure<number>-loop" | ||
@echo "draft" | ||
@echo "final" | ||
@echo "clean" | ||
@echo "autocommit" | ||
|
||
TEX_INPUTS := $(shell egrep '^[^%]*\\in(put|clude){.*?}' main.tex|sed 's@^.*{\(.*\)}.*$$@\1.tex@') | ||
TEX_INPUTS2 := $(shell test -n "$(TEX_INPUTS)" && egrep '^[^%]*\\in(put|clude){.*?}' $(TEX_INPUTS) ._dummy.tex|sed -e 's@^.*{\([^.]*\)}.*$$@\1.tex@' -e 's@^.*{\(.*\)}.*$$@\1@') $(TEX_INPUTS) | ||
TEX_INPUTS := $(shell test -n "$(TEX_INPUTS2)" && egrep '^[^%]*\\in(put|clude){.*?}' $(TEX_INPUTS2) ._dummy.tex|sed -e 's@^.*{\([^.]*\)}.*$$@\1.tex@' -e 's@^.*{\(.*\)}.*$$@\1@') $(TEX_INPUTS2) $(TEX_INPUTS) main.tex | ||
TEX_LISTINGS := $(shell test -n "$(TEX_INPUTS)" && awk -F '{|}' -- '/lstinputlisting\[[^\]]*$$/ { getline; while($$0 !~ /\] *{.*\..*}$$/) getline; sub(/^.*\]/, ""); print } /lstinputlisting\[.*\] *{.*}/ { print }' $(TEX_INPUTS) |sed 's=.*\(\[[^]]*\]\)\?{\([^}]*\)}.*=\2='| sort -u) | ||
TEX_PLOTDATA := $(shell grep '\\addplot.* file' $(TEX_INPUTS)|sed 's=^.*\\addplot.* file *{\(.*\)}.*$$=\1=') | ||
GRAPHICS := $(shell grep 'includegraphics' main.tex $(TEX_INPUTS)|grep -v newcommand|sed -e 's=.*\\includegraphics[^{]*{\([^}]*\)}.*=\1=' |sort -u|while read f; do for d in $(subst :, ,$(TEXINPUTS)); do test -f $$d/$$f && echo $$d/$$f && break; done; done) | ||
ALL_DEPS := $(TEX_INPUTS) $(TEX_LISTINGS) $(TEX_PLOTDATA) $(GRAPHICS) $(EXTRA_DEPS) | ||
|
||
devel: final-loop | ||
|
||
draft: $(DRAFT).pdf | ||
final: $(NAME).pdf | ||
mobile: $(NAME)_mobile.pdf | ||
|
||
upload: $(NAME).pdf | ||
@echo -n "Uploading $< ." | ||
@while ! timeout 120s scp -B $< lxpool.gsi.de:web-docs/$(UPLOADNAME); do echo -n .; done | ||
@echo . | ||
|
||
loop-internal: | ||
@screenTitle() { case "$$TERM" in screen*|tmux*) printf '\ek%s\e\\' "$$*";; *) printf '\e]1;%s:q\a' "$$*";; esac }; \ | ||
while true; do \ | ||
while true; do \ | ||
screenTitle 'LaTeX waiting: $(loopinternaltarget)'; \ | ||
if ! $(MAKE) -q $(loopinternaltarget).pdf>/dev/null; then \ | ||
screenTitle 'LaTeX build: $(loopinternaltarget)'; \ | ||
ls -lt $(ALL_DEPS) | head -n1 > ._stamp; \ | ||
nice ionice -c idle $(MAKE) $(loopinternaltarget).pdf || break; \ | ||
fi; \ | ||
sleep 1s; \ | ||
done; \ | ||
screenTitle 'failed'; \ | ||
kdialog --passivepopup "LaTeX ($(NAME)) failed" 1; \ | ||
echo -e '\a'; \ | ||
ls -lt $(ALL_DEPS) $(MORE_DEPS) | head -n1 > ._stamp.new; \ | ||
while diff -q ._stamp ._stamp.new; do \ | ||
sleep 1s; \ | ||
ls -lt $(ALL_DEPS) $(MORE_DEPS) | head -n1 > ._stamp.new; \ | ||
done; \ | ||
done | ||
|
||
draft-loop: | ||
$(MAKE) loopinternaltarget=$(DRAFT) MORE_DEPS=../draft.tex loop-internal | ||
|
||
final-loop: | ||
$(MAKE) loopinternaltarget=$(NAME) MORE_DEPS=../final.tex loop-internal | ||
|
||
%-loop: | ||
$(MAKE) loopinternaltarget=$(NAME)_$* MORE_DEPS=../$*.tex loop-internal | ||
|
||
final-figure%-loop: | ||
@while true; do $(MAKE) -q final-figure$*.pdf || $(MAKE) final-figure$*.pdf && sleep 1s; done | ||
|
||
clean: | ||
rm -f $(NAME).pdf $(DRAFT).pdf *.aux *.bbl *.blg *.brf *.lof *.log *.lol *.lot *.out *.toc */*.aux *.auxlock ._aux-md5sums *.bcf *.run.xml ._stamp ._stamp.new | ||
|
||
ALL_DEPS=$(TEX_INPUTS) $(TEX_LISTINGS) $(EXTRA_DEPS) $(TEX_PLOTDATA) | ||
|
||
$(DRAFT).pdf: ../draft.tex $(ALL_DEPS) | ||
$(PDFLATEX) ../draft | ||
@test -s draft.pdf && mv draft.pdf $@ | ||
$(BIBER) draft || true | ||
|
||
$(NAME).pdf: ../final.tex $(ALL_DEPS) | ||
@$(CREATEMD5) | ||
$(PDFLATEX) ../final | ||
@test -s final.pdf && mv final.pdf $@ | ||
$(BIBER) final || true | ||
@while ! $(CHECKMD5); do \ | ||
$(CREATEMD5); \ | ||
$(PDFLATEX) ../final; \ | ||
test -s final.pdf && mv final.pdf $@; \ | ||
done | ||
test "$(USER)" = "mkretz" && timeout 20s scp -B $@ lxpool.gsi.de:web-docs/$(UPLOADNAME) & | ||
|
||
$(NAME)_%.pdf: ../%.tex $(ALL_DEPS) | ||
@$(CREATEMD5) | ||
$(PDFLATEX) ../$* | ||
@test -s $*.pdf && mv $*.pdf $@ | ||
test -f $*.bcf && $(BIBER) $* || true | ||
test -f $*.ind && $(MAKEINDEX) $* || true | ||
while ! $(CHECKMD5); do \ | ||
$(CREATEMD5); \ | ||
$(PDFLATEX) ../$*; \ | ||
test -s $*.pdf && mv $*.pdf $@; \ | ||
test -f $*.ind && $(MAKEINDEX) $*; \ | ||
done | ||
|
||
final-figure%.pdf: final.tex $(ALL_DEPS) | ||
$(PDFLATEX) --halt-on-error --interaction=nonstopmode --jobname "final-figure$*" "\def\tikzexternalrealjob{final}\input{final}" | ||
|
||
autocommit: | ||
@screenTitle() { echo $$TERM|grep -q screen && printf '\ek%s\e\\' "$$*"; }; \ | ||
screenTitle 'autocommit' \ | ||
while true; do \ | ||
git commit -am "auto: `git status --porcelain|grep -v '^??'|cut -c4-|paste -s -d' '`" && \ | ||
screenTitle 'committed' && \ | ||
git push && \ | ||
screenTitle 'committed & pushed'; \ | ||
sleep 5s; \ | ||
screenTitle 'autocommit'; \ | ||
sleep 55s; \ | ||
done | ||
|
||
.PHONY: clean |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
\subsection{Alternative 1: every function is a non-member with \code{simd} prefix} | ||
|
||
\medskip\begin{lstlisting}[style=Vc] | ||
template<class V, class G> | ||
V | ||
simd_generate(G&& gen); | ||
|
||
template<class V = void, class It, class... Flags> | ||
conditional_t<is_same_v<V, void>, simd<iter_value_t<It>>, V> | ||
simd_copy_from(It first, simd_flags<Flags...> f = {}); | ||
|
||
template<class Rg, std::integral Idx, class AbiIdx, class... Flags> | ||
simd<ranges::range_value_t<Rg>, basic_simd<Idx, AbiIdx>::size()> | ||
simd_gather_from(const Rg&& in, const basic_simd<Idx, AbiIdx>& indexes, | ||
simd_flags<Flags...> f = {}); | ||
|
||
template<size_t SizeSelector = 0, class T, class Abi, class PermuteGenerator> | ||
simd<T, output-size> | ||
simd_permute(const basic_simd<T, Abi>& v, PermuteGenerator&& fn); | ||
|
||
template<size_t Bytes, class Abi, class T, class U> | ||
auto | ||
simd_select(const basic_simd_mask<Bytes, Abi>& c, const T& a, const U& b) | ||
-> decltype(simd-select-impl(c, a, b)); | ||
|
||
template<class T, class Abi> | ||
basic_simd<T, Abi> | ||
simd_exp(const basic_simd<T, Abi>& x); | ||
|
||
template<class T, class Abi> | ||
basic_simd<T, Abi> | ||
simd_min(const basic_simd<T, Abi>& x, const basic_simd<T, Abi>& y); | ||
|
||
template<size_t Bs, class Abi> | ||
bool | ||
simd_all_of(const basic_simd_mask<Bs, Abi>&); | ||
|
||
template<class T> | ||
concept simd_integral = /*...*/; | ||
|
||
template<class T> | ||
concept simd_generic_integral = integral<T> or simd_integral<T>; | ||
\end{lstlisting} | ||
|
||
Usage example: | ||
\medskip\begin{lstlisting}[style=Vc] | ||
void f(std::simd<float> vf, const std::vector<int>& data) { | ||
auto iota = std::simd_generate<std::simd<int>>([](int i) { return i; }); | ||
auto chunk = std::simd_copy_from(data.begin()); | ||
auto chunk_swapped = std::simd_gather_from(data, iota ^ 1); | ||
auto chunk_swapped2 = std::simd_permute(chunk, [](int i) { return i ^ 1; }); | ||
assert(std::simd_all_of(chunk_swapped == chunk_swapped2)); | ||
|
||
vf = std::simd_select(vf > 1.f, 1.f, vf); | ||
vf = std::simd_exp(vf); | ||
auto lo = std::simd_min(iota, chunk); | ||
} | ||
\end{lstlisting} | ||
|
||
There is little variation possible for the above code. | ||
The most important variation is using unqualified calls, relying on ADL: | ||
\medskip\begin{lstlisting}[style=Vc] | ||
void f(std::simd<float> vf, const std::vector<int>& data) { | ||
auto iota = std::simd_generate<std::simd<int>>([](int i) { return i; }); | ||
auto chunk = std::simd_copy_from(data.begin()); | ||
auto chunk_swapped = simd_gather_from(data, iota ^ 1); | ||
auto chunk_swapped2 = simd_permute(chunk, [](int i) { return i ^ 1; }); | ||
assert(simd_all_of(chunk_swapped == chunk_swapped2)); | ||
|
||
vf = simd_select(vf > 1.f, 1.f, vf); | ||
vf = simd_exp(vf); | ||
auto lo = simd_min(iota, chunk); | ||
} | ||
\end{lstlisting} | ||
|
||
For \simdgeneric programming a trivial example looks like this: | ||
\medskip\begin{lstlisting}[style=Vc] | ||
template<std::integral T> | ||
T scalar_only(T a, T b) { | ||
return 2 * std::min(a, b); | ||
} | ||
|
||
template<std::simd_integral T> | ||
T simd_only(T a, T b) { | ||
return 2 * std::simd_min(a, b); | ||
} | ||
|
||
template<std::simd_generic_integral T> | ||
T generic(T a, T b) { | ||
if constexpr (std::simd_integral<T>) | ||
return 2 * std::simd_min(a, b); | ||
else | ||
return 2 * std::min(a, b); | ||
} | ||
\end{lstlisting} | ||
|
||
The ability to constrain a function like this actually resolves a missing | ||
feature in the TS that I hit when working on using \stdx\code{simd} in the | ||
core of the GNU Radio framework. | ||
Obviously, the TS couldn't have proposed any concepts. | ||
The ability to constrain a function with any of the three choices above had to | ||
be solved with an ad-hoc solution in GNU Radio. | ||
|
||
However, looking at the implementation of the \code{generic} function above, | ||
this can't be what we want. | ||
|
||
\begin{description} | ||
\item[pros] | ||
\begin{itemize} | ||
\item Consistent. | ||
\item[$\Rightarrow$] Users don't need to remember which functions don't | ||
need a \code{simd} prefix. | ||
|
||
\item Consistent naming scheme for SIMD and \simdgeneric concepts. | ||
\end{itemize} | ||
|
||
\item[cons] | ||
\begin{itemize} | ||
\item Verbose. | ||
\item[$\Rightarrow$] There's a lot of “simd” spelled out in the code. | ||
It is not adding information (IOW: it's noise) -- at least in this | ||
code. | ||
|
||
\item \simdgeneric programming is barely possible (because it requires | ||
too many constexpr-if branches). | ||
\end{itemize} | ||
\end{description} | ||
|
||
\myrating{unacceptable for lack of \simdgeneric programming; | ||
too verbose without opt-out of the verbosity; | ||
there must be a better alternative} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
\subsection{Alternative 2: every function is a non-member without \code{simd} prefix} | ||
|
||
\medskip\begin{lstlisting}[style=Vc] | ||
template<class V, class G> | ||
V | ||
generate(G&& gen); | ||
|
||
template<class V = void, class It, class... Flags> | ||
conditional_t<is_same_v<V, void>, simd<iter_value_t<It>>, V> | ||
copy_from(It first, simd_flags<Flags...> f = {}); | ||
|
||
template<class Rg, std::integral Idx, class AbiIdx, class... Flags> | ||
simd<ranges::range_value_t<Rg>, basic_simd<Idx, AbiIdx>::size()> | ||
gather_from(const Rg&& in, const basic_simd<Idx, AbiIdx>& indexes, | ||
simd_flags<Flags...> f = {}); | ||
|
||
template<size_t SizeSelector = 0, class T, class Abi, class PermuteGenerator> | ||
simd<T, output-size> | ||
permute(const basic_simd<T, Abi>& v, PermuteGenerator&& fn); | ||
|
||
template<size_t Bytes, class Abi, class T, class U> | ||
auto | ||
select(const basic_simd_mask<Bytes, Abi>& c, const T& a, const U& b) | ||
-> decltype(simd-select-impl(c, a, b)); | ||
|
||
template<class T, class Abi> | ||
basic_simd<T, Abi> | ||
exp(const basic_simd<T, Abi>& x); | ||
|
||
template<class T, class Abi> | ||
basic_simd<T, Abi> | ||
min(const basic_simd<T, Abi>& x, const basic_simd<T, Abi>& y); | ||
|
||
template<size_t Bs, class Abi> | ||
bool | ||
all_of(const basic_simd_mask<Bs, Abi>&); | ||
|
||
// no way around a prefix: | ||
template<class T> | ||
concept simd_integral = /*...*/; | ||
|
||
template<class T> | ||
concept simd_generic_integral = integral<T> or simd_integral<T>; | ||
\end{lstlisting} | ||
|
||
Usage example: | ||
\medskip\begin{lstlisting}[style=Vc] | ||
void f(std::simd<float> vf, const std::vector<int>& data) { | ||
auto iota = std::generate<std::simd<int>>([](int i) { return i; }); | ||
auto chunk = std::copy_from(data.begin()); | ||
auto chunk_swapped = std::gather_from(data, iota ^ 1); | ||
auto chunk_swapped2 = std::permute(chunk, [](int i) { return i ^ 1; }); | ||
assert(std::all_of(chunk_swapped == chunk_swapped2)); | ||
|
||
vf = std::select(vf > 1.f, 1.f, vf); | ||
vf = std::exp(vf); | ||
auto lo = std::min(iota, chunk); | ||
} | ||
\end{lstlisting} | ||
|
||
There is little variation possible for the above code. | ||
The most important variation is using unqualified calls, relying on ADL: | ||
\medskip\begin{lstlisting}[style=Vc] | ||
void f(std::simd<float> vf, const std::vector<int>& data) { | ||
auto iota = std::generate<std::simd<int>>([](int i) { return i; }); | ||
auto chunk = std::copy_from(data.begin()); | ||
auto chunk_swapped = gather_from(data, iota ^ 1); | ||
auto chunk_swapped2 = permute(chunk, [](int i) { return i ^ 1; }); | ||
assert(all_of(chunk_swapped == chunk_swapped2)); | ||
|
||
vf = select(vf > 1.f, 1.f, vf); | ||
vf = exp(vf); | ||
auto lo = min(iota, chunk); | ||
} | ||
\end{lstlisting} | ||
|
||
For \simdgeneric programming the example now looks like this: | ||
\medskip\begin{lstlisting}[style=Vc] | ||
template<std::integral T> | ||
T scalar_only(T a, T b) { | ||
return 2 * std::min(a, b); | ||
} | ||
|
||
template<std::simd_integral T> | ||
T simd_only(T a, T b) { | ||
return 2 * std::min(a, b); | ||
} | ||
|
||
template<std::simd_generic_integral T> | ||
T generic(T a, T b) { | ||
return 2 * std::min(a, b); | ||
} | ||
\end{lstlisting} | ||
|
||
\begin{description} | ||
\item[pros] | ||
\begin{itemize} | ||
\item Consistent. | ||
\item[$\Rightarrow$] Simple to remember. | ||
\item \simdgeneric interfaces can easily be provided. | ||
\end{itemize} | ||
|
||
\item[cons] | ||
\begin{itemize} | ||
\item Nothing in e.g. \code{auto x = std::copy_from(data.begin())} hints | ||
at the creation of a \simd object. | ||
\item Non-\code{simd} overloads for the same names become questionable | ||
as soon as the functionality isn't equivalent. (huge “name grab”) | ||
|
||
\item If we ever need to disambiguate an inconsistently overloaded term, | ||
then it will need a \code{simd_} prefix. | ||
\Eg the \code{simd_integral} concept would be such a term. | ||
This could be considered less consistent than what we'd like to aim | ||
for. | ||
\end{itemize} | ||
\end{description} | ||
|
||
\myrating{unacceptable “name grab” and potentially confusing overloads} |
Oops, something went wrong.