-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
ChangeLog: * P3319_iota/Makefile: New file. * P3319_iota/changelog.tex: New file. * P3319_iota/main.tex: New file. * P3319_iota/strawpolls.tex: New file.
- Loading branch information
Showing
4 changed files
with
283 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
NAME := $(shell grep 'newcommand.wgDocumentNumber' main.tex|cut -f2 -d'{'|cut -f1 -d'}') | ||
DRAFT := $(NAME)_draft | ||
TEXINPUTS := ..:.: | ||
PDFLATEX := TEXINPUTS=$(TEXINPUTS) flock . lualatex --shell-escape --halt-on-error --file-line-error --interaction nonstopmode | ||
BIBER := biber --input-directory .. --output-directory . | ||
MAKEINDEX := makeindex | ||
SHELL := /bin/bash | ||
EXTRA_DEPS := $(wildcard *.sty ../*.sty) ._dummy.tex | ||
CREATEMD5 := md5sum `find . -regex '.*\.\(aux\|bbl\|toc\|tex\|ind\)'` > ._aux-md5sums | ||
CHECKMD5 := md5sum --quiet -c ._aux-md5sums | ||
UPLOADNAME := D$(shell echo $(NAME)|cut -c2-5).pdf | ||
|
||
all: final | ||
|
||
._dummy.tex: | ||
touch $@ | ||
|
||
help: | ||
@echo "all" | ||
@echo "devel" | ||
@echo "draft-loop" | ||
@echo "final-loop" | ||
@echo "final-figure<number>-loop" | ||
@echo "draft" | ||
@echo "final" | ||
@echo "clean" | ||
@echo "autocommit" | ||
|
||
TEX_INPUTS := $(shell egrep '^[^%]*\\in(put|clude){.*?}' main.tex|sed 's@^.*{\(.*\)}.*$$@\1.tex@') | ||
TEX_INPUTS2 := $(shell test -n "$(TEX_INPUTS)" && egrep '^[^%]*\\in(put|clude){.*?}' $(TEX_INPUTS) ._dummy.tex|sed -e 's@^.*{\([^.]*\)}.*$$@\1.tex@' -e 's@^.*{\(.*\)}.*$$@\1@') $(TEX_INPUTS) | ||
TEX_INPUTS := $(shell test -n "$(TEX_INPUTS2)" && egrep '^[^%]*\\in(put|clude){.*?}' $(TEX_INPUTS2) ._dummy.tex|sed -e 's@^.*{\([^.]*\)}.*$$@\1.tex@' -e 's@^.*{\(.*\)}.*$$@\1@') $(TEX_INPUTS2) $(TEX_INPUTS) main.tex | ||
TEX_LISTINGS := $(shell test -n "$(TEX_INPUTS)" && awk -F '{|}' -- '/lstinputlisting\[[^\]]*$$/ { getline; while($$0 !~ /\] *{.*\..*}$$/) getline; sub(/^.*\]/, ""); print } /lstinputlisting\[.*\] *{.*}/ { print }' $(TEX_INPUTS) |sed 's=.*\(\[[^]]*\]\)\?{\([^}]*\)}.*=\2='| sort -u) | ||
TEX_PLOTDATA := $(shell grep '\\addplot.* file' $(TEX_INPUTS)|sed 's=^.*\\addplot.* file *{\(.*\)}.*$$=\1=') | ||
GRAPHICS := $(shell grep 'includegraphics' main.tex $(TEX_INPUTS)|grep -v newcommand|sed -e 's=.*\\includegraphics[^{]*{\([^}]*\)}.*=\1=' |sort -u|while read f; do for d in $(subst :, ,$(TEXINPUTS)); do test -f $$d/$$f && echo $$d/$$f && break; done; done) | ||
ALL_DEPS := $(TEX_INPUTS) $(TEX_LISTINGS) $(TEX_PLOTDATA) $(GRAPHICS) $(EXTRA_DEPS) | ||
|
||
devel: final-loop | ||
|
||
draft: $(DRAFT).pdf | ||
final: $(NAME).pdf | ||
mobile: $(NAME)_mobile.pdf | ||
|
||
upload: $(NAME).pdf | ||
@echo -n "Uploading $< ." | ||
@while ! timeout 120s scp -B $< lxpool.gsi.de:web-docs/$(UPLOADNAME); do echo -n .; done | ||
@echo . | ||
|
||
loop-internal: | ||
@screenTitle() { case "$$TERM" in screen*|tmux*) printf '\ek%s\e\\' "$$*";; *) printf '\e]1;%s:q\a' "$$*";; esac }; \ | ||
while true; do \ | ||
while true; do \ | ||
screenTitle 'LaTeX waiting: $(loopinternaltarget)'; \ | ||
if ! $(MAKE) -q $(loopinternaltarget).pdf>/dev/null; then \ | ||
screenTitle 'LaTeX build: $(loopinternaltarget)'; \ | ||
ls -lt $(ALL_DEPS) | head -n1 > ._stamp; \ | ||
nice ionice -c idle $(MAKE) $(loopinternaltarget).pdf || break; \ | ||
fi; \ | ||
sleep 1s; \ | ||
done; \ | ||
screenTitle 'failed'; \ | ||
kdialog --passivepopup "LaTeX ($(NAME)) failed" 1; \ | ||
echo -e '\a'; \ | ||
ls -lt $(ALL_DEPS) $(MORE_DEPS) | head -n1 > ._stamp.new; \ | ||
while diff -q ._stamp ._stamp.new; do \ | ||
sleep 1s; \ | ||
ls -lt $(ALL_DEPS) $(MORE_DEPS) | head -n1 > ._stamp.new; \ | ||
done; \ | ||
done | ||
|
||
draft-loop: | ||
$(MAKE) loopinternaltarget=$(DRAFT) MORE_DEPS=../draft.tex loop-internal | ||
|
||
final-loop: | ||
$(MAKE) loopinternaltarget=$(NAME) MORE_DEPS=../final.tex loop-internal | ||
|
||
%-loop: | ||
$(MAKE) loopinternaltarget=$(NAME)_$* MORE_DEPS=../$*.tex loop-internal | ||
|
||
final-figure%-loop: | ||
@while true; do $(MAKE) -q final-figure$*.pdf || $(MAKE) final-figure$*.pdf && sleep 1s; done | ||
|
||
clean: | ||
rm -f $(NAME).pdf $(DRAFT).pdf *.aux *.bbl *.blg *.brf *.lof *.log *.lol *.lot *.out *.toc */*.aux *.auxlock ._aux-md5sums *.bcf *.run.xml ._stamp ._stamp.new | ||
|
||
ALL_DEPS=$(TEX_INPUTS) $(TEX_LISTINGS) $(EXTRA_DEPS) $(TEX_PLOTDATA) | ||
|
||
$(DRAFT).pdf: ../draft.tex $(ALL_DEPS) | ||
$(PDFLATEX) ../draft | ||
@test -s draft.pdf && mv draft.pdf $@ | ||
$(BIBER) draft || true | ||
|
||
$(NAME).pdf: ../final.tex $(ALL_DEPS) | ||
@$(CREATEMD5) | ||
$(PDFLATEX) ../final | ||
@test -s final.pdf && mv final.pdf $@ | ||
$(BIBER) final || true | ||
@while ! $(CHECKMD5); do \ | ||
$(CREATEMD5); \ | ||
$(PDFLATEX) ../final; \ | ||
test -s final.pdf && mv final.pdf $@; \ | ||
done | ||
test "$(USER)" = "mkretz" && timeout 20s scp -B $@ lxpool.gsi.de:web-docs/$(UPLOADNAME) & | ||
|
||
$(NAME)_%.pdf: ../%.tex $(ALL_DEPS) | ||
@$(CREATEMD5) | ||
$(PDFLATEX) ../$* | ||
@test -s $*.pdf && mv $*.pdf $@ | ||
test -f $*.bcf && $(BIBER) $* || true | ||
test -f $*.ind && $(MAKEINDEX) $* || true | ||
while ! $(CHECKMD5); do \ | ||
$(CREATEMD5); \ | ||
$(PDFLATEX) ../$*; \ | ||
test -s $*.pdf && mv $*.pdf $@; \ | ||
test -f $*.ind && $(MAKEINDEX) $*; \ | ||
done | ||
|
||
final-figure%.pdf: final.tex $(ALL_DEPS) | ||
$(PDFLATEX) --halt-on-error --interaction=nonstopmode --jobname "final-figure$*" "\def\tikzexternalrealjob{final}\input{final}" | ||
|
||
autocommit: | ||
@screenTitle() { echo $$TERM|grep -q screen && printf '\ek%s\e\\' "$$*"; }; \ | ||
screenTitle 'autocommit' \ | ||
while true; do \ | ||
git commit -am "auto: `git status --porcelain|grep -v '^??'|cut -c4-|paste -s -d' '`" && \ | ||
screenTitle 'committed' && \ | ||
git push && \ | ||
screenTitle 'committed & pushed'; \ | ||
sleep 5s; \ | ||
screenTitle 'autocommit'; \ | ||
sleep 55s; \ | ||
done | ||
|
||
.PHONY: clean |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
\section{Changelog} | ||
(placeholder) | ||
%\begin{revision} | ||
% \todo Everything | ||
%\end{revision} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
\newcommand\wgTitle{Add an iota object for simd (and more)} | ||
\newcommand\wgName{Matthias Kretz <[email protected]>} | ||
\newcommand\wgDocumentNumber{P3319R0} | ||
\newcommand\wgGroup{LEWG} | ||
\newcommand\wgTarget{\CC{}26} | ||
%\newcommand\wgAcknowledgements{Daniel Towner and Ruslan Arutyunyan contributed to this paper via discussions / reviews. Thanks also to Jeff Garland for reviewing.} | ||
|
||
\usepackage{mymacros} | ||
\usepackage{wg21} | ||
\setcounter{tocdepth}{2} % show sections and subsections in TOC | ||
\hypersetup{bookmarksdepth=5} | ||
\usepackage{changelog} | ||
\usepackage{underscore} | ||
\usepackage{multirow} | ||
|
||
\addbibresource{extra.bib} | ||
|
||
\newcommand\simd[1][]{\type{ba\-sic\_simd#1}\xspace} | ||
\newcommand\simdT{\type{ba\-sic\_simd\MayBreak<\MayBreak{}T>}\xspace} | ||
\newcommand\valuetype{\type{val\-ue\_type}\xspace} | ||
\newcommand\referencetype{\type{ref\-er\-ence}\xspace} | ||
\newcommand\mask[1][]{\type{ba\-sic\_simd\_mask#1}\xspace} | ||
\newcommand\maskT{\type{ba\-sic\_simd\_mask\MayBreak<\MayBreak{}T>}\xspace} | ||
\newcommand\wglink[1]{\href{https://wg21.link/#1}{#1}} | ||
|
||
\newcommand\nativeabi{\UNSP{native-abi}} | ||
\newcommand\deducet{\UNSP{deduce-t}} | ||
\newcommand\simdsizev{\UNSP{simd-size-v}} | ||
\newcommand\simdsizetype{\UNSP{simd-size-type}} | ||
\newcommand\simdselect{\UNSP{simd-select-impl}} | ||
\newcommand\maskelementsize{\UNSP{mask-element-size}} | ||
\newcommand\integerfrom{\UNSP{integer-from}} | ||
\newcommand\constexprwrapperlike{\UNSP{constexpr-wrapper-like}} | ||
|
||
\renewcommand{\lst}[1]{Listing~\ref{#1}} | ||
\renewcommand{\sect}[1]{Section~\ref{#1}} | ||
\renewcommand{\ttref}[1]{Tony~Table~\ref{#1}} | ||
\renewcommand{\tabref}[1]{Table~\ref{#1}} | ||
|
||
\begin{document} | ||
\selectlanguage{american} | ||
\begin{wgTitlepage} | ||
There is one important constant in SIMD programming: \code{{0, 1, 2, 3, | ||
...}}. In the standard library we have an algorithm called \code{iota} that | ||
can initialize a range with such values. For \code{simd} we want to have | ||
simple to spell constants that scale with the SIMD width. This paper proposes | ||
a simple facility that can be generalized. | ||
\end{wgTitlepage} | ||
|
||
\pagestyle{scrheadings} | ||
|
||
\input{changelog} | ||
\input{strawpolls} | ||
|
||
\section{Motivation} | ||
The 90\% use case for simd generator constructors is a simd with values 0, 1, | ||
2, 3, \ldots{} potentially with scaling and offset applied. | ||
However, often it would be more easier and more readable to use an “iota” | ||
\code{simd} object instead. | ||
|
||
\begingroup | ||
\smaller[1] | ||
\begin{tabular}{p{.45\textwidth}|p{.45\textwidth}} | ||
generator ctor & iota \\ | ||
\hline | ||
\begin{lstlisting} | ||
std::simd<int> a([](int i) { return i; }; | ||
|
||
std::simd<int> b([](int i) { return 2 + 3 * i; }; | ||
\end{lstlisting} | ||
& | ||
\begin{lstlisting} | ||
auto a = std::iota_v<std::simd<int>>; | ||
|
||
auto b = 2 + 3 * std::iota_v<std::simd<int>>; | ||
\end{lstlisting} | ||
\end{tabular} | ||
\endgroup | ||
|
||
The minimal definition I propose for \simd can look like this: | ||
\medskip\begin{lstlisting}[style=Vc] | ||
template <class T> | ||
inline constexpr T | ||
iota_v; | ||
|
||
template <class T> | ||
requires(std::is_arithmetic_v<T>) | ||
inline constexpr T | ||
iota_v<T> = T(); | ||
|
||
template <detail::simd_type T> | ||
inline constexpr T | ||
iota_v<T> = T([](auto i) { return static_cast<typename T::value_type>(i); }); | ||
\end{lstlisting} | ||
|
||
|
||
\section{Generalization} | ||
If we define a (\code{constexpr}) variable template \std\code{iota_v<T>} where | ||
\code{T} must be a \simd type, we're simply filling a sequence of values. | ||
We can create such an object for any type with static extent. | ||
This is especially interesting for the degenerate case in SIMD-generic | ||
programming, where \code{T} could \eg be an \code{int}. | ||
A \std\code{iota_v<int>} is nothing other than an object \code{int} with value | ||
\code{0}. | ||
We can easily generalize to \code{iota_v<std::array<T, N>>} and | ||
\code{iota_v<T[N]>}. | ||
And the next step then is to allow any type that | ||
\begin{itemize} | ||
\item has a static extent, | ||
\item has a \code{value_type} member type, | ||
\item can be list-initialized with \code{N} numbers of type | ||
\code{value_type}, where \code{N} equals the static extent of the type, and | ||
\item where \code{value_type() + 1} is an constant expression and convertible to \code{value_type}. | ||
\end{itemize} | ||
|
||
Consequently you could write | ||
\medskip\begin{lstlisting}[style=Vc] | ||
auto x = std::iota_v<float[5]>; | ||
auto y = std::iota_v<std::array<my_fixed_point, 8>>; | ||
// ... | ||
\end{lstlisting} | ||
|
||
\section{Relation to list-initialization of \code{simd}} | ||
If we add a constructor to \simd that enables list-initialization, then many | ||
users might use that in place of a generator constructor. | ||
This leads to code that doesn't scale with the vector width anymore. | ||
Therefore we should provide a simple facility that works better and is more | ||
portable. | ||
|
||
\section{Proposed polls} | ||
|
||
\wgPoll{We want an iota facility for \simd}{&&&&} | ||
|
||
\wgPoll{The iota facility should be generalized to scalars}{&&&&} | ||
|
||
\wgPoll{The iota facility should be generalized to any sequence of static extent}{&&&&} | ||
|
||
\section{Wording}\label{sec:wording} | ||
|
||
TBD after deciding on the preferred solution. | ||
|
||
\end{document} | ||
% vim: sw=2 sts=2 ai et tw=0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
\section{Straw Polls} | ||
(placeholder) |