From 55c94aeae8fc58d26bf90cc6352e3bfec754397d Mon Sep 17 00:00:00 2001 From: WATANABE Yuki Date: Sat, 18 Jan 2025 19:21:46 +0900 Subject: [PATCH] Position specifier (%n$) in printf built-in This commit adds support for position specifiers in the printf built-in as required by POSIX. A position specifier is a dollar sign ($) preceded by a positive decimal integer. The integer specifies the index of the value to be formatted. For example, in the format string '%2$s %1$s', the second value is formatted first and the first value is formatted next. Closes https://github.com/magicant/yash/issues/102 --- NEWS | 2 + NEWS.ja | 2 + builtins/printf.c | 132 ++++++++++++++++++++++++++++++++------------- doc/_printf.txt | 27 ++++++++-- doc/ja/_printf.txt | 18 ++++++- tests/POSIX | 3 ++ tests/printf-y.tst | 58 ++++++++++++++++++++ 7 files changed, 199 insertions(+), 43 deletions(-) diff --git a/NEWS b/NEWS index 67421970..9645eb26 100644 --- a/NEWS +++ b/NEWS @@ -22,6 +22,8 @@ arguments to the `export`, `local`, `readonly`, and `typeset` built-ins are now expanded in the same way as assignments are expanded. + - The `printf` built-in now supports position specifiers in format + strings as in `printf '%2$s %1$s\n' foo bar`. - After the `bg` built-in resumed a job, the `!` special parameter expands to the process ID of the job. - An interactive shell no longer exits on an error in the `exec` diff --git a/NEWS.ja b/NEWS.ja index a993bfda..a53838e7 100644 --- a/NEWS.ja +++ b/NEWS.ja @@ -19,6 +19,8 @@ - 宣言ユーティリティに対応した。`export`, `local`, `readonly`, `typeset` 組込みの引数は、それが変数代入の形式をしているならば 変数代入と同様のやり方で展開されるようになった + - `printf` 組込みの変換指定で位置 (値を出力する順序) を指定できる + ようになった。(例: `printf '%2$s %1$s\n' foo bar`) - `bg` 組込みでジョブを再開した後は `!` 特殊パラメータはジョブの プロセス ID に展開されるようになった - POSIX 準拠モードであっても、対話シェルが `exec` 組込みで失敗した diff --git a/builtins/printf.c b/builtins/printf.c index e8ebbefc..8eefe9fb 100644 --- a/builtins/printf.c +++ b/builtins/printf.c @@ -1,6 +1,6 @@ /* Yash: yet another shell */ /* printf.c: the echo/printf built-ins */ -/* (C) 2007-2019 magicant */ +/* (C) 2007-2025 magicant */ /* This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -55,17 +55,21 @@ struct format_T { char *value; size_t length; } raw; - char *convspec; + struct { + char *spec; + int position; // zero-based position of value to format + } conv; struct { bool left; unsigned long width, max; + int position; // zero-based position of value to format } echo; } value; }; /* The FT_NONE format type corresponds to the "%%" conversion specification. * The FT_RAW format type is used for literal strings that are not conversion * specifications. The format types of FT_STRING, FT_CHAR, FT_INT, FT_UINT, and - * FT_FLOAT are used for various types of conversion specifications (`convspec') + * FT_FLOAT are used for various types of conversion specifications (`conv') * that require a value of the corresponding type. * The FT_ECHO format type is used for the "b" conversion specification. */ /* FT_STRING -> wchar_t * @@ -74,6 +78,13 @@ struct format_T { * FT_UINT -> uintmax_t * FT_FLOAT -> long double */ +/* state to track consumption of formatted values */ +struct args_T { + int argc; // number of values in `args' + int toconsume; // number of values to consume from `args' + void *const *argv; // array of pointers to wide strings to be formatted +}; + enum printf_result_T { PR_OK, PR_OK_END, PR_ERROR, }; static enum printf_result_T echo_parse_escape(const wchar_t *restrict s, xstrbuf_T *restrict buf, mbstate_t *restrict st) @@ -82,13 +93,16 @@ static bool printf_parse_format( const wchar_t *format, struct format_T **resultp) __attribute__((nonnull)); static struct format_T **printf_parse_percent( - const wchar_t **formatp, struct format_T **resultp) + const wchar_t **formatp, int *nextposition, struct format_T **resultp) __attribute__((nonnull,warn_unused_result)); -static struct format_T *printf_parse_percent_b(xstrbuf_T *convspec) +static struct format_T *printf_parse_percent_b( + int position, xstrbuf_T *convspec) __attribute__((nonnull,malloc,warn_unused_result)); +static const wchar_t *nth_arg(struct args_T *args, int position) + __attribute__((nonnull)); static enum printf_result_T printf_printf( - const struct format_T *format, const wchar_t *arg, xstrbuf_T *buf) - __attribute__((nonnull(1,3))); + const struct format_T *format, struct args_T *args, xstrbuf_T *buf) + __attribute__((nonnull)); static uintmax_t printf_parse_integer(const wchar_t *arg, bool is_signed); static enum printf_result_T printf_print_escape( const struct format_T *format, const wchar_t *arg, xstrbuf_T *buf) @@ -300,19 +314,25 @@ int printf_builtin(int argc, void **argv) xoptind++; /* format the operands */ - int oldoptind; + struct args_T args = { + .argc = argc - xoptind, + .toconsume = 0, + .argv = &argv[xoptind], + }; xstrbuf_T buf; sb_init(&buf); do { - oldoptind = xoptind; + args.toconsume = 0; for (struct format_T *f = format; f != NULL; f = f->next) { - switch (printf_printf(f, ARGV(xoptind), &buf)) { + switch (printf_printf(f, &args, &buf)) { case PR_OK: break; case PR_OK_END: goto print; case PR_ERROR: goto error; } } - } while (xoptind < argc && xoptind != oldoptind); + args.argc -= args.toconsume; + args.argv += args.toconsume; + } while (args.argc > 0 && args.toconsume > 0); print: freeformat(format); @@ -359,14 +379,16 @@ bool printf_parse_format(const wchar_t *format, struct format_T **resultp) xstrbuf_T buf; mbstate_t state; + int position; sb_init(&buf); memset(&state, 0, sizeof state); + position = 0; while (*format != L'\0') { switch (*format) { case L'%': MAKE_STRING; - resultp = printf_parse_percent(&format, resultp); + resultp = printf_parse_percent(&format, &position, resultp); if (resultp == NULL) return false; sb_init(&buf); @@ -426,16 +448,20 @@ bool printf_parse_format(const wchar_t *format, struct format_T **resultp) /* Parses the conversion specification that starts with L'%' pointed to by * `*formatp'. + * `*nextposition` is the default position used when the conversion + * specification does not specify a position. `*nextposition` is updated to the + * next position after the conversion specification. * If successful, a pointer to the character to parse next is assigned to * `*formatp', a pointer to the result is assigned to `*resultp', and the next * `resultp' value is returned. * If unsuccessful, an error message is printed and NULL is returned. A pointer * to a partial result may be assigned to `*resultp'. */ struct format_T **printf_parse_percent( - const wchar_t **formatp, struct format_T **resultp) + const wchar_t **formatp, int *nextposition, struct format_T **resultp) { const wchar_t *format = *formatp; xstrbuf_T buf; + int position = *nextposition; bool hashflag = false, zeroflag = false; enum formattype_T type; struct format_T *result; @@ -452,6 +478,18 @@ struct format_T **printf_parse_percent( sb_init(&buf); sb_ccat(&buf, '%'); + /* parse position */ + if (iswdigit(*format)) { + wchar_t *end; + long value = wcstol(format, &end, 10); + if (value > INT_MAX) + value = INT_MAX; + if (value > 0 && *end == L'$') { + format = &end[1]; + position = (int) value - 1; + } + } + /* parse flags */ for (;;) { switch (*format) { @@ -511,7 +549,7 @@ struct format_T **printf_parse_percent( case L'b': if (hashflag || zeroflag) goto flag_error; format++; - result = printf_parse_percent_b(&buf); + result = printf_parse_percent_b(position, &buf); goto end; case L'%': if (buf.length != 1) goto flag_error; @@ -534,6 +572,7 @@ struct format_T **printf_parse_percent( } BUFCAT(*format++); + /* create the result */ result = xmalloc(sizeof *result); result->next = NULL; result->type = type; @@ -545,11 +584,13 @@ struct format_T **printf_parse_percent( case FT_ECHO: assert(false); default: - result->value.convspec = sb_tostr(&buf); + result->value.conv.spec = sb_tostr(&buf); + result->value.conv.position = position; +end: + *nextposition = position + (position < INT_MAX); break; } -end: *formatp = format; *resultp = result; return &result->next; @@ -560,7 +601,7 @@ struct format_T **printf_parse_percent( /* Parses the conversion specification given in buffer `convspec'. * The specification in the buffer must not have the conversion specifier, which * is assumed to be 'b'. The buffer is destroyed in this function. */ -struct format_T *printf_parse_percent_b(xstrbuf_T *convspec) +struct format_T *printf_parse_percent_b(int position, xstrbuf_T *convspec) { size_t index = 0; struct format_T *result = xmalloc(sizeof *result); @@ -597,16 +638,33 @@ struct format_T *printf_parse_percent_b(xstrbuf_T *convspec) result->value.echo.max = ULONG_MAX; } + result->value.echo.position = position; + assert(index == convspec->length); sb_destroy(convspec); return result; } +/* Finds the value from `args' at the specified position, updating the + * `args->toconsume' field if necessary to make sure the selected value is + * consumed. */ +const wchar_t *nth_arg(struct args_T *args, int position) +{ + if (position >= args->argc) { + args->toconsume = args->argc; + return NULL; + } + if (position >= args->toconsume) + args->toconsume = position + 1; + return args->argv[position]; +} + /* Formats the specified string. The result is appended to buffer `buf'. - * Increases `xoptind' if `arg' is used. Otherwise, `arg' is ignored. */ + * Updates `args->toconsume' if a value from `args` is used. */ enum printf_result_T printf_printf( - const struct format_T *format, const wchar_t *arg, xstrbuf_T *buf) + const struct format_T *format, struct args_T *args, xstrbuf_T *buf) { + const wchar_t *arg; switch (format->type) { case FT_NONE: sb_ccat(buf, '%'); @@ -616,27 +674,29 @@ enum printf_result_T printf_printf( format->value.raw.value, format->value.raw.length); return PR_OK; case FT_STRING: - if (arg != NULL) - xoptind++; - else + arg = nth_arg(args, format->value.conv.position); + if (arg == NULL) arg = L""; - if (sb_printf(buf, format->value.convspec, arg) < 0) + if (sb_printf(buf, format->value.conv.spec, arg) < 0) return PR_ERROR; return PR_OK; case FT_CHAR: + arg = nth_arg(args, format->value.conv.position); if (arg != NULL && arg[0] != L'\0') { - xoptind++; - if (sb_printf(buf, format->value.convspec, (wint_t) arg[0]) < 0) + if (sb_printf(buf, format->value.conv.spec, (wint_t) arg[0]) + < 0) return PR_ERROR; } return PR_OK; case FT_INT: - if (sb_printf(buf, format->value.convspec, + arg = nth_arg(args, format->value.conv.position); + if (sb_printf(buf, format->value.conv.spec, printf_parse_integer(arg, true)) < 0) return PR_ERROR; return PR_OK; case FT_UINT: - if (sb_printf(buf, format->value.convspec, + arg = nth_arg(args, format->value.conv.position); + if (sb_printf(buf, format->value.conv.spec, printf_parse_integer(arg, false)) < 0) return PR_ERROR; return PR_OK; @@ -645,9 +705,8 @@ enum printf_result_T printf_printf( long double value; wchar_t *end; - if (arg != NULL) - xoptind++; - else + arg = nth_arg(args, format->value.conv.position); + if (arg == NULL) arg = L"0"; errno = 0; #if HAVE_WCSTOLD @@ -658,14 +717,13 @@ enum printf_result_T printf_printf( value = wcstod(arg, &end); if (errno || arg[0] == L'\0' || *end != L'\0') xerror(errno, Ngt("`%ls' is not a valid number"), arg); - if (sb_printf(buf, format->value.convspec, value) < 0) + if (sb_printf(buf, format->value.conv.spec, value) < 0) return PR_ERROR; return PR_OK; } case FT_ECHO: - if (arg != NULL) - xoptind++; - else + arg = nth_arg(args, format->value.echo.position); + if (arg == NULL) arg = L""; return printf_print_escape(format, arg, buf); } @@ -678,9 +736,7 @@ uintmax_t printf_parse_integer(const wchar_t *arg, bool is_signed) uintmax_t value; wchar_t *end; - if (arg != NULL) - xoptind++; - else + if (arg == NULL) arg = L"0"; if (arg[0] == L'"' || arg[0] == L'\'') { value = (uintmax_t) arg[1]; @@ -746,7 +802,7 @@ void freeformat(struct format_T *f) case FT_ECHO: break; default: - free(f->value.convspec); + free(f->value.conv.spec); break; } free(f); diff --git a/doc/_printf.txt b/doc/_printf.txt index b4b5b3a9..c18bbb33 100644 --- a/doc/_printf.txt +++ b/doc/_printf.txt @@ -32,8 +32,9 @@ A conversion specification starts with a percent sign (+%+). A conversion specification except +%%+ consumes a {{value}}, which is formatted according to the specification and printed. -Each conversion specification consumes one {{value}} in the order of -appearance. +By default, the {{value}}s are consumed in the order of their appearance, but +you can specify which {{value}} to consume by using the +<>. If there are more {{value}}s than conversion specifications, the entire {{format}} is re-processed until all the {{value}}s are consumed. If a {{value}} to be consumed is missing, it is assumed to be an empty string @@ -66,7 +67,22 @@ the exponent part is between -5 and the precision (exclusive); +%e+ or +%E+ otherwise. In a conversion specification except +%%+, the leading percent sign may be -followed by flags, field width, and/or precision in this order. +followed by position, flags, field width, and/or precision in this order. + +[[convspec-position]] +==== Position + +A position specifier is a dollar sign (+$+) preceded by a positive decimal +integer. +The integer specifies the index of the {{value}} to be consumed. +For example, in the format string +%2$d %1$d+, the second {{value}} is +consumed first and the first {{value}} is consumed next. + +More than one position specifier can specify the same {{value}}. + +It is also possible that a {{value}} is not selected by any position +specifier. If a position refers to a {{value}}, any preceding {{value}}s that +are not selected by any other position specifier are silently consumed. [[convspec-flags]] ==== Flags @@ -209,4 +225,9 @@ If the shell is not in the link:posix.html[POSIXly-correct mode] and the then ``long double'' is used for floating-point conversion specifications. Otherwise, ``double'' is used. +POSIX leaves the behavior unspecified when a {{format}} contains conversion +specifications with a position and others without a position. +You should not rely on the selection order of {{value}}s in such cases. +Future versions of yash may treat such cases as errors. + // vim: set filetype=asciidoc textwidth=78 expandtab: diff --git a/doc/ja/_printf.txt b/doc/ja/_printf.txt index f62b6876..c9027221 100644 --- a/doc/ja/_printf.txt +++ b/doc/ja/_printf.txt @@ -22,7 +22,7 @@ Printf コマンドはオペランドで指定した{{書式}}に従って{{値} 変換指定はパーセント記号 (+%+) から始まります。 -+%%+ 以外の変換指定は、対応する値をとります。変換指定は、値を特定の形式に整形して出力します。変換指定と値は与えられた順番に対応付けられます。値が余った場合は、全ての値を処理し終わるまで書式の整形・出力を繰り返します。値が足りない場合は、空文字列 (文字列に関する変換指定の場合) または 0 (数値に関する変換指定の場合) を仮定します。値が一つも与えられていない場合は、書式は一度だけ出力されます。 ++%%+ 以外の変換指定は、対応する値をとります。変換指定は、値を特定の形式に整形して出力します。デフォルトでは変換指定と値は与えられた順番に対応付けられますが、変換指定に<>を含めることで任意の順序で対応させることもできます。値が余った場合は、全ての値を処理し終わるまで書式の整形・出力を繰り返します。値が足りない場合は、空文字列 (文字列に関する変換指定の場合) または 0 (数値に関する変換指定の場合) を仮定します。値が一つも与えられていない場合は、書式は一度だけ出力されます。 利用可能な変換指定は以下の通りです。 @@ -46,7 +46,18 @@ Printf コマンドはオペランドで指定した{{書式}}に従って{{値} +%g+ と +%G+ では、小数の指数部が -5 以上精度以下の時に +%f+ または +%F+ を、それ以外の時に +%e+ または +%E+ を使用します。 -+%%+ 以外の変換指定では、最初の +%+ の直後に変換指定フラグ・フィールド幅・精度をこの順で指定できます。これらを指定することで出力の形式を細かく調整できます。 ++%%+ 以外の変換指定では、最初の +%+ の直後に位置・変換指定フラグ・フィールド幅・精度をこの順で指定できます。これらを指定することで出力の形式を細かく調整できます。 + +[[convspec-position]] +==== 位置 + +位置は、正の整数 (十進法) の直後に +$+ を置いたものの形で指定します。 +この整数は、オペランドで指定された値の中から何番目の値を使用するかを指定します。 +例えば、書式が +%2$d %1$d+ の場合、2 番目の値が最初に、1 番目の値が次に出力されます。 + +複数の変換指定で同じ位置を指定することもできます。 + +ある変換指定がある位置の値を使用する場合、その位置より前の値は他の変換指定から使われない限り無視されます。 [[convspec-flags]] ==== 変換指定フラグ @@ -151,4 +162,7 @@ POSIX では、マルチバイト文字の扱いについて厳密に定義し シェルが非 link:posix.html[POSIX 準拠モード]で、システム上で ``long double'' 浮動小数点数が使用可能な場合は、実数の変換指定は ``long double'' で処理されます。それ以外の場合は ``double'' で処理されます。 +POSIX では、位置を含む変換指定と含まない変換指定を混在させることはできません。 +このような{{書式}}を使用すると、シェルの種類やバージョンによって値の選択順序が変わったりエラーになったりする可能性があります。 + // vim: set filetype=asciidoc expandtab: diff --git a/tests/POSIX b/tests/POSIX index f68a41cf..c73effd2 100644 --- a/tests/POSIX +++ b/tests/POSIX @@ -239,6 +239,9 @@ kill * kill3-p.tst * kill4-p.tst +printf + * printf-y.tst + read * read-p.tst diff --git a/tests/printf-y.tst b/tests/printf-y.tst index 7efa9a00..670fe0d2 100644 --- a/tests/printf-y.tst +++ b/tests/printf-y.tst @@ -479,6 +479,40 @@ S !1 __OUT__ +test_oE 'position' +printf '%3$d %4$d %2$d\n' 10 20 30 42 +printf '%2$d\n' 1 2 3 4 5 +__IN__ +30 42 20 +2 +4 +0 +__OUT__ + +# In yash, %b is implemented separately from other conversion specifiers, +# so we test it separately, too. +test_oE 'position in %b' +printf '%3$b %4$b %1$b\n' A BB CCC ddd +printf '%2$b\n' a b c d e +__IN__ +CCC ddd A +b +d + +__OUT__ + +test_oE 'position with flag' +printf '%1$03d\n' 1 +__IN__ +001 +__OUT__ + +test_oE 'mixing % and %n$' +printf '%d %5$.3s %5$s %c %d\n' 42 A B C formatted words +__IN__ +42 for formatted w 0 +__OUT__ + test_oE 'percent' printf '%%\n' printf '+%%+%%%%+\n' @@ -493,6 +527,30 @@ __IN__ 1%2 __OUT__ +test_Oe -e n 'unsupported conversion %y' +printf '%y' 1 +__IN__ +printf: `y' is not a valid conversion specifier +__ERR__ +#' +#` + +test_Oe -e n 'missing number before $' +printf '%$d' 1 +__IN__ +printf: `$' is not a valid conversion specifier +__ERR__ +#' +#` + +test_Oe -e n 'position 0' +printf '%0$d' 42 +__IN__ +printf: `$' is not a valid conversion specifier +__ERR__ +#' +#` + test_o -d -e n 'operands in invalid format' printf '%d\n' not_a_integer 32_trailing_characters __IN__