From c371cc9c2afba5600f8749c4ebcd93b4c9dc6a4e Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Sat, 29 Jun 2024 10:58:42 +0100 Subject: [PATCH 1/3] gh-121130: Fix f-string format specifiers with debug expressions --- Lib/test/test_fstring.py | 6 ++++++ .../2024-06-29-10-46-14.gh-issue-121130.Rj66Xs.rst | 2 ++ Parser/lexer/lexer.c | 14 ++++++++++++-- Parser/lexer/state.c | 1 + Parser/lexer/state.h | 1 + 5 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-06-29-10-46-14.gh-issue-121130.Rj66Xs.rst diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py index 49c6f761e5b4f0..558af76ddf4b66 100644 --- a/Lib/test/test_fstring.py +++ b/Lib/test/test_fstring.py @@ -1601,6 +1601,10 @@ def f(a): self.assertEqual(f'{f(a=4)}', '3=') self.assertEqual(x, 4) + # Check debug expressions in format spec + y = 20 + self.assertEqual(f"{2:{y=}}", "yyyyyyyyyyyyyyyyyyy2") + # Make sure __format__ is being called. class C: def __format__(self, s): @@ -1614,9 +1618,11 @@ def __repr__(self): self.assertEqual(f'{C()=: }', 'C()=FORMAT- ') self.assertEqual(f'{C()=:x}', 'C()=FORMAT-x') self.assertEqual(f'{C()=!r:*^20}', 'C()=********REPR********') + self.assertEqual(f"{C():{20=}}", 'FORMAT-20=20') self.assertRaises(SyntaxError, eval, "f'{C=]'") + # Make sure leading and following text works. x = 'foo' self.assertEqual(f'X{x=}Y', 'Xx='+repr(x)+'Y') diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-06-29-10-46-14.gh-issue-121130.Rj66Xs.rst b/Misc/NEWS.d/next/Core and Builtins/2024-06-29-10-46-14.gh-issue-121130.Rj66Xs.rst new file mode 100644 index 00000000000000..7084f0cbebbb73 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-06-29-10-46-14.gh-issue-121130.Rj66Xs.rst @@ -0,0 +1,2 @@ +Fix f-strings with debug expressions in format specifiers. Patch by Pablo +Galindo diff --git a/Parser/lexer/lexer.c b/Parser/lexer/lexer.c index 82b0e4ee352d62..5366ab949e4483 100644 --- a/Parser/lexer/lexer.c +++ b/Parser/lexer/lexer.c @@ -989,6 +989,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t the_current_tok->last_expr_buffer = NULL; the_current_tok->last_expr_size = 0; the_current_tok->last_expr_end = -1; + the_current_tok->in_format_spec = 0; the_current_tok->f_string_debug = 0; switch (*tok->start) { @@ -1137,15 +1138,20 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t * by the `{` case, so for ensuring that we are on the 0th level, we need * to adjust it manually */ int cursor = current_tok->curly_bracket_depth - (c != '{'); - if (cursor == 0 && !_PyLexer_update_fstring_expr(tok, c)) { + int in_format_spec = current_tok->in_format_spec; + int cursor_in_format_with_debug = + cursor == 1 && (current_tok->f_string_debug || in_format_spec); + int cursor_valid = cursor == 0 || cursor_in_format_with_debug; + if (cursor_valid && !_PyLexer_update_fstring_expr(tok, c)) { return MAKE_TOKEN(ENDMARKER); } - if (cursor == 0 && c != '{' && set_fstring_expr(tok, token, c)) { + if (cursor_valid && c != '{' && set_fstring_expr(tok, token, c)) { return MAKE_TOKEN(ERRORTOKEN); } if (c == ':' && cursor == current_tok->curly_bracket_expr_start_depth) { current_tok->kind = TOK_FSTRING_MODE; + current_tok->in_format_spec = 1; p_start = tok->start; p_end = tok->cur; return MAKE_TOKEN(_PyToken_OneChar(c)); @@ -1235,6 +1241,7 @@ tok_get_normal_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct t if (c == '}' && current_tok->curly_bracket_depth == current_tok->curly_bracket_expr_start_depth) { current_tok->curly_bracket_expr_start_depth--; current_tok->kind = TOK_FSTRING_MODE; + current_tok->in_format_spec = 0; current_tok->f_string_debug = 0; } } @@ -1337,6 +1344,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct if (in_format_spec && c == '\n') { tok_backup(tok, c); TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; + current_tok->in_format_spec = 0; p_start = tok->start; p_end = tok->cur; return MAKE_TOKEN(FSTRING_MIDDLE); @@ -1387,6 +1395,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct return MAKE_TOKEN(_PyTokenizer_syntaxerror(tok, "f-string: expressions nested too deeply")); } TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; + current_tok->in_format_spec = 0; p_start = tok->start; p_end = tok->cur; } else { @@ -1413,6 +1422,7 @@ tok_get_fstring_mode(struct tok_state *tok, tokenizer_mode* current_tok, struct tok_backup(tok, peek); tok_backup(tok, c); TOK_GET_MODE(tok)->kind = TOK_REGULAR_MODE; + current_tok->in_format_spec = 0; p_start = tok->start; p_end = tok->cur; } diff --git a/Parser/lexer/state.c b/Parser/lexer/state.c index 653ddafd411095..647f291911564c 100644 --- a/Parser/lexer/state.c +++ b/Parser/lexer/state.c @@ -74,6 +74,7 @@ free_fstring_expressions(struct tok_state *tok) mode->last_expr_buffer = NULL; mode->last_expr_size = 0; mode->last_expr_end = -1; + mode->in_format_spec = 0; } } } diff --git a/Parser/lexer/state.h b/Parser/lexer/state.h index 61d090d6d2fe21..9ed3babfdbfbf1 100644 --- a/Parser/lexer/state.h +++ b/Parser/lexer/state.h @@ -58,6 +58,7 @@ typedef struct _tokenizer_mode { Py_ssize_t last_expr_end; char* last_expr_buffer; int f_string_debug; + int in_format_spec; } tokenizer_mode; /* Tokenizer state */ From 89bd65ac7150763654f2ea5766aca4dfbfbe4850 Mon Sep 17 00:00:00 2001 From: Pablo Galindo Date: Sat, 29 Jun 2024 11:56:13 +0100 Subject: [PATCH 2/3] Fix unparsing of fstring with format specs --- Parser/action_helpers.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Parser/action_helpers.c b/Parser/action_helpers.c index 91b7e2f1058423..03876ebfffc5a3 100644 --- a/Parser/action_helpers.c +++ b/Parser/action_helpers.c @@ -1444,8 +1444,16 @@ expr_ty _PyPegen_formatted_value(Parser *p, expr_ty expression, Token *debug, Re conversion_val = (int)'r'; } + expr_ty format_expr = format ? (expr_ty) format->result : NULL; + if (format_expr && format_expr->kind == JoinedStr_kind && asdl_seq_LEN(format_expr->v.JoinedStr.values) == 1) { + expr_ty format_value = asdl_seq_GET(format_expr->v.JoinedStr.values, 0); + if (format_value->kind == JoinedStr_kind) { + format_expr = format_value; + } + } + expr_ty formatted_value = _PyAST_FormattedValue( - expression, conversion_val, format ? (expr_ty) format->result : NULL, + expression, conversion_val, format_expr, lineno, col_offset, end_lineno, end_col_offset, arena ); From 2bbb9d1d32b943e8504bc0d5b714f4445ed4ac9b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 1 Jul 2024 19:06:49 +0000 Subject: [PATCH 3/3] build(deps): bump hypothesis from 6.100.2 to 6.104.2 in /Tools Bumps [hypothesis](https://github.com/HypothesisWorks/hypothesis) from 6.100.2 to 6.104.2. - [Release notes](https://github.com/HypothesisWorks/hypothesis/releases) - [Commits](https://github.com/HypothesisWorks/hypothesis/compare/hypothesis-python-6.100.2...hypothesis-python-6.104.2) --- updated-dependencies: - dependency-name: hypothesis dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Tools/requirements-hypothesis.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Tools/requirements-hypothesis.txt b/Tools/requirements-hypothesis.txt index 9d5a18c881bf36..ab3f39ac6ee087 100644 --- a/Tools/requirements-hypothesis.txt +++ b/Tools/requirements-hypothesis.txt @@ -1,4 +1,4 @@ # Requirements file for hypothesis that # we use to run our property-based tests in CI. -hypothesis==6.100.2 +hypothesis==6.104.2