diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 4e87d9d6228b..0837ae7f5c1d 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -379,7 +379,11 @@ manager will later use a regex to expand these into links. =item * -XXX +Regexes which include both a sub-pattern (e.g. C<(??{...})> or C<(?&FOO)>) +and a cut (i.e. C<< (?>...) >>) could sometimes cause a premature scope +exit in other code during a match. For example in something like +C<(?{ local $x = ... })>, the C might have been unwound before the +pattern has finished matching. [GH #16197] =back diff --git a/regexec.c b/regexec.c index e52c48513d2a..c854666f1c39 100644 --- a/regexec.c +++ b/regexec.c @@ -320,7 +320,12 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen comma_pDEP /* REGCP_UNWIND(cp): unwind savestack back to marked index, * but without restoring regcppush()ed data (leave_scope() treats - * SAVEt_REGCONTEXT as a NOOP) + * SAVEt_REGCONTEXT as a NOOP). + * + * Note that the stack is normally only unwound on failure and + * backtracking. Successful matches involve accumulating many savestack + * entries which are all freed in *one go* during the final exit from + * S_regmatch(). */ #define REGCP_UNWIND(cp) \ @@ -4403,13 +4408,9 @@ S_set_reg_curpm(pTHX_ REGEXP *rx, regmatch_info *reginfo) STATIC bool /* 0 failure, 1 success */ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp) { - CHECKPOINT lastcp; REGEXP *const rx = reginfo->prog; regexp *const prog = ReANY(rx); SSize_t result; -#ifdef DEBUGGING - U32 depth = 0; /* used by REGCP_SET */ -#endif RXi_GET_DECL(prog,progi); DECLARE_AND_GET_RE_DEBUG_FLAGS; @@ -4453,7 +4454,6 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp) } } #endif - REGCP_SET(lastcp); result = regmatch(reginfo, *startposp, progi->program + 1); if (result != -1) { RXp_OFFSp(prog)[0].end = result; @@ -4461,7 +4461,6 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp) } if (reginfo->cutpoint) *startposp= reginfo->cutpoint; - REGCP_UNWIND(lastcp); return 0; } @@ -6683,7 +6682,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) during a successful match */ U32 lastopen = 0; /* last open we saw */ bool has_cutgroup = RXp_HAS_CUTGROUP(rex) ? 1 : 0; - SV* const oreplsv = GvSVn(PL_replgv); /* these three flags are set by various ops to signal information to * the very next op. They have a useful lifetime of exactly one loop * iteration, and are not preserved or restored by state pushes/pops @@ -6708,7 +6706,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) char_class_number_ classnum; bool is_utf8_pat = reginfo->is_utf8_pat; bool match = false; - I32 orig_savestack_ix = PL_savestack_ix; + I32 orig_savestack_ix; U8 * script_run_begin = NULL; char *match_end= NULL; /* where a match MUST end to be considered successful */ bool is_accepted = false; /* have we hit an ACCEPT opcode? */ @@ -6726,9 +6724,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) DECLARE_AND_GET_RE_DEBUG_FLAGS; #endif - /* protect against undef(*^R) */ - SAVEFREESV(SvREFCNT_inc_simple_NN(oreplsv)); - /* shut up 'may be used uninitialized' compiler warnings for dMULTICALL */ multicall_oldcatch = 0; PERL_UNUSED_VAR(multicall_cop); @@ -6746,6 +6741,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) Perl_re_printf( aTHX_ "regmatch start\n" ); })); + REGCP_SET(orig_savestack_ix); + while (scan != NULL) { next = scan + NEXT_OFF(scan); if (next == scan) @@ -8754,18 +8751,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) S_set_reg_curpm(aTHX_ rex_sv, reginfo); rex = ReANY(rex_sv); rexi = RXi_GET(rex); - { - /* preserve $^R across LEAVE's. See Bug 121070. */ - SV *save_sv= GvSV(PL_replgv); - SV *replsv; - SvREFCNT_inc(save_sv); - REGCP_UNWIND(ST.cp); /* LEAVE in disguise */ - /* don't move this initialization up */ - replsv = GvSV(PL_replgv); - sv_setsv(replsv, save_sv); - SvSETMAGIC(replsv); - SvREFCNT_dec(save_sv); - } cur_eval = ST.prev_eval; cur_curlyx = ST.prev_curlyx; @@ -10325,20 +10310,21 @@ NULL DEBUG_EXECUTE_r(Perl_re_printf( aTHX_ "%sMatch successful!%s\n", PL_colors[4], PL_colors[5])); - if (reginfo->info_aux_eval) { - /* each successfully executed (?{...}) block does the equivalent of - * local $^R = do {...} - * When popping the save stack, all these locals would be undone; - * bypass this by setting the outermost saved $^R to the latest - * value */ - /* I don't know if this is needed or works properly now. - * see code related to PL_replgv elsewhere in this file. - * Yves + if (reginfo->info_aux_eval && orig_savestack_ix < PL_savestack_ix) { + /* After exiting a match, $^R should still hold the value of the + * latest (?{...}). E.g. + * /(?{42})/ and print $^R; + * will print 42. However, each successfully executed (?{...}) + * block does the equivalent of 'local $^R = ...'. In addition, + * there may be an explicit 'local $^R' or similar code. When + * popping the savestack at the end, all these locals would be + * undone. Avoid this issue by making a copy of the final value + * just *before* the final savestack unwind. After the unwind, + * we set $^R to that value. + * This temporary copy is stored in the aux_eval struct so that + * it will get freed even if we die during savestack unwind. */ - if (oreplsv != GvSV(PL_replgv)) { - sv_setsv(oreplsv, GvSV(PL_replgv)); - SvSETMAGIC(oreplsv); - } + reginfo->info_aux_eval->final_replsv = newSVsv(GvSV(PL_replgv)); } result = 1; goto final_exit; @@ -10403,12 +10389,25 @@ NULL if (last_pushed_cv) { dSP; - /* see "Some notes about MULTICALL" above */ + /* see "Some notes about MULTICALL" above, especially how + * the POP_MULTICALL does the equivalent of the LEAVE_SCOPE + * for us, so no need to do it explicitly. */ POP_MULTICALL; PERL_UNUSED_VAR(SP); } - else - LEAVE_SCOPE(orig_savestack_ix); + else { + REGCP_UNWIND(orig_savestack_ix); + } + + if ( reginfo->info_aux_eval + && reginfo->info_aux_eval->final_replsv) + { + /* '/(?{42})/; print $^R' should print 42; now that the + * savestack has been popped, set the final value */ + SV *replsv = GvSV(PL_replgv); + sv_setsv(replsv, reginfo->info_aux_eval->final_replsv); + SvSETMAGIC(replsv); + } assert(!result || locinput - reginfo->strbeg >= 0); return result ? locinput - reginfo->strbeg : -1; @@ -11503,6 +11502,8 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo) RXp_SUBOFFSET(rex) = 0; RXp_SUBCOFFSET(rex) = 0; RXp_SUBLEN(rex) = reginfo->strend - reginfo->strbeg; + + eval_state->final_replsv = NULL; /* the final value of $^R. */ } @@ -11553,6 +11554,7 @@ S_cleanup_regmatch_info_aux(pTHX_ void *arg) PM_SETRE(eval_state->old_op, eval_state->old_op_val); SvREFCNT_dec(old_rx); } + SvREFCNT_dec(eval_state->final_replsv); } PL_regmatch_state = aux->old_regmatch_state; diff --git a/regexp.h b/regexp.h index 1db56f9c6750..fbe89eb49100 100644 --- a/regexp.h +++ b/regexp.h @@ -791,6 +791,7 @@ typedef struct { SV *sv; /* $_ during (?{}) */ MAGIC *pos_magic; /* pos() magic attached to $_ */ SSize_t pos; /* the original value of pos() in pos_magic */ + SV *final_replsv; /* the final value of $^R. */ U8 pos_flags; /* flags to be restored; currently only MGf_BYTES*/ } regmatch_info_aux_eval; diff --git a/t/op/svleak.t b/t/op/svleak.t index da7bd6e2b052..9b0df185037c 100644 --- a/t/op/svleak.t +++ b/t/op/svleak.t @@ -15,7 +15,7 @@ BEGIN { use Config; -plan tests => 157; +plan tests => 158; # run some code N times. If the number of SVs at the end of loop N is # greater than (N-1)*delta at the end of loop 1, we've got a leak @@ -710,3 +710,31 @@ package myconcat { 'overloaded pattern with code block' ); } + +# When making a final copy of $^R before unwinding the savestack, +# make sure that the copy doesn't leak if we die during that unwinding. +# Dying in STORE triggers that. + +package GH16197 { + + sub TIESCALAR { bless [ 0 ]; } + sub FETCH { $_[0][0] } + sub STORE { my $v = $_[1]; + # die when undoing the 'local'; the previous val was real, + # new value is undef. + if ($_[0][-1] and !$v) { + @{$_[0]} = (); + die; + } + push @{$_[0]}, $v; + } + + our $x99; + local $x99; + tie $x99, 'GH16197'; + + ::leak(5, 0, + sub { eval { "" =~ /(?{ local $x99; $x99 = 9 })/; }; }, + "no leak in \$^R copy during stack unwind", + ); +} diff --git a/t/re/pat_re_eval.t b/t/re/pat_re_eval.t index 8193eea5cc92..7d1152edb6aa 100644 --- a/t/re/pat_re_eval.t +++ b/t/re/pat_re_eval.t @@ -25,7 +25,7 @@ BEGIN { our @global; -plan tests => 528; # Update this when adding/deleting tests. +plan tests => 552; # Update this when adding/deleting tests. run_tests() unless caller; @@ -1446,6 +1446,53 @@ sub run_tests { is($got, "[A-[X-,X-,XY,XY],A-,AB,AB]", "GH22869"); } + # GH #16197 + # A subpattern, i.e. (??{...}) or (?&...), when combined with + # a cut, (?>...), caused the savestack to be prematurely unwound, + # resulting in localisations within (?{...}) being undone before + # the end of the match. + + { + my ($match, @vals); + + # The first six permutations do no backtracking, but are: with or + # without a cut; and match a digit via either a simple \d, + # or via a sub-pattern - (??{}) or (?&). + # The next six permutations add some backtracking. + # + # Previously the combination of a cut and a subpattern returned + # "101 101 101 101 101". + + for my $bt ('', '[a-z]*') { # trigger a backtrack? + for my $cut ('', '?>') { + for my $subpat (q{\d}, q{(??{ '\d' })}, q{(?&DIGIT)} ) { + + my $desc = "bt=$bt cut=$cut subp=$subpat"; + @vals = (); + local our $x99 = 100; + + use re 'eval'; + $match = + "a1b2c3d4e5" =~ + /^ ( + [a-z] + ($cut + $subpat + (?{ local $x99 = $x99 + 1; push @vals, $x99 }) + ) + $bt + ){5} + + (?(DEFINE) (? \d ) ) + /x; + ok($match, "GH 16197: match; $desc"); + is("@vals", "101 102 103 104 105", + "GH 16197: local vals; $desc"); + } + } + } + } + } # End of sub run_tests 1;