Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion pod/perldelta.pod
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,11 @@ manager will later use a regex to expand these into links.

=item *

XXX
Regexes which include both a sub-pattern (e.g. C<(??{...})> or C<(?&FOO)>)
and a cut (i.e. C<< (?>...) >>) could sometimes cause a premature scope
exit in other code during a match. For example in something like
C<(?{ local $x = ... })>, the C<local> might have been unwound before the
pattern has finished matching. [GH #16197]

=back

Expand Down
82 changes: 42 additions & 40 deletions regexec.c
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,12 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen comma_pDEP

/* REGCP_UNWIND(cp): unwind savestack back to marked index,
* but without restoring regcppush()ed data (leave_scope() treats
* SAVEt_REGCONTEXT as a NOOP)
* SAVEt_REGCONTEXT as a NOOP).
*
* Note that the stack is normally only unwound on failure and
* backtracking. Successful matches involve accumulating many savestack
* entries which are all freed in *one go* during the final exit from
* S_regmatch().
*/

#define REGCP_UNWIND(cp) \
Expand Down Expand Up @@ -4403,13 +4408,9 @@ S_set_reg_curpm(pTHX_ REGEXP *rx, regmatch_info *reginfo)
STATIC bool /* 0 failure, 1 success */
S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
{
CHECKPOINT lastcp;
REGEXP *const rx = reginfo->prog;
regexp *const prog = ReANY(rx);
SSize_t result;
#ifdef DEBUGGING
U32 depth = 0; /* used by REGCP_SET */
#endif
RXi_GET_DECL(prog,progi);
DECLARE_AND_GET_RE_DEBUG_FLAGS;

Expand Down Expand Up @@ -4453,15 +4454,13 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
}
}
#endif
REGCP_SET(lastcp);
result = regmatch(reginfo, *startposp, progi->program + 1);
if (result != -1) {
RXp_OFFSp(prog)[0].end = result;
return 1;
}
if (reginfo->cutpoint)
*startposp= reginfo->cutpoint;
REGCP_UNWIND(lastcp);
return 0;
}

Expand Down Expand Up @@ -6683,7 +6682,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
during a successful match */
U32 lastopen = 0; /* last open we saw */
bool has_cutgroup = RXp_HAS_CUTGROUP(rex) ? 1 : 0;
SV* const oreplsv = GvSVn(PL_replgv);
/* these three flags are set by various ops to signal information to
* the very next op. They have a useful lifetime of exactly one loop
* iteration, and are not preserved or restored by state pushes/pops
Expand All @@ -6708,7 +6706,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
char_class_number_ classnum;
bool is_utf8_pat = reginfo->is_utf8_pat;
bool match = false;
I32 orig_savestack_ix = PL_savestack_ix;
I32 orig_savestack_ix;
U8 * script_run_begin = NULL;
char *match_end= NULL; /* where a match MUST end to be considered successful */
bool is_accepted = false; /* have we hit an ACCEPT opcode? */
Expand All @@ -6726,9 +6724,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
DECLARE_AND_GET_RE_DEBUG_FLAGS;
#endif

/* protect against undef(*^R) */
SAVEFREESV(SvREFCNT_inc_simple_NN(oreplsv));

/* shut up 'may be used uninitialized' compiler warnings for dMULTICALL */
multicall_oldcatch = 0;
PERL_UNUSED_VAR(multicall_cop);
Expand All @@ -6746,6 +6741,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
Perl_re_printf( aTHX_ "regmatch start\n" );
}));

REGCP_SET(orig_savestack_ix);

while (scan != NULL) {
next = scan + NEXT_OFF(scan);
if (next == scan)
Expand Down Expand Up @@ -8754,18 +8751,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
S_set_reg_curpm(aTHX_ rex_sv, reginfo);
rex = ReANY(rex_sv);
rexi = RXi_GET(rex);
{
/* preserve $^R across LEAVE's. See Bug 121070. */
SV *save_sv= GvSV(PL_replgv);
SV *replsv;
SvREFCNT_inc(save_sv);
REGCP_UNWIND(ST.cp); /* LEAVE in disguise */
/* don't move this initialization up */
replsv = GvSV(PL_replgv);
sv_setsv(replsv, save_sv);
SvSETMAGIC(replsv);
SvREFCNT_dec(save_sv);
}
cur_eval = ST.prev_eval;
cur_curlyx = ST.prev_curlyx;

Expand Down Expand Up @@ -10325,20 +10310,21 @@ NULL
DEBUG_EXECUTE_r(Perl_re_printf( aTHX_ "%sMatch successful!%s\n",
PL_colors[4], PL_colors[5]));

if (reginfo->info_aux_eval) {
/* each successfully executed (?{...}) block does the equivalent of
* local $^R = do {...}
* When popping the save stack, all these locals would be undone;
* bypass this by setting the outermost saved $^R to the latest
* value */
/* I don't know if this is needed or works properly now.
* see code related to PL_replgv elsewhere in this file.
* Yves
if (reginfo->info_aux_eval && orig_savestack_ix < PL_savestack_ix) {
/* After exiting a match, $^R should still hold the value of the
* latest (?{...}). E.g.
* /(?{42})/ and print $^R;
* will print 42. However, each successfully executed (?{...})
* block does the equivalent of 'local $^R = ...'. In addition,
* there may be an explicit 'local $^R' or similar code. When
* popping the savestack at the end, all these locals would be
* undone. Avoid this issue by making a copy of the final value
* just *before* the final savestack unwind. After the unwind,
* we set $^R to that value.
* This temporary copy is stored in the aux_eval struct so that
* it will get freed even if we die during savestack unwind.
*/
if (oreplsv != GvSV(PL_replgv)) {
sv_setsv(oreplsv, GvSV(PL_replgv));
SvSETMAGIC(oreplsv);
}
reginfo->info_aux_eval->final_replsv = newSVsv(GvSV(PL_replgv));
}
result = 1;
goto final_exit;
Expand Down Expand Up @@ -10403,12 +10389,25 @@ NULL

if (last_pushed_cv) {
dSP;
/* see "Some notes about MULTICALL" above */
/* see "Some notes about MULTICALL" above, especially how
* the POP_MULTICALL does the equivalent of the LEAVE_SCOPE
* for us, so no need to do it explicitly. */
POP_MULTICALL;
PERL_UNUSED_VAR(SP);
}
else
LEAVE_SCOPE(orig_savestack_ix);
else {
REGCP_UNWIND(orig_savestack_ix);
}

if ( reginfo->info_aux_eval
&& reginfo->info_aux_eval->final_replsv)
{
/* '/(?{42})/; print $^R' should print 42; now that the
* savestack has been popped, set the final value */
SV *replsv = GvSV(PL_replgv);
sv_setsv(replsv, reginfo->info_aux_eval->final_replsv);
SvSETMAGIC(replsv);
}

assert(!result || locinput - reginfo->strbeg >= 0);
return result ? locinput - reginfo->strbeg : -1;
Expand Down Expand Up @@ -11503,6 +11502,8 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo)
RXp_SUBOFFSET(rex) = 0;
RXp_SUBCOFFSET(rex) = 0;
RXp_SUBLEN(rex) = reginfo->strend - reginfo->strbeg;

eval_state->final_replsv = NULL; /* the final value of $^R. */
}


Expand Down Expand Up @@ -11553,6 +11554,7 @@ S_cleanup_regmatch_info_aux(pTHX_ void *arg)
PM_SETRE(eval_state->old_op, eval_state->old_op_val);
SvREFCNT_dec(old_rx);
}
SvREFCNT_dec(eval_state->final_replsv);
}

PL_regmatch_state = aux->old_regmatch_state;
Expand Down
1 change: 1 addition & 0 deletions regexp.h
Original file line number Diff line number Diff line change
Expand Up @@ -791,6 +791,7 @@ typedef struct {
SV *sv; /* $_ during (?{}) */
MAGIC *pos_magic; /* pos() magic attached to $_ */
SSize_t pos; /* the original value of pos() in pos_magic */
SV *final_replsv; /* the final value of $^R. */
U8 pos_flags; /* flags to be restored; currently only MGf_BYTES*/
} regmatch_info_aux_eval;

Expand Down
30 changes: 29 additions & 1 deletion t/op/svleak.t
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ BEGIN {

use Config;

plan tests => 157;
plan tests => 158;

# run some code N times. If the number of SVs at the end of loop N is
# greater than (N-1)*delta at the end of loop 1, we've got a leak
Expand Down Expand Up @@ -710,3 +710,31 @@ package myconcat {
'overloaded pattern with code block'
);
}

# When making a final copy of $^R before unwinding the savestack,
# make sure that the copy doesn't leak if we die during that unwinding.
# Dying in STORE triggers that.

package GH16197 {

sub TIESCALAR { bless [ 0 ]; }
sub FETCH { $_[0][0] }
sub STORE { my $v = $_[1];
# die when undoing the 'local'; the previous val was real,
# new value is undef.
if ($_[0][-1] and !$v) {
@{$_[0]} = ();
die;
}
push @{$_[0]}, $v;
}

our $x99;
local $x99;
tie $x99, 'GH16197';

::leak(5, 0,
sub { eval { "" =~ /(?{ local $x99; $x99 = 9 })/; }; },
"no leak in \$^R copy during stack unwind",
);
}
49 changes: 48 additions & 1 deletion t/re/pat_re_eval.t
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ BEGIN {
our @global;


plan tests => 528; # Update this when adding/deleting tests.
plan tests => 552; # Update this when adding/deleting tests.

run_tests() unless caller;

Expand Down Expand Up @@ -1446,6 +1446,53 @@ sub run_tests {
is($got, "[A-[X-,X-,XY,XY],A-,AB,AB]", "GH22869");
}

# GH #16197
# A subpattern, i.e. (??{...}) or (?&...), when combined with
# a cut, (?>...), caused the savestack to be prematurely unwound,
# resulting in localisations within (?{...}) being undone before
# the end of the match.

{
my ($match, @vals);

# The first six permutations do no backtracking, but are: with or
# without a cut; and match a digit via either a simple \d,
# or via a sub-pattern - (??{}) or (?&).
# The next six permutations add some backtracking.
#
# Previously the combination of a cut and a subpattern returned
# "101 101 101 101 101".

for my $bt ('', '[a-z]*') { # trigger a backtrack?
for my $cut ('', '?>') {
for my $subpat (q{\d}, q{(??{ '\d' })}, q{(?&DIGIT)} ) {

my $desc = "bt=$bt cut=$cut subp=$subpat";
@vals = ();
local our $x99 = 100;

use re 'eval';
$match =
"a1b2c3d4e5" =~
/^ (
[a-z]
($cut
$subpat
(?{ local $x99 = $x99 + 1; push @vals, $x99 })
)
$bt
){5}

(?(DEFINE) (?<DIGIT> \d ) )
/x;
ok($match, "GH 16197: match; $desc");
is("@vals", "101 102 103 104 105",
"GH 16197: local vals; $desc");
}
}
}
}

} # End of sub run_tests

1;
Loading