Merge pull request #3403 from hadfl/perl_r38

perl: patch regcomp.c to address CVE-2023-47038
omniosorg · Nov 29, 2023 · ea43957 · ea43957
2 parents 1d08538 + e59dc50
commit ea43957
Show file tree

Hide file tree

Showing 3 changed files with 119 additions and 1 deletion.
diff --git a/build/perl/build.sh b/build/perl/build.sh
@@ -20,7 +20,7 @@
 PROG=perl
 PKG=runtime/perl
 VER=5.32.1
-DASHREV=1
+DASHREV=2
 MAJVER=${VER%.*}
 SUMMARY="Perl $MAJVER Programming Language"
 DESC="A highly capable, feature-rich programming language"

diff --git a/build/perl/patches/CVE-2023-47038.patch b/build/perl/patches/CVE-2023-47038.patch
@@ -0,0 +1,117 @@
+From 12c313ce49b36160a7ca2e9b07ad5bd92ee4a010 Mon Sep 17 00:00:00 2001
+From: Karl Williamson <[email protected]>
+Date: Sat, 9 Sep 2023 11:59:09 -0600
+Subject: [PATCH] Fix read/write past buffer end: perl-security#140
+
+A package name may be specified in a \p{...} regular expression
+construct.  If unspecified, "utf8::" is assumed, which is the package
+all official Unicode properties are in.  By specifying a different
+package, one can create a user-defined property with the same
+unqualified name as a Unicode one.  Such a property is defined by a sub
+whose name begins with "Is" or "In", and if the sub wishes to refer to
+an official Unicode property, it must explicitly specify the "utf8::".
+S_parse_uniprop_string() is used to parse the interior of both \p{} and
+the user-defined sub lines.
+
+In S_parse_uniprop_string(), it parses the input "name" parameter,
+creating a modified copy, "lookup_name", malloc'ed with the same size as
+"name".  The modifications are essentially to create a canonicalized
+version of the input, with such things as extraneous white-space
+stripped off.  I found it convenient to strip off the package specifier
+"utf8::".  To to so, the code simply pretends "lookup_name" begins just
+after the "utf8::", and adjusts various other values to compensate.
+However, it missed the adjustment of one required one.
+
+This is only a problem when the property name begins with "perl" and
+isn't "perlspace" nor "perlword".  All such ones are undocumented
+internal properties.
+
+What happens in this case is that the input is reparsed with slightly
+different rules in effect as to what is legal versus illegal.  The
+problem is that "lookup_name" no longer is pointing to its initial
+value, but "name" is.  Thus the space allocated for filling "lookup_name"
+is now shorter than "name", and as this shortened "lookup_name" is
+filled by copying suitable portions of "name", the write can be to
+unallocated space.
+
+The solution is to skip the "utf8::" when reparsing "name".  Then both
+"lookup_name" and "name" are effectively shortened by the same amount,
+and there is no going off the end.
+
+This commit also does white-space adjustment so that things align
+vertically for readability.
+
+This can be easily backported to earlier Perl releases.
+---
+ regcomp.c           | 17 +++++++++++------
+ t/re/pat_advanced.t |  8 ++++++++
+ 2 files changed, 19 insertions(+), 6 deletions(-)
+
+diff --git a/regcomp.c b/regcomp.c
+index 9c6ccc2c1b34..833f8644f754 100644
+--- a/regcomp.c
++++ b/regcomp.c
+@@ -23879,7 +23879,7 @@ S_parse_uniprop_string(pTHX_
+      * compile perl to know about them) */
+     bool is_nv_type = FALSE;
+
+-    unsigned int i, j = 0;
++    unsigned int i = 0, i_zero = 0, j = 0;
+     int equals_pos = -1;    /* Where the '=' is found, or negative if none */
+     int slash_pos  = -1;    /* Where the '/' is found, or negative if none */
+     int table_index = 0;    /* The entry number for this property in the table
+@@ -24013,9 +24013,13 @@ S_parse_uniprop_string(pTHX_
+      * all of them are considered to be for that package.  For the purposes of
+      * parsing the rest of the property, strip it off */
+     if (non_pkg_begin == STRLENs("utf8::") && memBEGINPs(name, name_len, "utf8::")) {
+-        lookup_name +=  STRLENs("utf8::");
+-        j -=  STRLENs("utf8::");
+-        equals_pos -=  STRLENs("utf8::");
++        lookup_name += STRLENs("utf8::");
++        j           -= STRLENs("utf8::");
++        equals_pos  -= STRLENs("utf8::");
++        i_zero       = STRLENs("utf8::");   /* When resetting 'i' to reparse
++                                               from the beginning, it has to be
++                                               set past what we're stripping
++                                               off */
+         stripped_utf8_pkg = TRUE;
+     }
+
+@@ -24420,7 +24424,8 @@ S_parse_uniprop_string(pTHX_
+
+             /* We set the inputs back to 0 and the code below will reparse,
+              * using strict */
+-            i = j = 0;
++            i = i_zero;
++            j = 0;
+         }
+     }
+
+@@ -24441,7 +24446,7 @@ S_parse_uniprop_string(pTHX_
+          * separates two digits */
+         if (cur == '_') {
+             if (    stricter
+-                && (     i == 0 || (int) i == equals_pos || i == name_len- 1
++                && (   i == i_zero || (int) i == equals_pos || i == name_len- 1
+                     || ! isDIGIT_A(name[i-1]) || ! isDIGIT_A(name[i+1])))
+             {
+                 lookup_name[j++] = '_';
+diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t
+index 6152c7b85c1b..1db317fff941 100644
+--- a/t/re/pat_advanced.t
++++ b/t/re/pat_advanced.t
+@@ -2688,6 +2688,14 @@ EOF_DEBUG_OUT
+                       {}, "Related to Github Issue #19350, forward \\g{x} pattern segv under use re Debug => 'PARSE'");
+     }
+
++    {   # perl-security#140, read/write past buffer end
++        fresh_perl_like('qr/\p{utf8::perl x}/',
++                        qr/Illegal user-defined property name "utf8::perl x" in regex/,
++                        {}, "perl-security#140");
++        fresh_perl_is('qr/\p{utf8::_perl_surrogate}/', "",
++                        {}, "perl-security#140");
++    }
++
+
+     # !!! NOTE that tests that aren't at all likely to crash perl should go
+     # a ways above, above these last ones.  There's a comment there that, like
diff --git a/build/perl/patches/series b/build/perl/patches/series
@@ -1,3 +1,4 @@
 t_op_getppid.t.patch
 dtrace.patch
 CVE-2021-36770.patch
+CVE-2023-47038.patch