Fix a crash in optimizing split()

We also added av_count() in-line function. It's a dependency and it
toke a big traction in the upstream. It will ease backporting future
fixes.
This commit is contained in:
Petr Písař 2020-11-19 18:03:47 +01:00
parent 3fab80b10b
commit 1c329f55ed
5 changed files with 520 additions and 1 deletions

View File

@ -0,0 +1,175 @@
From bafffe7f2ca587960177ed03216e2d5692fe6143 Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Wed, 19 Aug 2020 11:57:17 -0600
Subject: [PATCH] Add av_count()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This returns the number of elements in an array in a clearly named
function.
av_top_index(), av_tindex() are clearly named, but are less than ideal,
and came about because no one back then thought of this one, until now
Paul Evans did.
Petr Písař: Port 87306e0674dfe3af29804b4641347cd5ac9b0521 to 5.32.0.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
av.c | 17 ++++++++++++++---
av.h | 3 ++-
embed.fnc | 3 ++-
embed.h | 2 +-
inline.h | 16 ++++++++++++----
proto.h | 11 ++++++++---
6 files changed, 39 insertions(+), 13 deletions(-)
diff --git a/av.c b/av.c
index 27b2f12..b5ddaca 100644
--- a/av.c
+++ b/av.c
@@ -814,9 +814,10 @@ The Perl equivalent for this is C<$#myarray>.
=for apidoc av_len
Same as L</av_top_index>. Note that, unlike what the name implies, it returns
-the highest index in the array, so to get the size of the array you need to use
-S<C<av_len(av) + 1>>. This is unlike L</sv_len>, which returns what you would
-expect.
+the highest index in the array. This is unlike L</sv_len>, which returns what
+you would expect.
+
+B<To get the true number of elements in the array, instead use C<L</av_count>>>.
=cut
*/
@@ -1089,6 +1090,16 @@ Perl_av_nonelem(pTHX_ AV *av, SSize_t ix) {
return sv;
}
+SSize_t
+Perl_av_top_index(pTHX_ AV *av)
+{
+ PERL_ARGS_ASSERT_AV_TOP_INDEX;
+ assert(SvTYPE(av) == SVt_PVAV);
+
+ return AvFILL(av);
+}
+
+
/*
* ex: set ts=8 sts=4 sw=4 et:
*/
diff --git a/av.h b/av.h
index 5e39c42..90ebfff 100644
--- a/av.h
+++ b/av.h
@@ -81,7 +81,8 @@ Same as C<av_top_index()>.
#define AvFILL(av) ((SvRMAGICAL((const SV *) (av))) \
? mg_size(MUTABLE_SV(av)) : AvFILLp(av))
-#define av_tindex(av) av_top_index(av)
+#define av_top_index(av) AvFILL(av)
+#define av_tindex(av) av_top_index(av)
/* Note that it doesn't make sense to do this:
* SvGETMAGIC(av); IV x = av_tindex_nomg(av);
diff --git a/embed.fnc b/embed.fnc
index 589ab1a..789cd3c 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -541,7 +541,8 @@ Apd |void |av_push |NN AV *av|NN SV *val
EXp |void |av_reify |NN AV *av
ApdR |SV* |av_shift |NN AV *av
Apd |SV** |av_store |NN AV *av|SSize_t key|NULLOK SV *val
-AidRp |SSize_t|av_top_index |NN AV *av
+AMdRp |SSize_t|av_top_index |NN AV *av
+AidRp |Size_t |av_count |NN AV *av
AmdR |SSize_t|av_tindex |NN AV *av
Apd |void |av_undef |NN AV *av
Apdoex |SV** |av_create_and_unshift_one|NN AV **const avp|NN SV *const val
diff --git a/embed.h b/embed.h
index 182b12a..329ac40 100644
--- a/embed.h
+++ b/embed.h
@@ -48,6 +48,7 @@
#define atfork_lock Perl_atfork_lock
#define atfork_unlock Perl_atfork_unlock
#define av_clear(a) Perl_av_clear(aTHX_ a)
+#define av_count(a) Perl_av_count(aTHX_ a)
#define av_delete(a,b,c) Perl_av_delete(aTHX_ a,b,c)
#define av_exists(a,b) Perl_av_exists(aTHX_ a,b)
#define av_extend(a,b) Perl_av_extend(aTHX_ a,b)
@@ -59,7 +60,6 @@
#define av_push(a,b) Perl_av_push(aTHX_ a,b)
#define av_shift(a) Perl_av_shift(aTHX_ a)
#define av_store(a,b,c) Perl_av_store(aTHX_ a,b,c)
-#define av_top_index(a) Perl_av_top_index(aTHX_ a)
#define av_undef(a) Perl_av_undef(aTHX_ a)
#define av_unshift(a,b) Perl_av_unshift(aTHX_ a,b)
#define block_end(a,b) Perl_block_end(aTHX_ a,b)
diff --git a/inline.h b/inline.h
index 27005d2..35af18a 100644
--- a/inline.h
+++ b/inline.h
@@ -39,13 +39,21 @@ SOFTWARE.
/* ------------------------------- av.h ------------------------------- */
-PERL_STATIC_INLINE SSize_t
-Perl_av_top_index(pTHX_ AV *av)
+/*
+=for apidoc av_count
+Returns the number of elements in the array C<av>. This is the true length of
+the array, including any undefined elements. It is always the same as
+S<C<av_top_index(av) + 1>>.
+
+=cut
+*/
+PERL_STATIC_INLINE Size_t
+Perl_av_count(pTHX_ AV *av)
{
- PERL_ARGS_ASSERT_AV_TOP_INDEX;
+ PERL_ARGS_ASSERT_AV_COUNT;
assert(SvTYPE(av) == SVt_PVAV);
- return AvFILL(av);
+ return AvFILL(av) + 1;
}
/* ------------------------------- cv.h ------------------------------- */
diff --git a/proto.h b/proto.h
index 02ef4ed..83ba098 100644
--- a/proto.h
+++ b/proto.h
@@ -219,6 +219,13 @@ PERL_CALLCONV SV** Perl_av_arylen_p(pTHX_ AV *av);
PERL_CALLCONV void Perl_av_clear(pTHX_ AV *av);
#define PERL_ARGS_ASSERT_AV_CLEAR \
assert(av)
+#ifndef PERL_NO_INLINE_FUNCTIONS
+PERL_STATIC_INLINE Size_t Perl_av_count(pTHX_ AV *av)
+ __attribute__warn_unused_result__;
+#define PERL_ARGS_ASSERT_AV_COUNT \
+ assert(av)
+#endif
+
PERL_CALLCONV void Perl_av_create_and_push(pTHX_ AV **const avp, SV *const val);
#define PERL_ARGS_ASSERT_AV_CREATE_AND_PUSH \
assert(avp); assert(val)
@@ -284,12 +291,10 @@ PERL_CALLCONV SV** Perl_av_store(pTHX_ AV *av, SSize_t key, SV *val);
__attribute__warn_unused_result__; */
#define PERL_ARGS_ASSERT_AV_TINDEX
-#ifndef PERL_NO_INLINE_FUNCTIONS
-PERL_STATIC_INLINE SSize_t Perl_av_top_index(pTHX_ AV *av)
+PERL_CALLCONV SSize_t Perl_av_top_index(pTHX_ AV *av)
__attribute__warn_unused_result__;
#define PERL_ARGS_ASSERT_AV_TOP_INDEX \
assert(av)
-#endif
PERL_CALLCONV void Perl_av_undef(pTHX_ AV *av);
#define PERL_ARGS_ASSERT_AV_UNDEF \
--
2.25.4

View File

@ -0,0 +1,196 @@
From d7504df2a5d8985f2a8b04f17acff5e324572c39 Mon Sep 17 00:00:00 2001
From: Richard Leach <richardleach@users.noreply.github.com>
Date: Sun, 11 Oct 2020 12:26:27 +0100
Subject: [PATCH] pp_split: no SWITCHSTACK in @ary = split(...) optimisation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Petr Písař: 607eaf26a99ff76ab48877e68f1d7b005dc51575 ported to 5.32.0.
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
pp.c | 89 +++++++++++++++++++++++++++++-----------------------
t/op/split.t | 23 +++++++++++++-
2 files changed, 72 insertions(+), 40 deletions(-)
diff --git a/pp.c b/pp.c
index df80830..e4863d3 100644
--- a/pp.c
+++ b/pp.c
@@ -5985,6 +5985,7 @@ PP(pp_split)
/* handle @ary = split(...) optimisation */
if (PL_op->op_private & OPpSPLIT_ASSIGN) {
+ realarray = 1;
if (!(PL_op->op_flags & OPf_STACKED)) {
if (PL_op->op_private & OPpSPLIT_LEX) {
if (PL_op->op_private & OPpLVAL_INTRO)
@@ -6007,26 +6008,10 @@ PP(pp_split)
oldsave = PL_savestack_ix;
}
- realarray = 1;
- PUTBACK;
- av_extend(ary,0);
- (void)sv_2mortal(SvREFCNT_inc_simple_NN(sv));
- av_clear(ary);
- SPAGAIN;
if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
PUSHMARK(SP);
XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
- }
- else {
- if (!AvREAL(ary)) {
- I32 i;
- AvREAL_on(ary);
- AvREIFY_off(ary);
- for (i = AvFILLp(ary); i >= 0; i--)
- AvARRAY(ary)[i] = &PL_sv_undef; /* don't free mere refs */
- }
- /* temporarily switch stacks */
- SAVESWITCHSTACK(PL_curstack, ary);
+ } else {
make_mortal = 0;
}
}
@@ -6358,29 +6343,56 @@ PP(pp_split)
LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */
SPAGAIN;
if (realarray) {
- if (!mg) {
- if (SvSMAGICAL(ary)) {
- PUTBACK;
+ if (!mg) {
+ PUTBACK;
+ if(AvREAL(ary)) {
+ if (av_count(ary) > 0)
+ av_clear(ary);
+ } else {
+ AvREAL_on(ary);
+ AvREIFY_off(ary);
+
+ if (AvMAX(ary) > -1) {
+ /* don't free mere refs */
+ Zero(AvARRAY(ary), AvMAX(ary), SV*);
+ }
+ }
+ if(AvMAX(ary) < iters)
+ av_extend(ary,iters);
+ SPAGAIN;
+
+ /* Need to copy the SV*s from the stack into ary */
+ Copy(SP + 1 - iters, AvARRAY(ary), iters, SV*);
+ AvFILLp(ary) = iters - 1;
+
+ if (SvSMAGICAL(ary)) {
+ PUTBACK;
mg_set(MUTABLE_SV(ary));
SPAGAIN;
- }
- if (gimme == G_ARRAY) {
- EXTEND(SP, iters);
- Copy(AvARRAY(ary), SP + 1, iters, SV*);
- SP += iters;
- RETURN;
- }
+ }
+
+ if (gimme != G_ARRAY) {
+ /* SP points to the final SV* pushed to the stack. But the SV* */
+ /* are not going to be used from the stack. Point SP to below */
+ /* the first of these SV*. */
+ SP -= iters;
+ PUTBACK;
+ }
}
else {
- PUTBACK;
- ENTER_with_name("call_PUSH");
- call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED);
- LEAVE_with_name("call_PUSH");
- SPAGAIN;
+ PUTBACK;
+ av_extend(ary,iters);
+ av_clear(ary);
+
+ ENTER_with_name("call_PUSH");
+ call_sv(SV_CONST(PUSH),G_SCALAR|G_DISCARD|G_METHOD_NAMED);
+ LEAVE_with_name("call_PUSH");
+ SPAGAIN;
+
if (gimme == G_ARRAY) {
SSize_t i;
/* EXTEND should not be needed - we just popped them */
- EXTEND(SP, iters);
+ EXTEND_SKIP(SP, iters);
for (i=0; i < iters; i++) {
SV **svp = av_fetch(ary, i, FALSE);
PUSHs((svp) ? *svp : &PL_sv_undef);
@@ -6389,13 +6401,12 @@ PP(pp_split)
}
}
}
- else {
- if (gimme == G_ARRAY)
- RETURN;
- }
- GETTARGET;
- XPUSHi(iters);
+ if (gimme != G_ARRAY) {
+ GETTARGET;
+ XPUSHi(iters);
+ }
+
RETURN;
}
diff --git a/t/op/split.t b/t/op/split.t
index 14f9158..7f37512 100644
--- a/t/op/split.t
+++ b/t/op/split.t
@@ -7,7 +7,7 @@ BEGIN {
set_up_inc('../lib');
}
-plan tests => 176;
+plan tests => 182;
$FS = ':';
@@ -648,6 +648,19 @@ is "@a", '1 2 3', 'assignment to split-to-array (stacked)';
is (+@a, 0, "empty utf8 string");
}
+# correct stack adjustments (gh#18232)
+{
+ sub foo { return @_ }
+ my @a = foo(1, scalar split " ", "a b");
+ is(join('', @a), "12", "Scalar split to a sub parameter");
+}
+
+{
+ sub foo { return @_ }
+ my @a = foo(1, scalar(@x = split " ", "a b"));
+ is(join('', @a), "12", "Split to @x then use scalar result as a sub parameter");
+}
+
fresh_perl_is(<<'CODE', '', {}, "scalar split stack overflow");
map{int"";split//.0>60for"0000000000000000"}split// for"00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
CODE
@@ -667,3 +680,11 @@ CODE
ok(eq_array(\@result,['a','b']), "Resulting in ('a','b')");
}
}
+
+# check that the (@ary = split) optimisation survives @ary being modified
+
+fresh_perl_is('my @ary; @ary = split(/\w(?{ @ary[1000] = 1 })/, "abc");',
+ '',{},'(@ary = split ...) survives @ary being Renew()ed');
+fresh_perl_is('my @ary; @ary = split(/\w(?{ undef @ary })/, "abc");',
+ '',{},'(@ary = split ...) survives an (undef @ary)');
+
--
2.25.4

View File

@ -0,0 +1,77 @@
From bd5fa06648085e8c17efd55abeb6424aeeb1018e Mon Sep 17 00:00:00 2001
From: Karl Williamson <khw@cpan.org>
Date: Tue, 29 Sep 2020 00:48:19 -0600
Subject: [PATCH] Remove Perl_av_top_index
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
I created this in 87306e0674dfe3af29804b4641347cd5ac9b0521, thinking it
was needed to preserve backward compatibility if someone were using this
instead of the macro. But it turned out that there never was such a
function, it was inlined, and the name was S_av_top_index, so there is
no reason to create a new function that no one has ever been able to
call. So just remove it, and let all accesses go through the macro
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
av.c | 10 ----------
embed.fnc | 2 +-
proto.h | 7 +++----
3 files changed, 4 insertions(+), 15 deletions(-)
diff --git a/av.c b/av.c
index ada09cde9a..ad2429f90d 100644
--- a/av.c
+++ b/av.c
@@ -1095,16 +1095,6 @@ Perl_av_nonelem(pTHX_ AV *av, SSize_t ix) {
return sv;
}
-SSize_t
-Perl_av_top_index(pTHX_ AV *av)
-{
- PERL_ARGS_ASSERT_AV_TOP_INDEX;
- assert(SvTYPE(av) == SVt_PVAV);
-
- return AvFILL(av);
-}
-
-
/*
* ex: set ts=8 sts=4 sw=4 et:
*/
diff --git a/embed.fnc b/embed.fnc
index a6b4d0350f..f5c5b29c2d 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -637,7 +637,7 @@ Apd |void |av_push |NN AV *av|NN SV *val
EXp |void |av_reify |NN AV *av
ApdR |SV* |av_shift |NN AV *av
Apd |SV** |av_store |NN AV *av|SSize_t key|NULLOK SV *val
-AMdRp |SSize_t|av_top_index |NN AV *av
+AmdR |SSize_t|av_top_index |NN AV *av
AidRp |Size_t |av_count |NN AV *av
AmdR |SSize_t|av_tindex |NN AV *av
Apd |void |av_undef |NN AV *av
diff --git a/proto.h b/proto.h
index c4490fc46e..2da1a07761 100644
--- a/proto.h
+++ b/proto.h
@@ -291,10 +291,9 @@ PERL_CALLCONV SV** Perl_av_store(pTHX_ AV *av, SSize_t key, SV *val);
__attribute__warn_unused_result__; */
#define PERL_ARGS_ASSERT_AV_TINDEX
-PERL_CALLCONV SSize_t Perl_av_top_index(pTHX_ AV *av)
- __attribute__warn_unused_result__;
-#define PERL_ARGS_ASSERT_AV_TOP_INDEX \
- assert(av)
+/* PERL_CALLCONV SSize_t av_top_index(pTHX_ AV *av)
+ __attribute__warn_unused_result__; */
+#define PERL_ARGS_ASSERT_AV_TOP_INDEX
PERL_CALLCONV void Perl_av_undef(pTHX_ AV *av);
#define PERL_ARGS_ASSERT_AV_UNDEF \
--
2.25.4

View File

@ -0,0 +1,54 @@
From ab307de390c3459badcc89b3d77542b5b871b2e8 Mon Sep 17 00:00:00 2001
From: Richard Leach <richardleach@users.noreply.github.com>
Date: Tue, 20 Oct 2020 18:16:38 +0100
Subject: [PATCH 2/2] pp_split: add TonyC's stack-not-refcounted-suggestion and
tests
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
pp.c | 5 ++++-
t/op/split.t | 5 +++++
2 files changed, 9 insertions(+), 1 deletion(-)
diff --git a/pp.c b/pp.c
index ce16c56e63..5b5e163011 100644
--- a/pp.c
+++ b/pp.c
@@ -6034,6 +6034,9 @@ PP(pp_split)
oldsave = PL_savestack_ix;
}
+ /* Some defence against stack-not-refcounted bugs */
+ (void)sv_2mortal(SvREFCNT_inc_simple_NN(ary));
+
if ((mg = SvTIED_mg((const SV *)ary, PERL_MAGIC_tied))) {
PUSHMARK(SP);
XPUSHs(SvTIED_obj(MUTABLE_SV(ary), mg));
@@ -6356,7 +6359,7 @@ PP(pp_split)
}
PUTBACK;
- LEAVE_SCOPE(oldsave); /* may undo an earlier SWITCHSTACK */
+ LEAVE_SCOPE(oldsave);
SPAGAIN;
if (realarray) {
if (!mg) {
diff --git a/t/op/split.t b/t/op/split.t
index 1d78a45bde..7a321645ac 100644
--- a/t/op/split.t
+++ b/t/op/split.t
@@ -703,3 +703,8 @@ fresh_perl_is('my @ary; @ary = split(/\w(?{ @ary[1000] = 1 })/, "abc");',
fresh_perl_is('my @ary; @ary = split(/\w(?{ undef @ary })/, "abc");',
'',{},'(@ary = split ...) survives an (undef @ary)');
+# check the (@ary = split) optimisation survives stack-not-refcounted bugs
+fresh_perl_is('our @ary; @ary = split(/\w(?{ *ary = 0 })/, "abc");',
+ '',{},'(@ary = split ...) survives @ary destruction via typeglob');
+fresh_perl_is('my $ary = []; @$ary = split(/\w(?{ $ary = [] })/, "abc");',
+ '',{},'(@ary = split ...) survives @ary destruction via reassignment');
--
2.25.4

View File

@ -100,7 +100,7 @@ License: GPL+ or Artistic
Epoch: %{perl_epoch}
Version: %{perl_version}
# release number must be even higher, because dual-lived modules will be broken otherwise
Release: 466%{?dist}
Release: 467%{?dist}
Summary: Practical Extraction and Report Language
Url: https://www.perl.org/
Source0: https://www.cpan.org/src/5.0/perl-%{perl_version}.tar.xz
@ -266,6 +266,12 @@ Patch42: perl-5.33.3-t-op-inc.t-t-op-hexfp.t-t-op-sprintf2.t-Add-missing-
# in upstream after 5.33.3
Patch43: perl-5.33.3-fetch-magic-on-the-first-stacked-filetest-not-the-la.patch
# Fix a crash in optimizing split(), GH#18232, in upstream after 5.33.3
Patch44: perl-5.32.0-Add-av_count.patch
Patch45: perl-5.33.2-Remove-Perl_av_top_index.patch
Patch46: perl-5.32.0-pp_split-no-SWITCHSTACK-in-ary-split-.-optimisation.patch
Patch47: perl-5.33.3-pp_split-add-TonyC-s-stack-not-refcounted-suggestion.patch
# Link XS modules to libperl.so with EU::CBuilder on Linux, bug #960048
Patch200: perl-5.16.3-Link-XS-modules-to-libperl.so-with-EU-CBuilder-on-Li.patch
@ -4311,6 +4317,10 @@ you're not running VMS, this module does nothing.
%patch41 -p1
%patch42 -p1
%patch43 -p1
%patch44 -p1
%patch45 -p1
%patch46 -p1
%patch47 -p1
%patch200 -p1
%patch201 -p1
@ -4361,6 +4371,10 @@ perl -x patchlevel.h \
'Fedora Patch41: Fix un undefined behavior in Perl_custom_op_get_field()' \
'Fedora Patch42: Fix Config variable names in in t/op tests' \
'Fedora Patch43: Fix fetching a magic on the stacked file test operators' \
'Fedora Patch44: Fix a crash in optimizing split() (GH#18232)' \
'Fedora Patch45: Fix a crash in optimizing split() (GH#18232)' \
'Fedora Patch46: Fix a crash in optimizing split() (GH#18232)' \
'Fedora Patch47: Fix a crash in optimizing split() (GH#18232)' \
'Fedora Patch200: Link XS modules to libperl.so with EU::CBuilder on Linux' \
'Fedora Patch201: Link XS modules to libperl.so with EU::MM on Linux' \
%{nil}
@ -7079,6 +7093,9 @@ popd
# Old changelog entries are preserved in CVS.
%changelog
* Thu Nov 19 2020 Petr Pisar <ppisar@redhat.com> - 4:5.32.0-467
- Fix a crash in optimizing split() (GH#18232)
* Thu Nov 12 2020 Petr Pisar <ppisar@redhat.com> - 4:5.32.0-466
- Fix un undefined behavior in Perl_custom_op_get_field()
- Fix Config variable names in in t/op tests