From d3b09ae0076981fb5ef8a979fa387105278a7234 Mon Sep 17 00:00:00 2001 From: Jitka Plesnikova Date: Wed, 11 May 2022 11:01:46 +0200 Subject: [PATCH] Upgrade to 2.184 --- Dumper.pm | 51 +++++++++++++++++++-------------------------------- Dumper.xs | 10 ++++------ t/dumper.t | 52 ++++++++++++++++------------------------------------ 3 files changed, 39 insertions(+), 74 deletions(-) diff --git a/Dumper.pm b/Dumper.pm index 3b1bb75..ba61ffe 100644 --- a/Dumper.pm +++ b/Dumper.pm @@ -29,7 +29,7 @@ our ( $Indent, $Trailingcomma, $Purity, $Pad, $Varname, $Useqq, $Terse, $Freezer our ( @ISA, @EXPORT, @EXPORT_OK, $VERSION ); BEGIN { - $VERSION = '2.183'; # Don't forget to set version and release + $VERSION = '2.184'; # Don't forget to set version and release # date in POD below! @ISA = qw(Exporter); @@ -740,15 +740,15 @@ my %esc = ( "\e" => "\\e", ); -my $low_controls = ($IS_ASCII) - - # This includes \177, because traditionally it has been - # output as octal, even though it isn't really a "low" - # control - ? qr/[\0-\x1f\177]/ - - # EBCDIC low controls. - : qr/[\0-\x3f]/; +# The low controls are considered to be everything below SPACE, plus the +# outlier \c? control (but that wasn't properly in existence in early perls, +# so reconstruct its value here. This abandons EBCDIC support for this +# character for perls below 5.8) +my $low_controls = join "", map { quotemeta chr $_ } 0.. (ord(" ") - 1); +$low_controls .= ($] < 5.008 || $IS_ASCII) + ? "\x7f" + : chr utf8::unicode_to_native(0x9F); +my $low_controls_re = qr/[$low_controls]/; # put a string value in double quotes sub qquote { @@ -758,19 +758,10 @@ sub qquote { # This efficiently changes the high ordinal characters to \x{} if the utf8 # flag is on. On ASCII platforms, the high ordinals are all the # non-ASCII's. On EBCDIC platforms, we don't include in these the non-ASCII - # controls whose ordinals are less than SPACE, excluded below by the range - # \0-\x3f. On ASCII platforms this range just compiles as part of :ascii:. - # On EBCDIC platforms, there is just one outlier high ordinal control, and - # it gets output as \x{}. + # controls. my $bytes; { use bytes; $bytes = length } - s/([^[:ascii:]\0-\x3f])/sprintf("\\x{%x}",ord($1))/ge - if $bytes > length - - # The above doesn't get the EBCDIC outlier high ordinal control when - # the string is UTF-8 but there are no UTF-8 variant characters in it. - # We want that to come out as \x{} anyway. We need is_utf8() to do - # this. - || (! $IS_ASCII && utf8::is_utf8($_)); + s/([^[:ascii:]$low_controls])/sprintf("\\x{%x}",ord($1))/ge + if $bytes > length; return qq("$_") unless /[[:^print:]]/; # fast exit if only printables @@ -779,21 +770,17 @@ sub qquote { s/([\a\b\t\n\f\r\e])/$esc{$1}/g; # no need for 3 digits in escape for octals not followed by a digit. - s/($low_controls)(?!\d)/'\\'.sprintf('%o',ord($1))/eg; + s/($low_controls_re)(?!\d)/'\\'.sprintf('%o',ord($1))/eg; # But otherwise use 3 digits - s/($low_controls)/'\\'.sprintf('%03o',ord($1))/eg; + s/($low_controls_re)/'\\'.sprintf('%03o',ord($1))/eg; # all but last branch below not supported --BEHAVIOR SUBJECT TO CHANGE-- my $high = shift || ""; if ($high eq "iso8859") { # Doesn't escape the Latin1 printables - if ($IS_ASCII) { - s/([\200-\240])/'\\'.sprintf('%o',ord($1))/eg; - } - else { - my $high_control = utf8::unicode_to_native(0x9F); - s/$high_control/sprintf('\\%o',ord($1))/eg; - } + # Could use /u and [:cntrl:] etc, if khw were confident it worked in + # early early perls + s/([\200-\240])/'\\'.sprintf('%o',ord($1))/eg if $IS_ASCII; } elsif ($high eq "utf8") { # Some discussion of what to do here is in # https://rt.perl.org/Ticket/Display.html?id=113088 @@ -1461,7 +1448,7 @@ modify it under the same terms as Perl itself. =head1 VERSION -Version 2.183 +Version 2.184 =head1 SEE ALSO diff --git a/Dumper.xs b/Dumper.xs index 0eaa6c9..8bd6397 100644 --- a/Dumper.xs +++ b/Dumper.xs @@ -287,14 +287,13 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq) * outputs the raw char */ normal++; } - else { /* Is qq, low ordinal, non-printable. Output escape - * sequences */ + else { /* Is qq, non-printable. Output escape sequences */ if ( k == '\a' || k == '\b' || k == '\t' || k == '\n' || k == '\r' || k == '\f' || k == ESC_NATIVE) { grow += 2; /* 1 char plus backslash */ } - else /* The other low ordinals are output as an octal escape + else /* The other non-printable controls are output as an octal escape * sequence */ if (s + 1 >= send || isDIGIT(*(s+1))) { /* When the following character is a digit, use 3 octal digits @@ -341,9 +340,8 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq) } /* Here 1) isn't UTF-8; or - * 2) the current character is ASCII; or - * 3) it is an EBCDIC platform and is a low ordinal - * non-ASCII control. + * 2) the current character is represented as the same single + * byte regardless of the string's UTF-8ness * In each case the character occupies just one byte */ k = *(U8*)s; increment = 1; diff --git a/t/dumper.t b/t/dumper.t index 3cd86a6..80b2c8e 100644 --- a/t/dumper.t +++ b/t/dumper.t @@ -77,8 +77,8 @@ sub convert_to_native { $index = utf8::unicode_to_native(ord eval "\"$2\""); # But low hex numbers are always in octal. These are all - # controls. - my $format = ($index < ord(" ")) + # controls. The outlier \c? control is also in octal. + my $format = ($index < ord(" ") || $index == ord("\c?")) ? "\\%o" : "\\x{%x}"; $replacement = sprintf($format, $index); @@ -1659,8 +1659,8 @@ EOW # "\\x{41f}", # qr/\x{8b80}/, # qr/\x{41f}/, -# qr/\x{e4}/, -# '\xE4' +# qr/\x{b6}/, +# '\xb6' #]; EOW if ($] lt '5.010001') { @@ -1671,9 +1671,9 @@ EOW $want =~ s{/(,?)$}{/u$1}mg; } my $want_xs = $want; - $want_xs =~ s/'\xE4'/"\\x{e4}"/; - $want_xs =~ s<([^\0-\177])> ge; - TEST_BOTH(qq(Data::Dumper->Dumpxs([ [qq/\x{41f}/, qr/\x{8b80}/, qr/\x{41f}/, qr/\x{e4}/, "\xE4"] ])), + $want_xs =~ s/'\xb6'/"\\x{b6}"/; + $want_xs =~ s<([[:^ascii:]])> ge; + TEST_BOTH(qq(Data::Dumper->Dumpxs([ [qq/\x{41f}/, qr/\x{8b80}/, qr/\x{41f}/, qr/\x{b6}/, "\xb6"] ])), "string with Unicode + regexp with Unicode", $want, $want_xs); } @@ -1715,7 +1715,7 @@ EOW # qr/ \x{203d}\\/ /, # qr/ \\\x{203d}\\/ /, # qr/ \\\x{203d}$bs:\\/ /, -# '\xA3' +# '\xB6' #]; EOW if ($] lt '5.010001') { @@ -1726,9 +1726,9 @@ EOW $want =~ s{/(,?)$}{/u$1}mg; } my $want_xs = $want; - $want_xs =~ s/'\x{A3}'/"\\x{a3}"/; + $want_xs =~ s/'\x{B6}'/"\\x{b6}"/; $want_xs =~ s/\x{203D}/\\x{203d}/g; - TEST_BOTH(qq(Data::Dumper->Dumpxs([ [ '\x{2e18}', qr! \x{203d}/ !, qr! \\\x{203d}/ !, qr! \\\x{203d}$bs:/ !, "\xa3"] ])), + TEST_BOTH(qq(Data::Dumper->Dumpxs([ [ '\x{2e18}', qr! \x{203d}/ !, qr! \\\x{203d}/ !, qr! \\\x{203d}$bs:/ !, "\xb6"] ])), "github #18614, github #18764, perl #58608 corner cases", $want, $want_xs); } @@ -1743,13 +1743,13 @@ EOW # qr/^\$/, # qr/${dollar}foo/, # qr/\\\$foo/, -# qr/$dollar \x{A3} /u, +# qr/$dollar \x{B6} /u, # qr/$dollar \x{203d} /u, # qr/\\\$ \x{203d} /u, # qr/\\\\$dollar \x{203d} /u, # qr/ \$| \x{203d} /u, # qr/ (\$) \x{203d} /u, -# '\xA3' +# '\xB6' #]; EOW if ($] lt '5.014') { @@ -1760,8 +1760,8 @@ EOW $want =~ s!/,!)/,!g; } my $want_xs = $want; - $want_xs =~ s/'\x{A3}'/"\\x{a3}"/; - $want_xs =~ s/\x{A3}/\\x{a3}/; + $want_xs =~ s/'\x{B6}'/"\\x{b6}"/; + $want_xs =~ s/\x{B6}/\\x{b6}/; $want_xs =~ s/\x{203D}/\\x{203d}/g; my $have = <<"EOT"; Data::Dumper->Dumpxs([ [ @@ -1770,13 +1770,13 @@ Data::Dumper->Dumpxs([ [ qr'^\$', qr'\$foo', qr/\\\$foo/, - qr'\$ \x{A3} ', + qr'\$ \x{B6} ', qr'\$ \x{203d} ', qr/\\\$ \x{203d} /, qr'\\\\\$ \x{203d} ', qr/ \$| \x{203d} /, qr/ (\$) \x{203d} /, - '\xA3' + '\xB6' ] ]); EOT TEST_BOTH($have, "CPAN #84569", $want, $want_xs); @@ -1808,26 +1808,6 @@ EOW "name of code in *foo", $want); } -############# - -{ - # There is special code to handle the single control that in EBCDIC is - # not in the block with all the other controls, when it is UTF-8 and - # there are no variants in it (All controls in EBCDIC are invariant.) - # This tests that. There is no harm in testing this works on ASCII, - # and is better to not have split code paths. - my $outlier = chr utf8::unicode_to_native(0x9F); - my $outlier_hex = sprintf "%x", ord $outlier; - my $want = <