perl-Data-Dumper/Data-Dumper-2.154-Upgrade-t...

1348 lines
43 KiB
Diff

From deda932ecee93bbd318efaaaf66d2860f01ccd44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar@redhat.com>
Date: Wed, 6 May 2015 12:49:40 +0200
Subject: [PATCH] Upgrade to 2.158
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: Petr Písař <ppisar@redhat.com>
---
Changes | 18 +---
Dumper.pm | 89 ++++++++++++------
Dumper.xs | 256 +++++++++++++++++++++++++++++++++-----------------
t/dumper.t | 297 ++++++++++++++++++++++++++++++++++------------------------
t/quotekeys.t | 18 +++-
5 files changed, 421 insertions(+), 257 deletions(-)
diff --git a/Changes b/Changes
index 628ef6a..eca3bb9 100644
--- a/Changes
+++ b/Changes
@@ -6,22 +6,6 @@ Changes - public release history for Data::Dumper
=over 8
-=item 2.154 (Sep 18 2014)
-
-Most notably, this release fixes CVE-2014-4330:
-
- Don't recurse infinitely in Data::Dumper
-
- Add a configuration variable/option to limit recursion when dumping
- deep data structures.
- [...]
- This patch addresses CVE-2014-4330. This bug was found and
- reported by: LSE Leading Security Experts GmbH employee Markus
- Vervier.
-
-On top of that, there are several minor big fixes and improvements,
-see "git log" if the core perl distribution for details.
-
=item 2.151 (Mar 7 2014)
A "useqq" implementation for the XS version of Data::Dumper.
@@ -344,7 +328,7 @@ C<require 5.002>.
MLDBM example removed (as its own module, it has a separate CPAN
reality now).
-Fixed bugs in handling keys with wierd characters. Perl can be
+Fixed bugs in handling keys with weird characters. Perl can be
tripped up in its implicit quoting of the word before '=>'. The
fix: C<Data::Dumper::Purity>, when set, always triggers quotes
around hash keys.
diff --git a/Dumper.pm b/Dumper.pm
index 520dfd4..e884298 100644
--- a/Dumper.pm
+++ b/Dumper.pm
@@ -10,7 +10,7 @@
package Data::Dumper;
BEGIN {
- $VERSION = '2.154'; # Don't forget to set version and release
+ $VERSION = '2.158'; # Don't forget to set version and release
} # date in POD below!
#$| = 1;
@@ -37,6 +37,8 @@ BEGIN {
or $Useperl = 1;
}
+my $IS_ASCII = ord 'A' == 65;
+
# module vars and their defaults
$Indent = 2 unless defined $Indent;
$Purity = 0 unless defined $Purity;
@@ -222,8 +224,11 @@ sub DESTROY {}
sub Dump {
return &Dumpxs
- unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl}) ||
- $Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse});
+ unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl})
+ || $Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse})
+
+ # Use pure perl version on earlier releases on EBCDIC platforms
+ || (! $IS_ASCII && $] lt 5.021_010);
return &Dumpperl;
}
@@ -724,41 +729,71 @@ my %esc = (
"\e" => "\\e",
);
+my $low_controls = ($IS_ASCII)
+
+ # This includes \177, because traditionally it has been
+ # output as octal, even though it isn't really a "low"
+ # control
+ ? qr/[\0-\x1f\177]/
+
+ # EBCDIC low controls.
+ : qr/[\0-\x3f]/;
+
# put a string value in double quotes
sub qquote {
local($_) = shift;
s/([\\\"\@\$])/\\$1/g;
+
+ # This efficiently changes the high ordinal characters to \x{} if the utf8
+ # flag is on. On ASCII platforms, the high ordinals are all the
+ # non-ASCII's. On EBCDIC platforms, we don't include in these the non-ASCII
+ # controls whose ordinals are less than SPACE, excluded below by the range
+ # \0-\x3f. On ASCII platforms this range just compiles as part of :ascii:.
+ # On EBCDIC platforms, there is just one outlier high ordinal control, and
+ # it gets output as \x{}.
my $bytes; { use bytes; $bytes = length }
- s/([[:^ascii:]])/'\x{'.sprintf("%x",ord($1)).'}'/ge if $bytes > length;
- return qq("$_") unless
- /[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~]/; # fast exit
+ s/([^[:ascii:]\0-\x3f])/sprintf("\\x{%x}",ord($1))/ge
+ if $bytes > length
- my $high = shift || "";
+ # The above doesn't get the EBCDIC outlier high ordinal control when
+ # the string is UTF-8 but there are no UTF-8 variant characters in it.
+ # We want that to come out as \x{} anyway. We need is_utf8() to do
+ # this.
+ || (! $IS_ASCII && $] ge 5.008_001 && utf8::is_utf8($_));
+
+ return qq("$_") unless /[[:^print:]]/; # fast exit if only printables
+
+ # Here, there is at least one non-printable to output. First, translate the
+ # escapes.
s/([\a\b\t\n\f\r\e])/$esc{$1}/g;
- if (ord('^')==94) { # ascii
- # no need for 3 digits in escape for these
- s/([\0-\037])(?!\d)/'\\'.sprintf('%o',ord($1))/eg;
- s/([\0-\037\177])/'\\'.sprintf('%03o',ord($1))/eg;
+ # no need for 3 digits in escape for octals not followed by a digit.
+ s/($low_controls)(?!\d)/'\\'.sprintf('%o',ord($1))/eg;
+
+ # But otherwise use 3 digits
+ s/($low_controls)/'\\'.sprintf('%03o',ord($1))/eg;
+
# all but last branch below not supported --BEHAVIOR SUBJECT TO CHANGE--
- if ($high eq "iso8859") {
- s/([\200-\240])/'\\'.sprintf('%o',ord($1))/eg;
+ my $high = shift || "";
+ if ($high eq "iso8859") { # Doesn't escape the Latin1 printables
+ if ($IS_ASCII) {
+ s/([\200-\240])/'\\'.sprintf('%o',ord($1))/eg;
+ }
+ elsif ($] ge 5.007_003) {
+ my $high_control = utf8::unicode_to_native(0x9F);
+ s/$high_control/sprintf('\\%o',ord($1))/eg;
+ }
} elsif ($high eq "utf8") {
+# Some discussion of what to do here is in
+# https://rt.perl.org/Ticket/Display.html?id=113088
# use utf8;
# $str =~ s/([^\040-\176])/sprintf "\\x{%04x}", ord($1)/ge;
} elsif ($high eq "8bit") {
# leave it as it is
} else {
- s/([\200-\377])/'\\'.sprintf('%03o',ord($1))/eg;
- s/([^\040-\176])/sprintf "\\x{%04x}", ord($1)/ge;
+ s/([[:^ascii:]])/'\\'.sprintf('%03o',ord($1))/eg;
+ #s/([^\040-\176])/sprintf "\\x{%04x}", ord($1)/ge;
}
- }
- else { # ebcdic
- s{([^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~])(?!\d)}
- {my $v = ord($1); '\\'.sprintf(($v <= 037 ? '%o' : '%03o'), $v)}eg;
- s{([^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~])}
- {'\\'.sprintf('%03o',ord($1))}eg;
- }
return qq("$_");
}
@@ -1025,9 +1060,7 @@ $Data::Dumper::Useqq I<or> I<$OBJ>->Useqq(I<[NEWVAL]>)
When set, enables the use of double quotes for representing string values.
Whitespace other than space will be represented as C<[\n\t\r]>, "unsafe"
characters will be backslashed, and unprintable characters will be output as
-quoted octal integers. Since setting this variable imposes a performance
-penalty, the default is 0. C<Dump()> will run slower if this flag is set,
-since the fast XSUB implementation doesn't support it yet.
+quoted octal integers. The default is 0.
=item *
@@ -1391,8 +1424,8 @@ to have, you can use the C<Seen> method to pre-seed the internal reference
table and make the dumped output point to them, instead. See L</EXAMPLES>
above.
-The C<Useqq> and C<Deparse> flags makes Dump() run slower, since the
-XSUB implementation does not support them.
+The C<Deparse> flag makes Dump() run slower, since the XSUB
+implementation does not support it.
SCALAR objects have the weirdest looking C<bless> workaround.
@@ -1421,7 +1454,7 @@ modify it under the same terms as Perl itself.
=head1 VERSION
-Version 2.154 (September 18 2014)
+Version 2.158 (March 13 2015)
=head1 SEE ALSO
diff --git a/Dumper.xs b/Dumper.xs
index 2ffa867..97277f4 100644
--- a/Dumper.xs
+++ b/Dumper.xs
@@ -12,8 +12,33 @@
# define DD_USE_OLD_ID_FORMAT
#endif
+/* These definitions are ASCII only. But the pure-perl .pm avoids
+ * calling this .xs file for releases where they aren't defined */
+
+#ifndef isASCII
+# define isASCII(c) (((UV) (c)) < 128)
+#endif
+
+#ifndef ESC_NATIVE /* \e */
+# define ESC_NATIVE 27
+#endif
+
+#ifndef isPRINT
+# define isPRINT(c) (((UV) (c)) >= ' ' && ((UV) (c)) < 127)
+#endif
+
+#ifndef isALPHA
+# define isALPHA(c) ( (((UV) (c)) >= 'a' && ((UV) (c)) <= 'z') \
+ || (((UV) (c)) <= 'Z' && ((UV) (c)) >= 'A'))
+#endif
+
+#ifndef isIDFIRST
+# define isIDFIRST(c) (isALPHA(c) || (c) == '_')
+#endif
+
#ifndef isWORDCHAR
-# define isWORDCHAR(c) isALNUM(c)
+# define isWORDCHAR(c) (isIDFIRST(c) \
+ || (((UV) (c)) >= '0' && ((UV) (c)) <= '9'))
#endif
static I32 num_q (const char *s, STRLEN slen);
@@ -40,12 +65,6 @@ static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
#if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
-# ifdef EBCDIC
-# define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
-# else
-# define UNI_TO_NATIVE(ch) (ch)
-# endif
-
UV
Perl_utf8_to_uvchr_buf(pTHX_ U8 *s, U8 *send, STRLEN *retlen)
{
@@ -72,8 +91,7 @@ Perl_utf8_to_uvchr_buf(pTHX_ U8 *s, U8 *send, STRLEN *retlen)
* end of the buffer if there is a malformation that indicates the
* character is longer than the space available */
- const UV uv = utf8_to_uvchr(s, retlen);
- return UNI_TO_NATIVE(uv);
+ return utf8_to_uvchr(s, retlen);
}
# if !defined(PERL_IMPLICIT_CONTEXT)
@@ -234,55 +252,90 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
STRLEN qq_escapables = 0; /* " $ @ will need a \ in "" strings. */
STRLEN normal = 0;
int increment;
- UV next;
-
- /* this will need EBCDICification */
- for (s = src; s < send; do_utf8 ? s += increment : s++) {
- const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
- /* check for invalid utf8 */
- increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
+ for (s = src; s < send; s += increment) { /* Sizing pass */
+ UV k = *(U8*)s;
- /* this is only used to check if the next character is an
- * ASCII digit, which are invariant, so if the following collects
- * a UTF-8 start byte it does no harm
- */
- next = (s + increment >= send ) ? 0 : *(U8*)(s+increment);
+ increment = 1; /* Will override if necessary for utf-8 */
-#ifdef EBCDIC
- if (!isprint(k) || k > 256) {
-#else
- if (k > 127) {
-#endif
- /* 4: \x{} then count the number of hex digits. */
- grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
+ if (isPRINT(k)) {
+ if (k == '\\') {
+ backslashes++;
+ } else if (k == '\'') {
+ single_quotes++;
+ } else if (k == '"' || k == '$' || k == '@') {
+ qq_escapables++;
+ } else {
+ normal++;
+ }
+ }
+ else if (! isASCII(k) && k > ' ') {
+ /* High ordinal non-printable code point. (The test that k is
+ * above SPACE should be optimized out by the compiler on
+ * non-EBCDIC platforms; otherwise we could put an #ifdef around
+ * it, but it's better to have just a single code path when
+ * possible. All but one of the non-ASCII EBCDIC controls are low
+ * ordinal; that one is the only one above SPACE.)
+ *
+ * If UTF-8, output as hex, regardless of useqq. This means there
+ * is an overhead of 4 chars '\x{}'. Then count the number of hex
+ * digits. */
+ if (do_utf8) {
+ k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
+
+ /* treat invalid utf8 byte by byte. This loop iteration gets the
+ * first byte */
+ increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
+
+ grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
#if UVSIZE == 4
- 8 /* We may allocate a bit more than the minimum here. */
+ 8 /* We may allocate a bit more than the minimum here. */
#else
- k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
-#endif
- );
-#ifndef EBCDIC
- } else if (useqq &&
- /* we can't use the short form like '\0' if followed by a digit */
- (((k >= 7 && k <= 10) || k == 12 || k == 13 || k == 27)
- || (k < 8 && (next < '0' || next > '9')))) {
- grow += 2;
- } else if (useqq && k <= 31 && (next < '0' || next > '9')) {
- grow += 3;
- } else if (useqq && (k <= 31 || k >= 127)) {
- grow += 4;
+ k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
#endif
- } else if (k == '\\') {
- backslashes++;
- } else if (k == '\'') {
- single_quotes++;
- } else if (k == '"' || k == '$' || k == '@') {
- qq_escapables++;
- } else {
+ );
+ }
+ else if (useqq) { /* Not utf8, must be <= 0xFF, hence 2 hex
+ * digits. */
+ grow += 4 + 2;
+ }
+ else { /* Non-qq generates 3 octal digits plus backslash */
+ grow += 4;
+ }
+ } /* End of high-ordinal non-printable */
+ else if (! useqq) { /* Low ordinal, non-printable, non-qq just
+ * outputs the raw char */
normal++;
}
- }
+ else { /* Is qq, low ordinal, non-printable. Output escape
+ * sequences */
+ if ( k == '\a' || k == '\b' || k == '\t' || k == '\n' || k == '\r'
+ || k == '\f' || k == ESC_NATIVE)
+ {
+ grow += 2; /* 1 char plus backslash */
+ }
+ else /* The other low ordinals are output as an octal escape
+ * sequence */
+ if (s + 1 >= send || ( *(U8*)(s+1) >= '0'
+ && *(U8*)(s+1) <= '9'))
+ {
+ /* When the following character is a digit, use 3 octal digits
+ * plus backslash, as using fewer digits would concatenate the
+ * following char into this one */
+ grow += 4;
+ }
+ else if (k <= 7) {
+ grow += 2; /* 1 octal digit, plus backslash */
+ }
+ else if (k <= 077) {
+ grow += 3; /* 2 octal digits plus backslash */
+ }
+ else {
+ grow += 4; /* 3 octal digits plus backslash */
+ }
+ }
+ } /* End of size-calculating loop */
+
if (grow || useqq) {
/* We have something needing hex. 3 is ""\0 */
sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
@@ -291,38 +344,78 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
*r++ = '"';
- for (s = src; s < send; do_utf8 ? s += UTF8SKIP(s) : s++) {
- const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
+ for (s = src; s < send; s += increment) {
+ UV k;
+
+ if (do_utf8
+ && ! isASCII(*s)
+ /* Exclude non-ASCII low ordinal controls. This should be
+ * optimized out by the compiler on ASCII platforms; if not
+ * could wrap it in a #ifdef EBCDIC, but better to avoid
+ * #if's if possible */
+ && *(U8*)s > ' '
+ ) {
+
+ /* When in UTF-8, we output all non-ascii chars as \x{}
+ * reqardless of useqq, except for the low ordinal controls on
+ * EBCDIC platforms */
+ k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
+
+ /* treat invalid utf8 byte by byte. This loop iteration gets the
+ * first byte */
+ increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
+
+#if PERL_VERSION < 10
+ sprintf(r, "\\x{%"UVxf"}", k);
+ r += strlen(r);
+ /* my_sprintf is not supported by ppport.h */
+#else
+ r = r + my_sprintf(r, "\\x{%"UVxf"}", k);
+#endif
+ continue;
+ }
+
+ /* Here 1) isn't UTF-8; or
+ * 2) the current character is ASCII; or
+ * 3) it is an EBCDIC platform and is a low ordinal
+ * non-ASCII control.
+ * In each case the character occupies just one byte */
+ k = *(U8*)s;
+ increment = 1;
+
+ if (isPRINT(k)) {
+ /* These need a backslash escape */
+ if (k == '"' || k == '\\' || k == '$' || k == '@') {
+ *r++ = '\\';
+ }
- if (k == '"' || k == '\\' || k == '$' || k == '@') {
- *r++ = '\\';
*r++ = (char)k;
}
- else
-#ifdef EBCDIC
- if (isprint(k) && k < 256)
-#else
- if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) {
+ else if (! useqq) { /* non-qq, non-printable, low-ordinal is
+ * output raw */
+ *r++ = (char)k;
+ }
+ else { /* Is qq means use escape sequences */
bool next_is_digit;
*r++ = '\\';
switch (k) {
- case 7: *r++ = 'a'; break;
- case 8: *r++ = 'b'; break;
- case 9: *r++ = 't'; break;
- case 10: *r++ = 'n'; break;
- case 12: *r++ = 'f'; break;
- case 13: *r++ = 'r'; break;
- case 27: *r++ = 'e'; break;
+ case '\a': *r++ = 'a'; break;
+ case '\b': *r++ = 'b'; break;
+ case '\t': *r++ = 't'; break;
+ case '\n': *r++ = 'n'; break;
+ case '\f': *r++ = 'f'; break;
+ case '\r': *r++ = 'r'; break;
+ case ESC_NATIVE: *r++ = 'e'; break;
default:
- increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
/* only ASCII digits matter here, which are invariant,
* since we only encode characters \377 and under, or
* \x177 and under for a unicode string
*/
- next = (s+increment < send) ? *(U8*)(s+increment) : 0;
- next_is_digit = next >= '0' && next <= '9';
+ next_is_digit = (s + 1 >= send )
+ ? FALSE
+ : (*(U8*)(s+1) >= '0' && *(U8*)(s+1) <= '9');
/* faster than
* r = r + my_sprintf(r, "%o", k);
@@ -339,18 +432,6 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
}
}
}
- else if (k < 0x80)
-#endif
- *r++ = (char)k;
- else {
-#if PERL_VERSION < 10
- sprintf(r, "\\x{%"UVxf"}", k);
- r += strlen(r);
- /* my_sprintf is not supported by ppport.h */
-#else
- r = r + my_sprintf(r, "\\x{%"UVxf"}", k);
-#endif
- }
}
*r++ = '"';
} else {
@@ -440,7 +521,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
if (!val)
return 0;
- /* If the ouput buffer has less than some arbitrary amount of space
+ /* If the output buffer has less than some arbitrary amount of space
remaining, then enlarge it. For the test case (25M of output),
*1.1 was slower, *2.0 was the same, so the first guess of 1.5 is
deemed to be good enough. */
@@ -798,7 +879,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
else if (realtype == SVt_PVHV) {
SV *totpad, *newapad;
SV *sname;
- HE *entry;
+ HE *entry = NULL;
char *key;
I32 klen;
SV *hval;
@@ -1106,8 +1187,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, SvIV(val));
if (SvPOK(val)) {
/* Need to check to see if this is a string such as " 0".
- I'm assuming from sprintf isn't going to clash with utf8.
- Is this valid on EBCDIC? */
+ I'm assuming from sprintf isn't going to clash with utf8. */
STRLEN pvlen;
const char * const pv = SvPV(val, pvlen);
if (pvlen != len || memNE(pv, tmpbuf, len))
@@ -1270,7 +1350,7 @@ MODULE = Data::Dumper PACKAGE = Data::Dumper PREFIX = Data_Dumper_
#
# This is the exact equivalent of Dump. Well, almost. The things that are
# different as of now (due to Laziness):
-# * doesn't deparse yet.'
+# * doesn't do deparse yet.'
#
void
@@ -1292,7 +1372,7 @@ Data_Dumper_Dumpxs(href, ...)
I32 purity, deepcopy, quotekeys, maxdepth = 0;
IV maxrecurse = 1000;
char tmpbuf[1024];
- I32 gimme = GIMME;
+ I32 gimme = GIMME_V;
int use_sparse_seen_hash = 0;
if (!SvROK(href)) { /* call new to get an object first */
@@ -1504,7 +1584,7 @@ Data_Dumper_Dumpxs(href, ...)
}
else
croak("Call to new() method failed to return HASH ref");
- if (gimme == G_SCALAR)
+ if (gimme != G_ARRAY)
XPUSHs(sv_2mortal(retval));
}
diff --git a/t/dumper.t b/t/dumper.t
index f452ad2..643160a 100644
--- a/t/dumper.t
+++ b/t/dumper.t
@@ -16,7 +16,6 @@ local $Data::Dumper::Sortkeys = 1;
use Data::Dumper;
use Config;
-my $Is_ebcdic = defined($Config{'ebcdic'}) && $Config{'ebcdic'} eq 'define';
$Data::Dumper::Pad = "#";
my $TMAX;
@@ -24,6 +23,61 @@ my $XS;
my $TNUM = 0;
my $WANT = '';
+sub convert_to_native($) {
+ my $input = shift;
+
+ # unicode_to_native() not available before this release; hence won't work
+ # on EBCDIC platforms for earlier.
+ return $input if $] lt 5.007_003;
+
+ my @output;
+
+ # The input should always be one of the following constructs
+ while ($input =~ m/ ( \\ [0-7]+ )
+ | ( \\ x \{ [[:xdigit:]]+ } )
+ | ( \\ . )
+ | ( . ) /gx)
+ {
+ #print STDERR __LINE__, ": ", $&, "\n";
+ my $index;
+ my $replacement;
+ if (defined $4) { # Literal
+ $index = ord $4;
+ $replacement = $4;
+ }
+ elsif (defined $3) { # backslash escape
+ $index = ord eval "\"$3\"";
+ $replacement = $3;
+ }
+ elsif (defined $2) { # Hex
+ $index = utf8::unicode_to_native(ord eval "\"$2\"");
+
+ # But low hex numbers are always in octal. These are all
+ # controls.
+ my $format = ($index < ord(" "))
+ ? "\\%o"
+ : "\\x{%x}";
+ $replacement = sprintf($format, $index);
+ }
+ elsif (defined $1) { # Octal
+ $index = utf8::unicode_to_native(ord eval "\"$1\"");
+ $replacement = sprintf("\\%o", $index);
+ }
+ else {
+ die "Unexpected match in convert_to_native()";
+ }
+
+ if (defined $output[$index]) {
+ print STDERR "ordinal $index already has '$output[$index]'; skipping '$replacement'\n";
+ next;
+ }
+
+ $output[$index] = $replacement;
+ }
+
+ return join "", grep { defined } @output;
+}
+
sub TEST {
my $string = shift;
my $name = shift;
@@ -31,42 +85,19 @@ sub TEST {
++$TNUM;
$t =~ s/([A-Z]+)\(0x[0-9a-f]+\)/$1(0xdeadbeef)/g
if ($WANT =~ /deadbeef/);
- if ($Is_ebcdic) {
- # these data need massaging with non ascii character sets
- # because of hashing order differences
- $WANT = join("\n",sort(split(/\n/,$WANT)));
- $WANT =~ s/\,$//mg;
- $t = join("\n",sort(split(/\n/,$t)));
- $t =~ s/\,$//mg;
- }
$name = $name ? " - $name" : '';
print( ($t eq $WANT and not $@) ? "ok $TNUM$name\n"
: "not ok $TNUM$name\n--Expected--\n$WANT\n--Got--\n$@$t\n");
++$TNUM;
- if ($Is_ebcdic) { # EBCDIC.
- if ($TNUM == 311 || $TNUM == 314) {
- eval $string;
- } else {
- eval $t;
- }
- } else {
- eval "$t";
- }
- print $@ ? "not ok $TNUM\n# \$@ says: $@\n" : "ok $TNUM\n";
+ eval "$t";
+ print $@ ? "not ok $TNUM\n# \$@ says: $@\n" : "ok $TNUM - no eval error\n";
$t = eval $string;
++$TNUM;
$t =~ s/([A-Z]+)\(0x[0-9a-f]+\)/$1(0xdeadbeef)/g
if ($WANT =~ /deadbeef/);
- if ($Is_ebcdic) {
- # here too there are hashing order differences
- $WANT = join("\n",sort(split(/\n/,$WANT)));
- $WANT =~ s/\,$//mg;
- $t = join("\n",sort(split(/\n/,$t)));
- $t =~ s/\,$//mg;
- }
- print( ($t eq $WANT and not $@) ? "ok $TNUM\n"
+ print( ($t eq $WANT and not $@) ? "ok $TNUM - works a 2nd time after intervening eval\n"
: "not ok $TNUM\n--Expected--\n$WANT\n--Got--\n$@$t\n");
}
@@ -77,17 +108,20 @@ sub SKIP_TEST {
++$TNUM; print "ok $TNUM # skip $reason\n";
}
+$TMAX = 450;
+
# Force Data::Dumper::Dump to use perl. We test Dumpxs explicitly by calling
# it direct. Out here it lets us knobble the next if to test that the perl
# only tests do work (and count correctly)
$Data::Dumper::Useperl = 1;
if (defined &Data::Dumper::Dumpxs) {
print "### XS extension loaded, will run XS tests\n";
- $TMAX = 438; $XS = 1;
+ $XS = 1;
}
else {
print "### XS extensions not loaded, will NOT run XS tests\n";
- $TMAX = 219; $XS = 0;
+ $TMAX /= 2;
+ $XS = 0;
}
print "1..$TMAX\n";
@@ -104,7 +138,7 @@ $b->{a} = $a;
$b->{b} = $a->[1];
$b->{c} = $a->[2];
-############# 1
+#############
##
$WANT = <<'EOT';
#$a = [
@@ -138,7 +172,7 @@ SCOPE: {
}
-############# 7
+#############
##
$WANT = <<'EOT';
#@a = (
@@ -174,7 +208,7 @@ SCOPE: {
if $XS;
}
-############# 13
+#############
##
$WANT = <<'EOT';
#%b = (
@@ -200,7 +234,7 @@ TEST (q(Data::Dumper->Dumpxs([$b, $a], [qw(*b a)])),
'basic test with dereferenced hash: Dumpxs()')
if $XS;
-############# 19
+#############
##
$WANT = <<'EOT';
#$a = [
@@ -236,7 +270,7 @@ if ($XS) {
}
-############# 25
+#############
##
$WANT = <<'EOT';
#$a = [
@@ -266,7 +300,7 @@ TEST (q( $d->Reset; $d->Dumpxs ),
'Indent(3): Purity(0)->Quotekeys(0): Dumpxs()')
if $XS;
-############# 31
+#############
##
$WANT = <<'EOT';
#$VAR1 = [
@@ -288,7 +322,7 @@ EOT
TEST (q(Dumper($a)), 'Dumper');
TEST (q(Data::Dumper::DumperX($a)), 'DumperX') if $XS;
-############# 37
+#############
##
$WANT = <<'EOT';
#[
@@ -316,7 +350,7 @@ EOT
}
-############# 43
+#############
##
$WANT = <<'EOT';
#$VAR1 = {
@@ -348,7 +382,7 @@ $foo = { "abc\000\'\efg" => "mno\000",
$foo{d} = \%foo;
$foo[2] = \%foo;
-############# 49
+#############
##
$WANT = <<'EOT';
#$foo = \*::foo;
@@ -383,7 +417,7 @@ EOT
'Purity 1: Indent 3: Dumpxs()')
if $XS;
-############# 55
+#############
##
$WANT = <<'EOT';
#$foo = \*::foo;
@@ -414,7 +448,7 @@ EOT
'Purity 1: Indent 1: Dumpxs()')
if $XS;
-############# 61
+#############
##
$WANT = <<'EOT';
#@bar = (
@@ -444,7 +478,7 @@ EOT
'array|hash|glob dereferenced: Dumpxs()')
if $XS;
-############# 67
+#############
##
$WANT = <<'EOT';
#$bar = [
@@ -474,7 +508,7 @@ EOT
'array|hash|glob: not dereferenced: Dumpxs()')
if $XS;
-############# 73
+#############
##
$WANT = <<'EOT';
#$foo = \*::foo;
@@ -499,7 +533,7 @@ EOT
'Purity 0: Quotekeys 0: dereferenced: Dumpxs')
if $XS;
-############# 79
+#############
##
$WANT = <<'EOT';
#$foo = \*::foo;
@@ -537,7 +571,7 @@ EOT
$mutts = \%kennel;
$mutts = $mutts; # avoid warning
-############# 85
+#############
##
$WANT = <<'EOT';
#%kennels = (
@@ -567,7 +601,7 @@ EOT
'constructor: hash|array|scalar: Dumpxs()');
}
-############# 91
+#############
##
$WANT = <<'EOT';
#%kennels = %kennels;
@@ -578,7 +612,7 @@ EOT
TEST q($d->Dump), 'object call: Dump';
TEST q($d->Dumpxs), 'object call: Dumpxs' if $XS;
-############# 97
+#############
##
$WANT = <<'EOT';
#%kennels = (
@@ -598,7 +632,7 @@ EOT
TEST (q($d->Reset; $d->Dumpxs), 'Reset and Dumpxs separate calls');
}
-############# 103
+#############
##
$WANT = <<'EOT';
#@dogs = (
@@ -628,14 +662,14 @@ EOT
'constructor: array|hash|scalar: Dumpxs()');
}
-############# 109
+#############
##
TEST q($d->Reset->Dump), 'Reset Dump chained';
if ($XS) {
TEST q($d->Reset->Dumpxs), 'Reset Dumpxs chained';
}
-############# 115
+#############
##
$WANT = <<'EOT';
#@dogs = (
@@ -673,7 +707,7 @@ EOT
sub z { print "foo\n" }
$c = [ \&z ];
-############# 121
+#############
##
$WANT = <<'EOT';
#$a = $b;
@@ -688,7 +722,7 @@ TEST (q(Data::Dumper->new([\&z,$c],['a','c'])->Seen({'b' => \&z})->Dumpxs;),
'Seen: scalar: Dumpxs')
if $XS;
-############# 127
+#############
##
$WANT = <<'EOT';
#$a = \&b;
@@ -703,7 +737,7 @@ TEST (q(Data::Dumper->new([\&z,$c],['a','c'])->Seen({'*b' => \&z})->Dumpxs;),
'Seen: glob: Dumpxs')
if $XS;
-############# 133
+#############
##
$WANT = <<'EOT';
#*a = \&b;
@@ -725,7 +759,7 @@ TEST (q(Data::Dumper->new([\&z,$c],['*a','*c'])->Seen({'*b' =>
$a = [];
$a->[1] = \$a->[0];
-############# 139
+#############
##
$WANT = <<'EOT';
#@a = (
@@ -746,7 +780,7 @@ TEST (q(Data::Dumper->new([$a],['*a'])->Purity(1)->Dumpxs;),
$a = \\\\\'foo';
$b = $$$a;
-############# 145
+#############
##
$WANT = <<'EOT';
#$a = \\\\\'foo';
@@ -764,7 +798,7 @@ TEST (q(Data::Dumper->new([$a,$b],['a','b'])->Purity(1)->Dumpxs;),
$a = [{ a => \$b }, { b => undef }];
$b = [{ c => \$b }, { d => \$a }];
-############# 151
+#############
##
$WANT = <<'EOT';
#$a = [
@@ -799,7 +833,7 @@ TEST (q(Data::Dumper->new([$a,$b],['a','b'])->Purity(1)->Dumpxs;),
$b = $a->[0][0];
$c = $${$b->[0][0]};
-############# 157
+#############
##
$WANT = <<'EOT';
#$a = [
@@ -830,7 +864,7 @@ TEST (q(Data::Dumper->new([$a,$b,$c],['a','b','c'])->Purity(1)->Dumpxs;),
$b = { 'c' => $c };
$a = { 'b' => $b };
-############# 163
+#############
##
$WANT = <<'EOT';
#$a = {
@@ -852,7 +886,7 @@ TEST (q(Data::Dumper->new([$a,$b,$c],['a','b','c'])->Maxdepth(4)->Dumpxs;),
'Maxdepth(4): Dumpxs()')
if $XS;
-############# 169
+#############
##
$WANT = <<'EOT';
#$a = {
@@ -875,7 +909,7 @@ TEST (q(Data::Dumper->new([$a,$b,$c],['a','b','c'])->Maxdepth(1)->Dumpxs;),
$a = \$a;
$b = [$a];
-############# 175
+#############
##
$WANT = <<'EOT';
#$b = [
@@ -889,7 +923,7 @@ TEST (q(Data::Dumper->new([$b],['b'])->Purity(0)->Dumpxs;),
'Purity(0): Dumpxs()')
if $XS;
-############# 181
+#############
##
$WANT = <<'EOT';
#$b = [
@@ -908,7 +942,7 @@ TEST (q(Data::Dumper->new([$b],['b'])->Purity(1)->Dumpxs;),
{
$a = "\x{09c10}";
-############# 187
+#############
## XS code was adding an extra \0
$WANT = <<'EOT';
#$a = "\x{9c10}";
@@ -927,7 +961,7 @@ EOT
$i = 0;
$a = { map { ("$_$_$_", ++$i) } 'I'..'Q' };
-############# 193
+#############
##
$WANT = <<'EOT';
#$VAR1 = {
@@ -959,7 +993,7 @@ TEST (q(Data::Dumper->new([$a])->Dumpxs;),
return [ sort { $b <=> $a } keys %$hash ];
}
-############# 199
+#############
##
$WANT = <<'EOT';
#$VAR1 = {
@@ -993,7 +1027,7 @@ TEST q(Data::Dumper->new([$c])->Dumpxs;), "sortkeys sub (XS)"
];
}
-############# 205
+#############
##
$WANT = <<'EOT';
#$VAR1 = [
@@ -1033,7 +1067,7 @@ TEST q(Data::Dumper->new([[$c, $d]])->Dumpxs;), "more sortkeys sub (XS)"
local $Data::Dumper::Deparse = 1;
local $Data::Dumper::Indent = 2;
-############# 211
+#############
##
$WANT = <<'EOT';
#$VAR1 = {
@@ -1051,7 +1085,7 @@ EOT
}
}
-############# 214
+#############
##
# This is messy.
@@ -1293,7 +1327,7 @@ if ($XS) {
{
$a = "1\n";
-############# 310
+#############
## Perl code was using /...$/ and hence missing the \n.
$WANT = <<'EOT';
my $VAR1 = '42
@@ -1322,7 +1356,7 @@ EOT
-2147483648,
-2147483649,
);
-############# 316
+#############
## Perl code flips over at 10 digits.
$WANT = <<'EOT';
#$VAR1 = 999999999;
@@ -1379,42 +1413,27 @@ EOT
}
}
-#XXX}
{
- if ($Is_ebcdic) {
$b = "Bad. XS didn't escape dollar sign";
-############# 322
- $WANT = <<"EOT"; # Careful. This is '' string written inside '' here doc
-#\$VAR1 = '\$b\"\@\\\\\xB1';
-EOT
- $a = "\$b\"\@\\\xB1\x{100}";
- chop $a;
- TEST q(Data::Dumper->Dump([$a])), "utf8 flag with \" and \$";
- if ($XS) {
- $WANT = <<'EOT'; # While this is "" string written inside "" here doc
-#$VAR1 = "\$b\"\@\\\x{b1}";
-EOT
- TEST q(Data::Dumper->Dumpxs([$a])), "XS utf8 flag with \" and \$";
- }
- } else {
- $b = "Bad. XS didn't escape dollar sign";
-############# 322
- $WANT = <<"EOT"; # Careful. This is '' string written inside '' here doc
-#\$VAR1 = '\$b\"\@\\\\\xA3';
+#############
+ # B6 is chosen because it is UTF-8 variant on ASCII and all 3 EBCDIC
+ # platforms that Perl currently purports to work on. It also is the only
+ # such code point that has the same meaning on all 4, the paragraph sign.
+ $WANT = <<"EOT"; # Careful. This is '' string written inside "" here doc
+#\$VAR1 = '\$b\"\@\\\\\xB6';
EOT
- $a = "\$b\"\@\\\xA3\x{100}";
- chop $a;
- TEST q(Data::Dumper->Dump([$a])), "utf8 flag with \" and \$";
- if ($XS) {
- $WANT = <<'EOT'; # While this is "" string written inside "" here doc
-#$VAR1 = "\$b\"\@\\\x{a3}";
+ $a = "\$b\"\@\\\xB6\x{100}";
+ chop $a;
+ TEST q(Data::Dumper->Dump([$a])), "utf8 flag with \" and \$";
+ if ($XS) {
+ $WANT = <<'EOT'; # While this is "" string written inside "" here doc
+#$VAR1 = "\$b\"\@\\\x{b6}";
EOT
- TEST q(Data::Dumper->Dumpxs([$a])), "XS utf8 flag with \" and \$";
- }
- }
+ TEST q(Data::Dumper->Dumpxs([$a])), "XS utf8 flag with \" and \$";
+ }
# XS used to produce "$b\"' which is 4 chars, not 3. [ie wrongly qq(\$b\\\")]
-############# 328
+#############
$WANT = <<'EOT';
#$VAR1 = '$b"';
EOT
@@ -1429,7 +1448,7 @@ EOT
# XS used to produce 'D'oh!' which is well, D'oh!
# Andreas found this one, which in turn discovered the previous two.
-############# 334
+#############
$WANT = <<'EOT';
#$VAR1 = 'D\'oh!';
EOT
@@ -1492,7 +1511,7 @@ EOT
TEST q(Data::Dumper->Dumpxs([\\%foo])),
"XS quotekeys == 0 for utf8 flagged ASCII" if $XS;
}
-############# 358
+#############
{
$WANT = <<'EOT';
#$VAR1 = [
@@ -1507,7 +1526,7 @@ EOT
TEST q(Data::Dumper->Dumpxs([\@foo])), 'Richard Clamp, Message-Id: <20030104005247.GA27685@mirth.demon.co.uk>: Dumpxs()'if $XS;
}
-############# 364
+#############
# Make sure $obj->Dumpxs returns the right thing in list context. This was
# broken by the initial attempt to fix [perl #74170].
$WANT = <<'EOT';
@@ -1517,11 +1536,13 @@ TEST q(join " ", new Data::Dumper [[]],[] =>->Dumpxs),
'$obj->Dumpxs in list context'
if $XS;
-############# 366
+#############
{
- $WANT = <<'EOT';
-#$VAR1 = [
-# "\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37 !\"#\$%&'()*+,-./0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377"
+ $WANT = '\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37 !\"#\$%&\'()*+,-./0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377';
+ $WANT = convert_to_native($WANT);
+ $WANT = <<EOT;
+#\$VAR1 = [
+# "$WANT"
#];
EOT
@@ -1531,11 +1552,13 @@ EOT
TEST (q(Data::Dumper::DumperX($foo)), 'All latin1 characters: DumperX') if $XS;
}
-############# 372
+#############
{
- $WANT = <<'EOT';
-#$VAR1 = [
-# "\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37 !\"#\$%&'()*+,-./0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\x{80}\x{81}\x{82}\x{83}\x{84}\x{85}\x{86}\x{87}\x{88}\x{89}\x{8a}\x{8b}\x{8c}\x{8d}\x{8e}\x{8f}\x{90}\x{91}\x{92}\x{93}\x{94}\x{95}\x{96}\x{97}\x{98}\x{99}\x{9a}\x{9b}\x{9c}\x{9d}\x{9e}\x{9f}\x{a0}\x{a1}\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}\x{a7}\x{a8}\x{a9}\x{aa}\x{ab}\x{ac}\x{ad}\x{ae}\x{af}\x{b0}\x{b1}\x{b2}\x{b3}\x{b4}\x{b5}\x{b6}\x{b7}\x{b8}\x{b9}\x{ba}\x{bb}\x{bc}\x{bd}\x{be}\x{bf}\x{c0}\x{c1}\x{c2}\x{c3}\x{c4}\x{c5}\x{c6}\x{c7}\x{c8}\x{c9}\x{ca}\x{cb}\x{cc}\x{cd}\x{ce}\x{cf}\x{d0}\x{d1}\x{d2}\x{d3}\x{d4}\x{d5}\x{d6}\x{d7}\x{d8}\x{d9}\x{da}\x{db}\x{dc}\x{dd}\x{de}\x{df}\x{e0}\x{e1}\x{e2}\x{e3}\x{e4}\x{e5}\x{e6}\x{e7}\x{e8}\x{e9}\x{ea}\x{eb}\x{ec}\x{ed}\x{ee}\x{ef}\x{f0}\x{f1}\x{f2}\x{f3}\x{f4}\x{f5}\x{f6}\x{f7}\x{f8}\x{f9}\x{fa}\x{fb}\x{fc}\x{fd}\x{fe}\x{ff}\x{20ac}"
+ $WANT = '\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37 !\"#\$%&\'()*+,-./0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\x{80}\x{81}\x{82}\x{83}\x{84}\x{85}\x{86}\x{87}\x{88}\x{89}\x{8a}\x{8b}\x{8c}\x{8d}\x{8e}\x{8f}\x{90}\x{91}\x{92}\x{93}\x{94}\x{95}\x{96}\x{97}\x{98}\x{99}\x{9a}\x{9b}\x{9c}\x{9d}\x{9e}\x{9f}\x{a0}\x{a1}\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}\x{a7}\x{a8}\x{a9}\x{aa}\x{ab}\x{ac}\x{ad}\x{ae}\x{af}\x{b0}\x{b1}\x{b2}\x{b3}\x{b4}\x{b5}\x{b6}\x{b7}\x{b8}\x{b9}\x{ba}\x{bb}\x{bc}\x{bd}\x{be}\x{bf}\x{c0}\x{c1}\x{c2}\x{c3}\x{c4}\x{c5}\x{c6}\x{c7}\x{c8}\x{c9}\x{ca}\x{cb}\x{cc}\x{cd}\x{ce}\x{cf}\x{d0}\x{d1}\x{d2}\x{d3}\x{d4}\x{d5}\x{d6}\x{d7}\x{d8}\x{d9}\x{da}\x{db}\x{dc}\x{dd}\x{de}\x{df}\x{e0}\x{e1}\x{e2}\x{e3}\x{e4}\x{e5}\x{e6}\x{e7}\x{e8}\x{e9}\x{ea}\x{eb}\x{ec}\x{ed}\x{ee}\x{ef}\x{f0}\x{f1}\x{f2}\x{f3}\x{f4}\x{f5}\x{f6}\x{f7}\x{f8}\x{f9}\x{fa}\x{fb}\x{fc}\x{fd}\x{fe}\x{ff}\x{20ac}';
+ $WANT = convert_to_native($WANT);
+ $WANT = <<EOT;
+#\$VAR1 = [
+# "$WANT"
#];
EOT
@@ -1553,7 +1576,7 @@ EOT
if $XS;
}
-############# 378
+#############
{
# If XS cannot load, the pure-Perl version cannot deparse vstrings with
# underscores properly. In 5.8.0, vstrings are just strings.
@@ -1563,11 +1586,12 @@ EOT
#$c = \'ABC';
#$d = \'ABC';
NOVSTRINGS
- my $vstrings_corr = <<'VSTRINGS_CORRECT';
-#$a = \v65.66.67;
-#$b = \v65.66.067;
-#$c = \v65.66.6_7;
-#$d = \'ABC';
+my $ABC_native = chr(65) . chr(66) . chr(67);
+ my $vstrings_corr = <<VSTRINGS_CORRECT;
+#\$a = \\v65.66.67;
+#\$b = \\v65.66.067;
+#\$c = \\v65.66.6_7;
+#\$d = \\'$ABC_native';
VSTRINGS_CORRECT
$WANT = $] <= 5.0080001
? $no_vstrings
@@ -1591,7 +1615,7 @@ VSTRINGS_CORRECT
}
}
-############# 384
+#############
{
# [perl #107372] blessed overloaded globs
$WANT = <<'EOW';
@@ -1606,7 +1630,7 @@ EOW
TEST q(Data::Dumper->Dumpxs([\*finkle])), 'blessed overloaded globs (xs)'
if $XS;
}
-############# 390
+#############
{
# [perl #74798] uncovered behaviour
$WANT = <<'EOW';
@@ -1653,7 +1677,7 @@ EOW
"numbers and number-like scalars"
if $XS;
}
-############# 426
+#############
{
# [perl #82948]
# re::regexp_pattern was moved to universal.c in v5.10.0-252-g192c1e2
@@ -1669,7 +1693,7 @@ OLD
TEST q(Data::Dumper->Dumpxs([ qr/abc/, qr/abc/i ])), "qr// xs"
if $XS;
}
-############# 432
+#############
{
sub foo {}
@@ -1682,4 +1706,37 @@ EOW
TEST q(Data::Dumper->new([ \&foo, \\&foo ], [ "*a", "b" ])->Dumpxs), "name of code in *foo xs"
if $XS;
}
-############# 436
+#############
+
+{
+ if($] lt 5.007_003) {
+ SKIP_TEST "Test is only problematic for EBCDIC, which only works for >= 5.8";
+ SKIP_TEST "Test is only problematic for EBCDIC, which only works for >= 5.8";
+ }
+ else {
+ # There is special code to handle the single control that in EBCDIC is
+ # not in the block with all the other controls, when it is UTF-8 and
+ # there are no variants in it (All controls in EBCDIC are invariant.)
+ # This tests that. There is no harm in testing this works on ASCII,
+ # and is better to not have split code paths.
+ my $outlier = chr utf8::unicode_to_native(0x9F);
+ my $outlier_hex = sprintf "%x", ord $outlier;
+ $WANT = <<EOT;
+#\$VAR1 = \"\\x{$outlier_hex}\";
+EOT
+ $foo = "$outlier\x{100}";
+ chop $foo;
+ local $Data::Dumper::Useqq = 1;
+ TEST (q(Dumper($foo)), 'EBCDIC outlier control');
+ TEST (q(Data::Dumper::DumperX($foo)), 'EBCDIC outlier control: DumperX') if $XS;
+ }
+}
+############# [perl #124091]
+{
+ $WANT = <<'EOT';
+#$VAR1 = "\n";
+EOT
+ local $Data::Dumper::Useqq = 1;
+ TEST (qq(Dumper("\n")), '\n alone');
+ TEST (qq(Data::Dumper::DumperX("\n")), '\n alone') if $XS;
+}
diff --git a/t/quotekeys.t b/t/quotekeys.t
index a858828..0f6313a 100644
--- a/t/quotekeys.t
+++ b/t/quotekeys.t
@@ -26,6 +26,8 @@ my %d = (
alpha => 'a',
);
+my $is_ascii = ord("A") == 65;
+
run_tests_for_quotekeys();
SKIP: {
skip "XS version was unavailable, so we already ran with pure Perl", 5
@@ -110,25 +112,33 @@ sub run_tests_for_quotekeys {
);
is(Dumper(\%qkdata),
- q($VAR1 = {'0' => 1,'012345' => 1,'12' => 1,'123456789' => 1,'1234567890' => 1,"1\x{660}" => 1,'::de::fg' => 1,'ab' => 1,'hi::12' => 1};),
+ (($is_ascii) # Sort order is different on EBCDIC platforms
+ ? q($VAR1 = {'0' => 1,'012345' => 1,'12' => 1,'123456789' => 1,'1234567890' => 1,"1\x{660}" => 1,'::de::fg' => 1,'ab' => 1,'hi::12' => 1};)
+ : q($VAR1 = {'::de::fg' => 1,'ab' => 1,'hi::12' => 1,'0' => 1,'012345' => 1,'12' => 1,'123456789' => 1,'1234567890' => 1,"1\x{660}" => 1};)),
"always quote when quotekeys true");
{
local $Data::Dumper::Useqq = 1;
is(Dumper(\%qkdata),
- q($VAR1 = {"0" => 1,"012345" => 1,"12" => 1,"123456789" => 1,"1234567890" => 1,"1\x{660}" => 1,"::de::fg" => 1,"ab" => 1,"hi::12" => 1};),
+ (($is_ascii)
+ ? q($VAR1 = {"0" => 1,"012345" => 1,"12" => 1,"123456789" => 1,"1234567890" => 1,"1\x{660}" => 1,"::de::fg" => 1,"ab" => 1,"hi::12" => 1};)
+ : q($VAR1 = {"::de::fg" => 1,"ab" => 1,"hi::12" => 1,"0" => 1,"012345" => 1,"12" => 1,"123456789" => 1,"1234567890" => 1,"1\x{660}" => 1};)),
"always quote when quotekeys true (useqq)");
}
local $Data::Dumper::Quotekeys = 0;
is(Dumper(\%qkdata),
- q($VAR1 = {0 => 1,'012345' => 1,12 => 1,123456789 => 1,'1234567890' => 1,"1\x{660}" => 1,'::de::fg' => 1,ab => 1,'hi::12' => 1};),
+ (($is_ascii)
+ ? q($VAR1 = {0 => 1,'012345' => 1,12 => 1,123456789 => 1,'1234567890' => 1,"1\x{660}" => 1,'::de::fg' => 1,ab => 1,'hi::12' => 1};)
+ : q($VAR1 = {'::de::fg' => 1,ab => 1,'hi::12' => 1,0 => 1,'012345' => 1,12 => 1,123456789 => 1,'1234567890' => 1,"1\x{660}" => 1};)),
"avoid quotes when quotekeys false");
{
local $Data::Dumper::Useqq = 1;
is(Dumper(\%qkdata),
- q($VAR1 = {0 => 1,"012345" => 1,12 => 1,123456789 => 1,"1234567890" => 1,"1\x{660}" => 1,"::de::fg" => 1,ab => 1,"hi::12" => 1};),
+ (($is_ascii)
+ ? q($VAR1 = {0 => 1,"012345" => 1,12 => 1,123456789 => 1,"1234567890" => 1,"1\x{660}" => 1,"::de::fg" => 1,ab => 1,"hi::12" => 1};)
+ : q($VAR1 = {"::de::fg" => 1,ab => 1,"hi::12" => 1,0 => 1,"012345" => 1,12 => 1,123456789 => 1,"1234567890" => 1,"1\x{660}" => 1};)),
"avoid quotes when quotekeys false (useqq)");
}
}
--
2.1.0