1348 lines
43 KiB
Diff
1348 lines
43 KiB
Diff
From deda932ecee93bbd318efaaaf66d2860f01ccd44 Mon Sep 17 00:00:00 2001
|
|
From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar@redhat.com>
|
|
Date: Wed, 6 May 2015 12:49:40 +0200
|
|
Subject: [PATCH] Upgrade to 2.158
|
|
MIME-Version: 1.0
|
|
Content-Type: text/plain; charset=UTF-8
|
|
Content-Transfer-Encoding: 8bit
|
|
|
|
Signed-off-by: Petr Písař <ppisar@redhat.com>
|
|
---
|
|
Changes | 18 +---
|
|
Dumper.pm | 89 ++++++++++++------
|
|
Dumper.xs | 256 +++++++++++++++++++++++++++++++++-----------------
|
|
t/dumper.t | 297 ++++++++++++++++++++++++++++++++++------------------------
|
|
t/quotekeys.t | 18 +++-
|
|
5 files changed, 421 insertions(+), 257 deletions(-)
|
|
|
|
diff --git a/Changes b/Changes
|
|
index 628ef6a..eca3bb9 100644
|
|
--- a/Changes
|
|
+++ b/Changes
|
|
@@ -6,22 +6,6 @@ Changes - public release history for Data::Dumper
|
|
|
|
=over 8
|
|
|
|
-=item 2.154 (Sep 18 2014)
|
|
-
|
|
-Most notably, this release fixes CVE-2014-4330:
|
|
-
|
|
- Don't recurse infinitely in Data::Dumper
|
|
-
|
|
- Add a configuration variable/option to limit recursion when dumping
|
|
- deep data structures.
|
|
- [...]
|
|
- This patch addresses CVE-2014-4330. This bug was found and
|
|
- reported by: LSE Leading Security Experts GmbH employee Markus
|
|
- Vervier.
|
|
-
|
|
-On top of that, there are several minor big fixes and improvements,
|
|
-see "git log" if the core perl distribution for details.
|
|
-
|
|
=item 2.151 (Mar 7 2014)
|
|
|
|
A "useqq" implementation for the XS version of Data::Dumper.
|
|
@@ -344,7 +328,7 @@ C<require 5.002>.
|
|
MLDBM example removed (as its own module, it has a separate CPAN
|
|
reality now).
|
|
|
|
-Fixed bugs in handling keys with wierd characters. Perl can be
|
|
+Fixed bugs in handling keys with weird characters. Perl can be
|
|
tripped up in its implicit quoting of the word before '=>'. The
|
|
fix: C<Data::Dumper::Purity>, when set, always triggers quotes
|
|
around hash keys.
|
|
diff --git a/Dumper.pm b/Dumper.pm
|
|
index 520dfd4..e884298 100644
|
|
--- a/Dumper.pm
|
|
+++ b/Dumper.pm
|
|
@@ -10,7 +10,7 @@
|
|
package Data::Dumper;
|
|
|
|
BEGIN {
|
|
- $VERSION = '2.154'; # Don't forget to set version and release
|
|
+ $VERSION = '2.158'; # Don't forget to set version and release
|
|
} # date in POD below!
|
|
|
|
#$| = 1;
|
|
@@ -37,6 +37,8 @@ BEGIN {
|
|
or $Useperl = 1;
|
|
}
|
|
|
|
+my $IS_ASCII = ord 'A' == 65;
|
|
+
|
|
# module vars and their defaults
|
|
$Indent = 2 unless defined $Indent;
|
|
$Purity = 0 unless defined $Purity;
|
|
@@ -222,8 +224,11 @@ sub DESTROY {}
|
|
|
|
sub Dump {
|
|
return &Dumpxs
|
|
- unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl}) ||
|
|
- $Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse});
|
|
+ unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl})
|
|
+ || $Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse})
|
|
+
|
|
+ # Use pure perl version on earlier releases on EBCDIC platforms
|
|
+ || (! $IS_ASCII && $] lt 5.021_010);
|
|
return &Dumpperl;
|
|
}
|
|
|
|
@@ -724,41 +729,71 @@ my %esc = (
|
|
"\e" => "\\e",
|
|
);
|
|
|
|
+my $low_controls = ($IS_ASCII)
|
|
+
|
|
+ # This includes \177, because traditionally it has been
|
|
+ # output as octal, even though it isn't really a "low"
|
|
+ # control
|
|
+ ? qr/[\0-\x1f\177]/
|
|
+
|
|
+ # EBCDIC low controls.
|
|
+ : qr/[\0-\x3f]/;
|
|
+
|
|
# put a string value in double quotes
|
|
sub qquote {
|
|
local($_) = shift;
|
|
s/([\\\"\@\$])/\\$1/g;
|
|
+
|
|
+ # This efficiently changes the high ordinal characters to \x{} if the utf8
|
|
+ # flag is on. On ASCII platforms, the high ordinals are all the
|
|
+ # non-ASCII's. On EBCDIC platforms, we don't include in these the non-ASCII
|
|
+ # controls whose ordinals are less than SPACE, excluded below by the range
|
|
+ # \0-\x3f. On ASCII platforms this range just compiles as part of :ascii:.
|
|
+ # On EBCDIC platforms, there is just one outlier high ordinal control, and
|
|
+ # it gets output as \x{}.
|
|
my $bytes; { use bytes; $bytes = length }
|
|
- s/([[:^ascii:]])/'\x{'.sprintf("%x",ord($1)).'}'/ge if $bytes > length;
|
|
- return qq("$_") unless
|
|
- /[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~]/; # fast exit
|
|
+ s/([^[:ascii:]\0-\x3f])/sprintf("\\x{%x}",ord($1))/ge
|
|
+ if $bytes > length
|
|
|
|
- my $high = shift || "";
|
|
+ # The above doesn't get the EBCDIC outlier high ordinal control when
|
|
+ # the string is UTF-8 but there are no UTF-8 variant characters in it.
|
|
+ # We want that to come out as \x{} anyway. We need is_utf8() to do
|
|
+ # this.
|
|
+ || (! $IS_ASCII && $] ge 5.008_001 && utf8::is_utf8($_));
|
|
+
|
|
+ return qq("$_") unless /[[:^print:]]/; # fast exit if only printables
|
|
+
|
|
+ # Here, there is at least one non-printable to output. First, translate the
|
|
+ # escapes.
|
|
s/([\a\b\t\n\f\r\e])/$esc{$1}/g;
|
|
|
|
- if (ord('^')==94) { # ascii
|
|
- # no need for 3 digits in escape for these
|
|
- s/([\0-\037])(?!\d)/'\\'.sprintf('%o',ord($1))/eg;
|
|
- s/([\0-\037\177])/'\\'.sprintf('%03o',ord($1))/eg;
|
|
+ # no need for 3 digits in escape for octals not followed by a digit.
|
|
+ s/($low_controls)(?!\d)/'\\'.sprintf('%o',ord($1))/eg;
|
|
+
|
|
+ # But otherwise use 3 digits
|
|
+ s/($low_controls)/'\\'.sprintf('%03o',ord($1))/eg;
|
|
+
|
|
# all but last branch below not supported --BEHAVIOR SUBJECT TO CHANGE--
|
|
- if ($high eq "iso8859") {
|
|
- s/([\200-\240])/'\\'.sprintf('%o',ord($1))/eg;
|
|
+ my $high = shift || "";
|
|
+ if ($high eq "iso8859") { # Doesn't escape the Latin1 printables
|
|
+ if ($IS_ASCII) {
|
|
+ s/([\200-\240])/'\\'.sprintf('%o',ord($1))/eg;
|
|
+ }
|
|
+ elsif ($] ge 5.007_003) {
|
|
+ my $high_control = utf8::unicode_to_native(0x9F);
|
|
+ s/$high_control/sprintf('\\%o',ord($1))/eg;
|
|
+ }
|
|
} elsif ($high eq "utf8") {
|
|
+# Some discussion of what to do here is in
|
|
+# https://rt.perl.org/Ticket/Display.html?id=113088
|
|
# use utf8;
|
|
# $str =~ s/([^\040-\176])/sprintf "\\x{%04x}", ord($1)/ge;
|
|
} elsif ($high eq "8bit") {
|
|
# leave it as it is
|
|
} else {
|
|
- s/([\200-\377])/'\\'.sprintf('%03o',ord($1))/eg;
|
|
- s/([^\040-\176])/sprintf "\\x{%04x}", ord($1)/ge;
|
|
+ s/([[:^ascii:]])/'\\'.sprintf('%03o',ord($1))/eg;
|
|
+ #s/([^\040-\176])/sprintf "\\x{%04x}", ord($1)/ge;
|
|
}
|
|
- }
|
|
- else { # ebcdic
|
|
- s{([^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~])(?!\d)}
|
|
- {my $v = ord($1); '\\'.sprintf(($v <= 037 ? '%o' : '%03o'), $v)}eg;
|
|
- s{([^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~])}
|
|
- {'\\'.sprintf('%03o',ord($1))}eg;
|
|
- }
|
|
|
|
return qq("$_");
|
|
}
|
|
@@ -1025,9 +1060,7 @@ $Data::Dumper::Useqq I<or> I<$OBJ>->Useqq(I<[NEWVAL]>)
|
|
When set, enables the use of double quotes for representing string values.
|
|
Whitespace other than space will be represented as C<[\n\t\r]>, "unsafe"
|
|
characters will be backslashed, and unprintable characters will be output as
|
|
-quoted octal integers. Since setting this variable imposes a performance
|
|
-penalty, the default is 0. C<Dump()> will run slower if this flag is set,
|
|
-since the fast XSUB implementation doesn't support it yet.
|
|
+quoted octal integers. The default is 0.
|
|
|
|
=item *
|
|
|
|
@@ -1391,8 +1424,8 @@ to have, you can use the C<Seen> method to pre-seed the internal reference
|
|
table and make the dumped output point to them, instead. See L</EXAMPLES>
|
|
above.
|
|
|
|
-The C<Useqq> and C<Deparse> flags makes Dump() run slower, since the
|
|
-XSUB implementation does not support them.
|
|
+The C<Deparse> flag makes Dump() run slower, since the XSUB
|
|
+implementation does not support it.
|
|
|
|
SCALAR objects have the weirdest looking C<bless> workaround.
|
|
|
|
@@ -1421,7 +1454,7 @@ modify it under the same terms as Perl itself.
|
|
|
|
=head1 VERSION
|
|
|
|
-Version 2.154 (September 18 2014)
|
|
+Version 2.158 (March 13 2015)
|
|
|
|
=head1 SEE ALSO
|
|
|
|
diff --git a/Dumper.xs b/Dumper.xs
|
|
index 2ffa867..97277f4 100644
|
|
--- a/Dumper.xs
|
|
+++ b/Dumper.xs
|
|
@@ -12,8 +12,33 @@
|
|
# define DD_USE_OLD_ID_FORMAT
|
|
#endif
|
|
|
|
+/* These definitions are ASCII only. But the pure-perl .pm avoids
|
|
+ * calling this .xs file for releases where they aren't defined */
|
|
+
|
|
+#ifndef isASCII
|
|
+# define isASCII(c) (((UV) (c)) < 128)
|
|
+#endif
|
|
+
|
|
+#ifndef ESC_NATIVE /* \e */
|
|
+# define ESC_NATIVE 27
|
|
+#endif
|
|
+
|
|
+#ifndef isPRINT
|
|
+# define isPRINT(c) (((UV) (c)) >= ' ' && ((UV) (c)) < 127)
|
|
+#endif
|
|
+
|
|
+#ifndef isALPHA
|
|
+# define isALPHA(c) ( (((UV) (c)) >= 'a' && ((UV) (c)) <= 'z') \
|
|
+ || (((UV) (c)) <= 'Z' && ((UV) (c)) >= 'A'))
|
|
+#endif
|
|
+
|
|
+#ifndef isIDFIRST
|
|
+# define isIDFIRST(c) (isALPHA(c) || (c) == '_')
|
|
+#endif
|
|
+
|
|
#ifndef isWORDCHAR
|
|
-# define isWORDCHAR(c) isALNUM(c)
|
|
+# define isWORDCHAR(c) (isIDFIRST(c) \
|
|
+ || (((UV) (c)) >= '0' && ((UV) (c)) <= '9'))
|
|
#endif
|
|
|
|
static I32 num_q (const char *s, STRLEN slen);
|
|
@@ -40,12 +65,6 @@ static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,
|
|
|
|
#if PERL_VERSION <= 6 /* Perl 5.6 and earlier */
|
|
|
|
-# ifdef EBCDIC
|
|
-# define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
|
|
-# else
|
|
-# define UNI_TO_NATIVE(ch) (ch)
|
|
-# endif
|
|
-
|
|
UV
|
|
Perl_utf8_to_uvchr_buf(pTHX_ U8 *s, U8 *send, STRLEN *retlen)
|
|
{
|
|
@@ -72,8 +91,7 @@ Perl_utf8_to_uvchr_buf(pTHX_ U8 *s, U8 *send, STRLEN *retlen)
|
|
* end of the buffer if there is a malformation that indicates the
|
|
* character is longer than the space available */
|
|
|
|
- const UV uv = utf8_to_uvchr(s, retlen);
|
|
- return UNI_TO_NATIVE(uv);
|
|
+ return utf8_to_uvchr(s, retlen);
|
|
}
|
|
|
|
# if !defined(PERL_IMPLICIT_CONTEXT)
|
|
@@ -234,55 +252,90 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
|
|
STRLEN qq_escapables = 0; /* " $ @ will need a \ in "" strings. */
|
|
STRLEN normal = 0;
|
|
int increment;
|
|
- UV next;
|
|
-
|
|
- /* this will need EBCDICification */
|
|
- for (s = src; s < send; do_utf8 ? s += increment : s++) {
|
|
- const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
|
|
|
|
- /* check for invalid utf8 */
|
|
- increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
|
|
+ for (s = src; s < send; s += increment) { /* Sizing pass */
|
|
+ UV k = *(U8*)s;
|
|
|
|
- /* this is only used to check if the next character is an
|
|
- * ASCII digit, which are invariant, so if the following collects
|
|
- * a UTF-8 start byte it does no harm
|
|
- */
|
|
- next = (s + increment >= send ) ? 0 : *(U8*)(s+increment);
|
|
+ increment = 1; /* Will override if necessary for utf-8 */
|
|
|
|
-#ifdef EBCDIC
|
|
- if (!isprint(k) || k > 256) {
|
|
-#else
|
|
- if (k > 127) {
|
|
-#endif
|
|
- /* 4: \x{} then count the number of hex digits. */
|
|
- grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
|
|
+ if (isPRINT(k)) {
|
|
+ if (k == '\\') {
|
|
+ backslashes++;
|
|
+ } else if (k == '\'') {
|
|
+ single_quotes++;
|
|
+ } else if (k == '"' || k == '$' || k == '@') {
|
|
+ qq_escapables++;
|
|
+ } else {
|
|
+ normal++;
|
|
+ }
|
|
+ }
|
|
+ else if (! isASCII(k) && k > ' ') {
|
|
+ /* High ordinal non-printable code point. (The test that k is
|
|
+ * above SPACE should be optimized out by the compiler on
|
|
+ * non-EBCDIC platforms; otherwise we could put an #ifdef around
|
|
+ * it, but it's better to have just a single code path when
|
|
+ * possible. All but one of the non-ASCII EBCDIC controls are low
|
|
+ * ordinal; that one is the only one above SPACE.)
|
|
+ *
|
|
+ * If UTF-8, output as hex, regardless of useqq. This means there
|
|
+ * is an overhead of 4 chars '\x{}'. Then count the number of hex
|
|
+ * digits. */
|
|
+ if (do_utf8) {
|
|
+ k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
|
|
+
|
|
+ /* treat invalid utf8 byte by byte. This loop iteration gets the
|
|
+ * first byte */
|
|
+ increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
|
|
+
|
|
+ grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
|
|
#if UVSIZE == 4
|
|
- 8 /* We may allocate a bit more than the minimum here. */
|
|
+ 8 /* We may allocate a bit more than the minimum here. */
|
|
#else
|
|
- k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
|
|
-#endif
|
|
- );
|
|
-#ifndef EBCDIC
|
|
- } else if (useqq &&
|
|
- /* we can't use the short form like '\0' if followed by a digit */
|
|
- (((k >= 7 && k <= 10) || k == 12 || k == 13 || k == 27)
|
|
- || (k < 8 && (next < '0' || next > '9')))) {
|
|
- grow += 2;
|
|
- } else if (useqq && k <= 31 && (next < '0' || next > '9')) {
|
|
- grow += 3;
|
|
- } else if (useqq && (k <= 31 || k >= 127)) {
|
|
- grow += 4;
|
|
+ k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
|
|
#endif
|
|
- } else if (k == '\\') {
|
|
- backslashes++;
|
|
- } else if (k == '\'') {
|
|
- single_quotes++;
|
|
- } else if (k == '"' || k == '$' || k == '@') {
|
|
- qq_escapables++;
|
|
- } else {
|
|
+ );
|
|
+ }
|
|
+ else if (useqq) { /* Not utf8, must be <= 0xFF, hence 2 hex
|
|
+ * digits. */
|
|
+ grow += 4 + 2;
|
|
+ }
|
|
+ else { /* Non-qq generates 3 octal digits plus backslash */
|
|
+ grow += 4;
|
|
+ }
|
|
+ } /* End of high-ordinal non-printable */
|
|
+ else if (! useqq) { /* Low ordinal, non-printable, non-qq just
|
|
+ * outputs the raw char */
|
|
normal++;
|
|
}
|
|
- }
|
|
+ else { /* Is qq, low ordinal, non-printable. Output escape
|
|
+ * sequences */
|
|
+ if ( k == '\a' || k == '\b' || k == '\t' || k == '\n' || k == '\r'
|
|
+ || k == '\f' || k == ESC_NATIVE)
|
|
+ {
|
|
+ grow += 2; /* 1 char plus backslash */
|
|
+ }
|
|
+ else /* The other low ordinals are output as an octal escape
|
|
+ * sequence */
|
|
+ if (s + 1 >= send || ( *(U8*)(s+1) >= '0'
|
|
+ && *(U8*)(s+1) <= '9'))
|
|
+ {
|
|
+ /* When the following character is a digit, use 3 octal digits
|
|
+ * plus backslash, as using fewer digits would concatenate the
|
|
+ * following char into this one */
|
|
+ grow += 4;
|
|
+ }
|
|
+ else if (k <= 7) {
|
|
+ grow += 2; /* 1 octal digit, plus backslash */
|
|
+ }
|
|
+ else if (k <= 077) {
|
|
+ grow += 3; /* 2 octal digits plus backslash */
|
|
+ }
|
|
+ else {
|
|
+ grow += 4; /* 3 octal digits plus backslash */
|
|
+ }
|
|
+ }
|
|
+ } /* End of size-calculating loop */
|
|
+
|
|
if (grow || useqq) {
|
|
/* We have something needing hex. 3 is ""\0 */
|
|
sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
|
|
@@ -291,38 +344,78 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
|
|
|
|
*r++ = '"';
|
|
|
|
- for (s = src; s < send; do_utf8 ? s += UTF8SKIP(s) : s++) {
|
|
- const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
|
|
+ for (s = src; s < send; s += increment) {
|
|
+ UV k;
|
|
+
|
|
+ if (do_utf8
|
|
+ && ! isASCII(*s)
|
|
+ /* Exclude non-ASCII low ordinal controls. This should be
|
|
+ * optimized out by the compiler on ASCII platforms; if not
|
|
+ * could wrap it in a #ifdef EBCDIC, but better to avoid
|
|
+ * #if's if possible */
|
|
+ && *(U8*)s > ' '
|
|
+ ) {
|
|
+
|
|
+ /* When in UTF-8, we output all non-ascii chars as \x{}
|
|
+ * reqardless of useqq, except for the low ordinal controls on
|
|
+ * EBCDIC platforms */
|
|
+ k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
|
|
+
|
|
+ /* treat invalid utf8 byte by byte. This loop iteration gets the
|
|
+ * first byte */
|
|
+ increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
|
|
+
|
|
+#if PERL_VERSION < 10
|
|
+ sprintf(r, "\\x{%"UVxf"}", k);
|
|
+ r += strlen(r);
|
|
+ /* my_sprintf is not supported by ppport.h */
|
|
+#else
|
|
+ r = r + my_sprintf(r, "\\x{%"UVxf"}", k);
|
|
+#endif
|
|
+ continue;
|
|
+ }
|
|
+
|
|
+ /* Here 1) isn't UTF-8; or
|
|
+ * 2) the current character is ASCII; or
|
|
+ * 3) it is an EBCDIC platform and is a low ordinal
|
|
+ * non-ASCII control.
|
|
+ * In each case the character occupies just one byte */
|
|
+ k = *(U8*)s;
|
|
+ increment = 1;
|
|
+
|
|
+ if (isPRINT(k)) {
|
|
+ /* These need a backslash escape */
|
|
+ if (k == '"' || k == '\\' || k == '$' || k == '@') {
|
|
+ *r++ = '\\';
|
|
+ }
|
|
|
|
- if (k == '"' || k == '\\' || k == '$' || k == '@') {
|
|
- *r++ = '\\';
|
|
*r++ = (char)k;
|
|
}
|
|
- else
|
|
-#ifdef EBCDIC
|
|
- if (isprint(k) && k < 256)
|
|
-#else
|
|
- if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) {
|
|
+ else if (! useqq) { /* non-qq, non-printable, low-ordinal is
|
|
+ * output raw */
|
|
+ *r++ = (char)k;
|
|
+ }
|
|
+ else { /* Is qq means use escape sequences */
|
|
bool next_is_digit;
|
|
|
|
*r++ = '\\';
|
|
switch (k) {
|
|
- case 7: *r++ = 'a'; break;
|
|
- case 8: *r++ = 'b'; break;
|
|
- case 9: *r++ = 't'; break;
|
|
- case 10: *r++ = 'n'; break;
|
|
- case 12: *r++ = 'f'; break;
|
|
- case 13: *r++ = 'r'; break;
|
|
- case 27: *r++ = 'e'; break;
|
|
+ case '\a': *r++ = 'a'; break;
|
|
+ case '\b': *r++ = 'b'; break;
|
|
+ case '\t': *r++ = 't'; break;
|
|
+ case '\n': *r++ = 'n'; break;
|
|
+ case '\f': *r++ = 'f'; break;
|
|
+ case '\r': *r++ = 'r'; break;
|
|
+ case ESC_NATIVE: *r++ = 'e'; break;
|
|
default:
|
|
- increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
|
|
|
|
/* only ASCII digits matter here, which are invariant,
|
|
* since we only encode characters \377 and under, or
|
|
* \x177 and under for a unicode string
|
|
*/
|
|
- next = (s+increment < send) ? *(U8*)(s+increment) : 0;
|
|
- next_is_digit = next >= '0' && next <= '9';
|
|
+ next_is_digit = (s + 1 >= send )
|
|
+ ? FALSE
|
|
+ : (*(U8*)(s+1) >= '0' && *(U8*)(s+1) <= '9');
|
|
|
|
/* faster than
|
|
* r = r + my_sprintf(r, "%o", k);
|
|
@@ -339,18 +432,6 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
|
|
}
|
|
}
|
|
}
|
|
- else if (k < 0x80)
|
|
-#endif
|
|
- *r++ = (char)k;
|
|
- else {
|
|
-#if PERL_VERSION < 10
|
|
- sprintf(r, "\\x{%"UVxf"}", k);
|
|
- r += strlen(r);
|
|
- /* my_sprintf is not supported by ppport.h */
|
|
-#else
|
|
- r = r + my_sprintf(r, "\\x{%"UVxf"}", k);
|
|
-#endif
|
|
- }
|
|
}
|
|
*r++ = '"';
|
|
} else {
|
|
@@ -440,7 +521,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
|
|
if (!val)
|
|
return 0;
|
|
|
|
- /* If the ouput buffer has less than some arbitrary amount of space
|
|
+ /* If the output buffer has less than some arbitrary amount of space
|
|
remaining, then enlarge it. For the test case (25M of output),
|
|
*1.1 was slower, *2.0 was the same, so the first guess of 1.5 is
|
|
deemed to be good enough. */
|
|
@@ -798,7 +879,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
|
|
else if (realtype == SVt_PVHV) {
|
|
SV *totpad, *newapad;
|
|
SV *sname;
|
|
- HE *entry;
|
|
+ HE *entry = NULL;
|
|
char *key;
|
|
I32 klen;
|
|
SV *hval;
|
|
@@ -1106,8 +1187,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
|
|
len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, SvIV(val));
|
|
if (SvPOK(val)) {
|
|
/* Need to check to see if this is a string such as " 0".
|
|
- I'm assuming from sprintf isn't going to clash with utf8.
|
|
- Is this valid on EBCDIC? */
|
|
+ I'm assuming from sprintf isn't going to clash with utf8. */
|
|
STRLEN pvlen;
|
|
const char * const pv = SvPV(val, pvlen);
|
|
if (pvlen != len || memNE(pv, tmpbuf, len))
|
|
@@ -1270,7 +1350,7 @@ MODULE = Data::Dumper PACKAGE = Data::Dumper PREFIX = Data_Dumper_
|
|
#
|
|
# This is the exact equivalent of Dump. Well, almost. The things that are
|
|
# different as of now (due to Laziness):
|
|
-# * doesn't deparse yet.'
|
|
+# * doesn't do deparse yet.'
|
|
#
|
|
|
|
void
|
|
@@ -1292,7 +1372,7 @@ Data_Dumper_Dumpxs(href, ...)
|
|
I32 purity, deepcopy, quotekeys, maxdepth = 0;
|
|
IV maxrecurse = 1000;
|
|
char tmpbuf[1024];
|
|
- I32 gimme = GIMME;
|
|
+ I32 gimme = GIMME_V;
|
|
int use_sparse_seen_hash = 0;
|
|
|
|
if (!SvROK(href)) { /* call new to get an object first */
|
|
@@ -1504,7 +1584,7 @@ Data_Dumper_Dumpxs(href, ...)
|
|
}
|
|
else
|
|
croak("Call to new() method failed to return HASH ref");
|
|
- if (gimme == G_SCALAR)
|
|
+ if (gimme != G_ARRAY)
|
|
XPUSHs(sv_2mortal(retval));
|
|
}
|
|
|
|
diff --git a/t/dumper.t b/t/dumper.t
|
|
index f452ad2..643160a 100644
|
|
--- a/t/dumper.t
|
|
+++ b/t/dumper.t
|
|
@@ -16,7 +16,6 @@ local $Data::Dumper::Sortkeys = 1;
|
|
|
|
use Data::Dumper;
|
|
use Config;
|
|
-my $Is_ebcdic = defined($Config{'ebcdic'}) && $Config{'ebcdic'} eq 'define';
|
|
|
|
$Data::Dumper::Pad = "#";
|
|
my $TMAX;
|
|
@@ -24,6 +23,61 @@ my $XS;
|
|
my $TNUM = 0;
|
|
my $WANT = '';
|
|
|
|
+sub convert_to_native($) {
|
|
+ my $input = shift;
|
|
+
|
|
+ # unicode_to_native() not available before this release; hence won't work
|
|
+ # on EBCDIC platforms for earlier.
|
|
+ return $input if $] lt 5.007_003;
|
|
+
|
|
+ my @output;
|
|
+
|
|
+ # The input should always be one of the following constructs
|
|
+ while ($input =~ m/ ( \\ [0-7]+ )
|
|
+ | ( \\ x \{ [[:xdigit:]]+ } )
|
|
+ | ( \\ . )
|
|
+ | ( . ) /gx)
|
|
+ {
|
|
+ #print STDERR __LINE__, ": ", $&, "\n";
|
|
+ my $index;
|
|
+ my $replacement;
|
|
+ if (defined $4) { # Literal
|
|
+ $index = ord $4;
|
|
+ $replacement = $4;
|
|
+ }
|
|
+ elsif (defined $3) { # backslash escape
|
|
+ $index = ord eval "\"$3\"";
|
|
+ $replacement = $3;
|
|
+ }
|
|
+ elsif (defined $2) { # Hex
|
|
+ $index = utf8::unicode_to_native(ord eval "\"$2\"");
|
|
+
|
|
+ # But low hex numbers are always in octal. These are all
|
|
+ # controls.
|
|
+ my $format = ($index < ord(" "))
|
|
+ ? "\\%o"
|
|
+ : "\\x{%x}";
|
|
+ $replacement = sprintf($format, $index);
|
|
+ }
|
|
+ elsif (defined $1) { # Octal
|
|
+ $index = utf8::unicode_to_native(ord eval "\"$1\"");
|
|
+ $replacement = sprintf("\\%o", $index);
|
|
+ }
|
|
+ else {
|
|
+ die "Unexpected match in convert_to_native()";
|
|
+ }
|
|
+
|
|
+ if (defined $output[$index]) {
|
|
+ print STDERR "ordinal $index already has '$output[$index]'; skipping '$replacement'\n";
|
|
+ next;
|
|
+ }
|
|
+
|
|
+ $output[$index] = $replacement;
|
|
+ }
|
|
+
|
|
+ return join "", grep { defined } @output;
|
|
+}
|
|
+
|
|
sub TEST {
|
|
my $string = shift;
|
|
my $name = shift;
|
|
@@ -31,42 +85,19 @@ sub TEST {
|
|
++$TNUM;
|
|
$t =~ s/([A-Z]+)\(0x[0-9a-f]+\)/$1(0xdeadbeef)/g
|
|
if ($WANT =~ /deadbeef/);
|
|
- if ($Is_ebcdic) {
|
|
- # these data need massaging with non ascii character sets
|
|
- # because of hashing order differences
|
|
- $WANT = join("\n",sort(split(/\n/,$WANT)));
|
|
- $WANT =~ s/\,$//mg;
|
|
- $t = join("\n",sort(split(/\n/,$t)));
|
|
- $t =~ s/\,$//mg;
|
|
- }
|
|
$name = $name ? " - $name" : '';
|
|
print( ($t eq $WANT and not $@) ? "ok $TNUM$name\n"
|
|
: "not ok $TNUM$name\n--Expected--\n$WANT\n--Got--\n$@$t\n");
|
|
|
|
++$TNUM;
|
|
- if ($Is_ebcdic) { # EBCDIC.
|
|
- if ($TNUM == 311 || $TNUM == 314) {
|
|
- eval $string;
|
|
- } else {
|
|
- eval $t;
|
|
- }
|
|
- } else {
|
|
- eval "$t";
|
|
- }
|
|
- print $@ ? "not ok $TNUM\n# \$@ says: $@\n" : "ok $TNUM\n";
|
|
+ eval "$t";
|
|
+ print $@ ? "not ok $TNUM\n# \$@ says: $@\n" : "ok $TNUM - no eval error\n";
|
|
|
|
$t = eval $string;
|
|
++$TNUM;
|
|
$t =~ s/([A-Z]+)\(0x[0-9a-f]+\)/$1(0xdeadbeef)/g
|
|
if ($WANT =~ /deadbeef/);
|
|
- if ($Is_ebcdic) {
|
|
- # here too there are hashing order differences
|
|
- $WANT = join("\n",sort(split(/\n/,$WANT)));
|
|
- $WANT =~ s/\,$//mg;
|
|
- $t = join("\n",sort(split(/\n/,$t)));
|
|
- $t =~ s/\,$//mg;
|
|
- }
|
|
- print( ($t eq $WANT and not $@) ? "ok $TNUM\n"
|
|
+ print( ($t eq $WANT and not $@) ? "ok $TNUM - works a 2nd time after intervening eval\n"
|
|
: "not ok $TNUM\n--Expected--\n$WANT\n--Got--\n$@$t\n");
|
|
}
|
|
|
|
@@ -77,17 +108,20 @@ sub SKIP_TEST {
|
|
++$TNUM; print "ok $TNUM # skip $reason\n";
|
|
}
|
|
|
|
+$TMAX = 450;
|
|
+
|
|
# Force Data::Dumper::Dump to use perl. We test Dumpxs explicitly by calling
|
|
# it direct. Out here it lets us knobble the next if to test that the perl
|
|
# only tests do work (and count correctly)
|
|
$Data::Dumper::Useperl = 1;
|
|
if (defined &Data::Dumper::Dumpxs) {
|
|
print "### XS extension loaded, will run XS tests\n";
|
|
- $TMAX = 438; $XS = 1;
|
|
+ $XS = 1;
|
|
}
|
|
else {
|
|
print "### XS extensions not loaded, will NOT run XS tests\n";
|
|
- $TMAX = 219; $XS = 0;
|
|
+ $TMAX /= 2;
|
|
+ $XS = 0;
|
|
}
|
|
|
|
print "1..$TMAX\n";
|
|
@@ -104,7 +138,7 @@ $b->{a} = $a;
|
|
$b->{b} = $a->[1];
|
|
$b->{c} = $a->[2];
|
|
|
|
-############# 1
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$a = [
|
|
@@ -138,7 +172,7 @@ SCOPE: {
|
|
}
|
|
|
|
|
|
-############# 7
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#@a = (
|
|
@@ -174,7 +208,7 @@ SCOPE: {
|
|
if $XS;
|
|
}
|
|
|
|
-############# 13
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#%b = (
|
|
@@ -200,7 +234,7 @@ TEST (q(Data::Dumper->Dumpxs([$b, $a], [qw(*b a)])),
|
|
'basic test with dereferenced hash: Dumpxs()')
|
|
if $XS;
|
|
|
|
-############# 19
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$a = [
|
|
@@ -236,7 +270,7 @@ if ($XS) {
|
|
}
|
|
|
|
|
|
-############# 25
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$a = [
|
|
@@ -266,7 +300,7 @@ TEST (q( $d->Reset; $d->Dumpxs ),
|
|
'Indent(3): Purity(0)->Quotekeys(0): Dumpxs()')
|
|
if $XS;
|
|
|
|
-############# 31
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$VAR1 = [
|
|
@@ -288,7 +322,7 @@ EOT
|
|
TEST (q(Dumper($a)), 'Dumper');
|
|
TEST (q(Data::Dumper::DumperX($a)), 'DumperX') if $XS;
|
|
|
|
-############# 37
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#[
|
|
@@ -316,7 +350,7 @@ EOT
|
|
}
|
|
|
|
|
|
-############# 43
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$VAR1 = {
|
|
@@ -348,7 +382,7 @@ $foo = { "abc\000\'\efg" => "mno\000",
|
|
$foo{d} = \%foo;
|
|
$foo[2] = \%foo;
|
|
|
|
-############# 49
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$foo = \*::foo;
|
|
@@ -383,7 +417,7 @@ EOT
|
|
'Purity 1: Indent 3: Dumpxs()')
|
|
if $XS;
|
|
|
|
-############# 55
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$foo = \*::foo;
|
|
@@ -414,7 +448,7 @@ EOT
|
|
'Purity 1: Indent 1: Dumpxs()')
|
|
if $XS;
|
|
|
|
-############# 61
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#@bar = (
|
|
@@ -444,7 +478,7 @@ EOT
|
|
'array|hash|glob dereferenced: Dumpxs()')
|
|
if $XS;
|
|
|
|
-############# 67
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$bar = [
|
|
@@ -474,7 +508,7 @@ EOT
|
|
'array|hash|glob: not dereferenced: Dumpxs()')
|
|
if $XS;
|
|
|
|
-############# 73
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$foo = \*::foo;
|
|
@@ -499,7 +533,7 @@ EOT
|
|
'Purity 0: Quotekeys 0: dereferenced: Dumpxs')
|
|
if $XS;
|
|
|
|
-############# 79
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$foo = \*::foo;
|
|
@@ -537,7 +571,7 @@ EOT
|
|
$mutts = \%kennel;
|
|
$mutts = $mutts; # avoid warning
|
|
|
|
-############# 85
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#%kennels = (
|
|
@@ -567,7 +601,7 @@ EOT
|
|
'constructor: hash|array|scalar: Dumpxs()');
|
|
}
|
|
|
|
-############# 91
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#%kennels = %kennels;
|
|
@@ -578,7 +612,7 @@ EOT
|
|
TEST q($d->Dump), 'object call: Dump';
|
|
TEST q($d->Dumpxs), 'object call: Dumpxs' if $XS;
|
|
|
|
-############# 97
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#%kennels = (
|
|
@@ -598,7 +632,7 @@ EOT
|
|
TEST (q($d->Reset; $d->Dumpxs), 'Reset and Dumpxs separate calls');
|
|
}
|
|
|
|
-############# 103
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#@dogs = (
|
|
@@ -628,14 +662,14 @@ EOT
|
|
'constructor: array|hash|scalar: Dumpxs()');
|
|
}
|
|
|
|
-############# 109
|
|
+#############
|
|
##
|
|
TEST q($d->Reset->Dump), 'Reset Dump chained';
|
|
if ($XS) {
|
|
TEST q($d->Reset->Dumpxs), 'Reset Dumpxs chained';
|
|
}
|
|
|
|
-############# 115
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#@dogs = (
|
|
@@ -673,7 +707,7 @@ EOT
|
|
sub z { print "foo\n" }
|
|
$c = [ \&z ];
|
|
|
|
-############# 121
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$a = $b;
|
|
@@ -688,7 +722,7 @@ TEST (q(Data::Dumper->new([\&z,$c],['a','c'])->Seen({'b' => \&z})->Dumpxs;),
|
|
'Seen: scalar: Dumpxs')
|
|
if $XS;
|
|
|
|
-############# 127
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$a = \&b;
|
|
@@ -703,7 +737,7 @@ TEST (q(Data::Dumper->new([\&z,$c],['a','c'])->Seen({'*b' => \&z})->Dumpxs;),
|
|
'Seen: glob: Dumpxs')
|
|
if $XS;
|
|
|
|
-############# 133
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#*a = \&b;
|
|
@@ -725,7 +759,7 @@ TEST (q(Data::Dumper->new([\&z,$c],['*a','*c'])->Seen({'*b' =>
|
|
$a = [];
|
|
$a->[1] = \$a->[0];
|
|
|
|
-############# 139
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#@a = (
|
|
@@ -746,7 +780,7 @@ TEST (q(Data::Dumper->new([$a],['*a'])->Purity(1)->Dumpxs;),
|
|
$a = \\\\\'foo';
|
|
$b = $$$a;
|
|
|
|
-############# 145
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$a = \\\\\'foo';
|
|
@@ -764,7 +798,7 @@ TEST (q(Data::Dumper->new([$a,$b],['a','b'])->Purity(1)->Dumpxs;),
|
|
$a = [{ a => \$b }, { b => undef }];
|
|
$b = [{ c => \$b }, { d => \$a }];
|
|
|
|
-############# 151
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$a = [
|
|
@@ -799,7 +833,7 @@ TEST (q(Data::Dumper->new([$a,$b],['a','b'])->Purity(1)->Dumpxs;),
|
|
$b = $a->[0][0];
|
|
$c = $${$b->[0][0]};
|
|
|
|
-############# 157
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$a = [
|
|
@@ -830,7 +864,7 @@ TEST (q(Data::Dumper->new([$a,$b,$c],['a','b','c'])->Purity(1)->Dumpxs;),
|
|
$b = { 'c' => $c };
|
|
$a = { 'b' => $b };
|
|
|
|
-############# 163
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$a = {
|
|
@@ -852,7 +886,7 @@ TEST (q(Data::Dumper->new([$a,$b,$c],['a','b','c'])->Maxdepth(4)->Dumpxs;),
|
|
'Maxdepth(4): Dumpxs()')
|
|
if $XS;
|
|
|
|
-############# 169
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$a = {
|
|
@@ -875,7 +909,7 @@ TEST (q(Data::Dumper->new([$a,$b,$c],['a','b','c'])->Maxdepth(1)->Dumpxs;),
|
|
$a = \$a;
|
|
$b = [$a];
|
|
|
|
-############# 175
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$b = [
|
|
@@ -889,7 +923,7 @@ TEST (q(Data::Dumper->new([$b],['b'])->Purity(0)->Dumpxs;),
|
|
'Purity(0): Dumpxs()')
|
|
if $XS;
|
|
|
|
-############# 181
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$b = [
|
|
@@ -908,7 +942,7 @@ TEST (q(Data::Dumper->new([$b],['b'])->Purity(1)->Dumpxs;),
|
|
|
|
{
|
|
$a = "\x{09c10}";
|
|
-############# 187
|
|
+#############
|
|
## XS code was adding an extra \0
|
|
$WANT = <<'EOT';
|
|
#$a = "\x{9c10}";
|
|
@@ -927,7 +961,7 @@ EOT
|
|
$i = 0;
|
|
$a = { map { ("$_$_$_", ++$i) } 'I'..'Q' };
|
|
|
|
-############# 193
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$VAR1 = {
|
|
@@ -959,7 +993,7 @@ TEST (q(Data::Dumper->new([$a])->Dumpxs;),
|
|
return [ sort { $b <=> $a } keys %$hash ];
|
|
}
|
|
|
|
-############# 199
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$VAR1 = {
|
|
@@ -993,7 +1027,7 @@ TEST q(Data::Dumper->new([$c])->Dumpxs;), "sortkeys sub (XS)"
|
|
];
|
|
}
|
|
|
|
-############# 205
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$VAR1 = [
|
|
@@ -1033,7 +1067,7 @@ TEST q(Data::Dumper->new([[$c, $d]])->Dumpxs;), "more sortkeys sub (XS)"
|
|
local $Data::Dumper::Deparse = 1;
|
|
local $Data::Dumper::Indent = 2;
|
|
|
|
-############# 211
|
|
+#############
|
|
##
|
|
$WANT = <<'EOT';
|
|
#$VAR1 = {
|
|
@@ -1051,7 +1085,7 @@ EOT
|
|
}
|
|
}
|
|
|
|
-############# 214
|
|
+#############
|
|
##
|
|
|
|
# This is messy.
|
|
@@ -1293,7 +1327,7 @@ if ($XS) {
|
|
|
|
{
|
|
$a = "1\n";
|
|
-############# 310
|
|
+#############
|
|
## Perl code was using /...$/ and hence missing the \n.
|
|
$WANT = <<'EOT';
|
|
my $VAR1 = '42
|
|
@@ -1322,7 +1356,7 @@ EOT
|
|
-2147483648,
|
|
-2147483649,
|
|
);
|
|
-############# 316
|
|
+#############
|
|
## Perl code flips over at 10 digits.
|
|
$WANT = <<'EOT';
|
|
#$VAR1 = 999999999;
|
|
@@ -1379,42 +1413,27 @@ EOT
|
|
}
|
|
}
|
|
|
|
-#XXX}
|
|
{
|
|
- if ($Is_ebcdic) {
|
|
$b = "Bad. XS didn't escape dollar sign";
|
|
-############# 322
|
|
- $WANT = <<"EOT"; # Careful. This is '' string written inside '' here doc
|
|
-#\$VAR1 = '\$b\"\@\\\\\xB1';
|
|
-EOT
|
|
- $a = "\$b\"\@\\\xB1\x{100}";
|
|
- chop $a;
|
|
- TEST q(Data::Dumper->Dump([$a])), "utf8 flag with \" and \$";
|
|
- if ($XS) {
|
|
- $WANT = <<'EOT'; # While this is "" string written inside "" here doc
|
|
-#$VAR1 = "\$b\"\@\\\x{b1}";
|
|
-EOT
|
|
- TEST q(Data::Dumper->Dumpxs([$a])), "XS utf8 flag with \" and \$";
|
|
- }
|
|
- } else {
|
|
- $b = "Bad. XS didn't escape dollar sign";
|
|
-############# 322
|
|
- $WANT = <<"EOT"; # Careful. This is '' string written inside '' here doc
|
|
-#\$VAR1 = '\$b\"\@\\\\\xA3';
|
|
+#############
|
|
+ # B6 is chosen because it is UTF-8 variant on ASCII and all 3 EBCDIC
|
|
+ # platforms that Perl currently purports to work on. It also is the only
|
|
+ # such code point that has the same meaning on all 4, the paragraph sign.
|
|
+ $WANT = <<"EOT"; # Careful. This is '' string written inside "" here doc
|
|
+#\$VAR1 = '\$b\"\@\\\\\xB6';
|
|
EOT
|
|
|
|
- $a = "\$b\"\@\\\xA3\x{100}";
|
|
- chop $a;
|
|
- TEST q(Data::Dumper->Dump([$a])), "utf8 flag with \" and \$";
|
|
- if ($XS) {
|
|
- $WANT = <<'EOT'; # While this is "" string written inside "" here doc
|
|
-#$VAR1 = "\$b\"\@\\\x{a3}";
|
|
+ $a = "\$b\"\@\\\xB6\x{100}";
|
|
+ chop $a;
|
|
+ TEST q(Data::Dumper->Dump([$a])), "utf8 flag with \" and \$";
|
|
+ if ($XS) {
|
|
+ $WANT = <<'EOT'; # While this is "" string written inside "" here doc
|
|
+#$VAR1 = "\$b\"\@\\\x{b6}";
|
|
EOT
|
|
- TEST q(Data::Dumper->Dumpxs([$a])), "XS utf8 flag with \" and \$";
|
|
- }
|
|
- }
|
|
+ TEST q(Data::Dumper->Dumpxs([$a])), "XS utf8 flag with \" and \$";
|
|
+ }
|
|
# XS used to produce "$b\"' which is 4 chars, not 3. [ie wrongly qq(\$b\\\")]
|
|
-############# 328
|
|
+#############
|
|
$WANT = <<'EOT';
|
|
#$VAR1 = '$b"';
|
|
EOT
|
|
@@ -1429,7 +1448,7 @@ EOT
|
|
|
|
# XS used to produce 'D'oh!' which is well, D'oh!
|
|
# Andreas found this one, which in turn discovered the previous two.
|
|
-############# 334
|
|
+#############
|
|
$WANT = <<'EOT';
|
|
#$VAR1 = 'D\'oh!';
|
|
EOT
|
|
@@ -1492,7 +1511,7 @@ EOT
|
|
TEST q(Data::Dumper->Dumpxs([\\%foo])),
|
|
"XS quotekeys == 0 for utf8 flagged ASCII" if $XS;
|
|
}
|
|
-############# 358
|
|
+#############
|
|
{
|
|
$WANT = <<'EOT';
|
|
#$VAR1 = [
|
|
@@ -1507,7 +1526,7 @@ EOT
|
|
TEST q(Data::Dumper->Dumpxs([\@foo])), 'Richard Clamp, Message-Id: <20030104005247.GA27685@mirth.demon.co.uk>: Dumpxs()'if $XS;
|
|
}
|
|
|
|
-############# 364
|
|
+#############
|
|
# Make sure $obj->Dumpxs returns the right thing in list context. This was
|
|
# broken by the initial attempt to fix [perl #74170].
|
|
$WANT = <<'EOT';
|
|
@@ -1517,11 +1536,13 @@ TEST q(join " ", new Data::Dumper [[]],[] =>->Dumpxs),
|
|
'$obj->Dumpxs in list context'
|
|
if $XS;
|
|
|
|
-############# 366
|
|
+#############
|
|
{
|
|
- $WANT = <<'EOT';
|
|
-#$VAR1 = [
|
|
-# "\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37 !\"#\$%&'()*+,-./0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377"
|
|
+ $WANT = '\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37 !\"#\$%&\'()*+,-./0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377';
|
|
+ $WANT = convert_to_native($WANT);
|
|
+ $WANT = <<EOT;
|
|
+#\$VAR1 = [
|
|
+# "$WANT"
|
|
#];
|
|
EOT
|
|
|
|
@@ -1531,11 +1552,13 @@ EOT
|
|
TEST (q(Data::Dumper::DumperX($foo)), 'All latin1 characters: DumperX') if $XS;
|
|
}
|
|
|
|
-############# 372
|
|
+#############
|
|
{
|
|
- $WANT = <<'EOT';
|
|
-#$VAR1 = [
|
|
-# "\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37 !\"#\$%&'()*+,-./0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\x{80}\x{81}\x{82}\x{83}\x{84}\x{85}\x{86}\x{87}\x{88}\x{89}\x{8a}\x{8b}\x{8c}\x{8d}\x{8e}\x{8f}\x{90}\x{91}\x{92}\x{93}\x{94}\x{95}\x{96}\x{97}\x{98}\x{99}\x{9a}\x{9b}\x{9c}\x{9d}\x{9e}\x{9f}\x{a0}\x{a1}\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}\x{a7}\x{a8}\x{a9}\x{aa}\x{ab}\x{ac}\x{ad}\x{ae}\x{af}\x{b0}\x{b1}\x{b2}\x{b3}\x{b4}\x{b5}\x{b6}\x{b7}\x{b8}\x{b9}\x{ba}\x{bb}\x{bc}\x{bd}\x{be}\x{bf}\x{c0}\x{c1}\x{c2}\x{c3}\x{c4}\x{c5}\x{c6}\x{c7}\x{c8}\x{c9}\x{ca}\x{cb}\x{cc}\x{cd}\x{ce}\x{cf}\x{d0}\x{d1}\x{d2}\x{d3}\x{d4}\x{d5}\x{d6}\x{d7}\x{d8}\x{d9}\x{da}\x{db}\x{dc}\x{dd}\x{de}\x{df}\x{e0}\x{e1}\x{e2}\x{e3}\x{e4}\x{e5}\x{e6}\x{e7}\x{e8}\x{e9}\x{ea}\x{eb}\x{ec}\x{ed}\x{ee}\x{ef}\x{f0}\x{f1}\x{f2}\x{f3}\x{f4}\x{f5}\x{f6}\x{f7}\x{f8}\x{f9}\x{fa}\x{fb}\x{fc}\x{fd}\x{fe}\x{ff}\x{20ac}"
|
|
+ $WANT = '\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37 !\"#\$%&\'()*+,-./0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\x{80}\x{81}\x{82}\x{83}\x{84}\x{85}\x{86}\x{87}\x{88}\x{89}\x{8a}\x{8b}\x{8c}\x{8d}\x{8e}\x{8f}\x{90}\x{91}\x{92}\x{93}\x{94}\x{95}\x{96}\x{97}\x{98}\x{99}\x{9a}\x{9b}\x{9c}\x{9d}\x{9e}\x{9f}\x{a0}\x{a1}\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}\x{a7}\x{a8}\x{a9}\x{aa}\x{ab}\x{ac}\x{ad}\x{ae}\x{af}\x{b0}\x{b1}\x{b2}\x{b3}\x{b4}\x{b5}\x{b6}\x{b7}\x{b8}\x{b9}\x{ba}\x{bb}\x{bc}\x{bd}\x{be}\x{bf}\x{c0}\x{c1}\x{c2}\x{c3}\x{c4}\x{c5}\x{c6}\x{c7}\x{c8}\x{c9}\x{ca}\x{cb}\x{cc}\x{cd}\x{ce}\x{cf}\x{d0}\x{d1}\x{d2}\x{d3}\x{d4}\x{d5}\x{d6}\x{d7}\x{d8}\x{d9}\x{da}\x{db}\x{dc}\x{dd}\x{de}\x{df}\x{e0}\x{e1}\x{e2}\x{e3}\x{e4}\x{e5}\x{e6}\x{e7}\x{e8}\x{e9}\x{ea}\x{eb}\x{ec}\x{ed}\x{ee}\x{ef}\x{f0}\x{f1}\x{f2}\x{f3}\x{f4}\x{f5}\x{f6}\x{f7}\x{f8}\x{f9}\x{fa}\x{fb}\x{fc}\x{fd}\x{fe}\x{ff}\x{20ac}';
|
|
+ $WANT = convert_to_native($WANT);
|
|
+ $WANT = <<EOT;
|
|
+#\$VAR1 = [
|
|
+# "$WANT"
|
|
#];
|
|
EOT
|
|
|
|
@@ -1553,7 +1576,7 @@ EOT
|
|
if $XS;
|
|
}
|
|
|
|
-############# 378
|
|
+#############
|
|
{
|
|
# If XS cannot load, the pure-Perl version cannot deparse vstrings with
|
|
# underscores properly. In 5.8.0, vstrings are just strings.
|
|
@@ -1563,11 +1586,12 @@ EOT
|
|
#$c = \'ABC';
|
|
#$d = \'ABC';
|
|
NOVSTRINGS
|
|
- my $vstrings_corr = <<'VSTRINGS_CORRECT';
|
|
-#$a = \v65.66.67;
|
|
-#$b = \v65.66.067;
|
|
-#$c = \v65.66.6_7;
|
|
-#$d = \'ABC';
|
|
+my $ABC_native = chr(65) . chr(66) . chr(67);
|
|
+ my $vstrings_corr = <<VSTRINGS_CORRECT;
|
|
+#\$a = \\v65.66.67;
|
|
+#\$b = \\v65.66.067;
|
|
+#\$c = \\v65.66.6_7;
|
|
+#\$d = \\'$ABC_native';
|
|
VSTRINGS_CORRECT
|
|
$WANT = $] <= 5.0080001
|
|
? $no_vstrings
|
|
@@ -1591,7 +1615,7 @@ VSTRINGS_CORRECT
|
|
}
|
|
}
|
|
|
|
-############# 384
|
|
+#############
|
|
{
|
|
# [perl #107372] blessed overloaded globs
|
|
$WANT = <<'EOW';
|
|
@@ -1606,7 +1630,7 @@ EOW
|
|
TEST q(Data::Dumper->Dumpxs([\*finkle])), 'blessed overloaded globs (xs)'
|
|
if $XS;
|
|
}
|
|
-############# 390
|
|
+#############
|
|
{
|
|
# [perl #74798] uncovered behaviour
|
|
$WANT = <<'EOW';
|
|
@@ -1653,7 +1677,7 @@ EOW
|
|
"numbers and number-like scalars"
|
|
if $XS;
|
|
}
|
|
-############# 426
|
|
+#############
|
|
{
|
|
# [perl #82948]
|
|
# re::regexp_pattern was moved to universal.c in v5.10.0-252-g192c1e2
|
|
@@ -1669,7 +1693,7 @@ OLD
|
|
TEST q(Data::Dumper->Dumpxs([ qr/abc/, qr/abc/i ])), "qr// xs"
|
|
if $XS;
|
|
}
|
|
-############# 432
|
|
+#############
|
|
|
|
{
|
|
sub foo {}
|
|
@@ -1682,4 +1706,37 @@ EOW
|
|
TEST q(Data::Dumper->new([ \&foo, \\&foo ], [ "*a", "b" ])->Dumpxs), "name of code in *foo xs"
|
|
if $XS;
|
|
}
|
|
-############# 436
|
|
+#############
|
|
+
|
|
+{
|
|
+ if($] lt 5.007_003) {
|
|
+ SKIP_TEST "Test is only problematic for EBCDIC, which only works for >= 5.8";
|
|
+ SKIP_TEST "Test is only problematic for EBCDIC, which only works for >= 5.8";
|
|
+ }
|
|
+ else {
|
|
+ # There is special code to handle the single control that in EBCDIC is
|
|
+ # not in the block with all the other controls, when it is UTF-8 and
|
|
+ # there are no variants in it (All controls in EBCDIC are invariant.)
|
|
+ # This tests that. There is no harm in testing this works on ASCII,
|
|
+ # and is better to not have split code paths.
|
|
+ my $outlier = chr utf8::unicode_to_native(0x9F);
|
|
+ my $outlier_hex = sprintf "%x", ord $outlier;
|
|
+ $WANT = <<EOT;
|
|
+#\$VAR1 = \"\\x{$outlier_hex}\";
|
|
+EOT
|
|
+ $foo = "$outlier\x{100}";
|
|
+ chop $foo;
|
|
+ local $Data::Dumper::Useqq = 1;
|
|
+ TEST (q(Dumper($foo)), 'EBCDIC outlier control');
|
|
+ TEST (q(Data::Dumper::DumperX($foo)), 'EBCDIC outlier control: DumperX') if $XS;
|
|
+ }
|
|
+}
|
|
+############# [perl #124091]
|
|
+{
|
|
+ $WANT = <<'EOT';
|
|
+#$VAR1 = "\n";
|
|
+EOT
|
|
+ local $Data::Dumper::Useqq = 1;
|
|
+ TEST (qq(Dumper("\n")), '\n alone');
|
|
+ TEST (qq(Data::Dumper::DumperX("\n")), '\n alone') if $XS;
|
|
+}
|
|
diff --git a/t/quotekeys.t b/t/quotekeys.t
|
|
index a858828..0f6313a 100644
|
|
--- a/t/quotekeys.t
|
|
+++ b/t/quotekeys.t
|
|
@@ -26,6 +26,8 @@ my %d = (
|
|
alpha => 'a',
|
|
);
|
|
|
|
+my $is_ascii = ord("A") == 65;
|
|
+
|
|
run_tests_for_quotekeys();
|
|
SKIP: {
|
|
skip "XS version was unavailable, so we already ran with pure Perl", 5
|
|
@@ -110,25 +112,33 @@ sub run_tests_for_quotekeys {
|
|
);
|
|
|
|
is(Dumper(\%qkdata),
|
|
- q($VAR1 = {'0' => 1,'012345' => 1,'12' => 1,'123456789' => 1,'1234567890' => 1,"1\x{660}" => 1,'::de::fg' => 1,'ab' => 1,'hi::12' => 1};),
|
|
+ (($is_ascii) # Sort order is different on EBCDIC platforms
|
|
+ ? q($VAR1 = {'0' => 1,'012345' => 1,'12' => 1,'123456789' => 1,'1234567890' => 1,"1\x{660}" => 1,'::de::fg' => 1,'ab' => 1,'hi::12' => 1};)
|
|
+ : q($VAR1 = {'::de::fg' => 1,'ab' => 1,'hi::12' => 1,'0' => 1,'012345' => 1,'12' => 1,'123456789' => 1,'1234567890' => 1,"1\x{660}" => 1};)),
|
|
"always quote when quotekeys true");
|
|
|
|
{
|
|
local $Data::Dumper::Useqq = 1;
|
|
is(Dumper(\%qkdata),
|
|
- q($VAR1 = {"0" => 1,"012345" => 1,"12" => 1,"123456789" => 1,"1234567890" => 1,"1\x{660}" => 1,"::de::fg" => 1,"ab" => 1,"hi::12" => 1};),
|
|
+ (($is_ascii)
|
|
+ ? q($VAR1 = {"0" => 1,"012345" => 1,"12" => 1,"123456789" => 1,"1234567890" => 1,"1\x{660}" => 1,"::de::fg" => 1,"ab" => 1,"hi::12" => 1};)
|
|
+ : q($VAR1 = {"::de::fg" => 1,"ab" => 1,"hi::12" => 1,"0" => 1,"012345" => 1,"12" => 1,"123456789" => 1,"1234567890" => 1,"1\x{660}" => 1};)),
|
|
"always quote when quotekeys true (useqq)");
|
|
}
|
|
|
|
local $Data::Dumper::Quotekeys = 0;
|
|
|
|
is(Dumper(\%qkdata),
|
|
- q($VAR1 = {0 => 1,'012345' => 1,12 => 1,123456789 => 1,'1234567890' => 1,"1\x{660}" => 1,'::de::fg' => 1,ab => 1,'hi::12' => 1};),
|
|
+ (($is_ascii)
|
|
+ ? q($VAR1 = {0 => 1,'012345' => 1,12 => 1,123456789 => 1,'1234567890' => 1,"1\x{660}" => 1,'::de::fg' => 1,ab => 1,'hi::12' => 1};)
|
|
+ : q($VAR1 = {'::de::fg' => 1,ab => 1,'hi::12' => 1,0 => 1,'012345' => 1,12 => 1,123456789 => 1,'1234567890' => 1,"1\x{660}" => 1};)),
|
|
"avoid quotes when quotekeys false");
|
|
{
|
|
local $Data::Dumper::Useqq = 1;
|
|
is(Dumper(\%qkdata),
|
|
- q($VAR1 = {0 => 1,"012345" => 1,12 => 1,123456789 => 1,"1234567890" => 1,"1\x{660}" => 1,"::de::fg" => 1,ab => 1,"hi::12" => 1};),
|
|
+ (($is_ascii)
|
|
+ ? q($VAR1 = {0 => 1,"012345" => 1,12 => 1,123456789 => 1,"1234567890" => 1,"1\x{660}" => 1,"::de::fg" => 1,ab => 1,"hi::12" => 1};)
|
|
+ : q($VAR1 = {"::de::fg" => 1,ab => 1,"hi::12" => 1,0 => 1,"012345" => 1,12 => 1,123456789 => 1,"1234567890" => 1,"1\x{660}" => 1};)),
|
|
"avoid quotes when quotekeys false (useqq)");
|
|
}
|
|
}
|
|
--
|
|
2.1.0
|
|
|