perl-Data-Dumper/Data-Dumper-2.154-Upgrade-t...

From deda932ecee93bbd318efaaaf66d2860f01ccd44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Petr=20P=C3=ADsa=C5=99?= <ppisar@redhat.com>
Date: Wed, 6 May 2015 12:49:40 +0200
Subject: [PATCH] Upgrade to 2.158
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Petr Písař <ppisar@redhat.com>
---
 Changes       |  18 +---
 Dumper.pm     |  89 ++++++++++++------
 Dumper.xs     | 256 +++++++++++++++++++++++++++++++++-----------------
 t/dumper.t    | 297 ++++++++++++++++++++++++++++++++++------------------------
 t/quotekeys.t |  18 +++-
 5 files changed, 421 insertions(+), 257 deletions(-)

diff --git a/Changes b/Changes
index 628ef6a..eca3bb9 100644
--- a/Changes
+++ b/Changes
@@ -6,22 +6,6 @@ Changes - public release history for Data::Dumper

 =over 8

-=item 2.154 (Sep 18 2014)
-
-Most notably, this release fixes CVE-2014-4330:
-
-  Don't recurse infinitely in Data::Dumper
-
-  Add a configuration variable/option to limit recursion when dumping
-  deep data structures.
-  [...]
-  This patch addresses CVE-2014-4330.  This bug was found and
-  reported by: LSE Leading Security Experts GmbH employee Markus
-  Vervier.
-
-On top of that, there are several minor big fixes and improvements,
-see "git log" if the core perl distribution for details.
-
 =item 2.151 (Mar 7 2014)

 A "useqq" implementation for the XS version of Data::Dumper.
@@ -344,7 +328,7 @@ C<require 5.002>.
 MLDBM example removed (as its own module, it has a separate CPAN
 reality now).

-Fixed bugs in handling keys with wierd characters.  Perl can be
+Fixed bugs in handling keys with weird characters.  Perl can be
 tripped up in its implicit quoting of the word before '=>'.  The
 fix: C<Data::Dumper::Purity>, when set, always triggers quotes
 around hash keys.
diff --git a/Dumper.pm b/Dumper.pm
index 520dfd4..e884298 100644
--- a/Dumper.pm
+++ b/Dumper.pm
@@ -10,7 +10,7 @@
 package Data::Dumper;

 BEGIN {
-    $VERSION = '2.154'; # Don't forget to set version and release
+    $VERSION = '2.158'; # Don't forget to set version and release
 }               # date in POD below!

 #$| = 1;
@@ -37,6 +37,8 @@ BEGIN {
     or $Useperl = 1;
 }

+my $IS_ASCII  = ord 'A' ==  65;
+
 # module vars and their defaults
 $Indent     = 2         unless defined $Indent;
 $Purity     = 0         unless defined $Purity;
@@ -222,8 +224,11 @@ sub DESTROY {}

 sub Dump {
     return &Dumpxs
-    unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl}) ||
-           $Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse});
+    unless $Data::Dumper::Useperl || (ref($_[0]) && $_[0]->{useperl})
+        || $Data::Dumper::Deparse || (ref($_[0]) && $_[0]->{deparse})
+
+            # Use pure perl version on earlier releases on EBCDIC platforms
+        || (! $IS_ASCII && $] lt 5.021_010);
     return &Dumpperl;
 }

@@ -724,41 +729,71 @@ my %esc = (
     "\e" => "\\e",
 );

+my $low_controls = ($IS_ASCII)
+
+                   # This includes \177, because traditionally it has been
+                   # output as octal, even though it isn't really a "low"
+                   # control
+                   ? qr/[\0-\x1f\177]/
+
+                     # EBCDIC low controls.
+                   : qr/[\0-\x3f]/;
+
 # put a string value in double quotes
 sub qquote {
   local($_) = shift;
   s/([\\\"\@\$])/\\$1/g;
+
+  # This efficiently changes the high ordinal characters to \x{} if the utf8
+  # flag is on.  On ASCII platforms, the high ordinals are all the
+  # non-ASCII's.  On EBCDIC platforms, we don't include in these the non-ASCII
+  # controls whose ordinals are less than SPACE, excluded below by the range
+  # \0-\x3f.  On ASCII platforms this range just compiles as part of :ascii:.
+  # On EBCDIC platforms, there is just one outlier high ordinal control, and
+  # it gets output as \x{}.
   my $bytes; { use bytes; $bytes = length }
-  s/([[:^ascii:]])/'\x{'.sprintf("%x",ord($1)).'}'/ge if $bytes > length;
-  return qq("$_") unless
-    /[^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~]/;  # fast exit
+  s/([^[:ascii:]\0-\x3f])/sprintf("\\x{%x}",ord($1))/ge
+    if $bytes > length

-  my $high = shift || "";
+       # The above doesn't get the EBCDIC outlier high ordinal control when
+       # the string is UTF-8 but there are no UTF-8 variant characters in it.
+       # We want that to come out as \x{} anyway.  We need is_utf8() to do
+       # this.
+       || (! $IS_ASCII && $] ge 5.008_001 && utf8::is_utf8($_));
+
+  return qq("$_") unless /[[:^print:]]/;  # fast exit if only printables
+
+  # Here, there is at least one non-printable to output.  First, translate the
+  # escapes.
   s/([\a\b\t\n\f\r\e])/$esc{$1}/g;

-  if (ord('^')==94)  { # ascii
-    # no need for 3 digits in escape for these
-    s/([\0-\037])(?!\d)/'\\'.sprintf('%o',ord($1))/eg;
-    s/([\0-\037\177])/'\\'.sprintf('%03o',ord($1))/eg;
+  # no need for 3 digits in escape for octals not followed by a digit.
+  s/($low_controls)(?!\d)/'\\'.sprintf('%o',ord($1))/eg;
+
+  # But otherwise use 3 digits
+  s/($low_controls)/'\\'.sprintf('%03o',ord($1))/eg;
+
     # all but last branch below not supported --BEHAVIOR SUBJECT TO CHANGE--
-    if ($high eq "iso8859") {
-      s/([\200-\240])/'\\'.sprintf('%o',ord($1))/eg;
+  my $high = shift || "";
+    if ($high eq "iso8859") {   # Doesn't escape the Latin1 printables
+      if ($IS_ASCII) {
+        s/([\200-\240])/'\\'.sprintf('%o',ord($1))/eg;
+      }
+      elsif ($] ge 5.007_003) {
+        my $high_control = utf8::unicode_to_native(0x9F);
+        s/$high_control/sprintf('\\%o',ord($1))/eg;
+      }
     } elsif ($high eq "utf8") {
+#     Some discussion of what to do here is in
+#       https://rt.perl.org/Ticket/Display.html?id=113088
 #     use utf8;
 #     $str =~ s/([^\040-\176])/sprintf "\\x{%04x}", ord($1)/ge;
     } elsif ($high eq "8bit") {
         # leave it as it is
     } else {
-      s/([\200-\377])/'\\'.sprintf('%03o',ord($1))/eg;
-      s/([^\040-\176])/sprintf "\\x{%04x}", ord($1)/ge;
+      s/([[:^ascii:]])/'\\'.sprintf('%03o',ord($1))/eg;
+      #s/([^\040-\176])/sprintf "\\x{%04x}", ord($1)/ge;
     }
-  }
-  else { # ebcdic
-      s{([^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~])(?!\d)}
-       {my $v = ord($1); '\\'.sprintf(($v <= 037 ? '%o' : '%03o'), $v)}eg;
-      s{([^ !"\#\$%&'()*+,\-.\/0-9:;<=>?\@A-Z[\\\]^_`a-z{|}~])}
-       {'\\'.sprintf('%03o',ord($1))}eg;
-  }

   return qq("$_");
 }
@@ -1025,9 +1060,7 @@ $Data::Dumper::Useqq  I<or>  I<$OBJ>->Useqq(I<[NEWVAL]>)
 When set, enables the use of double quotes for representing string values.
 Whitespace other than space will be represented as C<[\n\t\r]>, "unsafe"
 characters will be backslashed, and unprintable characters will be output as
-quoted octal integers.  Since setting this variable imposes a performance
-penalty, the default is 0.  C<Dump()> will run slower if this flag is set,
-since the fast XSUB implementation doesn't support it yet.
+quoted octal integers.  The default is 0.

 =item *

@@ -1391,8 +1424,8 @@ to have, you can use the C<Seen> method to pre-seed the internal reference
 table and make the dumped output point to them, instead.  See L</EXAMPLES>
 above.

-The C<Useqq> and C<Deparse> flags makes Dump() run slower, since the
-XSUB implementation does not support them.
+The C<Deparse> flag makes Dump() run slower, since the XSUB
+implementation does not support it.

 SCALAR objects have the weirdest looking C<bless> workaround.

@@ -1421,7 +1454,7 @@ modify it under the same terms as Perl itself.

 =head1 VERSION

-Version 2.154  (September 18 2014)
+Version 2.158  (March 13 2015)

 =head1 SEE ALSO

diff --git a/Dumper.xs b/Dumper.xs
index 2ffa867..97277f4 100644
--- a/Dumper.xs
+++ b/Dumper.xs
@@ -12,8 +12,33 @@
 #  define DD_USE_OLD_ID_FORMAT
 #endif

+/* These definitions are ASCII only.  But the pure-perl .pm avoids
+ * calling this .xs file for releases where they aren't defined */
+
+#ifndef isASCII
+#   define isASCII(c) (((UV) (c)) < 128)
+#endif
+
+#ifndef ESC_NATIVE          /* \e */
+#   define ESC_NATIVE 27
+#endif
+
+#ifndef isPRINT
+#   define isPRINT(c) (((UV) (c)) >= ' ' && ((UV) (c)) < 127)
+#endif
+
+#ifndef isALPHA
+#   define isALPHA(c) (   (((UV) (c)) >= 'a' && ((UV) (c)) <= 'z')          \
+                       || (((UV) (c)) <= 'Z' && ((UV) (c)) >= 'A'))
+#endif
+
+#ifndef isIDFIRST
+#   define isIDFIRST(c) (isALPHA(c) || (c) == '_')
+#endif
+
 #ifndef isWORDCHAR
-#   define isWORDCHAR(c) isALNUM(c)
+#   define isWORDCHAR(c) (isIDFIRST(c)                                      \
+                          || (((UV) (c)) >= '0' && ((UV) (c)) <= '9'))
 #endif

 static I32 num_q (const char *s, STRLEN slen);
@@ -40,12 +65,6 @@ static I32 DD_dump (pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval,

 #if PERL_VERSION <= 6 /* Perl 5.6 and earlier */

-# ifdef EBCDIC
-#  define UNI_TO_NATIVE(ch) (((ch) > 255) ? (ch) : ASCII_TO_NATIVE(ch))
-# else
-#  define UNI_TO_NATIVE(ch) (ch)
-# endif
-
 UV
 Perl_utf8_to_uvchr_buf(pTHX_ U8 *s, U8 *send, STRLEN *retlen)
 {
@@ -72,8 +91,7 @@ Perl_utf8_to_uvchr_buf(pTHX_ U8 *s, U8 *send, STRLEN *retlen)
      * end of the buffer if there is a malformation that indicates the
      * character is longer than the space available */

-    const UV uv = utf8_to_uvchr(s, retlen);
-    return UNI_TO_NATIVE(uv);
+    return utf8_to_uvchr(s, retlen);
 }

 # if !defined(PERL_IMPLICIT_CONTEXT)
@@ -234,55 +252,90 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
     STRLEN qq_escapables = 0;	/* " $ @ will need a \ in "" strings.  */
     STRLEN normal = 0;
     int increment;
-    UV next;
-
-    /* this will need EBCDICification */
-    for (s = src; s < send; do_utf8 ? s += increment : s++) {
-        const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;

-        /* check for invalid utf8 */
-        increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
+    for (s = src; s < send; s += increment) { /* Sizing pass */
+        UV k = *(U8*)s;

-	/* this is only used to check if the next character is an
-	 * ASCII digit, which are invariant, so if the following collects
-	 * a UTF-8 start byte it does no harm
-	 */
-	next = (s + increment >= send ) ? 0 : *(U8*)(s+increment);
+        increment = 1;      /* Will override if necessary for utf-8 */

-#ifdef EBCDIC
-	if (!isprint(k) || k > 256) {
-#else
-	if (k > 127) {
-#endif
-            /* 4: \x{} then count the number of hex digits.  */
-            grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
+        if (isPRINT(k)) {
+            if (k == '\\') {
+                backslashes++;
+            } else if (k == '\'') {
+                single_quotes++;
+            } else if (k == '"' || k == '$' || k == '@') {
+                qq_escapables++;
+            } else {
+                normal++;
+            }
+        }
+        else if (! isASCII(k) && k > ' ') {
+            /* High ordinal non-printable code point.  (The test that k is
+             * above SPACE should be optimized out by the compiler on
+             * non-EBCDIC platforms; otherwise we could put an #ifdef around
+             * it, but it's better to have just a single code path when
+             * possible.  All but one of the non-ASCII EBCDIC controls are low
+             * ordinal; that one is the only one above SPACE.)
+             *
+             * If UTF-8, output as hex, regardless of useqq.  This means there
+             * is an overhead of 4 chars '\x{}'.  Then count the number of hex
+             * digits.  */
+            if (do_utf8) {
+                k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
+
+                /* treat invalid utf8 byte by byte.  This loop iteration gets the
+                * first byte */
+                increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
+
+                grow += 4 + (k <= 0xFF ? 2 : k <= 0xFFF ? 3 : k <= 0xFFFF ? 4 :
 #if UVSIZE == 4
-                8 /* We may allocate a bit more than the minimum here.  */
+                    8 /* We may allocate a bit more than the minimum here.  */
 #else
-                k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
-#endif
-                );
-#ifndef EBCDIC
-	} else if (useqq &&
-	    /* we can't use the short form like '\0' if followed by a digit */
-                   (((k >= 7 && k <= 10) || k == 12 || k == 13 || k == 27)
-                 || (k < 8 && (next < '0' || next > '9')))) {
-	    grow += 2;
-	} else if (useqq && k <= 31 && (next < '0' || next > '9')) {
-	    grow += 3;
-	} else if (useqq && (k <= 31 || k >= 127)) {
-	    grow += 4;
+                    k <= 0xFFFFFFFF ? 8 : UVSIZE * 4
 #endif
-        } else if (k == '\\') {
-            backslashes++;
-        } else if (k == '\'') {
-            single_quotes++;
-        } else if (k == '"' || k == '$' || k == '@') {
-            qq_escapables++;
-        } else {
+                    );
+            }
+            else if (useqq) {   /* Not utf8, must be <= 0xFF, hence 2 hex
+                                 * digits. */
+                grow += 4 + 2;
+            }
+            else {  /* Non-qq generates 3 octal digits plus backslash */
+                grow += 4;
+            }
+	} /* End of high-ordinal non-printable */
+        else if (! useqq) { /* Low ordinal, non-printable, non-qq just
+                             * outputs the raw char */
             normal++;
         }
-    }
+        else {  /* Is qq, low ordinal, non-printable.  Output escape
+                 * sequences */
+            if (   k == '\a' || k == '\b' || k == '\t' || k == '\n' || k == '\r'
+                || k == '\f' || k == ESC_NATIVE)
+            {
+                grow += 2;  /* 1 char plus backslash */
+            }
+            else /* The other low ordinals are output as an octal escape
+                  * sequence */
+                 if (s + 1 >= send || (   *(U8*)(s+1) >= '0'
+                                       && *(U8*)(s+1) <= '9'))
+            {
+                /* When the following character is a digit, use 3 octal digits
+                 * plus backslash, as using fewer digits would concatenate the
+                 * following char into this one */
+                grow += 4;
+            }
+            else if (k <= 7) {
+                grow += 2;  /* 1 octal digit, plus backslash */
+            }
+            else if (k <= 077) {
+                grow += 3;  /* 2 octal digits plus backslash */
+            }
+            else {
+                grow += 4;  /* 3 octal digits plus backslash */
+            }
+        }
+    } /* End of size-calculating loop */
+
     if (grow || useqq) {
         /* We have something needing hex. 3 is ""\0 */
         sv_grow(sv, cur + 3 + grow + 2*backslashes + single_quotes
@@ -291,38 +344,78 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)

         *r++ = '"';

-        for (s = src; s < send; do_utf8 ? s += UTF8SKIP(s) : s++) {
-            const UV k = do_utf8 ? utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL) : *(U8*)s;
+        for (s = src; s < send; s += increment) {
+            UV k;
+
+            if (do_utf8
+                && ! isASCII(*s)
+                    /* Exclude non-ASCII low ordinal controls.  This should be
+                     * optimized out by the compiler on ASCII platforms; if not
+                     * could wrap it in a #ifdef EBCDIC, but better to avoid
+                     * #if's if possible */
+                && *(U8*)s > ' '
+            ) {
+
+                /* When in UTF-8, we output all non-ascii chars as \x{}
+                 * reqardless of useqq, except for the low ordinal controls on
+                 * EBCDIC platforms */
+                k = utf8_to_uvchr_buf((U8*)s, (U8*) send, NULL);
+
+                /* treat invalid utf8 byte by byte.  This loop iteration gets the
+                * first byte */
+                increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);
+
+#if PERL_VERSION < 10
+                sprintf(r, "\\x{%"UVxf"}", k);
+                r += strlen(r);
+                /* my_sprintf is not supported by ppport.h */
+#else
+                r = r + my_sprintf(r, "\\x{%"UVxf"}", k);
+#endif
+                continue;
+            }
+
+            /* Here 1) isn't UTF-8; or
+             *      2) the current character is ASCII; or
+             *      3) it is an EBCDIC platform and is a low ordinal
+             *         non-ASCII control.
+             * In each case the character occupies just one byte */
+            k = *(U8*)s;
+            increment = 1;
+
+            if (isPRINT(k)) {
+                /* These need a backslash escape */
+                if (k == '"' || k == '\\' || k == '$' || k == '@') {
+                    *r++ = '\\';
+                }

-            if (k == '"' || k == '\\' || k == '$' || k == '@') {
-                *r++ = '\\';
                 *r++ = (char)k;
             }
-            else
-#ifdef EBCDIC
-	      if (isprint(k) && k < 256)
-#else
-	      if (useqq && (k <= 31 || k == 127 || (!do_utf8 && k > 127))) {
+            else if (! useqq) { /* non-qq, non-printable, low-ordinal is
+                                 * output raw */
+                *r++ = (char)k;
+            }
+            else {  /* Is qq means use escape sequences */
 	        bool next_is_digit;

 		*r++ = '\\';
 		switch (k) {
-		case 7:  *r++ = 'a'; break;
-		case 8:  *r++ = 'b'; break;
-		case 9:  *r++ = 't'; break;
-		case 10: *r++ = 'n'; break;
-		case 12: *r++ = 'f'; break;
-		case 13: *r++ = 'r'; break;
-		case 27: *r++ = 'e'; break;
+		case '\a':  *r++ = 'a'; break;
+		case '\b':  *r++ = 'b'; break;
+		case '\t':  *r++ = 't'; break;
+		case '\n':  *r++ = 'n'; break;
+		case '\f':  *r++ = 'f'; break;
+		case '\r':  *r++ = 'r'; break;
+		case ESC_NATIVE: *r++ = 'e'; break;
 		default:
-		    increment = (k == 0 && *s != '\0') ? 1 : UTF8SKIP(s);

 		    /* only ASCII digits matter here, which are invariant,
 		     * since we only encode characters \377 and under, or
 		     * \x177 and under for a unicode string
 		     */
-		    next = (s+increment < send) ? *(U8*)(s+increment) : 0;
-		    next_is_digit = next >= '0' && next <= '9';
+                    next_is_digit = (s + 1 >= send )
+                                    ? FALSE
+                                    : (*(U8*)(s+1) >= '0' && *(U8*)(s+1) <= '9');

 		    /* faster than
 		     * r = r + my_sprintf(r, "%o", k);
@@ -339,18 +432,6 @@ esc_q_utf8(pTHX_ SV* sv, const char *src, STRLEN slen, I32 do_utf8, I32 useqq)
 		    }
 		}
 	    }
-	    else if (k < 0x80)
-#endif
-                *r++ = (char)k;
-            else {
-#if PERL_VERSION < 10
-                sprintf(r, "\\x{%"UVxf"}", k);
-                r += strlen(r);
-                /* my_sprintf is not supported by ppport.h */
-#else
-                r = r + my_sprintf(r, "\\x{%"UVxf"}", k);
-#endif
-            }
         }
         *r++ = '"';
     } else {
@@ -440,7 +521,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
     if (!val)
 	return 0;

-    /* If the ouput buffer has less than some arbitrary amount of space
+    /* If the output buffer has less than some arbitrary amount of space
        remaining, then enlarge it. For the test case (25M of output),
        *1.1 was slower, *2.0 was the same, so the first guess of 1.5 is
 	deemed to be good enough.  */
@@ -798,7 +879,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 	else if (realtype == SVt_PVHV) {
 	    SV *totpad, *newapad;
 	    SV *sname;
-	    HE *entry;
+	    HE *entry = NULL;
 	    char *key;
 	    I32 klen;
 	    SV *hval;
@@ -1106,8 +1187,7 @@ DD_dump(pTHX_ SV *val, const char *name, STRLEN namelen, SV *retval, HV *seenhv,
 	      len = my_snprintf(tmpbuf, sizeof(tmpbuf), "%"IVdf, SvIV(val));
             if (SvPOK(val)) {
               /* Need to check to see if this is a string such as " 0".
-                 I'm assuming from sprintf isn't going to clash with utf8.
-                 Is this valid on EBCDIC?  */
+                 I'm assuming from sprintf isn't going to clash with utf8. */
               STRLEN pvlen;
               const char * const pv = SvPV(val, pvlen);
               if (pvlen != len || memNE(pv, tmpbuf, len))
@@ -1270,7 +1350,7 @@ MODULE = Data::Dumper		PACKAGE = Data::Dumper         PREFIX = Data_Dumper_
 #
 # This is the exact equivalent of Dump.  Well, almost. The things that are
 # different as of now (due to Laziness):
-#   * doesn't deparse yet.'
+#   * doesn't do deparse yet.'
 #

 void
@@ -1292,7 +1372,7 @@ Data_Dumper_Dumpxs(href, ...)
 	    I32 purity, deepcopy, quotekeys, maxdepth = 0;
 	    IV maxrecurse = 1000;
 	    char tmpbuf[1024];
-	    I32 gimme = GIMME;
+	    I32 gimme = GIMME_V;
             int use_sparse_seen_hash = 0;

 	    if (!SvROK(href)) {		/* call new to get an object first */
@@ -1504,7 +1584,7 @@ Data_Dumper_Dumpxs(href, ...)
 	    }
 	    else
 		croak("Call to new() method failed to return HASH ref");
-	    if (gimme == G_SCALAR)
+	    if (gimme != G_ARRAY)
 		XPUSHs(sv_2mortal(retval));
 	}

diff --git a/t/dumper.t b/t/dumper.t
index f452ad2..643160a 100644
--- a/t/dumper.t
+++ b/t/dumper.t
@@ -16,7 +16,6 @@ local $Data::Dumper::Sortkeys = 1;

 use Data::Dumper;
 use Config;
-my $Is_ebcdic = defined($Config{'ebcdic'}) && $Config{'ebcdic'} eq 'define';

 $Data::Dumper::Pad = "#";
 my $TMAX;
@@ -24,6 +23,61 @@ my $XS;
 my $TNUM = 0;
 my $WANT = '';

+sub convert_to_native($) {
+    my $input = shift;
+
+    # unicode_to_native() not available before this release; hence won't work
+    # on EBCDIC platforms for earlier.
+    return $input if $] lt 5.007_003;
+
+    my @output;
+
+    # The input should always be one of the following constructs
+    while ($input =~ m/ ( \\ [0-7]+ )
+                      | ( \\ x \{ [[:xdigit:]]+ } )
+                      | ( \\ . )
+                      | ( . ) /gx)
+    {
+        #print STDERR __LINE__, ": ", $&, "\n";
+        my $index;
+        my $replacement;
+        if (defined $4) {       # Literal
+            $index = ord $4;
+            $replacement = $4;
+        }
+        elsif (defined $3) {    # backslash escape
+            $index = ord eval "\"$3\"";
+            $replacement = $3;
+        }
+        elsif (defined $2) {    # Hex
+            $index = utf8::unicode_to_native(ord eval "\"$2\"");
+
+            # But low hex numbers are always in octal.  These are all
+            # controls.
+            my $format = ($index < ord(" "))
+                         ? "\\%o"
+                         : "\\x{%x}";
+            $replacement = sprintf($format, $index);
+        }
+        elsif (defined $1) {    # Octal
+            $index = utf8::unicode_to_native(ord eval "\"$1\"");
+            $replacement = sprintf("\\%o", $index);
+        }
+        else {
+            die "Unexpected match in convert_to_native()";
+        }
+
+        if (defined $output[$index]) {
+            print STDERR "ordinal $index already has '$output[$index]'; skipping '$replacement'\n";
+            next;
+        }
+
+        $output[$index] = $replacement;
+    }
+
+    return join "", grep { defined } @output;
+}
+
 sub TEST {
   my $string = shift;
   my $name = shift;
@@ -31,42 +85,19 @@ sub TEST {
   ++$TNUM;
   $t =~ s/([A-Z]+)\(0x[0-9a-f]+\)/$1(0xdeadbeef)/g
     if ($WANT =~ /deadbeef/);
-  if ($Is_ebcdic) {
-    # these data need massaging with non ascii character sets
-    # because of hashing order differences
-    $WANT = join("\n",sort(split(/\n/,$WANT)));
-    $WANT =~ s/\,$//mg;
-    $t    = join("\n",sort(split(/\n/,$t)));
-    $t    =~ s/\,$//mg;
-  }
   $name = $name ? " - $name" : '';
   print( ($t eq $WANT and not $@) ? "ok $TNUM$name\n"
     : "not ok $TNUM$name\n--Expected--\n$WANT\n--Got--\n$@$t\n");

   ++$TNUM;
-  if ($Is_ebcdic) { # EBCDIC.
-    if ($TNUM == 311 || $TNUM == 314) {
-      eval $string;
-    } else {
-      eval $t;
-    }
-  } else {
-    eval "$t";
-  }
-  print $@ ? "not ok $TNUM\n# \$@ says: $@\n" : "ok $TNUM\n";
+  eval "$t";
+  print $@ ? "not ok $TNUM\n# \$@ says: $@\n" : "ok $TNUM -   no eval error\n";

   $t = eval $string;
   ++$TNUM;
   $t =~ s/([A-Z]+)\(0x[0-9a-f]+\)/$1(0xdeadbeef)/g
     if ($WANT =~ /deadbeef/);
-  if ($Is_ebcdic) {
-    # here too there are hashing order differences
-    $WANT = join("\n",sort(split(/\n/,$WANT)));
-    $WANT =~ s/\,$//mg;
-    $t    = join("\n",sort(split(/\n/,$t)));
-    $t    =~ s/\,$//mg;
-  }
-  print( ($t eq $WANT and not $@) ? "ok $TNUM\n"
+  print( ($t eq $WANT and not $@) ? "ok $TNUM -   works a 2nd time after intervening eval\n"
     : "not ok $TNUM\n--Expected--\n$WANT\n--Got--\n$@$t\n");
 }

@@ -77,17 +108,20 @@ sub SKIP_TEST {
   ++$TNUM; print "ok $TNUM # skip $reason\n";
 }

+$TMAX = 450;
+
 # Force Data::Dumper::Dump to use perl. We test Dumpxs explicitly by calling
 # it direct. Out here it lets us knobble the next if to test that the perl
 # only tests do work (and count correctly)
 $Data::Dumper::Useperl = 1;
 if (defined &Data::Dumper::Dumpxs) {
   print "### XS extension loaded, will run XS tests\n";
-  $TMAX = 438; $XS = 1;
+  $XS = 1;
 }
 else {
   print "### XS extensions not loaded, will NOT run XS tests\n";
-  $TMAX = 219; $XS = 0;
+  $TMAX /= 2;
+  $XS = 0;
 }

 print "1..$TMAX\n";
@@ -104,7 +138,7 @@ $b->{a} = $a;
 $b->{b} = $a->[1];
 $b->{c} = $a->[2];

-############# 1
+#############
 ##
 $WANT = <<'EOT';
 #$a = [
@@ -138,7 +172,7 @@ SCOPE: {
 }


-############# 7
+#############
 ##
 $WANT = <<'EOT';
 #@a = (
@@ -174,7 +208,7 @@ SCOPE: {
     if $XS;
 }

-############# 13
+#############
 ##
 $WANT = <<'EOT';
 #%b = (
@@ -200,7 +234,7 @@ TEST (q(Data::Dumper->Dumpxs([$b, $a], [qw(*b a)])),
     'basic test with dereferenced hash: Dumpxs()')
     if $XS;

-############# 19
+#############
 ##
 $WANT = <<'EOT';
 #$a = [
@@ -236,7 +270,7 @@ if ($XS) {
 }


-############# 25
+#############
 ##
 $WANT = <<'EOT';
 #$a = [
@@ -266,7 +300,7 @@ TEST (q( $d->Reset; $d->Dumpxs ),
     'Indent(3): Purity(0)->Quotekeys(0): Dumpxs()')
     if $XS;

-############# 31
+#############
 ##
 $WANT = <<'EOT';
 #$VAR1 = [
@@ -288,7 +322,7 @@ EOT
 TEST (q(Dumper($a)), 'Dumper');
 TEST (q(Data::Dumper::DumperX($a)), 'DumperX') if $XS;

-############# 37
+#############
 ##
 $WANT = <<'EOT';
 #[
@@ -316,7 +350,7 @@ EOT
 }


-############# 43
+#############
 ##
 $WANT = <<'EOT';
 #$VAR1 = {
@@ -348,7 +382,7 @@ $foo = { "abc\000\'\efg" => "mno\000",
   $foo{d} = \%foo;
   $foo[2] = \%foo;

-############# 49
+#############
 ##
   $WANT = <<'EOT';
 #$foo = \*::foo;
@@ -383,7 +417,7 @@ EOT
     'Purity 1: Indent 3: Dumpxs()')
     if $XS;

-############# 55
+#############
 ##
   $WANT = <<'EOT';
 #$foo = \*::foo;
@@ -414,7 +448,7 @@ EOT
     'Purity 1: Indent 1: Dumpxs()')
     if $XS;

-############# 61
+#############
 ##
   $WANT = <<'EOT';
 #@bar = (
@@ -444,7 +478,7 @@ EOT
     'array|hash|glob dereferenced: Dumpxs()')
     if $XS;

-############# 67
+#############
 ##
   $WANT = <<'EOT';
 #$bar = [
@@ -474,7 +508,7 @@ EOT
     'array|hash|glob: not dereferenced: Dumpxs()')
     if $XS;

-############# 73
+#############
 ##
   $WANT = <<'EOT';
 #$foo = \*::foo;
@@ -499,7 +533,7 @@ EOT
     'Purity 0: Quotekeys 0: dereferenced: Dumpxs')
     if $XS;

-############# 79
+#############
 ##
   $WANT = <<'EOT';
 #$foo = \*::foo;
@@ -537,7 +571,7 @@ EOT
   $mutts = \%kennel;
   $mutts = $mutts;         # avoid warning

-############# 85
+#############
 ##
   $WANT = <<'EOT';
 #%kennels = (
@@ -567,7 +601,7 @@ EOT
       'constructor: hash|array|scalar: Dumpxs()');
   }

-############# 91
+#############
 ##
   $WANT = <<'EOT';
 #%kennels = %kennels;
@@ -578,7 +612,7 @@ EOT
   TEST q($d->Dump), 'object call: Dump';
   TEST q($d->Dumpxs), 'object call: Dumpxs' if $XS;

-############# 97
+#############
 ##
   $WANT = <<'EOT';
 #%kennels = (
@@ -598,7 +632,7 @@ EOT
     TEST (q($d->Reset; $d->Dumpxs), 'Reset and Dumpxs separate calls');
   }

-############# 103
+#############
 ##
   $WANT = <<'EOT';
 #@dogs = (
@@ -628,14 +662,14 @@ EOT
 	'constructor: array|hash|scalar: Dumpxs()');
   }

-############# 109
+#############
 ##
   TEST q($d->Reset->Dump), 'Reset Dump chained';
   if ($XS) {
     TEST q($d->Reset->Dumpxs), 'Reset Dumpxs chained';
   }

-############# 115
+#############
 ##
   $WANT = <<'EOT';
 #@dogs = (
@@ -673,7 +707,7 @@ EOT
 sub z { print "foo\n" }
 $c = [ \&z ];

-############# 121
+#############
 ##
   $WANT = <<'EOT';
 #$a = $b;
@@ -688,7 +722,7 @@ TEST (q(Data::Dumper->new([\&z,$c],['a','c'])->Seen({'b' => \&z})->Dumpxs;),
     'Seen: scalar: Dumpxs')
 	if $XS;

-############# 127
+#############
 ##
   $WANT = <<'EOT';
 #$a = \&b;
@@ -703,7 +737,7 @@ TEST (q(Data::Dumper->new([\&z,$c],['a','c'])->Seen({'*b' => \&z})->Dumpxs;),
     'Seen: glob: Dumpxs')
 	if $XS;

-############# 133
+#############
 ##
   $WANT = <<'EOT';
 #*a = \&b;
@@ -725,7 +759,7 @@ TEST (q(Data::Dumper->new([\&z,$c],['*a','*c'])->Seen({'*b' =>
   $a = [];
   $a->[1] = \$a->[0];

-############# 139
+#############
 ##
   $WANT = <<'EOT';
 #@a = (
@@ -746,7 +780,7 @@ TEST (q(Data::Dumper->new([$a],['*a'])->Purity(1)->Dumpxs;),
   $a = \\\\\'foo';
   $b = $$$a;

-############# 145
+#############
 ##
   $WANT = <<'EOT';
 #$a = \\\\\'foo';
@@ -764,7 +798,7 @@ TEST (q(Data::Dumper->new([$a,$b],['a','b'])->Purity(1)->Dumpxs;),
   $a = [{ a => \$b }, { b => undef }];
   $b = [{ c => \$b }, { d => \$a }];

-############# 151
+#############
 ##
   $WANT = <<'EOT';
 #$a = [
@@ -799,7 +833,7 @@ TEST (q(Data::Dumper->new([$a,$b],['a','b'])->Purity(1)->Dumpxs;),
   $b = $a->[0][0];
   $c = $${$b->[0][0]};

-############# 157
+#############
 ##
   $WANT = <<'EOT';
 #$a = [
@@ -830,7 +864,7 @@ TEST (q(Data::Dumper->new([$a,$b,$c],['a','b','c'])->Purity(1)->Dumpxs;),
     $b = { 'c' => $c };
     $a = { 'b' => $b };

-############# 163
+#############
 ##
   $WANT = <<'EOT';
 #$a = {
@@ -852,7 +886,7 @@ TEST (q(Data::Dumper->new([$a,$b,$c],['a','b','c'])->Maxdepth(4)->Dumpxs;),
     'Maxdepth(4): Dumpxs()')
 	if $XS;

-############# 169
+#############
 ##
   $WANT = <<'EOT';
 #$a = {
@@ -875,7 +909,7 @@ TEST (q(Data::Dumper->new([$a,$b,$c],['a','b','c'])->Maxdepth(1)->Dumpxs;),
     $a = \$a;
     $b = [$a];

-############# 175
+#############
 ##
   $WANT = <<'EOT';
 #$b = [
@@ -889,7 +923,7 @@ TEST (q(Data::Dumper->new([$b],['b'])->Purity(0)->Dumpxs;),
     'Purity(0): Dumpxs()')
 	if $XS;

-############# 181
+#############
 ##
   $WANT = <<'EOT';
 #$b = [
@@ -908,7 +942,7 @@ TEST (q(Data::Dumper->new([$b],['b'])->Purity(1)->Dumpxs;),

 {
   $a = "\x{09c10}";
-############# 187
+#############
 ## XS code was adding an extra \0
   $WANT = <<'EOT';
 #$a = "\x{9c10}";
@@ -927,7 +961,7 @@ EOT
   $i = 0;
   $a = { map { ("$_$_$_", ++$i) } 'I'..'Q' };

-############# 193
+#############
 ##
   $WANT = <<'EOT';
 #$VAR1 = {
@@ -959,7 +993,7 @@ TEST (q(Data::Dumper->new([$a])->Dumpxs;),
     return [ sort { $b <=> $a } keys %$hash ];
   }

-############# 199
+#############
 ##
   $WANT = <<'EOT';
 #$VAR1 = {
@@ -993,7 +1027,7 @@ TEST q(Data::Dumper->new([$c])->Dumpxs;), "sortkeys sub (XS)"
     ];
   }

-############# 205
+#############
 ##
   $WANT = <<'EOT';
 #$VAR1 = [
@@ -1033,7 +1067,7 @@ TEST q(Data::Dumper->new([[$c, $d]])->Dumpxs;), "more sortkeys sub (XS)"
   local $Data::Dumper::Deparse = 1;
   local $Data::Dumper::Indent = 2;

-############# 211
+#############
 ##
   $WANT = <<'EOT';
 #$VAR1 = {
@@ -1051,7 +1085,7 @@ EOT
   }
 }

-############# 214
+#############
 ##

 # This is messy.
@@ -1293,7 +1327,7 @@ if ($XS) {

 {
   $a = "1\n";
-############# 310
+#############
 ## Perl code was using /...$/ and hence missing the \n.
   $WANT = <<'EOT';
 my $VAR1 = '42
@@ -1322,7 +1356,7 @@ EOT
         -2147483648,
         -2147483649,
         );
-############# 316
+#############
 ## Perl code flips over at 10 digits.
   $WANT = <<'EOT';
 #$VAR1 = 999999999;
@@ -1379,42 +1413,27 @@ EOT
   }
 }

-#XXX}
 {
-    if ($Is_ebcdic) {
 	$b = "Bad. XS didn't escape dollar sign";
-############# 322
-	$WANT = <<"EOT"; # Careful. This is '' string written inside '' here doc
-#\$VAR1 = '\$b\"\@\\\\\xB1';
-EOT
-        $a = "\$b\"\@\\\xB1\x{100}";
-	chop $a;
-	TEST q(Data::Dumper->Dump([$a])), "utf8 flag with \" and \$";
-	if ($XS) {
-	    $WANT = <<'EOT'; # While this is "" string written inside "" here doc
-#$VAR1 = "\$b\"\@\\\x{b1}";
-EOT
-            TEST q(Data::Dumper->Dumpxs([$a])), "XS utf8 flag with \" and \$";
-	}
-    } else {
-	$b = "Bad. XS didn't escape dollar sign";
-############# 322
-	$WANT = <<"EOT"; # Careful. This is '' string written inside '' here doc
-#\$VAR1 = '\$b\"\@\\\\\xA3';
+#############
+    # B6 is chosen because it is UTF-8 variant on ASCII and all 3 EBCDIC
+    # platforms that Perl currently purports to work on.  It also is the only
+    # such code point that has the same meaning on all 4, the paragraph sign.
+    $WANT = <<"EOT"; # Careful. This is '' string written inside "" here doc
+#\$VAR1 = '\$b\"\@\\\\\xB6';
 EOT

-        $a = "\$b\"\@\\\xA3\x{100}";
-	chop $a;
-	TEST q(Data::Dumper->Dump([$a])), "utf8 flag with \" and \$";
-	if ($XS) {
-	    $WANT = <<'EOT'; # While this is "" string written inside "" here doc
-#$VAR1 = "\$b\"\@\\\x{a3}";
+    $a = "\$b\"\@\\\xB6\x{100}";
+    chop $a;
+    TEST q(Data::Dumper->Dump([$a])), "utf8 flag with \" and \$";
+    if ($XS) {
+        $WANT = <<'EOT'; # While this is "" string written inside "" here doc
+#$VAR1 = "\$b\"\@\\\x{b6}";
 EOT
-            TEST q(Data::Dumper->Dumpxs([$a])), "XS utf8 flag with \" and \$";
-	}
-  }
+        TEST q(Data::Dumper->Dumpxs([$a])), "XS utf8 flag with \" and \$";
+    }
   # XS used to produce "$b\"' which is 4 chars, not 3. [ie wrongly qq(\$b\\\")]
-############# 328
+#############
   $WANT = <<'EOT';
 #$VAR1 = '$b"';
 EOT
@@ -1429,7 +1448,7 @@ EOT

   # XS used to produce 'D'oh!' which is well, D'oh!
   # Andreas found this one, which in turn discovered the previous two.
-############# 334
+#############
   $WANT = <<'EOT';
 #$VAR1 = 'D\'oh!';
 EOT
@@ -1492,7 +1511,7 @@ EOT
   TEST q(Data::Dumper->Dumpxs([\\%foo])),
     "XS quotekeys == 0 for utf8 flagged ASCII" if $XS;
 }
-############# 358
+#############
 {
   $WANT = <<'EOT';
 #$VAR1 = [
@@ -1507,7 +1526,7 @@ EOT
     TEST q(Data::Dumper->Dumpxs([\@foo])), 'Richard Clamp, Message-Id: <20030104005247.GA27685@mirth.demon.co.uk>: Dumpxs()'if $XS;
 }

-############# 364
+#############
 # Make sure $obj->Dumpxs returns the right thing in list context. This was
 # broken by the initial attempt to fix [perl #74170].
 $WANT = <<'EOT';
@@ -1517,11 +1536,13 @@ TEST q(join " ", new Data::Dumper [[]],[] =>->Dumpxs),
     '$obj->Dumpxs in list context'
  if $XS;

-############# 366
+#############
 {
-  $WANT = <<'EOT';
-#$VAR1 = [
-#  "\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37 !\"#\$%&'()*+,-./0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377"
+  $WANT = '\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37 !\"#\$%&\'()*+,-./0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377';
+  $WANT = convert_to_native($WANT);
+  $WANT = <<EOT;
+#\$VAR1 = [
+#  "$WANT"
 #];
 EOT

@@ -1531,11 +1552,13 @@ EOT
   TEST (q(Data::Dumper::DumperX($foo)), 'All latin1 characters: DumperX') if $XS;
 }

-############# 372
+#############
 {
-  $WANT = <<'EOT';
-#$VAR1 = [
-#  "\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37 !\"#\$%&'()*+,-./0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\x{80}\x{81}\x{82}\x{83}\x{84}\x{85}\x{86}\x{87}\x{88}\x{89}\x{8a}\x{8b}\x{8c}\x{8d}\x{8e}\x{8f}\x{90}\x{91}\x{92}\x{93}\x{94}\x{95}\x{96}\x{97}\x{98}\x{99}\x{9a}\x{9b}\x{9c}\x{9d}\x{9e}\x{9f}\x{a0}\x{a1}\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}\x{a7}\x{a8}\x{a9}\x{aa}\x{ab}\x{ac}\x{ad}\x{ae}\x{af}\x{b0}\x{b1}\x{b2}\x{b3}\x{b4}\x{b5}\x{b6}\x{b7}\x{b8}\x{b9}\x{ba}\x{bb}\x{bc}\x{bd}\x{be}\x{bf}\x{c0}\x{c1}\x{c2}\x{c3}\x{c4}\x{c5}\x{c6}\x{c7}\x{c8}\x{c9}\x{ca}\x{cb}\x{cc}\x{cd}\x{ce}\x{cf}\x{d0}\x{d1}\x{d2}\x{d3}\x{d4}\x{d5}\x{d6}\x{d7}\x{d8}\x{d9}\x{da}\x{db}\x{dc}\x{dd}\x{de}\x{df}\x{e0}\x{e1}\x{e2}\x{e3}\x{e4}\x{e5}\x{e6}\x{e7}\x{e8}\x{e9}\x{ea}\x{eb}\x{ec}\x{ed}\x{ee}\x{ef}\x{f0}\x{f1}\x{f2}\x{f3}\x{f4}\x{f5}\x{f6}\x{f7}\x{f8}\x{f9}\x{fa}\x{fb}\x{fc}\x{fd}\x{fe}\x{ff}\x{20ac}"
+  $WANT = '\0\1\2\3\4\5\6\a\b\t\n\13\f\r\16\17\20\21\22\23\24\25\26\27\30\31\32\e\34\35\36\37 !\"#\$%&\'()*+,-./0123456789:;<=>?\@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\177\x{80}\x{81}\x{82}\x{83}\x{84}\x{85}\x{86}\x{87}\x{88}\x{89}\x{8a}\x{8b}\x{8c}\x{8d}\x{8e}\x{8f}\x{90}\x{91}\x{92}\x{93}\x{94}\x{95}\x{96}\x{97}\x{98}\x{99}\x{9a}\x{9b}\x{9c}\x{9d}\x{9e}\x{9f}\x{a0}\x{a1}\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}\x{a7}\x{a8}\x{a9}\x{aa}\x{ab}\x{ac}\x{ad}\x{ae}\x{af}\x{b0}\x{b1}\x{b2}\x{b3}\x{b4}\x{b5}\x{b6}\x{b7}\x{b8}\x{b9}\x{ba}\x{bb}\x{bc}\x{bd}\x{be}\x{bf}\x{c0}\x{c1}\x{c2}\x{c3}\x{c4}\x{c5}\x{c6}\x{c7}\x{c8}\x{c9}\x{ca}\x{cb}\x{cc}\x{cd}\x{ce}\x{cf}\x{d0}\x{d1}\x{d2}\x{d3}\x{d4}\x{d5}\x{d6}\x{d7}\x{d8}\x{d9}\x{da}\x{db}\x{dc}\x{dd}\x{de}\x{df}\x{e0}\x{e1}\x{e2}\x{e3}\x{e4}\x{e5}\x{e6}\x{e7}\x{e8}\x{e9}\x{ea}\x{eb}\x{ec}\x{ed}\x{ee}\x{ef}\x{f0}\x{f1}\x{f2}\x{f3}\x{f4}\x{f5}\x{f6}\x{f7}\x{f8}\x{f9}\x{fa}\x{fb}\x{fc}\x{fd}\x{fe}\x{ff}\x{20ac}';
+  $WANT = convert_to_native($WANT);
+  $WANT = <<EOT;
+#\$VAR1 = [
+#  "$WANT"
 #];
 EOT

@@ -1553,7 +1576,7 @@ EOT
     if $XS;
 }

-############# 378
+#############
 {
   # If XS cannot load, the pure-Perl version cannot deparse vstrings with
   # underscores properly.  In 5.8.0, vstrings are just strings.
@@ -1563,11 +1586,12 @@ EOT
 #$c = \'ABC';
 #$d = \'ABC';
 NOVSTRINGS
-  my $vstrings_corr = <<'VSTRINGS_CORRECT';
-#$a = \v65.66.67;
-#$b = \v65.66.067;
-#$c = \v65.66.6_7;
-#$d = \'ABC';
+my $ABC_native = chr(65) . chr(66) . chr(67);
+  my $vstrings_corr = <<VSTRINGS_CORRECT;
+#\$a = \\v65.66.67;
+#\$b = \\v65.66.067;
+#\$c = \\v65.66.6_7;
+#\$d = \\'$ABC_native';
 VSTRINGS_CORRECT
   $WANT = $] <= 5.0080001
           ? $no_vstrings
@@ -1591,7 +1615,7 @@ VSTRINGS_CORRECT
   }
 }

-############# 384
+#############
 {
   # [perl #107372] blessed overloaded globs
   $WANT = <<'EOW';
@@ -1606,7 +1630,7 @@ EOW
   TEST q(Data::Dumper->Dumpxs([\*finkle])), 'blessed overloaded globs (xs)'
     if $XS;
 }
-############# 390
+#############
 {
   # [perl #74798] uncovered behaviour
   $WANT = <<'EOW';
@@ -1653,7 +1677,7 @@ EOW
     "numbers and number-like scalars"
     if $XS;
 }
-############# 426
+#############
 {
   # [perl #82948]
   # re::regexp_pattern was moved to universal.c in v5.10.0-252-g192c1e2
@@ -1669,7 +1693,7 @@ OLD
   TEST q(Data::Dumper->Dumpxs([ qr/abc/, qr/abc/i ])), "qr// xs"
     if $XS;
 }
-############# 432
+#############

 {
   sub foo {}
@@ -1682,4 +1706,37 @@ EOW
   TEST q(Data::Dumper->new([ \&foo, \\&foo ], [ "*a", "b" ])->Dumpxs), "name of code in *foo xs"
     if $XS;
 }
-############# 436
+#############
+
+{
+    if($] lt 5.007_003) {
+        SKIP_TEST "Test is only problematic for EBCDIC, which only works for >= 5.8";
+        SKIP_TEST "Test is only problematic for EBCDIC, which only works for >= 5.8";
+    }
+    else {
+        # There is special code to handle the single control that in EBCDIC is
+        # not in the block with all the other controls, when it is UTF-8 and
+        # there are no variants in it (All controls in EBCDIC are invariant.)
+        # This tests that.  There is no harm in testing this works on ASCII,
+        # and is better to not have split code paths.
+        my $outlier = chr utf8::unicode_to_native(0x9F);
+        my $outlier_hex = sprintf "%x", ord $outlier;
+        $WANT = <<EOT;
+#\$VAR1 = \"\\x{$outlier_hex}\";
+EOT
+        $foo = "$outlier\x{100}";
+        chop $foo;
+        local $Data::Dumper::Useqq = 1;
+        TEST (q(Dumper($foo)), 'EBCDIC outlier control');
+        TEST (q(Data::Dumper::DumperX($foo)), 'EBCDIC outlier control: DumperX') if $XS;
+    }
+}
+############# [perl #124091]
+{
+        $WANT = <<'EOT';
+#$VAR1 = "\n";
+EOT
+        local $Data::Dumper::Useqq = 1;
+        TEST (qq(Dumper("\n")), '\n alone');
+        TEST (qq(Data::Dumper::DumperX("\n")), '\n alone') if $XS;
+}
diff --git a/t/quotekeys.t b/t/quotekeys.t
index a858828..0f6313a 100644
--- a/t/quotekeys.t
+++ b/t/quotekeys.t
@@ -26,6 +26,8 @@ my %d = (
     alpha   => 'a',
 );

+my $is_ascii = ord("A") == 65;
+
 run_tests_for_quotekeys();
 SKIP: {
     skip "XS version was unavailable, so we already ran with pure Perl", 5
@@ -110,25 +112,33 @@ sub run_tests_for_quotekeys {
       );

     is(Dumper(\%qkdata),
-       q($VAR1 = {'0' => 1,'012345' => 1,'12' => 1,'123456789' => 1,'1234567890' => 1,"1\x{660}" => 1,'::de::fg' => 1,'ab' => 1,'hi::12' => 1};),
+       (($is_ascii) # Sort order is different on EBCDIC platforms
+        ? q($VAR1 = {'0' => 1,'012345' => 1,'12' => 1,'123456789' => 1,'1234567890' => 1,"1\x{660}" => 1,'::de::fg' => 1,'ab' => 1,'hi::12' => 1};)
+        : q($VAR1 = {'::de::fg' => 1,'ab' => 1,'hi::12' => 1,'0' => 1,'012345' => 1,'12' => 1,'123456789' => 1,'1234567890' => 1,"1\x{660}" => 1};)),
        "always quote when quotekeys true");

     {
         local $Data::Dumper::Useqq = 1;
         is(Dumper(\%qkdata),
-	   q($VAR1 = {"0" => 1,"012345" => 1,"12" => 1,"123456789" => 1,"1234567890" => 1,"1\x{660}" => 1,"::de::fg" => 1,"ab" => 1,"hi::12" => 1};),
+           (($is_ascii)
+	    ? q($VAR1 = {"0" => 1,"012345" => 1,"12" => 1,"123456789" => 1,"1234567890" => 1,"1\x{660}" => 1,"::de::fg" => 1,"ab" => 1,"hi::12" => 1};)
+            : q($VAR1 = {"::de::fg" => 1,"ab" => 1,"hi::12" => 1,"0" => 1,"012345" => 1,"12" => 1,"123456789" => 1,"1234567890" => 1,"1\x{660}" => 1};)),
 	   "always quote when quotekeys true (useqq)");
     }

     local $Data::Dumper::Quotekeys = 0;

     is(Dumper(\%qkdata),
-       q($VAR1 = {0 => 1,'012345' => 1,12 => 1,123456789 => 1,'1234567890' => 1,"1\x{660}" => 1,'::de::fg' => 1,ab => 1,'hi::12' => 1};),
+        (($is_ascii)
+         ? q($VAR1 = {0 => 1,'012345' => 1,12 => 1,123456789 => 1,'1234567890' => 1,"1\x{660}" => 1,'::de::fg' => 1,ab => 1,'hi::12' => 1};)
+         : q($VAR1 = {'::de::fg' => 1,ab => 1,'hi::12' => 1,0 => 1,'012345' => 1,12 => 1,123456789 => 1,'1234567890' => 1,"1\x{660}" => 1};)),
 	      "avoid quotes when quotekeys false");
     {
         local $Data::Dumper::Useqq = 1;
 	is(Dumper(\%qkdata),
-	   q($VAR1 = {0 => 1,"012345" => 1,12 => 1,123456789 => 1,"1234567890" => 1,"1\x{660}" => 1,"::de::fg" => 1,ab => 1,"hi::12" => 1};),
+            (($is_ascii)
+	     ? q($VAR1 = {0 => 1,"012345" => 1,12 => 1,123456789 => 1,"1234567890" => 1,"1\x{660}" => 1,"::de::fg" => 1,ab => 1,"hi::12" => 1};)
+             : q($VAR1 = {"::de::fg" => 1,ab => 1,"hi::12" => 1,0 => 1,"012345" => 1,12 => 1,123456789 => 1,"1234567890" => 1,"1\x{660}" => 1};)),
 	      "avoid quotes when quotekeys false (useqq)");
     }
 }
--
2.1.0