From 583621ac98fd8e435a33f4e6e42520ba0c3bdd01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=A0t=C4=9Bp=C3=A1n=20Kasal?= Date: Tue, 7 Apr 2009 09:57:20 +0000 Subject: [PATCH] - fix CGI::escape for all strings (#472571) - perl-CGI-t-util-58.patch: Do not distort lib/CGI/t/util-58.t http://rt.perl.org/rt3/Ticket/Display.html?id=64502 --- perl-CGI-escape.patch | 94 ++++++++++++++++++++++++++++++++++++++++ perl-CGI-t-util-58.patch | 22 ++++++++++ perl.spec | 23 +++++++++- 3 files changed, 137 insertions(+), 2 deletions(-) create mode 100644 perl-CGI-escape.patch create mode 100644 perl-CGI-t-util-58.patch diff --git a/perl-CGI-escape.patch b/perl-CGI-escape.patch new file mode 100644 index 0000000..413014a --- /dev/null +++ b/perl-CGI-escape.patch @@ -0,0 +1,94 @@ +2009-04-06 Stepan Kasal + + * t/util-58.t: Add tests reflecting common usage. + * CGI/Util.pm (encode): State what conversions are needed, in + accordance to the common usage mentioned above; and code it. + +diff -ur perl-5.10.0/lib/CGI/Util.pm perl-5.10.0/lib/CGI/Util.pm +--- perl-5.10.0/lib/CGI/Util.pm 2008-09-08 15:58:52.000000000 +0200 ++++ perl-5.10.0/lib/CGI/Util.pm 2009-04-04 16:30:29.000000000 +0200 +@@ -210,7 +210,6 @@ + my $todecode = shift; + return undef unless defined($todecode); + $todecode =~ tr/+/ /; # pluses become spaces +- $EBCDIC = "\t" ne "\011"; + if ($EBCDIC) { + $todecode =~ s/%([0-9a-fA-F]{2})/chr $A2E[hex($1)]/ge; + } else { +@@ -232,16 +231,24 @@ + } + + # URL-encode data ++# ++# We cannot use the %u escapes, they were rejected by W3C, so the official ++# way is %XX-escaped utf-8 encoding. ++# Naturally, Unicode strings have to be converted to their utf-8 byte ++# representation. (No action is required on 5.6.) ++# Byte strings were traditionally used directly as a sequence of octets. ++# This worked if they actually represented binary data (i.e. in CGI::Compress). ++# This also worked if these byte strings were actually utf-8 encoded; e.g., ++# when the source file used utf-8 without the apropriate "use utf8;". ++# This fails if the byte string is actually a Latin 1 encoded string, but it ++# was always so and cannot be fixed without breaking the binary data case. ++# -- Stepan Kasal ++# + sub escape { + shift() if @_ > 1 and ( ref($_[0]) || (defined $_[1] && $_[0] eq $CGI::DefaultClass)); + my $toencode = shift; + return undef unless defined($toencode); +- $toencode = eval { pack("C*", unpack("U0C*", $toencode))} || pack("C*", unpack("C*", $toencode)); +- +- # force bytes while preserving backward compatibility -- dankogai +- # but commented out because it was breaking CGI::Compress -- lstein +- # $toencode = eval { pack("U*", unpack("U0C*", $toencode))} || pack("C*", unpack("C*", $toencode)); +- ++ utf8::encode($toencode) if ($] > 5.007 && utf8::is_utf8($toencode)); + if ($EBCDIC) { + $toencode=~s/([^a-zA-Z0-9_.~-])/uc sprintf("%%%02x",$E2A[ord($1)])/eg; + } else { +diff -ur perl-5.10.0/lib/CGI/t/util-58.t perl-5.10.0/lib/CGI/t/util-58.t +--- perl-5.10.0/lib/CGI/t/util-58.t 2003-04-14 20:32:22.000000000 +0200 ++++ perl-5.10.0/lib/CGI/t/util-58.t 2009-04-06 16:49:42.000000000 +0200 +@@ -1,16 +1,29 @@ ++# test CGI::Util::escape ++use Test::More tests => 4; ++use_ok("CGI::Util"); ++ ++# Byte strings should be escaped byte by byte: ++# 1) not a valid utf-8 sequence: ++my $uri = "pe\x{f8}\x{ed}\x{e8}ko.ogg"; ++is(CGI::Util::escape($uri), "pe%F8%ED%E8ko.ogg", "Escape a Latin-2 string"); ++ ++# 2) is a valid utf-8 sequence, but not an UTF-8-flagged string ++# This happens often: people write utf-8 strings to source, but forget ++# to tell perl about it by "use utf8;"--this is obviously wrong, but we ++# have to handle it gracefully, for compatibility with GCI.pm under ++# perl-5.8.x + # +-# This tests CGI::Util::escape() when fed with UTF-8-flagged string +-# -- dankogai +-BEGIN { +- if ($] < 5.008) { +- print "1..0 # \$] == $] < 5.008\n"; +- exit(0); +- } +-} ++$uri = "pe\x{c5}\x{99}\x{c3}\x{ad}\x{c4}\x{8d}ko.ogg"; ++is(CGI::Util::escape($uri), "pe%C5%99%C3%AD%C4%8Dko.ogg", ++ "Escape an utf-8 byte string"); + +-use Test::More tests => 2; +-use_ok("CGI::Util"); +-my $uri = "\x{5c0f}\x{98fc} \x{5f3e}.txt"; # KOGAI, Dan, in Kanji +-is(CGI::Util::escape($uri), "%E5%B0%8F%E9%A3%BC%20%E5%BC%BE.txt", +- "# Escape string with UTF-8 flag"); ++SKIP: ++{ ++ # This tests CGI::Util::escape() when fed with UTF-8-flagged string ++ # -- dankogai ++ skip("Unicode strings not available in $]", 1) if ($] < 5.008); ++ $uri = "\x{5c0f}\x{98fc} \x{5f3e}.txt"; # KOGAI, Dan, in Kanji ++ is(CGI::Util::escape($uri), "%E5%B0%8F%E9%A3%BC%20%E5%BC%BE.txt", ++ "Escape string with UTF-8 flag"); ++} + __END__ diff --git a/perl-CGI-t-util-58.patch b/perl-CGI-t-util-58.patch new file mode 100644 index 0000000..22ca6e0 --- /dev/null +++ b/perl-CGI-t-util-58.patch @@ -0,0 +1,22 @@ +2009-04-06 Stepan Kasal + + * lib/CGI/t/util-58.t: return to the upstream version, do not + hide bugs. + +diff -ur perl-5.10.0.orig/lib/CGI/t/util-58.t perl-5.10.0/lib/CGI/t/util-58.t +--- perl-5.10.0.orig/lib/CGI/t/util-58.t 2007-12-18 11:47:07.000000000 +0100 ++++ perl-5.10.0/lib/CGI/t/util-58.t 2009-04-06 18:28:07.000000000 +0200 +@@ -11,11 +11,6 @@ + use Test::More tests => 2; + use_ok("CGI::Util"); + my $uri = "\x{5c0f}\x{98fc} \x{5f3e}.txt"; # KOGAI, Dan, in Kanji +-if (ord('A') == 193) { # EBCDIC. +- is(CGI::Util::escape($uri), "%FC%C3%A0%EE%F9%E5%E7%F8%20%FC%C3%C7%CA.txt", +- "# Escape string with UTF-8 (UTF-EBCDIC) flag"); +-} else { +- is(CGI::Util::escape($uri), "%E5%B0%8F%E9%A3%BC%20%E5%BC%BE.txt", +- "# Escape string with UTF-8 flag"); +-} ++is(CGI::Util::escape($uri), "%E5%B0%8F%E9%A3%BC%20%E5%BC%BE.txt", ++ "# Escape string with UTF-8 flag"); + __END__ diff --git a/perl.spec b/perl.spec index d306fd8..3fbfc03 100644 --- a/perl.spec +++ b/perl.spec @@ -7,7 +7,7 @@ Name: perl Version: %{perl_version} -Release: 65%{?dist} +Release: 66%{?dist} Epoch: %{perl_epoch} Summary: Practical Extraction and Report Language Group: Development/Languages @@ -97,8 +97,13 @@ Patch35: perl-5.10.0-reorderINC.patch # Fix from Archive::Extract maintainer to only look at stdout # We need this because we're using tar >= 1.21 +# included upstream in 0.31_03 Patch36: perl-5.10.0-Archive-Extract-onlystdout.patch +# Do not distort lib/CGI/t/util-58.t +# http://rt.perl.org/rt3/Ticket/Display.html?id=64502 +Patch37: perl-CGI-t-util-58.patch + ### Debian Patches ### # Fix issue with (nested) definition lists in lib/Pod/Html.pm @@ -160,7 +165,7 @@ Patch52: 31_fix_attributes_unknown_error Patch53: 32_fix_fork_rand # Fix memory leak with qr//. -# Adapted from upstream changhe 34506. +# Adapted from upstream change 34506. Patch54: 34_fix_qr-memory-leak-2 # CVE-2005-0448 revisited: File::Path::rmtree no longer allows creating of setuid files. @@ -224,6 +229,11 @@ Patch118: perl-update-autodie.patch # patches File-Fetch and CPAN Patch201: perl-5.10.0-links.patch +# Fix CGI::escape to work with all strings, started as #472571, +# brought upstream as http://rt.cpan.org/Public/Bug/Display.html?id=34528, +# accepted there for CGI.pm-3.43 +Patch202: perl-CGI-escape.patch + BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX) BuildRequires: tcsh, dos2unix, man, groff BuildRequires: gdbm-devel, db4-devel, zlib-devel @@ -952,6 +962,7 @@ upstream tarball from perl.org. %patch34 -p1 %patch35 -p1 %patch36 -p1 +%patch37 -p1 ### Debian patches ### %patch40 -p1 @@ -993,6 +1004,7 @@ upstream tarball from perl.org. %patch117 -p1 %patch118 -p1 %patch201 -p1 +%patch202 -p1 # # Candidates for doc recoding (need case by case review): @@ -1218,6 +1230,7 @@ perl -x patchlevel.h \ '34507 Fix memory leak in single-char character class optimization' \ 'Fedora Patch35: Reorder @INC, based on b9ba2fadb18b54e35e5de54f945111a56cbcb249' \ 'Fedora Patch36: Fix from Archive::Extract maintainer to only look at stdout from tar' \ + 'Fedora Patch37: Do not distort lib/CGI/t/util-58.t' \ '32727 Fix issue with (nested) definition lists in lib/Pod/Html.pm' \ '33287 Fix NULLOK items' \ '33554 Fix a typo in the predefined common protocols to make "udp" resolve without netbase' \ @@ -1256,6 +1269,7 @@ perl -x patchlevel.h \ 'Fedora Patch117: Update Digest::SHA to %{Digest_SHA_version}' \ 'Fedora Patch117: Update module autodie to %{autodie_version}' \ 'Fedora Patch201: Fedora uses links instead of lynx' \ + 'Fedora Patch202: Fix CGI::escape to work with all strings' \ %{nil} rm patchlevel.bak @@ -1880,6 +1894,11 @@ TMPDIR="$PWD/tmp" make test # Old changelog entries are preserved in CVS. %changelog +* Tue Apr 7 2009 Stepan Kasal - 4:5.10.0-66 +- fix CGI::escape for all strings (#472571) +- perl-CGI-t-util-58.patch: Do not distort lib/CGI/t/util-58.t + http://rt.perl.org/rt3/Ticket/Display.html?id=64502 + * Fri Mar 27 2009 Stepan Kasal - 4:5.10.0-65 - Move the gargantuan Changes* collection to -devel (#492605)