extend URL parser to support IPv6 zone identifiers (#680996)

This commit is contained in:
Kamil Dudka 2014-04-02 15:25:45 +02:00
parent e4d3e72cd6
commit 3f180252f8
2 changed files with 372 additions and 1 deletions

View File

@ -0,0 +1,364 @@
From 3d1fa5aee501d0a8ca82c5d7b4964648f0092523 Mon Sep 17 00:00:00 2001
From: Paul Marks <pmarks@google.com>
Date: Sun, 30 Mar 2014 07:50:37 +0200
Subject: [PATCH 1/4] curl: stop interpreting IPv6 literals as glob patterns.
This makes it possible to fetch from an IPv6 literal without specifying
the -g option. Globbing remains available elsehwere in the URL.
For example:
curl http://[::1]/file[1-3].txt
This creates no ambiguity, because there is no overlap between the
syntax of valid globs and valid IPv6 literals. Globs contain hyphens
and at most 1 colon, while IPv6 literals have no hyphens, and at least 2
colons.
The peek_ipv6() parser simply whitelists a set of characters and counts
colons, because the real validation happens later on. The character set
includes A-Z, in case someone decides to implement support for scopes
like [fe80::1%25eth0] in the future.
Signed-off-by: Paul Marks <pmarks@google.com>
[upstream commit 0bc4938eecccefdf8906bf9c488e4cd9c8467e99]
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
src/tool_urlglob.c | 48 +++++++++++++++++++++++++++++++++++++++++++++---
tests/data/test1230 | 2 +-
2 files changed, 46 insertions(+), 4 deletions(-)
diff --git a/src/tool_urlglob.c b/src/tool_urlglob.c
index ec5014b..943e0ab 100644
--- a/src/tool_urlglob.c
+++ b/src/tool_urlglob.c
@@ -5,7 +5,7 @@
* | (__| |_| | _ <| |___
* \___|\___/|_| \_\_____|
*
- * Copyright (C) 1998 - 2013, Daniel Stenberg, <daniel@haxx.se>, et al.
+ * Copyright (C) 1998 - 2014, Daniel Stenberg, <daniel@haxx.se>, et al.
*
* This software is licensed as described in the file COPYING, which
* you should have received as part of this distribution. The terms
@@ -302,6 +302,36 @@ static GlobCode glob_range(URLGlob *glob, char **patternp,
return GLOB_OK;
}
+static bool peek_ipv6(const char *str, size_t *skip)
+{
+ /*
+ * Scan for a potential IPv6 literal.
+ * - Valid globs contain a hyphen and <= 1 colon.
+ * - IPv6 literals contain no hyphens and >= 2 colons.
+ */
+ size_t i = 0;
+ size_t colons = 0;
+ if(str[i++] != '[') {
+ return FALSE;
+ }
+ for(;;) {
+ const char c = str[i++];
+ if(ISALNUM(c) || c == '.' || c == '%') {
+ /* ok */
+ }
+ else if(c == ':') {
+ colons++;
+ }
+ else if(c == ']') {
+ *skip = i;
+ return colons >= 2;
+ }
+ else {
+ return FALSE;
+ }
+ }
+}
+
static GlobCode glob_parse(URLGlob *glob, char *pattern,
size_t pos, unsigned long *amount)
{
@@ -315,8 +345,20 @@ static GlobCode glob_parse(URLGlob *glob, char *pattern,
while(*pattern && !res) {
char *buf = glob->glob_buffer;
- int sublen = 0;
- while(*pattern && *pattern != '{' && *pattern != '[') {
+ size_t sublen = 0;
+ while(*pattern && *pattern != '{') {
+ if(*pattern == '[') {
+ /* Skip over potential IPv6 literals. */
+ size_t skip;
+ if(peek_ipv6(pattern, &skip)) {
+ memcpy(buf, pattern, skip);
+ buf += skip;
+ pattern += skip;
+ sublen += skip;
+ continue;
+ }
+ break;
+ }
if(*pattern == '}' || *pattern == ']')
return GLOBERROR("unmatched close brace/bracket", pos, GLOB_ERROR);
diff --git a/tests/data/test1230 b/tests/data/test1230
index b16269d..3c1d3d4 100644
--- a/tests/data/test1230
+++ b/tests/data/test1230
@@ -56,7 +56,7 @@ HTTP CONNECT to IPv6 numerical address
</name>
# 0x4ce == 1230, the test number
<command>
--g http://[1234:1234:1234::4ce]:%HTTPPORT/wanted/page/1230 -p -x %HOSTIP:%HTTPPORT
+http://[1234:1234:1234::4ce]:%HTTPPORT/wanted/page/1230 -p -x %HOSTIP:%HTTPPORT
</command>
</client>
--
1.7.1
From 38c0e09f4a020fdcdcfeb149d89d8551e534143f Mon Sep 17 00:00:00 2001
From: Till Maas <opensource@till.name>
Date: Sat, 15 Mar 2014 22:42:50 +0100
Subject: [PATCH 2/4] URL parser: IPv6 zone identifiers are now supported
[upstream commit 9317eced98408c7fefa6dd5f1559050e1ec8a3b7]
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
docs/KNOWN_BUGS | 11 +--------
docs/MANUAL | 6 ++--
lib/url.c | 69 +++++++++++++++++++++++++++++++++++++++++++++---------
3 files changed, 61 insertions(+), 25 deletions(-)
diff --git a/docs/KNOWN_BUGS b/docs/KNOWN_BUGS
index ad997a0..c8ad032 100644
--- a/docs/KNOWN_BUGS
+++ b/docs/KNOWN_BUGS
@@ -180,16 +180,7 @@ may have been fixed since this was written!
--cflags suffers from the same effects with CFLAGS/CPPFLAGS.
30. You need to use -g to the command line tool in order to use RFC2732-style
- IPv6 numerical addresses in URLs.
-
-29. IPv6 URLs with zone ID is not nicely supported.
- http://www.ietf.org/internet-drafts/draft-fenner-literal-zone-02.txt (expired)
- specifies the use of a plus sign instead of a percent when specifying zone
- IDs in URLs to get around the problem of percent signs being
- special. According to the reporter, Firefox deals with the URL _with_ a
- percent letter (which seems like a blatant URL spec violation).
- libcurl supports zone IDs where the percent sign is URL-escaped (i.e. %25):
- http://curl.haxx.se/bug/view.cgi?id=555
+ or RFC6874-style IPv6 numerical addresses in URLs.
26. NTLM authentication using SSPI (on Windows) when (lib)curl is running in
"system context" will make it use wrong(?) user name - at least when compared
diff --git a/docs/MANUAL b/docs/MANUAL
index 4ad2e13..da8f602 100644
--- a/docs/MANUAL
+++ b/docs/MANUAL
@@ -956,9 +956,9 @@ IPv6
When this style is used, the -g option must be given to stop curl from
interpreting the square brackets as special globbing characters. Link local
and site local addresses including a scope identifier, such as fe80::1234%1,
- may also be used, but the scope portion must be numeric and the percent
- character must be URL escaped. The previous example in an SFTP URL might
- look like:
+ may also be used, but the scope portion must be numeric or match an existing
+ network interface on Linux and the percent character must be URL escaped. The
+ previous example in an SFTP URL might look like:
sftp://[fe80::1234%251]/
diff --git a/lib/url.c b/lib/url.c
index 0e420c7..40751cc 100644
--- a/lib/url.c
+++ b/lib/url.c
@@ -3951,23 +3951,59 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
if(result != CURLE_OK)
return result;
- if(conn->host.name[0] == '[') {
+ if(conn->host.name[0] == '[' && !data->state.this_is_a_follow) {
/* This looks like an IPv6 address literal. See if there is an address
- scope. */
- char *percent = strstr (conn->host.name, "%25");
+ scope if there is no location header */
+ char *percent = strchr(conn->host.name, '%');
if(percent) {
+ unsigned int identifier_offset = 3;
char *endp;
- unsigned long scope = strtoul (percent + 3, &endp, 10);
+ unsigned long scope;
+ if(strncmp("%25", percent, 3) != 0) {
+ infof(data,
+ "Please URL encode %% as %%25, see RFC 6874.\n");
+ identifier_offset = 1;
+ }
+ scope = strtoul(percent + identifier_offset, &endp, 10);
if(*endp == ']') {
/* The address scope was well formed. Knock it out of the
hostname. */
memmove(percent, endp, strlen(endp)+1);
- if(!data->state.this_is_a_follow)
- /* Don't honour a scope given in a Location: header */
- conn->scope = (unsigned int)scope;
+ conn->scope = (unsigned int)scope;
+ }
+ else {
+ /* Zone identifier is not numeric */
+#ifdef HAVE_NET_IF_H
+ char ifname[IFNAMSIZ + 2];
+ char *square_bracket;
+ unsigned int scopeidx = 0;
+ strncpy(ifname, percent + identifier_offset, IFNAMSIZ + 2);
+ /* Ensure nullbyte termination */
+ ifname[IFNAMSIZ + 1] = '\0';
+ square_bracket = strchr(ifname, ']');
+ if(square_bracket) {
+ /* Remove ']' */
+ *square_bracket = '\0';
+ scopeidx = if_nametoindex(ifname);
+ if(scopeidx == 0) {
+ infof(data, "Invalid network interface: %s; %s\n", ifname,
+ strerror(errno));
+ }
+ }
+ if(scopeidx > 0) {
+ /* Remove zone identifier from hostname */
+ memmove(percent,
+ percent + identifier_offset + strlen(ifname),
+ identifier_offset + strlen(ifname));
+ conn->scope = scopeidx;
+ }
+ else {
+#endif /* HAVE_NET_IF_H */
+ infof(data, "Invalid IPv6 address format\n");
+#ifdef HAVE_NET_IF_H
+ }
+#endif /* HAVE_NET_IF_H */
}
- else
- infof(data, "Invalid IPv6 address format\n");
}
}
@@ -4350,12 +4386,21 @@ static CURLcode parse_proxy(struct SessionHandle *data,
/* start scanning for port number at this point */
portptr = proxyptr;
- /* detect and extract RFC2732-style IPv6-addresses */
+ /* detect and extract RFC6874-style IPv6-addresses */
if(*proxyptr == '[') {
char *ptr = ++proxyptr; /* advance beyond the initial bracket */
- while(*ptr && (ISXDIGIT(*ptr) || (*ptr == ':') || (*ptr == '%') ||
- (*ptr == '.')))
+ while(*ptr && (ISXDIGIT(*ptr) || (*ptr == ':') || (*ptr == '.')))
+ ptr++;
+ if(*ptr == '%') {
+ /* There might be a zone identifier */
+ if(strncmp("%25", ptr, 3))
+ infof(data, "Please URL encode %% as %%25, see RFC 6874.\n");
ptr++;
+ /* Allow unresered characters as defined in RFC 3986 */
+ while(*ptr && (ISALPHA(*ptr) || ISXDIGIT(*ptr) || (*ptr == '-') ||
+ (*ptr == '.') || (*ptr == '_') || (*ptr == '~')))
+ ptr++;
+ }
if(*ptr == ']')
/* yeps, it ended nicely with a bracket as well */
*ptr++ = 0;
--
1.7.1
From 5894ce84ce36fb460df0580754cab17142430f00 Mon Sep 17 00:00:00 2001
From: Dan Fandrich <dan@coneharvesters.com>
Date: Mon, 31 Mar 2014 09:02:55 +0200
Subject: [PATCH 3/4] docs: Removed mention of -g hack when using IPv6 literals
This limitation was removed in commit 0bc4938e
[upstream commit ed4972ffdb11fc62a8bae33ff4eafbd73973ad9f]
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
docs/MANUAL | 2 +-
docs/TODO | 8 --------
2 files changed, 1 insertions(+), 9 deletions(-)
diff --git a/docs/MANUAL b/docs/MANUAL
index da8f602..11960e1 100644
--- a/docs/MANUAL
+++ b/docs/MANUAL
@@ -50,7 +50,7 @@ SIMPLE USAGE
Get the main page from an IPv6 web server:
- curl -g "http://[2001:1890:1112:1::20]/"
+ curl "http://[2001:1890:1112:1::20]/"
DOWNLOAD TO A FILE
diff --git a/docs/TODO b/docs/TODO
index 2b7ac96..871261a 100644
--- a/docs/TODO
+++ b/docs/TODO
@@ -88,7 +88,6 @@
15.4 simultaneous parallel transfers
15.5 provide formpost headers
15.6 warning when setting an option
- 15.7 IPv6 addresses with globbing
16. Build
16.1 roffit
@@ -489,13 +488,6 @@ to provide the data to send.
This can be useful to tell when support for a particular feature hasn't been
compiled into the library.
-15.7 IPv6 addresses with globbing
-
- Currently the command line client needs to get url globbing disabled (with
- -g) for it to support IPv6 numerical addresses. This is a rather silly flaw
- that should be corrected. It probably involves a smarter detection of the
- '[' and ']' letters.
-
16. Build
16.1 roffit
--
1.7.1
From 3e33d0d436d0d6817480172db89836b3d5ba9db5 Mon Sep 17 00:00:00 2001
From: Daniel Stenberg <daniel@haxx.se>
Date: Mon, 31 Mar 2014 09:35:32 +0200
Subject: [PATCH 4/4] ipv6: strip off zone identifiers in redirects too
Follow up to 9317eced984 makes test 1056 work again.
[upstream commit 13682d1a24bba5386530805d8fbcf987b19c3552]
Signed-off-by: Kamil Dudka <kdudka@redhat.com>
---
lib/url.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/lib/url.c b/lib/url.c
index 40751cc..ebd38cc 100644
--- a/lib/url.c
+++ b/lib/url.c
@@ -3951,7 +3951,7 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
if(result != CURLE_OK)
return result;
- if(conn->host.name[0] == '[' && !data->state.this_is_a_follow) {
+ if(conn->host.name[0] == '[') {
/* This looks like an IPv6 address literal. See if there is an address
scope if there is no location header */
char *percent = strchr(conn->host.name, '%');
--
1.7.1

View File

@ -1,7 +1,7 @@
Summary: A utility for getting files from remote servers (FTP, HTTP, and others)
Name: curl
Version: 7.36.0
Release: 1%{?dist}
Release: 2%{?dist}
License: MIT
Group: Applications/Internet
Source: http://curl.haxx.se/download/%{name}-%{version}.tar.lzma
@ -10,6 +10,9 @@ Source2: curlbuild.h
# adapt tests 815 and 816 such that they work with the fix for CVE-2014-0138
Patch1: 0001-curl-7.36.0-f82e0edc.patch
# extend URL parser to support IPv6 zone identifiers (#680996)
Patch2: 0002-curl-7.36.0-9317eced.patch
# patch making libcurl multilib ready
Patch101: 0101-curl-7.32.0-multilib.patch
@ -123,6 +126,7 @@ documentation of the library, too.
# upstream patches
%patch1 -p1
%patch2 -p1
# Fedora patches
%patch101 -p1
@ -244,6 +248,9 @@ rm -rf $RPM_BUILD_ROOT
%{_datadir}/aclocal/libcurl.m4
%changelog
* Wed Apr 02 2014 Kamil Dudka <kdudka@redhat.com> 7.36.0-2
- extend URL parser to support IPv6 zone identifiers (#680996)
* Wed Mar 26 2014 Kamil Dudka <kdudka@redhat.com> 7.36.0-1
- new upstream release (fixes CVE-2014-0138)