From 3d1fa5aee501d0a8ca82c5d7b4964648f0092523 Mon Sep 17 00:00:00 2001 From: Paul Marks Date: Sun, 30 Mar 2014 07:50:37 +0200 Subject: [PATCH 1/4] curl: stop interpreting IPv6 literals as glob patterns. This makes it possible to fetch from an IPv6 literal without specifying the -g option. Globbing remains available elsehwere in the URL. For example: curl http://[::1]/file[1-3].txt This creates no ambiguity, because there is no overlap between the syntax of valid globs and valid IPv6 literals. Globs contain hyphens and at most 1 colon, while IPv6 literals have no hyphens, and at least 2 colons. The peek_ipv6() parser simply whitelists a set of characters and counts colons, because the real validation happens later on. The character set includes A-Z, in case someone decides to implement support for scopes like [fe80::1%25eth0] in the future. Signed-off-by: Paul Marks [upstream commit 0bc4938eecccefdf8906bf9c488e4cd9c8467e99] Signed-off-by: Kamil Dudka --- src/tool_urlglob.c | 48 +++++++++++++++++++++++++++++++++++++++++++++--- tests/data/test1230 | 2 +- 2 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/tool_urlglob.c b/src/tool_urlglob.c index ec5014b..943e0ab 100644 --- a/src/tool_urlglob.c +++ b/src/tool_urlglob.c @@ -5,7 +5,7 @@ * | (__| |_| | _ <| |___ * \___|\___/|_| \_\_____| * - * Copyright (C) 1998 - 2013, Daniel Stenberg, , et al. + * Copyright (C) 1998 - 2014, Daniel Stenberg, , et al. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms @@ -302,6 +302,36 @@ static GlobCode glob_range(URLGlob *glob, char **patternp, return GLOB_OK; } +static bool peek_ipv6(const char *str, size_t *skip) +{ + /* + * Scan for a potential IPv6 literal. + * - Valid globs contain a hyphen and <= 1 colon. + * - IPv6 literals contain no hyphens and >= 2 colons. + */ + size_t i = 0; + size_t colons = 0; + if(str[i++] != '[') { + return FALSE; + } + for(;;) { + const char c = str[i++]; + if(ISALNUM(c) || c == '.' || c == '%') { + /* ok */ + } + else if(c == ':') { + colons++; + } + else if(c == ']') { + *skip = i; + return colons >= 2; + } + else { + return FALSE; + } + } +} + static GlobCode glob_parse(URLGlob *glob, char *pattern, size_t pos, unsigned long *amount) { @@ -315,8 +345,20 @@ static GlobCode glob_parse(URLGlob *glob, char *pattern, while(*pattern && !res) { char *buf = glob->glob_buffer; - int sublen = 0; - while(*pattern && *pattern != '{' && *pattern != '[') { + size_t sublen = 0; + while(*pattern && *pattern != '{') { + if(*pattern == '[') { + /* Skip over potential IPv6 literals. */ + size_t skip; + if(peek_ipv6(pattern, &skip)) { + memcpy(buf, pattern, skip); + buf += skip; + pattern += skip; + sublen += skip; + continue; + } + break; + } if(*pattern == '}' || *pattern == ']') return GLOBERROR("unmatched close brace/bracket", pos, GLOB_ERROR); diff --git a/tests/data/test1230 b/tests/data/test1230 index b16269d..3c1d3d4 100644 --- a/tests/data/test1230 +++ b/tests/data/test1230 @@ -56,7 +56,7 @@ HTTP CONNECT to IPv6 numerical address # 0x4ce == 1230, the test number --g http://[1234:1234:1234::4ce]:%HTTPPORT/wanted/page/1230 -p -x %HOSTIP:%HTTPPORT +http://[1234:1234:1234::4ce]:%HTTPPORT/wanted/page/1230 -p -x %HOSTIP:%HTTPPORT -- 1.7.1 From 38c0e09f4a020fdcdcfeb149d89d8551e534143f Mon Sep 17 00:00:00 2001 From: Till Maas Date: Sat, 15 Mar 2014 22:42:50 +0100 Subject: [PATCH 2/4] URL parser: IPv6 zone identifiers are now supported [upstream commit 9317eced98408c7fefa6dd5f1559050e1ec8a3b7] Signed-off-by: Kamil Dudka --- docs/KNOWN_BUGS | 11 +-------- docs/MANUAL | 6 ++-- lib/url.c | 69 +++++++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 61 insertions(+), 25 deletions(-) diff --git a/docs/KNOWN_BUGS b/docs/KNOWN_BUGS index ad997a0..c8ad032 100644 --- a/docs/KNOWN_BUGS +++ b/docs/KNOWN_BUGS @@ -180,16 +180,7 @@ may have been fixed since this was written! --cflags suffers from the same effects with CFLAGS/CPPFLAGS. 30. You need to use -g to the command line tool in order to use RFC2732-style - IPv6 numerical addresses in URLs. - -29. IPv6 URLs with zone ID is not nicely supported. - http://www.ietf.org/internet-drafts/draft-fenner-literal-zone-02.txt (expired) - specifies the use of a plus sign instead of a percent when specifying zone - IDs in URLs to get around the problem of percent signs being - special. According to the reporter, Firefox deals with the URL _with_ a - percent letter (which seems like a blatant URL spec violation). - libcurl supports zone IDs where the percent sign is URL-escaped (i.e. %25): - http://curl.haxx.se/bug/view.cgi?id=555 + or RFC6874-style IPv6 numerical addresses in URLs. 26. NTLM authentication using SSPI (on Windows) when (lib)curl is running in "system context" will make it use wrong(?) user name - at least when compared diff --git a/docs/MANUAL b/docs/MANUAL index 4ad2e13..da8f602 100644 --- a/docs/MANUAL +++ b/docs/MANUAL @@ -956,9 +956,9 @@ IPv6 When this style is used, the -g option must be given to stop curl from interpreting the square brackets as special globbing characters. Link local and site local addresses including a scope identifier, such as fe80::1234%1, - may also be used, but the scope portion must be numeric and the percent - character must be URL escaped. The previous example in an SFTP URL might - look like: + may also be used, but the scope portion must be numeric or match an existing + network interface on Linux and the percent character must be URL escaped. The + previous example in an SFTP URL might look like: sftp://[fe80::1234%251]/ diff --git a/lib/url.c b/lib/url.c index 0e420c7..40751cc 100644 --- a/lib/url.c +++ b/lib/url.c @@ -3951,23 +3951,59 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data, if(result != CURLE_OK) return result; - if(conn->host.name[0] == '[') { + if(conn->host.name[0] == '[' && !data->state.this_is_a_follow) { /* This looks like an IPv6 address literal. See if there is an address - scope. */ - char *percent = strstr (conn->host.name, "%25"); + scope if there is no location header */ + char *percent = strchr(conn->host.name, '%'); if(percent) { + unsigned int identifier_offset = 3; char *endp; - unsigned long scope = strtoul (percent + 3, &endp, 10); + unsigned long scope; + if(strncmp("%25", percent, 3) != 0) { + infof(data, + "Please URL encode %% as %%25, see RFC 6874.\n"); + identifier_offset = 1; + } + scope = strtoul(percent + identifier_offset, &endp, 10); if(*endp == ']') { /* The address scope was well formed. Knock it out of the hostname. */ memmove(percent, endp, strlen(endp)+1); - if(!data->state.this_is_a_follow) - /* Don't honour a scope given in a Location: header */ - conn->scope = (unsigned int)scope; + conn->scope = (unsigned int)scope; + } + else { + /* Zone identifier is not numeric */ +#ifdef HAVE_NET_IF_H + char ifname[IFNAMSIZ + 2]; + char *square_bracket; + unsigned int scopeidx = 0; + strncpy(ifname, percent + identifier_offset, IFNAMSIZ + 2); + /* Ensure nullbyte termination */ + ifname[IFNAMSIZ + 1] = '\0'; + square_bracket = strchr(ifname, ']'); + if(square_bracket) { + /* Remove ']' */ + *square_bracket = '\0'; + scopeidx = if_nametoindex(ifname); + if(scopeidx == 0) { + infof(data, "Invalid network interface: %s; %s\n", ifname, + strerror(errno)); + } + } + if(scopeidx > 0) { + /* Remove zone identifier from hostname */ + memmove(percent, + percent + identifier_offset + strlen(ifname), + identifier_offset + strlen(ifname)); + conn->scope = scopeidx; + } + else { +#endif /* HAVE_NET_IF_H */ + infof(data, "Invalid IPv6 address format\n"); +#ifdef HAVE_NET_IF_H + } +#endif /* HAVE_NET_IF_H */ } - else - infof(data, "Invalid IPv6 address format\n"); } } @@ -4350,12 +4386,21 @@ static CURLcode parse_proxy(struct SessionHandle *data, /* start scanning for port number at this point */ portptr = proxyptr; - /* detect and extract RFC2732-style IPv6-addresses */ + /* detect and extract RFC6874-style IPv6-addresses */ if(*proxyptr == '[') { char *ptr = ++proxyptr; /* advance beyond the initial bracket */ - while(*ptr && (ISXDIGIT(*ptr) || (*ptr == ':') || (*ptr == '%') || - (*ptr == '.'))) + while(*ptr && (ISXDIGIT(*ptr) || (*ptr == ':') || (*ptr == '.'))) + ptr++; + if(*ptr == '%') { + /* There might be a zone identifier */ + if(strncmp("%25", ptr, 3)) + infof(data, "Please URL encode %% as %%25, see RFC 6874.\n"); ptr++; + /* Allow unresered characters as defined in RFC 3986 */ + while(*ptr && (ISALPHA(*ptr) || ISXDIGIT(*ptr) || (*ptr == '-') || + (*ptr == '.') || (*ptr == '_') || (*ptr == '~'))) + ptr++; + } if(*ptr == ']') /* yeps, it ended nicely with a bracket as well */ *ptr++ = 0; -- 1.7.1 From 5894ce84ce36fb460df0580754cab17142430f00 Mon Sep 17 00:00:00 2001 From: Dan Fandrich Date: Mon, 31 Mar 2014 09:02:55 +0200 Subject: [PATCH 3/4] docs: Removed mention of -g hack when using IPv6 literals This limitation was removed in commit 0bc4938e [upstream commit ed4972ffdb11fc62a8bae33ff4eafbd73973ad9f] Signed-off-by: Kamil Dudka --- docs/MANUAL | 2 +- docs/TODO | 8 -------- 2 files changed, 1 insertions(+), 9 deletions(-) diff --git a/docs/MANUAL b/docs/MANUAL index da8f602..11960e1 100644 --- a/docs/MANUAL +++ b/docs/MANUAL @@ -50,7 +50,7 @@ SIMPLE USAGE Get the main page from an IPv6 web server: - curl -g "http://[2001:1890:1112:1::20]/" + curl "http://[2001:1890:1112:1::20]/" DOWNLOAD TO A FILE diff --git a/docs/TODO b/docs/TODO index 2b7ac96..871261a 100644 --- a/docs/TODO +++ b/docs/TODO @@ -88,7 +88,6 @@ 15.4 simultaneous parallel transfers 15.5 provide formpost headers 15.6 warning when setting an option - 15.7 IPv6 addresses with globbing 16. Build 16.1 roffit @@ -489,13 +488,6 @@ to provide the data to send. This can be useful to tell when support for a particular feature hasn't been compiled into the library. -15.7 IPv6 addresses with globbing - - Currently the command line client needs to get url globbing disabled (with - -g) for it to support IPv6 numerical addresses. This is a rather silly flaw - that should be corrected. It probably involves a smarter detection of the - '[' and ']' letters. - 16. Build 16.1 roffit -- 1.7.1 From 3e33d0d436d0d6817480172db89836b3d5ba9db5 Mon Sep 17 00:00:00 2001 From: Daniel Stenberg Date: Mon, 31 Mar 2014 09:35:32 +0200 Subject: [PATCH 4/4] ipv6: strip off zone identifiers in redirects too Follow up to 9317eced984 makes test 1056 work again. [upstream commit 13682d1a24bba5386530805d8fbcf987b19c3552] Signed-off-by: Kamil Dudka --- lib/url.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/lib/url.c b/lib/url.c index 40751cc..ebd38cc 100644 --- a/lib/url.c +++ b/lib/url.c @@ -3951,7 +3951,7 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data, if(result != CURLE_OK) return result; - if(conn->host.name[0] == '[' && !data->state.this_is_a_follow) { + if(conn->host.name[0] == '[') { /* This looks like an IPv6 address literal. See if there is an address scope if there is no location header */ char *percent = strchr(conn->host.name, '%'); -- 1.7.1