Skip to content

Commit 51230d9

Browse files
committed
Make RFC 3986 URI parser strict
1 parent 1a6a529 commit 51230d9

File tree

5 files changed

+329
-33
lines changed

5 files changed

+329
-33
lines changed

httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Equivalence.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040
/**
4141
* RFC 3986 equivalence utilities (§6.2).
4242
*
43-
* @since 5.4
43+
* @since 5.5
4444
*/
4545
@Internal
4646
@Contract(threading = ThreadingBehavior.STATELESS)

httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Normalizer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
/**
4040
* Normalization and canonicalization helpers.
4141
*
42-
* @since 5.4
42+
* @since 5.5
4343
*/
4444
@Internal
4545
@Contract(threading = ThreadingBehavior.STATELESS)

httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Parser.java

Lines changed: 284 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727

2828
package org.apache.hc.core5.net.uri.internal.uris;
2929

30+
import static org.apache.hc.core5.util.TextUtils.isHex;
31+
3032
import org.apache.hc.core5.annotation.Contract;
3133
import org.apache.hc.core5.annotation.Internal;
3234
import org.apache.hc.core5.annotation.ThreadingBehavior;
@@ -39,7 +41,7 @@
3941
/**
4042
* Parser for {@link Rfc3986Uri}.
4143
*
42-
* @since 5.4
44+
* @since 5.5
4345
*/
4446
@Internal
4547
@Contract(threading = ThreadingBehavior.STATELESS)
@@ -80,29 +82,40 @@ public static Rfc3986Uri parse(final String s) {
8082
} else {
8183
hostStart = authStart;
8284
}
83-
if (hostStart >= authEnd) {
84-
throw new IllegalArgumentException("Empty host in authority");
85-
}
8685

87-
if (buf[hostStart] == '[') {
88-
final int rb = indexOf(buf, ']', hostStart + 1, authEnd);
89-
if (rb < 0) {
90-
throw new IllegalArgumentException("Unclosed IPv6 literal");
91-
}
92-
host = s.substring(hostStart, rb + 1); // keep literal verbatim
93-
if (rb + 1 < authEnd && buf[rb + 1] == ':') {
94-
port = Ports.parsePort(buf, rb + 2, authEnd);
95-
}
86+
// RFC 3986 allows empty host in authority (reg-name can be empty), e.g. file:///path
87+
if (hostStart >= authEnd) {
88+
host = "";
89+
cur.updatePos(authEnd);
9690
} else {
97-
final int colon = lastIndexOf(buf, ':', hostStart, authEnd);
98-
if (colon >= 0) {
99-
host = Ascii.lowerAscii(s.substring(hostStart, colon));
100-
port = Ports.parsePort(buf, colon + 1, authEnd);
91+
if (buf[hostStart] == '[') {
92+
final int rb = indexOf(buf, ']', hostStart + 1, authEnd);
93+
if (rb < 0) {
94+
throw new IllegalArgumentException("Unclosed IPv6 literal");
95+
}
96+
host = Ascii.lowerAscii(s.substring(hostStart, rb + 1)); // normalize consistently
97+
if (rb + 1 < authEnd && buf[rb + 1] == ':') {
98+
if (rb + 2 == authEnd) {
99+
port = -1; // empty port is syntactically allowed
100+
} else {
101+
port = Ports.parsePort(buf, rb + 2, authEnd);
102+
}
103+
}
101104
} else {
102-
host = Ascii.lowerAscii(s.substring(hostStart, authEnd));
105+
final int colon = lastIndexOf(buf, ':', hostStart, authEnd);
106+
if (colon >= 0) {
107+
host = Ascii.lowerAscii(s.substring(hostStart, colon));
108+
if (colon + 1 == authEnd) {
109+
port = -1; // empty port is syntactically allowed
110+
} else {
111+
port = Ports.parsePort(buf, colon + 1, authEnd);
112+
}
113+
} else {
114+
host = Ascii.lowerAscii(s.substring(hostStart, authEnd));
115+
}
103116
}
117+
cur.updatePos(authEnd);
104118
}
105-
cur.updatePos(authEnd);
106119
}
107120

108121
// ---- path ----
@@ -125,9 +138,259 @@ public static Rfc3986Uri parse(final String s) {
125138
cur.updatePos(buf.length);
126139
}
127140

128-
return new Rfc3986Uri(s, scheme, userInfo, host, port, path, query, fragment);
141+
final Rfc3986Uri u = new Rfc3986Uri(s, scheme, userInfo, host, port, path, query, fragment);
142+
143+
// strict-by-default validation
144+
validateScheme(u.getScheme());
145+
validateUserInfo(u.getUserInfo());
146+
validateHost(u.getHost());
147+
validatePath(u.getPath());
148+
validateQuery(u.getQuery());
149+
validateFragment(u.getFragment());
150+
151+
return u;
152+
}
153+
154+
private static void validateScheme(final String scheme) {
155+
if (scheme == null) {
156+
return;
157+
}
158+
for (int i = 0; i < scheme.length(); i++) {
159+
final char c = scheme.charAt(i);
160+
if (c > 0x7F) {
161+
throw new IllegalArgumentException("Non-ASCII character in scheme");
162+
}
163+
}
164+
}
165+
166+
private static void validateUserInfo(final String userInfo) {
167+
if (userInfo == null) {
168+
return;
169+
}
170+
validatePctEncoding(userInfo, "userinfo");
171+
for (int i = 0; i < userInfo.length(); i++) {
172+
final char c = userInfo.charAt(i);
173+
if (c == '%') {
174+
i += 2;
175+
continue;
176+
}
177+
if (c > 0x7F) {
178+
throw new IllegalArgumentException("Non-ASCII character in userinfo");
179+
}
180+
if (!isUnreserved(c) && !isSubDelim(c) && c != ':') {
181+
throw new IllegalArgumentException("Illegal character in userinfo");
182+
}
183+
}
184+
}
185+
186+
private static void validateHost(final String host) {
187+
if (host == null) {
188+
return;
189+
}
190+
if (host.isEmpty()) {
191+
return; // allowed (reg-name is *)
192+
}
193+
if (host.charAt(0) == '[') {
194+
if (host.charAt(host.length() - 1) != ']') {
195+
throw new IllegalArgumentException("Unclosed IP-literal");
196+
}
197+
final String inside = host.substring(1, host.length() - 1);
198+
validateIpLiteral(inside);
199+
return;
200+
}
201+
validatePctEncoding(host, "host");
202+
for (int i = 0; i < host.length(); i++) {
203+
final char c = host.charAt(i);
204+
if (c == '%') {
205+
i += 2;
206+
continue;
207+
}
208+
if (c > 0x7F) {
209+
throw new IllegalArgumentException("Non-ASCII character in host");
210+
}
211+
if (!isUnreserved(c) && !isSubDelim(c)) {
212+
throw new IllegalArgumentException("Illegal character in host");
213+
}
214+
}
215+
}
216+
217+
private static void validatePath(final String path) {
218+
if (path == null) {
219+
return;
220+
}
221+
validatePctEncoding(path, "path");
222+
for (int i = 0; i < path.length(); i++) {
223+
final char c = path.charAt(i);
224+
if (c == '%') {
225+
i += 2;
226+
continue;
227+
}
228+
if (c > 0x7F) {
229+
throw new IllegalArgumentException("Non-ASCII character in path");
230+
}
231+
if (c == '/') {
232+
continue;
233+
}
234+
if (!isUnreserved(c) && !isSubDelim(c) && c != ':' && c != '@') {
235+
throw new IllegalArgumentException("Illegal character in path");
236+
}
237+
}
238+
}
239+
240+
private static void validateQuery(final String query) {
241+
if (query == null) {
242+
return;
243+
}
244+
validatePctEncoding(query, "query");
245+
for (int i = 0; i < query.length(); i++) {
246+
final char c = query.charAt(i);
247+
if (c == '%') {
248+
i += 2;
249+
continue;
250+
}
251+
if (c > 0x7F) {
252+
throw new IllegalArgumentException("Non-ASCII character in query");
253+
}
254+
if (c == '/' || c == '?') {
255+
continue;
256+
}
257+
if (!isUnreserved(c) && !isSubDelim(c) && c != ':' && c != '@') {
258+
throw new IllegalArgumentException("Illegal character in query");
259+
}
260+
}
261+
}
262+
263+
private static void validateFragment(final String fragment) {
264+
if (fragment == null) {
265+
return;
266+
}
267+
validatePctEncoding(fragment, "fragment");
268+
for (int i = 0; i < fragment.length(); i++) {
269+
final char c = fragment.charAt(i);
270+
if (c == '%') {
271+
i += 2;
272+
continue;
273+
}
274+
if (c > 0x7F) {
275+
throw new IllegalArgumentException("Non-ASCII character in fragment");
276+
}
277+
if (c == '/' || c == '?') {
278+
continue;
279+
}
280+
if (!isUnreserved(c) && !isSubDelim(c) && c != ':' && c != '@') {
281+
throw new IllegalArgumentException("Illegal character in fragment");
282+
}
283+
}
284+
}
285+
286+
private static void validatePctEncoding(final String s, final String component) {
287+
for (int i = 0; i < s.length(); i++) {
288+
final char c = s.charAt(i);
289+
if (c == '%') {
290+
if (i + 2 >= s.length()) {
291+
throw new IllegalArgumentException("Incomplete pct-encoding in " + component);
292+
}
293+
final char h1 = s.charAt(i + 1);
294+
final char h2 = s.charAt(i + 2);
295+
if (!isHex(h1) || !isHex(h2)) {
296+
throw new IllegalArgumentException("Invalid pct-encoding in " + component);
297+
}
298+
i += 2;
299+
} else if (c <= 0x1F || c == 0x7F) {
300+
throw new IllegalArgumentException("Control character in " + component);
301+
}
302+
}
303+
}
304+
305+
private static void validateIpLiteral(final String inside) {
306+
if (inside.isEmpty()) {
307+
throw new IllegalArgumentException("Empty IP-literal");
308+
}
309+
final char first = inside.charAt(0);
310+
if (first == 'v' || first == 'V') {
311+
validateIpvFuture(inside);
312+
} else {
313+
validateIpv6Address(inside);
314+
}
315+
}
316+
317+
private static void validateIpvFuture(final String s) {
318+
int i = 1;
319+
if (i >= s.length()) {
320+
throw new IllegalArgumentException("Invalid IPvFuture");
321+
}
322+
int hexdigs = 0;
323+
while (i < s.length() && isHex(s.charAt(i))) {
324+
hexdigs++;
325+
i++;
326+
}
327+
if (hexdigs == 0 || i >= s.length() || s.charAt(i) != '.') {
328+
throw new IllegalArgumentException("Invalid IPvFuture");
329+
}
330+
i++;
331+
if (i >= s.length()) {
332+
throw new IllegalArgumentException("Invalid IPvFuture");
333+
}
334+
int tail = 0;
335+
while (i < s.length()) {
336+
final char c = s.charAt(i);
337+
if (c > 0x7F) {
338+
throw new IllegalArgumentException("Non-ASCII character in IPvFuture");
339+
}
340+
if (!isUnreserved(c) && !isSubDelim(c) && c != ':') {
341+
throw new IllegalArgumentException("Illegal character in IPvFuture");
342+
}
343+
tail++;
344+
i++;
345+
}
346+
if (tail == 0) {
347+
throw new IllegalArgumentException("Invalid IPvFuture");
348+
}
349+
}
350+
351+
private static void validateIpv6Address(final String s) {
352+
if (s.indexOf(":::") >= 0) {
353+
throw new IllegalArgumentException("Invalid IPv6 literal");
354+
}
355+
final int dbl = s.indexOf("::");
356+
if (dbl >= 0 && s.indexOf("::", dbl + 2) >= 0) {
357+
throw new IllegalArgumentException("Invalid IPv6 literal");
358+
}
359+
for (int i = 0; i < s.length(); i++) {
360+
final char c = s.charAt(i);
361+
if (c > 0x7F) {
362+
throw new IllegalArgumentException("Non-ASCII character in IPv6 literal");
363+
}
364+
if (!(isHex(c) || c == ':' || c == '.')) {
365+
throw new IllegalArgumentException("Illegal character in IPv6 literal");
366+
}
367+
}
368+
// This is "strict enough" for RFC 3986 IP-literal validation without pulling in InetAddress.
369+
// It rejects obvious junk and enforces the '::' compression constraint.
370+
}
371+
372+
private static boolean isUnreserved(final char c) {
373+
return Ascii.isAlpha(c) || Ascii.isDigit(c) || c == '-' || c == '.' || c == '_' || c == '~';
129374
}
130375

376+
private static boolean isSubDelim(final char c) {
377+
switch (c) {
378+
case '!':
379+
case '$':
380+
case '&':
381+
case '\'':
382+
case '(':
383+
case ')':
384+
case '*':
385+
case '+':
386+
case ',':
387+
case ';':
388+
case '=':
389+
return true;
390+
default:
391+
return false;
392+
}
393+
}
131394

132395
private static int scanScheme(final char[] a, final int from, final int toExcl) {
133396
int finalFrom = from;
@@ -163,12 +426,7 @@ private static int scanUntil(final char[] a, final int from, final int toExcl, f
163426
}
164427

165428
private static int indexOf(final char[] a, final char ch, final int from, final int toExcl) {
166-
for (int i = from; i < toExcl; i++) {
167-
if (a[i] == ch) {
168-
return i;
169-
}
170-
}
171-
return -1;
429+
return Ascii.indexOf(a, ch, from, toExcl);
172430
}
173431

174432
private static int lastIndexOf(final char[] a, final char ch, final int from, final int toExcl) {

httpcore5/src/main/java/org/apache/hc/core5/net/uri/internal/uris/Rfc3986Renderer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
/**
3535
* Pre-sized StringBuilder renderer of URI components.
3636
*
37-
* @since 5.4
37+
* @since 5.5
3838
*/
3939
@Internal
4040
@Contract(threading = ThreadingBehavior.STATELESS)

0 commit comments

Comments
 (0)