URIUtils.java revision 953:221a84ef44c0
1/*
2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package jdk.nashorn.internal.runtime;
27
28import static jdk.nashorn.internal.runtime.ECMAErrors.uriError;
29
30/**
31 * URI handling global functions. ECMA 15.1.3 URI Handling Function Properties
32 *
33 */
34public final class URIUtils {
35
36    private URIUtils() {
37    }
38
39    static String encodeURI(final Object self, final String string) {
40        return encode(self, string, false);
41    }
42
43    static String encodeURIComponent(final Object self, final String string) {
44        return encode(self, string, true);
45    }
46
47    static String decodeURI(final Object self, final String string) {
48        return decode(self, string, false);
49    }
50
51    static String decodeURIComponent(final Object self, final String string) {
52        return decode(self, string, true);
53    }
54
55    // abstract encode function
56    private static String encode(final Object self, final String string, final boolean component) {
57        if (string.isEmpty()) {
58            return string;
59        }
60
61        final int len = string.length();
62        final StringBuilder sb = new StringBuilder();
63
64        for (int k = 0; k < len; k++) {
65            final char C = string.charAt(k);
66            if (isUnescaped(C, component)) {
67                sb.append(C);
68                continue;
69            }
70
71            if (C >= 0xDC00 && C <= 0xDFFF) {
72                return error(string, k);
73            }
74
75            int V;
76            if (C < 0xD800 || C > 0xDBFF) {
77                V = C;
78            } else {
79                k++;
80                if (k == len) {
81                    return error(string, k);
82                }
83
84                final char kChar = string.charAt(k);
85                if (kChar < 0xDC00 || kChar > 0xDFFF) {
86                    return error(string, k);
87                }
88                V = ((C - 0xD800) * 0x400 + (kChar - 0xDC00) + 0x10000);
89            }
90
91            try {
92                sb.append(toHexEscape(V));
93            } catch (final Exception e) {
94                throw uriError(e, "bad.uri", string, Integer.toString(k));
95            }
96        }
97
98        return sb.toString();
99    }
100
101    // abstract decode function
102    private static String decode(final Object self, final String string, final boolean component) {
103        if (string.isEmpty()) {
104            return string;
105        }
106
107        final int           len = string.length();
108        final StringBuilder sb  = new StringBuilder();
109
110        for (int k = 0; k < len; k++) {
111            final char ch = string.charAt(k);
112            if (ch != '%') {
113                sb.append(ch);
114                continue;
115            }
116            final int start = k;
117            if (k + 2 >= len) {
118                return error(string, k);
119            }
120
121            int B = toHexByte(string.charAt(k + 1), string.charAt(k + 2));
122            if (B < 0) {
123                return error(string, k + 1);
124            }
125
126            k += 2;
127            char C;
128            // Most significant bit is zero
129            if ((B & 0x80) == 0) {
130                C = (char) B;
131                if (!component && URI_RESERVED.indexOf(C) >= 0) {
132                    for (int j = start; j <= k; j++) {
133                        sb.append(string.charAt(j));
134                    }
135                } else {
136                    sb.append(C);
137                }
138            } else {
139                // n is utf8 length, V is codepoint and minV is lower bound
140                int n, V, minV;
141
142                if ((B & 0xC0) == 0x80) {
143                    // 10xxxxxx - illegal first byte
144                    return error(string, k);
145                } else if ((B & 0x20) == 0) {
146                    // 110xxxxx 10xxxxxx
147                    n = 2;
148                    V = B & 0x1F;
149                    minV = 0x80;
150                } else if ((B & 0x10) == 0) {
151                    // 1110xxxx 10xxxxxx 10xxxxxx
152                    n = 3;
153                    V = B & 0x0F;
154                    minV = 0x800;
155                } else if ((B & 0x08) == 0) {
156                    // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
157                    n = 4;
158                    V = B & 0x07;
159                    minV = 0x10000;
160                } else if ((B & 0x04) == 0) {
161                    // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
162                    n = 5;
163                    V =  B & 0x03;
164                    minV = 0x200000;
165                } else if ((B & 0x02) == 0) {
166                    // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
167                    n = 6;
168                    V = B & 0x01;
169                    minV = 0x4000000;
170                } else {
171                    return error(string, k);
172                }
173
174                // check bound for sufficient chars
175                if (k + (3*(n-1)) >= len) {
176                    return error(string, k);
177                }
178
179                for (int j = 1; j < n; j++) {
180                    k++;
181                    if (string.charAt(k) != '%') {
182                        return error(string, k);
183                    }
184
185                    B = toHexByte(string.charAt(k + 1), string.charAt(k + 2));
186                    if (B < 0 || (B & 0xC0) != 0x80) {
187                        return error(string, k + 1);
188                    }
189
190                    V = (V << 6) | (B & 0x3F);
191                    k += 2;
192                }
193
194                // Check for overlongs and invalid codepoints.
195                // The high and low surrogate halves used by UTF-16
196                // (U+D800 through U+DFFF) are not legal Unicode values.
197                if ((V < minV) || (V >= 0xD800 && V <= 0xDFFF)) {
198                    V = Integer.MAX_VALUE;
199                }
200
201                if (V < 0x10000) {
202                    C = (char) V;
203                    if (!component && URI_RESERVED.indexOf(C) >= 0) {
204                        for (int j = start; j != k; j++) {
205                            sb.append(string.charAt(j));
206                        }
207                    } else {
208                        sb.append(C);
209                    }
210                } else { // V >= 0x10000
211                    if (V > 0x10FFFF) {
212                        return error(string, k);
213                    }
214                    final int L = ((V - 0x10000) & 0x3FF) + 0xDC00;
215                    final int H = (((V - 0x10000) >> 10) & 0x3FF) + 0xD800;
216                    sb.append((char) H);
217                    sb.append((char) L);
218                }
219            }
220        }
221
222        return sb.toString();
223    }
224
225    private static int hexDigit(final char ch) {
226        final char chu = Character.toUpperCase(ch);
227        if (chu >= '0' && chu <= '9') {
228            return (chu - '0');
229        } else if (chu >= 'A' && chu <= 'F') {
230            return (chu - 'A' + 10);
231        } else {
232            return -1;
233        }
234    }
235
236    private static int toHexByte(final char ch1, final char ch2) {
237        final int i1 = hexDigit(ch1);
238        final int i2 = hexDigit(ch2);
239        if (i1 >= 0 && i2 >= 0) {
240            return (i1 << 4) | i2;
241        }
242        return -1;
243    }
244
245    private static String toHexEscape(final int u0) {
246        int u = u0;
247        int len;
248        final byte[] b = new byte[6];
249
250        if (u <= 0x7f) {
251            b[0] = (byte) u;
252            len = 1;
253        } else {
254            // > 0x7ff -> length 2
255            // > 0xffff -> length 3
256            // and so on. each new length is an additional 5 bits from the
257            // original 11
258            // the final mask is 8-len zeros in the low part.
259            len = 2;
260            for (int mask = u >>> 11; mask != 0; mask >>>= 5) {
261                len++;
262            }
263            for (int i = len - 1; i > 0; i--) {
264                b[i] = (byte) (0x80 | (u & 0x3f));
265                u >>>= 6; // 64 bits per octet.
266            }
267
268            b[0] = (byte) (~((1 << (8 - len)) - 1) | u);
269        }
270
271        final StringBuilder sb = new StringBuilder();
272        for (int i = 0; i < len; i++) {
273            sb.append('%');
274            if ((b[i] & 0xff) < 0x10) {
275                sb.append('0');
276            }
277            sb.append(Integer.toHexString(b[i] & 0xff).toUpperCase());
278        }
279
280        return sb.toString();
281    }
282
283    private static String error(final String string, final int index) {
284        throw uriError("bad.uri", string, Integer.toString(index));
285    }
286
287    // 'uriEscaped' except for alphanumeric chars
288    private static final String URI_UNESCAPED_NONALPHANUMERIC = "-_.!~*'()";
289    // 'uriReserved' + '#'
290    private static final String URI_RESERVED = ";/?:@&=+$,#";
291
292    private static boolean isUnescaped(final char ch, final boolean component) {
293        if (('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z')
294                || ('0' <= ch && ch <= '9')) {
295            return true;
296        }
297
298        if (URI_UNESCAPED_NONALPHANUMERIC.indexOf(ch) >= 0) {
299            return true;
300        }
301
302        if (!component) {
303            return URI_RESERVED.indexOf(ch) >= 0;
304        }
305
306        return false;
307    }
308}
309