URIUtils.java revision 953:221a84ef44c0
1/* 2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package jdk.nashorn.internal.runtime; 27 28import static jdk.nashorn.internal.runtime.ECMAErrors.uriError; 29 30/** 31 * URI handling global functions. ECMA 15.1.3 URI Handling Function Properties 32 * 33 */ 34public final class URIUtils { 35 36 private URIUtils() { 37 } 38 39 static String encodeURI(final Object self, final String string) { 40 return encode(self, string, false); 41 } 42 43 static String encodeURIComponent(final Object self, final String string) { 44 return encode(self, string, true); 45 } 46 47 static String decodeURI(final Object self, final String string) { 48 return decode(self, string, false); 49 } 50 51 static String decodeURIComponent(final Object self, final String string) { 52 return decode(self, string, true); 53 } 54 55 // abstract encode function 56 private static String encode(final Object self, final String string, final boolean component) { 57 if (string.isEmpty()) { 58 return string; 59 } 60 61 final int len = string.length(); 62 final StringBuilder sb = new StringBuilder(); 63 64 for (int k = 0; k < len; k++) { 65 final char C = string.charAt(k); 66 if (isUnescaped(C, component)) { 67 sb.append(C); 68 continue; 69 } 70 71 if (C >= 0xDC00 && C <= 0xDFFF) { 72 return error(string, k); 73 } 74 75 int V; 76 if (C < 0xD800 || C > 0xDBFF) { 77 V = C; 78 } else { 79 k++; 80 if (k == len) { 81 return error(string, k); 82 } 83 84 final char kChar = string.charAt(k); 85 if (kChar < 0xDC00 || kChar > 0xDFFF) { 86 return error(string, k); 87 } 88 V = ((C - 0xD800) * 0x400 + (kChar - 0xDC00) + 0x10000); 89 } 90 91 try { 92 sb.append(toHexEscape(V)); 93 } catch (final Exception e) { 94 throw uriError(e, "bad.uri", string, Integer.toString(k)); 95 } 96 } 97 98 return sb.toString(); 99 } 100 101 // abstract decode function 102 private static String decode(final Object self, final String string, final boolean component) { 103 if (string.isEmpty()) { 104 return string; 105 } 106 107 final int len = string.length(); 108 final StringBuilder sb = new StringBuilder(); 109 110 for (int k = 0; k < len; k++) { 111 final char ch = string.charAt(k); 112 if (ch != '%') { 113 sb.append(ch); 114 continue; 115 } 116 final int start = k; 117 if (k + 2 >= len) { 118 return error(string, k); 119 } 120 121 int B = toHexByte(string.charAt(k + 1), string.charAt(k + 2)); 122 if (B < 0) { 123 return error(string, k + 1); 124 } 125 126 k += 2; 127 char C; 128 // Most significant bit is zero 129 if ((B & 0x80) == 0) { 130 C = (char) B; 131 if (!component && URI_RESERVED.indexOf(C) >= 0) { 132 for (int j = start; j <= k; j++) { 133 sb.append(string.charAt(j)); 134 } 135 } else { 136 sb.append(C); 137 } 138 } else { 139 // n is utf8 length, V is codepoint and minV is lower bound 140 int n, V, minV; 141 142 if ((B & 0xC0) == 0x80) { 143 // 10xxxxxx - illegal first byte 144 return error(string, k); 145 } else if ((B & 0x20) == 0) { 146 // 110xxxxx 10xxxxxx 147 n = 2; 148 V = B & 0x1F; 149 minV = 0x80; 150 } else if ((B & 0x10) == 0) { 151 // 1110xxxx 10xxxxxx 10xxxxxx 152 n = 3; 153 V = B & 0x0F; 154 minV = 0x800; 155 } else if ((B & 0x08) == 0) { 156 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx 157 n = 4; 158 V = B & 0x07; 159 minV = 0x10000; 160 } else if ((B & 0x04) == 0) { 161 // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 162 n = 5; 163 V = B & 0x03; 164 minV = 0x200000; 165 } else if ((B & 0x02) == 0) { 166 // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 167 n = 6; 168 V = B & 0x01; 169 minV = 0x4000000; 170 } else { 171 return error(string, k); 172 } 173 174 // check bound for sufficient chars 175 if (k + (3*(n-1)) >= len) { 176 return error(string, k); 177 } 178 179 for (int j = 1; j < n; j++) { 180 k++; 181 if (string.charAt(k) != '%') { 182 return error(string, k); 183 } 184 185 B = toHexByte(string.charAt(k + 1), string.charAt(k + 2)); 186 if (B < 0 || (B & 0xC0) != 0x80) { 187 return error(string, k + 1); 188 } 189 190 V = (V << 6) | (B & 0x3F); 191 k += 2; 192 } 193 194 // Check for overlongs and invalid codepoints. 195 // The high and low surrogate halves used by UTF-16 196 // (U+D800 through U+DFFF) are not legal Unicode values. 197 if ((V < minV) || (V >= 0xD800 && V <= 0xDFFF)) { 198 V = Integer.MAX_VALUE; 199 } 200 201 if (V < 0x10000) { 202 C = (char) V; 203 if (!component && URI_RESERVED.indexOf(C) >= 0) { 204 for (int j = start; j != k; j++) { 205 sb.append(string.charAt(j)); 206 } 207 } else { 208 sb.append(C); 209 } 210 } else { // V >= 0x10000 211 if (V > 0x10FFFF) { 212 return error(string, k); 213 } 214 final int L = ((V - 0x10000) & 0x3FF) + 0xDC00; 215 final int H = (((V - 0x10000) >> 10) & 0x3FF) + 0xD800; 216 sb.append((char) H); 217 sb.append((char) L); 218 } 219 } 220 } 221 222 return sb.toString(); 223 } 224 225 private static int hexDigit(final char ch) { 226 final char chu = Character.toUpperCase(ch); 227 if (chu >= '0' && chu <= '9') { 228 return (chu - '0'); 229 } else if (chu >= 'A' && chu <= 'F') { 230 return (chu - 'A' + 10); 231 } else { 232 return -1; 233 } 234 } 235 236 private static int toHexByte(final char ch1, final char ch2) { 237 final int i1 = hexDigit(ch1); 238 final int i2 = hexDigit(ch2); 239 if (i1 >= 0 && i2 >= 0) { 240 return (i1 << 4) | i2; 241 } 242 return -1; 243 } 244 245 private static String toHexEscape(final int u0) { 246 int u = u0; 247 int len; 248 final byte[] b = new byte[6]; 249 250 if (u <= 0x7f) { 251 b[0] = (byte) u; 252 len = 1; 253 } else { 254 // > 0x7ff -> length 2 255 // > 0xffff -> length 3 256 // and so on. each new length is an additional 5 bits from the 257 // original 11 258 // the final mask is 8-len zeros in the low part. 259 len = 2; 260 for (int mask = u >>> 11; mask != 0; mask >>>= 5) { 261 len++; 262 } 263 for (int i = len - 1; i > 0; i--) { 264 b[i] = (byte) (0x80 | (u & 0x3f)); 265 u >>>= 6; // 64 bits per octet. 266 } 267 268 b[0] = (byte) (~((1 << (8 - len)) - 1) | u); 269 } 270 271 final StringBuilder sb = new StringBuilder(); 272 for (int i = 0; i < len; i++) { 273 sb.append('%'); 274 if ((b[i] & 0xff) < 0x10) { 275 sb.append('0'); 276 } 277 sb.append(Integer.toHexString(b[i] & 0xff).toUpperCase()); 278 } 279 280 return sb.toString(); 281 } 282 283 private static String error(final String string, final int index) { 284 throw uriError("bad.uri", string, Integer.toString(index)); 285 } 286 287 // 'uriEscaped' except for alphanumeric chars 288 private static final String URI_UNESCAPED_NONALPHANUMERIC = "-_.!~*'()"; 289 // 'uriReserved' + '#' 290 private static final String URI_RESERVED = ";/?:@&=+$,#"; 291 292 private static boolean isUnescaped(final char ch, final boolean component) { 293 if (('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z') 294 || ('0' <= ch && ch <= '9')) { 295 return true; 296 } 297 298 if (URI_UNESCAPED_NONALPHANUMERIC.indexOf(ch) >= 0) { 299 return true; 300 } 301 302 if (!component) { 303 return URI_RESERVED.indexOf(ch) >= 0; 304 } 305 306 return false; 307 } 308} 309