QuotedStringTokenizer.java revision 953:221a84ef44c0
1/*
2 * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package jdk.nashorn.internal.runtime;
27
28import java.util.LinkedList;
29import java.util.Stack;
30import java.util.StringTokenizer;
31
32/**
33 * A string tokenizer that supports entries with quotes and nested quotes. If
34 * the separators are quoted either by ' and ", or whatever quotes the user
35 * supplies they will be ignored and considered part of another token
36 */
37public final class QuotedStringTokenizer {
38    private final LinkedList<String> tokens;
39
40    private final char quotes[];
41
42    /**
43     * Constructor
44     *
45     * @param str string to tokenize
46     */
47    public QuotedStringTokenizer(final String str) {
48        this(str, " ");
49    }
50
51    /**
52     * Create a quoted string tokenizer
53     *
54     * @param str
55     *            a string to tokenize
56     * @param delim
57     *            delimiters between tokens
58     *
59     */
60    public QuotedStringTokenizer(final String str, final String delim) {
61        this(str, delim, new char[] { '"', '\'' });
62    }
63
64    /**
65     * Create a quoted string tokenizer
66     *
67     * @param str
68     *            a string to tokenize
69     * @param delim
70     *            delimiters between tokens
71     * @param quotes
72     *            all the characters that should be accepted as quotes, default
73     *            is ' or "
74     */
75    private QuotedStringTokenizer(final String str, final String delim, final char[] quotes) {
76        this.quotes = quotes;
77
78        boolean delimIsWhitespace = true;
79        for (int i = 0; i < delim.length(); i++) {
80            if (!Character.isWhitespace(delim.charAt(i))) {
81                delimIsWhitespace = false;
82                break;
83            }
84        }
85
86        final StringTokenizer st = new StringTokenizer(str, delim);
87        tokens = new LinkedList<>();
88        while (st.hasMoreTokens()) {
89            String token = st.nextToken();
90
91            while (unmatchedQuotesIn(token)) {
92                if (!st.hasMoreTokens()) {
93                    throw new IndexOutOfBoundsException(token);
94                }
95                token += (delimIsWhitespace ? " " : delim) + st.nextToken();
96            }
97            tokens.add(stripQuotes(token));
98        }
99    }
100
101    /**
102     * @return the number of tokens in the tokenizer
103     */
104    public int countTokens() {
105        return tokens.size();
106    }
107
108    /**
109     * @return true if there are tokens left
110     */
111    public boolean hasMoreTokens() {
112        return countTokens() > 0;
113    }
114
115    /**
116     * @return the next token in the tokenizer
117     */
118    public String nextToken() {
119        return tokens.removeFirst();
120    }
121
122    private String stripQuotes(final String value0) {
123        String value = value0.trim();
124        for (final char q : quotes) {
125            if (value.length() >= 2 && value.startsWith("" + q) && value.endsWith("" + q)) {
126                // also go over the value and remove \q sequences. they are just
127                // plain q now
128                value = value.substring(1, value.length() - 1);
129                value = value.replace("\\" + q, "" + q);
130            }
131        }
132        return value;
133    }
134
135    private boolean unmatchedQuotesIn(final String str) {
136        final Stack<Character> quoteStack = new Stack<>();
137        for (int i = 0; i < str.length(); i++) {
138            final char c = str.charAt(i);
139            for (final char q : this.quotes) {
140                if (c == q) {
141                    if (quoteStack.isEmpty()) {
142                        quoteStack.push(c);
143                    } else {
144                        final char top = quoteStack.pop();
145                        if (top != c) {
146                            quoteStack.push(top);
147                            quoteStack.push(c);
148                        }
149                    }
150                }
151            }
152        }
153
154        return !quoteStack.isEmpty();
155    }
156}
157