001 package aima.basic; 002 003 import java.util.Enumeration; 004 import java.util.NoSuchElementException; 005 006 /** 007 * Replacement for StringTokenizer in java.util, beacuse of bug in the Sun's 008 * implementation. 009 * 010 * @author <A HREF="mailto:moravek@pobox.sk">Peter Moravek </A> 011 */ 012 public class Tokenizer implements Enumeration { 013 014 /** 015 * Constructs a string tokenizer for the specified string. All characters in 016 * the delim argument are the delimiters for separating tokens. If the 017 * returnTokens flag is true, then the delimiter characters are also 018 * returned as tokens. Each delimiter is returned as a string of length one. 019 * If the flag is false, the delimiter characters are skipped and only serve 020 * as separators between tokens. 021 * 022 * @param str 023 * a string to be parsed 024 * @param delim 025 * the delimiters 026 * @param returnTokens 027 * flag indicating whether to return the delimiters as tokens 028 */ 029 public Tokenizer(String str, String delim, boolean returnTokens) { 030 this.str = str; 031 this.delim = delim; 032 this.returnTokens = returnTokens; 033 034 max = str.length(); 035 } 036 037 /** 038 * Constructs a string tokenizer for the specified string. The characters in 039 * the delim argument are the delimiters for separating tokens. Delimiter 040 * characters themselves will not be treated as tokens. 041 * 042 * @param str 043 * a string to be parsed 044 * @param delim 045 * the delimiters 046 */ 047 public Tokenizer(String str, String delim) { 048 this(str, delim, false); 049 } 050 051 /** 052 * Constructs a string tokenizer for the specified string. The character in 053 * the delim argument is the delimiter for separating tokens. Delimiter 054 * character themselves will not be treated as token. 055 * 056 * @param str 057 * a string to be parsed 058 * @param delim 059 * the delimiter 060 */ 061 public Tokenizer(String str, char delim) { 062 this(str, String.valueOf(delim), false); 063 } 064 065 /** 066 * Constructs a string tokenizer for the specified string. The tokenizer 067 * uses the default delimiter set, which is " \t\n\r\f": the space 068 * character, the tab character, the newline character, the carriage-return 069 * character, and the form-feed character. Delimiter characters themselves 070 * will not be treated as tokens. 071 * 072 * @param str 073 * a string to be parsed 074 */ 075 public Tokenizer(String str) { 076 this(str, Tokenizer.DEFAULT_DELIMITERS, false); 077 } 078 079 /** 080 * Tests if there are more tokens available from this tokenizer's string. If 081 * this method returns true, then a subsequent call to nextToken with no 082 * argument will successfully return a token. 083 * 084 * @return true if and only if there is at least one token in the string 085 * after the current position; false otherwise. 086 */ 087 public boolean hasMoreTokens() { 088 return ((current < max) ? (true) 089 : (((current == max) && (max == 0 || (returnTokens && delim 090 .indexOf(str.charAt(previous)) >= 0))))); 091 } 092 093 /** 094 * Returns the next token from this string tokenizer. 095 * 096 * @return the next token from this string tokenizer 097 * 098 * @exception NoSuchElementException 099 * if there are no more tokens in this tokenizer's string 100 */ 101 public String nextToken() throws NoSuchElementException { 102 if (current == max 103 && (max == 0 || (returnTokens && delim.indexOf(str 104 .charAt(previous)) >= 0))) { 105 106 current++; 107 return new String(); 108 } 109 110 if (current >= max) 111 throw new NoSuchElementException(); 112 113 int start = current; 114 String result = null; 115 116 if (delim.indexOf(str.charAt(start)) >= 0) { 117 if (previous == -1 118 || (returnTokens && previous != current && delim 119 .indexOf(str.charAt(previous)) >= 0)) { 120 121 result = new String(); 122 } else if (returnTokens) 123 result = str.substring(start, ++current); 124 125 if (!returnTokens) 126 current++; 127 } 128 129 previous = start; 130 start = current; 131 132 if (result == null) 133 while (current < max && delim.indexOf(str.charAt(current)) < 0) 134 current++; 135 136 return result == null ? str.substring(start, current) : result; 137 } 138 139 /** 140 * Returns the next token in this string tokenizer's string. First, the set 141 * of characters considered to be delimiters by this Tokenizer object is 142 * changed to be the characters in the string delim. Then the next token in 143 * the string after the current position is returned. The current position 144 * is advanced beyond the recognized token. The new delimiter set remains 145 * the default after this call. 146 * 147 * @param delim 148 * the new delimiters 149 * 150 * @return the next token, after switching to the new delimiter set 151 * 152 * @exception NoSuchElementException 153 * if there are no more tokens in this tokenizer's string. 154 */ 155 public String nextToken(String delim) throws NoSuchElementException { 156 this.delim = delim; 157 return nextToken(); 158 } 159 160 /** 161 * Returns the same value as the hasMoreTokens method. It exists so that 162 * this class can implement the Enumeration interface. 163 * 164 * @return true if there are more tokens; false otherwise. 165 */ 166 public boolean hasMoreElements() { 167 return hasMoreTokens(); 168 } 169 170 /** 171 * Returns the same value as the nextToken method, except that its declared 172 * return value is Object rather than String. It exists so that this class 173 * can implement the Enumeration interface. 174 * 175 * @return the next token in the string 176 * 177 * @exception NoSuchElementException 178 * if there are no more tokens in this tokenizer's string 179 */ 180 public Object nextElement() { 181 return nextToken(); 182 } 183 184 /** 185 * Calculates the number of times that this tokenizer's nextToken method can 186 * be called before it generates an exception. The current position is not 187 * advanced. 188 * 189 * @return the number of tokens remaining in the string using the current 190 * delimiter set 191 */ 192 public int countTokens() { 193 int curr = current; 194 int count = 0; 195 196 for (int i = curr; i < max; i++) { 197 if (delim.indexOf(str.charAt(i)) >= 0) 198 count++; 199 200 curr++; 201 } 202 203 return count + (returnTokens ? count : 0) + 1; 204 } 205 206 /** 207 * Resets this tokenizer's state so the tokenizing starts from the begin. 208 */ 209 public void reset() { 210 previous = -1; 211 current = 0; 212 } 213 214 /** 215 * Constructs a string tokenizer for the specified string. All characters in 216 * the delim argument are the delimiters for separating tokens. If the 217 * returnTokens flag is true, then the delimiter characters are also 218 * returned as tokens. Each delimiter is returned as a string of length one. 219 * If the flag is false, the delimiter characters are skipped and only serve 220 * as separators between tokens. Then tokenizes the str and return an 221 * String[] array with tokens. 222 * 223 * @param str 224 * a string to be parsed 225 * @param delim 226 * the delimiters 227 * @param returnTokens 228 * flag indicating whether to return the delimiters as tokens 229 * 230 * @return array with tokens 231 */ 232 public static String[] tokenize(String str, String delim, 233 boolean returnTokens) { 234 235 Tokenizer tokenizer = new Tokenizer(str, delim, returnTokens); 236 String[] tokens = new String[tokenizer.countTokens()]; 237 238 for (int i = 0; i < tokens.length; i++) 239 tokens[i] = tokenizer.nextToken(); 240 241 return tokens; 242 } 243 244 /** 245 * Default delimiters "\t\n\r\f": the space character, the tab character, 246 * the newline character, the carriage-return character, and the form-feed 247 * character. 248 */ 249 public static final String DEFAULT_DELIMITERS = " \t\n\r\f"; 250 251 /** 252 * String to tokenize. 253 */ 254 private String str = null; 255 256 /** 257 * Delimiters. 258 */ 259 private String delim = null; 260 261 /** 262 * Flag indicating whether to return the delimiters as tokens. 263 */ 264 private boolean returnTokens = false; 265 266 /** 267 * Previous token start. 268 */ 269 private int previous = -1; 270 271 /** 272 * Current position in str string. 273 */ 274 private int current = 0; 275 276 /** 277 * Maximal position in str string. 278 */ 279 private int max = 0; 280 }