001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.lang3; 018 019/** 020 * <p>Operations on {@link CharSequence} that are 021 * {@code null} safe.</p> 022 * 023 * @see CharSequence 024 * @since 3.0 025 */ 026public class CharSequenceUtils { 027 028 private static final int NOT_FOUND = -1; 029 030 /** 031 * <p>{@code CharSequenceUtils} instances should NOT be constructed in 032 * standard programming. </p> 033 * 034 * <p>This constructor is public to permit tools that require a JavaBean 035 * instance to operate.</p> 036 */ 037 public CharSequenceUtils() { 038 } 039 040 //----------------------------------------------------------------------- 041 /** 042 * <p>Returns a new {@code CharSequence} that is a subsequence of this 043 * sequence starting with the {@code char} value at the specified index.</p> 044 * 045 * <p>This provides the {@code CharSequence} equivalent to {@link String#substring(int)}. 046 * The length (in {@code char}) of the returned sequence is {@code length() - start}, 047 * so if {@code start == end} then an empty sequence is returned.</p> 048 * 049 * @param cs the specified subsequence, null returns null 050 * @param start the start index, inclusive, valid 051 * @return a new subsequence, may be null 052 * @throws IndexOutOfBoundsException if {@code start} is negative or if 053 * {@code start} is greater than {@code length()} 054 */ 055 public static CharSequence subSequence(final CharSequence cs, final int start) { 056 return cs == null ? null : cs.subSequence(start, cs.length()); 057 } 058 059 //----------------------------------------------------------------------- 060 /** 061 * Returns the index within {@code cs} of the first occurrence of the 062 * specified character, starting the search at the specified index. 063 * <p> 064 * If a character with value {@code searchChar} occurs in the 065 * character sequence represented by the {@code cs} 066 * object at an index no smaller than {@code start}, then 067 * the index of the first such occurrence is returned. For values 068 * of {@code searchChar} in the range from 0 to 0xFFFF (inclusive), 069 * this is the smallest value <i>k</i> such that: 070 * <blockquote><pre> 071 * (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> >= start) 072 * </pre></blockquote> 073 * is true. For other values of {@code searchChar}, it is the 074 * smallest value <i>k</i> such that: 075 * <blockquote><pre> 076 * (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> >= start) 077 * </pre></blockquote> 078 * is true. In either case, if no such character occurs inm {@code cs} 079 * at or after position {@code start}, then 080 * {@code -1} is returned. 081 * 082 * <p> 083 * There is no restriction on the value of {@code start}. If it 084 * is negative, it has the same effect as if it were zero: the entire 085 * {@code CharSequence} may be searched. If it is greater than 086 * the length of {@code cs}, it has the same effect as if it were 087 * equal to the length of {@code cs}: {@code -1} is returned. 088 * 089 * <p>All indices are specified in {@code char} values 090 * (Unicode code units). 091 * 092 * @param cs the {@code CharSequence} to be processed, not null 093 * @param searchChar the char to be searched for 094 * @param start the start index, negative starts at the string start 095 * @return the index where the search char was found, -1 if not found 096 * @since 3.6 updated to behave more like {@code String} 097 */ 098 static int indexOf(final CharSequence cs, final int searchChar, int start) { 099 if (cs instanceof String) { 100 return ((String) cs).indexOf(searchChar, start); 101 } 102 final int sz = cs.length(); 103 if (start < 0) { 104 start = 0; 105 } 106 if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 107 for (int i = start; i < sz; i++) { 108 if (cs.charAt(i) == searchChar) { 109 return i; 110 } 111 } 112 return NOT_FOUND; 113 } 114 //supplementary characters (LANG1300) 115 if (searchChar <= Character.MAX_CODE_POINT) { 116 final char[] chars = Character.toChars(searchChar); 117 for (int i = start; i < sz - 1; i++) { 118 final char high = cs.charAt(i); 119 final char low = cs.charAt(i + 1); 120 if (high == chars[0] && low == chars[1]) { 121 return i; 122 } 123 } 124 } 125 return NOT_FOUND; 126 } 127 128 /** 129 * Used by the indexOf(CharSequence methods) as a green implementation of indexOf. 130 * 131 * @param cs the {@code CharSequence} to be processed 132 * @param searchChar the {@code CharSequence} to be searched for 133 * @param start the start index 134 * @return the index where the search sequence was found 135 */ 136 static int indexOf(final CharSequence cs, final CharSequence searchChar, final int start) { 137 if (cs instanceof String) { 138 return ((String) cs).indexOf(searchChar.toString(), start); 139 } else if (cs instanceof StringBuilder) { 140 return ((StringBuilder) cs).indexOf(searchChar.toString(), start); 141 } else if (cs instanceof StringBuffer) { 142 return ((StringBuffer) cs).indexOf(searchChar.toString(), start); 143 } 144 return cs.toString().indexOf(searchChar.toString(), start); 145// if (cs instanceof String && searchChar instanceof String) { 146// // TODO: Do we assume searchChar is usually relatively small; 147// // If so then calling toString() on it is better than reverting to 148// // the green implementation in the else block 149// return ((String) cs).indexOf((String) searchChar, start); 150// } else { 151// // TODO: Implement rather than convert to String 152// return cs.toString().indexOf(searchChar.toString(), start); 153// } 154 } 155 156 /** 157 * Returns the index within {@code cs} of the last occurrence of 158 * the specified character, searching backward starting at the 159 * specified index. For values of {@code searchChar} in the range 160 * from 0 to 0xFFFF (inclusive), the index returned is the largest 161 * value <i>k</i> such that: 162 * <blockquote><pre> 163 * (this.charAt(<i>k</i>) == searchChar) && (<i>k</i> <= start) 164 * </pre></blockquote> 165 * is true. For other values of {@code searchChar}, it is the 166 * largest value <i>k</i> such that: 167 * <blockquote><pre> 168 * (this.codePointAt(<i>k</i>) == searchChar) && (<i>k</i> <= start) 169 * </pre></blockquote> 170 * is true. In either case, if no such character occurs in {@code cs} 171 * at or before position {@code start}, then {@code -1} is returned. 172 * 173 * <p>All indices are specified in {@code char} values 174 * (Unicode code units). 175 * 176 * @param cs the {@code CharSequence} to be processed 177 * @param searchChar the char to be searched for 178 * @param start the start index, negative returns -1, beyond length starts at end 179 * @return the index where the search char was found, -1 if not found 180 * @since 3.6 updated to behave more like {@code String} 181 */ 182 static int lastIndexOf(final CharSequence cs, final int searchChar, int start) { 183 if (cs instanceof String) { 184 return ((String) cs).lastIndexOf(searchChar, start); 185 } 186 final int sz = cs.length(); 187 if (start < 0) { 188 return NOT_FOUND; 189 } 190 if (start >= sz) { 191 start = sz - 1; 192 } 193 if (searchChar < Character.MIN_SUPPLEMENTARY_CODE_POINT) { 194 for (int i = start; i >= 0; --i) { 195 if (cs.charAt(i) == searchChar) { 196 return i; 197 } 198 } 199 return NOT_FOUND; 200 } 201 //supplementary characters (LANG1300) 202 //NOTE - we must do a forward traversal for this to avoid duplicating code points 203 if (searchChar <= Character.MAX_CODE_POINT) { 204 final char[] chars = Character.toChars(searchChar); 205 //make sure it's not the last index 206 if (start == sz - 1) { 207 return NOT_FOUND; 208 } 209 for (int i = start; i >= 0; i--) { 210 final char high = cs.charAt(i); 211 final char low = cs.charAt(i + 1); 212 if (chars[0] == high && chars[1] == low) { 213 return i; 214 } 215 } 216 } 217 return NOT_FOUND; 218 } 219 220 static final int TO_STRING_LIMIT = 16; 221 222 /** 223 * Used by the lastIndexOf(CharSequence methods) as a green implementation of lastIndexOf 224 * 225 * @param cs the {@code CharSequence} to be processed 226 * @param searchChar the {@code CharSequence} to find 227 * @param start the start index 228 * @return the index where the search sequence was found 229 */ 230 static int lastIndexOf(final CharSequence cs, final CharSequence searchChar, int start) { 231 if (searchChar == null || cs == null) { 232 return NOT_FOUND; 233 } 234 if (searchChar instanceof String) { 235 if (cs instanceof String) { 236 return ((String) cs).lastIndexOf((String) searchChar, start); 237 } else if (cs instanceof StringBuilder) { 238 return ((StringBuilder) cs).lastIndexOf((String) searchChar, start); 239 } else if (cs instanceof StringBuffer) { 240 return ((StringBuffer) cs).lastIndexOf((String) searchChar, start); 241 } 242 } 243 244 final int len1 = cs.length(); 245 final int len2 = searchChar.length(); 246 247 if (start > len1) { 248 start = len1; 249 } 250 251 if (start < 0 || len2 < 0 || len2 > len1) { 252 return NOT_FOUND; 253 } 254 255 if (len2 == 0) { 256 return start; 257 } 258 259 if (len2 <= TO_STRING_LIMIT) { 260 if (cs instanceof String) { 261 return ((String) cs).lastIndexOf(searchChar.toString(), start); 262 } else if (cs instanceof StringBuilder) { 263 return ((StringBuilder) cs).lastIndexOf(searchChar.toString(), start); 264 } else if (cs instanceof StringBuffer) { 265 return ((StringBuffer) cs).lastIndexOf(searchChar.toString(), start); 266 } 267 } 268 269 if (start + len2 > len1) { 270 start = len1 - len2; 271 } 272 273 final char char0 = searchChar.charAt(0); 274 275 int i = start; 276 while (true) { 277 while (cs.charAt(i) != char0) { 278 i--; 279 if (i < 0) { 280 return NOT_FOUND; 281 } 282 } 283 if (checkLaterThan1(cs, searchChar, len2, i)) { 284 return i; 285 } 286 i--; 287 if (i < 0) { 288 return NOT_FOUND; 289 } 290 } 291 } 292 293 private static boolean checkLaterThan1(final CharSequence cs, final CharSequence searchChar, final int len2, final int start1) { 294 for (int i = 1, j = len2 - 1; i <= j; i++, j--) { 295 if (cs.charAt(start1 + i) != searchChar.charAt(i) 296 || 297 cs.charAt(start1 + j) != searchChar.charAt(j) 298 ) { 299 return false; 300 } 301 } 302 return true; 303 } 304 305 /** 306 * Converts the given CharSequence to a char[]. 307 * 308 * @param source the {@code CharSequence} to be processed. 309 * @return the resulting char array, never null. 310 * @since 3.11 311 */ 312 public static char[] toCharArray(final CharSequence source) { 313 final int len = StringUtils.length(source); 314 if (len == 0) { 315 return ArrayUtils.EMPTY_CHAR_ARRAY; 316 } 317 if (source instanceof String) { 318 return ((String) source).toCharArray(); 319 } 320 final char[] array = new char[len]; 321 for (int i = 0; i < len; i++) { 322 array[i] = source.charAt(i); 323 } 324 return array; 325 } 326 327 /** 328 * Green implementation of regionMatches. 329 * 330 * @param cs the {@code CharSequence} to be processed 331 * @param ignoreCase whether or not to be case insensitive 332 * @param thisStart the index to start on the {@code cs} CharSequence 333 * @param substring the {@code CharSequence} to be looked for 334 * @param start the index to start on the {@code substring} CharSequence 335 * @param length character length of the region 336 * @return whether the region matched 337 */ 338 static boolean regionMatches(final CharSequence cs, final boolean ignoreCase, final int thisStart, 339 final CharSequence substring, final int start, final int length) { 340 if (cs instanceof String && substring instanceof String) { 341 return ((String) cs).regionMatches(ignoreCase, thisStart, (String) substring, start, length); 342 } 343 int index1 = thisStart; 344 int index2 = start; 345 int tmpLen = length; 346 347 // Extract these first so we detect NPEs the same as the java.lang.String version 348 final int srcLen = cs.length() - thisStart; 349 final int otherLen = substring.length() - start; 350 351 // Check for invalid parameters 352 if (thisStart < 0 || start < 0 || length < 0) { 353 return false; 354 } 355 356 // Check that the regions are long enough 357 if (srcLen < length || otherLen < length) { 358 return false; 359 } 360 361 while (tmpLen-- > 0) { 362 final char c1 = cs.charAt(index1++); 363 final char c2 = substring.charAt(index2++); 364 365 if (c1 == c2) { 366 continue; 367 } 368 369 if (!ignoreCase) { 370 return false; 371 } 372 373 // The real same check as in String.regionMatches(): 374 final char u1 = Character.toUpperCase(c1); 375 final char u2 = Character.toUpperCase(c2); 376 if (u1 != u2 && Character.toLowerCase(u1) != Character.toLowerCase(u2)) { 377 return false; 378 } 379 } 380 381 return true; 382 } 383}