001/* 002 * Copyright 2001-2006 Geert Bevin <gbevin[remove] at uwyn dot com> 003 * Distributed under the terms of either: 004 * - the common development and distribution license (CDDL), v1.0; or 005 * - the GNU Lesser General Public License, v2.1 or later 006 * $Id: StringUtils.java 3106 2006-03-13 17:53:50Z gbevin $ 007 */ 008package com.uwyn.jhighlight.tools; 009 010import com.uwyn.jhighlight.pcj.map.CharKeyOpenHashMap; 011import java.util.ArrayList; 012import java.util.Iterator; 013import java.util.regex.Pattern; 014 015/** 016 * General purpose class containing common <code>String</code> manipulation 017 * methods. 018 * 019 * @author Geert Bevin (gbevin[remove] at uwyn dot com) 020 * @version $Revision: 3106 $ 021 * @since 1.0 022 */ 023public abstract class StringUtils 024{ 025 private static final CharKeyOpenHashMap mHtmlEncodeMap = new CharKeyOpenHashMap(); 026 027 static 028 { 029 // Html encoding mapping according to the HTML 4.0 spec 030 // http://www.w3.org/TR/REC-html40/sgml/entities.html 031 032 // Special characters for HTML 033 mHtmlEncodeMap.put('\u0026', "&"); 034 mHtmlEncodeMap.put('\u003C', "<"); 035 mHtmlEncodeMap.put('\u003E', ">"); 036 mHtmlEncodeMap.put('\u0022', """); 037 038 mHtmlEncodeMap.put('\u0152', "Œ"); 039 mHtmlEncodeMap.put('\u0153', "œ"); 040 mHtmlEncodeMap.put('\u0160', "Š"); 041 mHtmlEncodeMap.put('\u0161', "š"); 042 mHtmlEncodeMap.put('\u0178', "Ÿ"); 043 mHtmlEncodeMap.put('\u02C6', "ˆ"); 044 mHtmlEncodeMap.put('\u02DC', "˜"); 045 mHtmlEncodeMap.put('\u2002', " "); 046 mHtmlEncodeMap.put('\u2003', " "); 047 mHtmlEncodeMap.put('\u2009', " "); 048 mHtmlEncodeMap.put('\u200C', "‌"); 049 mHtmlEncodeMap.put('\u200D', "‍"); 050 mHtmlEncodeMap.put('\u200E', "‎"); 051 mHtmlEncodeMap.put('\u200F', "‏"); 052 mHtmlEncodeMap.put('\u2013', "–"); 053 mHtmlEncodeMap.put('\u2014', "—"); 054 mHtmlEncodeMap.put('\u2018', "‘"); 055 mHtmlEncodeMap.put('\u2019', "’"); 056 mHtmlEncodeMap.put('\u201A', "‚"); 057 mHtmlEncodeMap.put('\u201C', "“"); 058 mHtmlEncodeMap.put('\u201D', "”"); 059 mHtmlEncodeMap.put('\u201E', "„"); 060 mHtmlEncodeMap.put('\u2020', "†"); 061 mHtmlEncodeMap.put('\u2021', "‡"); 062 mHtmlEncodeMap.put('\u2030', "‰"); 063 mHtmlEncodeMap.put('\u2039', "‹"); 064 mHtmlEncodeMap.put('\u203A', "›"); 065 mHtmlEncodeMap.put('\u20AC', "€"); 066 067 // Character entity references for ISO 8859-1 characters 068 mHtmlEncodeMap.put('\u00A0', " "); 069 mHtmlEncodeMap.put('\u00A1', "¡"); 070 mHtmlEncodeMap.put('\u00A2', "¢"); 071 mHtmlEncodeMap.put('\u00A3', "£"); 072 mHtmlEncodeMap.put('\u00A4', "¤"); 073 mHtmlEncodeMap.put('\u00A5', "¥"); 074 mHtmlEncodeMap.put('\u00A6', "¦"); 075 mHtmlEncodeMap.put('\u00A7', "§"); 076 mHtmlEncodeMap.put('\u00A8', "¨"); 077 mHtmlEncodeMap.put('\u00A9', "©"); 078 mHtmlEncodeMap.put('\u00AA', "ª"); 079 mHtmlEncodeMap.put('\u00AB', "«"); 080 mHtmlEncodeMap.put('\u00AC', "¬"); 081 mHtmlEncodeMap.put('\u00AD', "­"); 082 mHtmlEncodeMap.put('\u00AE', "®"); 083 mHtmlEncodeMap.put('\u00AF', "¯"); 084 mHtmlEncodeMap.put('\u00B0', "°"); 085 mHtmlEncodeMap.put('\u00B1', "±"); 086 mHtmlEncodeMap.put('\u00B2', "²"); 087 mHtmlEncodeMap.put('\u00B3', "³"); 088 mHtmlEncodeMap.put('\u00B4', "´"); 089 mHtmlEncodeMap.put('\u00B5', "µ"); 090 mHtmlEncodeMap.put('\u00B6', "¶"); 091 mHtmlEncodeMap.put('\u00B7', "·"); 092 mHtmlEncodeMap.put('\u00B8', "¸"); 093 mHtmlEncodeMap.put('\u00B9', "¹"); 094 mHtmlEncodeMap.put('\u00BA', "º"); 095 mHtmlEncodeMap.put('\u00BB', "»"); 096 mHtmlEncodeMap.put('\u00BC', "¼"); 097 mHtmlEncodeMap.put('\u00BD', "½"); 098 mHtmlEncodeMap.put('\u00BE', "¾"); 099 mHtmlEncodeMap.put('\u00BF', "¿"); 100 mHtmlEncodeMap.put('\u00C0', "À"); 101 mHtmlEncodeMap.put('\u00C1', "Á"); 102 mHtmlEncodeMap.put('\u00C2', "Â"); 103 mHtmlEncodeMap.put('\u00C3', "Ã"); 104 mHtmlEncodeMap.put('\u00C4', "Ä"); 105 mHtmlEncodeMap.put('\u00C5', "Å"); 106 mHtmlEncodeMap.put('\u00C6', "Æ"); 107 mHtmlEncodeMap.put('\u00C7', "Ç"); 108 mHtmlEncodeMap.put('\u00C8', "È"); 109 mHtmlEncodeMap.put('\u00C9', "É"); 110 mHtmlEncodeMap.put('\u00CA', "Ê"); 111 mHtmlEncodeMap.put('\u00CB', "Ë"); 112 mHtmlEncodeMap.put('\u00CC', "Ì"); 113 mHtmlEncodeMap.put('\u00CD', "Í"); 114 mHtmlEncodeMap.put('\u00CE', "Î"); 115 mHtmlEncodeMap.put('\u00CF', "Ï"); 116 mHtmlEncodeMap.put('\u00D0', "Ð"); 117 mHtmlEncodeMap.put('\u00D1', "Ñ"); 118 mHtmlEncodeMap.put('\u00D2', "Ò"); 119 mHtmlEncodeMap.put('\u00D3', "Ó"); 120 mHtmlEncodeMap.put('\u00D4', "Ô"); 121 mHtmlEncodeMap.put('\u00D5', "Õ"); 122 mHtmlEncodeMap.put('\u00D6', "Ö"); 123 mHtmlEncodeMap.put('\u00D7', "×"); 124 mHtmlEncodeMap.put('\u00D8', "Ø"); 125 mHtmlEncodeMap.put('\u00D9', "Ù"); 126 mHtmlEncodeMap.put('\u00DA', "Ú"); 127 mHtmlEncodeMap.put('\u00DB', "Û"); 128 mHtmlEncodeMap.put('\u00DC', "Ü"); 129 mHtmlEncodeMap.put('\u00DD', "Ý"); 130 mHtmlEncodeMap.put('\u00DE', "Þ"); 131 mHtmlEncodeMap.put('\u00DF', "ß"); 132 mHtmlEncodeMap.put('\u00E0', "à"); 133 mHtmlEncodeMap.put('\u00E1', "á"); 134 mHtmlEncodeMap.put('\u00E2', "â"); 135 mHtmlEncodeMap.put('\u00E3', "ã"); 136 mHtmlEncodeMap.put('\u00E4', "ä"); 137 mHtmlEncodeMap.put('\u00E5', "å"); 138 mHtmlEncodeMap.put('\u00E6', "æ"); 139 mHtmlEncodeMap.put('\u00E7', "ç"); 140 mHtmlEncodeMap.put('\u00E8', "è"); 141 mHtmlEncodeMap.put('\u00E9', "é"); 142 mHtmlEncodeMap.put('\u00EA', "ê"); 143 mHtmlEncodeMap.put('\u00EB', "ë"); 144 mHtmlEncodeMap.put('\u00EC', "ì"); 145 mHtmlEncodeMap.put('\u00ED', "í"); 146 mHtmlEncodeMap.put('\u00EE', "î"); 147 mHtmlEncodeMap.put('\u00EF', "ï"); 148 mHtmlEncodeMap.put('\u00F0', "ð"); 149 mHtmlEncodeMap.put('\u00F1', "ñ"); 150 mHtmlEncodeMap.put('\u00F2', "ò"); 151 mHtmlEncodeMap.put('\u00F3', "ó"); 152 mHtmlEncodeMap.put('\u00F4', "ô"); 153 mHtmlEncodeMap.put('\u00F5', "õ"); 154 mHtmlEncodeMap.put('\u00F6', "ö"); 155 mHtmlEncodeMap.put('\u00F7', "÷"); 156 mHtmlEncodeMap.put('\u00F8', "ø"); 157 mHtmlEncodeMap.put('\u00F9', "ù"); 158 mHtmlEncodeMap.put('\u00FA', "ú"); 159 mHtmlEncodeMap.put('\u00FB', "û"); 160 mHtmlEncodeMap.put('\u00FC', "ü"); 161 mHtmlEncodeMap.put('\u00FD', "ý"); 162 mHtmlEncodeMap.put('\u00FE', "þ"); 163 mHtmlEncodeMap.put('\u00FF', "ÿ"); 164 165 // Mathematical, Greek and Symbolic characters for HTML 166 mHtmlEncodeMap.put('\u0192', "ƒ"); 167 mHtmlEncodeMap.put('\u0391', "Α"); 168 mHtmlEncodeMap.put('\u0392', "Β"); 169 mHtmlEncodeMap.put('\u0393', "Γ"); 170 mHtmlEncodeMap.put('\u0394', "Δ"); 171 mHtmlEncodeMap.put('\u0395', "Ε"); 172 mHtmlEncodeMap.put('\u0396', "Ζ"); 173 mHtmlEncodeMap.put('\u0397', "Η"); 174 mHtmlEncodeMap.put('\u0398', "Θ"); 175 mHtmlEncodeMap.put('\u0399', "Ι"); 176 mHtmlEncodeMap.put('\u039A', "Κ"); 177 mHtmlEncodeMap.put('\u039B', "Λ"); 178 mHtmlEncodeMap.put('\u039C', "Μ"); 179 mHtmlEncodeMap.put('\u039D', "Ν"); 180 mHtmlEncodeMap.put('\u039E', "Ξ"); 181 mHtmlEncodeMap.put('\u039F', "Ο"); 182 mHtmlEncodeMap.put('\u03A0', "Π"); 183 mHtmlEncodeMap.put('\u03A1', "Ρ"); 184 mHtmlEncodeMap.put('\u03A3', "Σ"); 185 mHtmlEncodeMap.put('\u03A4', "Τ"); 186 mHtmlEncodeMap.put('\u03A5', "Υ"); 187 mHtmlEncodeMap.put('\u03A6', "Φ"); 188 mHtmlEncodeMap.put('\u03A7', "Χ"); 189 mHtmlEncodeMap.put('\u03A8', "Ψ"); 190 mHtmlEncodeMap.put('\u03A9', "Ω"); 191 mHtmlEncodeMap.put('\u03B1', "α"); 192 mHtmlEncodeMap.put('\u03B2', "β"); 193 mHtmlEncodeMap.put('\u03B3', "γ"); 194 mHtmlEncodeMap.put('\u03B4', "δ"); 195 mHtmlEncodeMap.put('\u03B5', "ε"); 196 mHtmlEncodeMap.put('\u03B6', "ζ"); 197 mHtmlEncodeMap.put('\u03B7', "η"); 198 mHtmlEncodeMap.put('\u03B8', "θ"); 199 mHtmlEncodeMap.put('\u03B9', "ι"); 200 mHtmlEncodeMap.put('\u03BA', "κ"); 201 mHtmlEncodeMap.put('\u03BB', "λ"); 202 mHtmlEncodeMap.put('\u03BC', "μ"); 203 mHtmlEncodeMap.put('\u03BD', "ν"); 204 mHtmlEncodeMap.put('\u03BE', "ξ"); 205 mHtmlEncodeMap.put('\u03BF', "ο"); 206 mHtmlEncodeMap.put('\u03C0', "π"); 207 mHtmlEncodeMap.put('\u03C1', "ρ"); 208 mHtmlEncodeMap.put('\u03C2', "ς"); 209 mHtmlEncodeMap.put('\u03C3', "σ"); 210 mHtmlEncodeMap.put('\u03C4', "τ"); 211 mHtmlEncodeMap.put('\u03C5', "υ"); 212 mHtmlEncodeMap.put('\u03C6', "φ"); 213 mHtmlEncodeMap.put('\u03C7', "χ"); 214 mHtmlEncodeMap.put('\u03C8', "ψ"); 215 mHtmlEncodeMap.put('\u03C9', "ω"); 216 mHtmlEncodeMap.put('\u03D1', "ϑ"); 217 mHtmlEncodeMap.put('\u03D2', "ϒ"); 218 mHtmlEncodeMap.put('\u03D6', "ϖ"); 219 mHtmlEncodeMap.put('\u2022', "•"); 220 mHtmlEncodeMap.put('\u2026', "…"); 221 mHtmlEncodeMap.put('\u2032', "′"); 222 mHtmlEncodeMap.put('\u2033', "″"); 223 mHtmlEncodeMap.put('\u203E', "‾"); 224 mHtmlEncodeMap.put('\u2044', "⁄"); 225 mHtmlEncodeMap.put('\u2118', "℘"); 226 mHtmlEncodeMap.put('\u2111', "ℑ"); 227 mHtmlEncodeMap.put('\u211C', "ℜ"); 228 mHtmlEncodeMap.put('\u2122', "™"); 229 mHtmlEncodeMap.put('\u2135', "ℵ"); 230 mHtmlEncodeMap.put('\u2190', "←"); 231 mHtmlEncodeMap.put('\u2191', "↑"); 232 mHtmlEncodeMap.put('\u2192', "→"); 233 mHtmlEncodeMap.put('\u2193', "↓"); 234 mHtmlEncodeMap.put('\u2194', "↔"); 235 mHtmlEncodeMap.put('\u21B5', "↵"); 236 mHtmlEncodeMap.put('\u21D0', "⇐"); 237 mHtmlEncodeMap.put('\u21D1', "⇑"); 238 mHtmlEncodeMap.put('\u21D2', "⇒"); 239 mHtmlEncodeMap.put('\u21D3', "⇓"); 240 mHtmlEncodeMap.put('\u21D4', "⇔"); 241 mHtmlEncodeMap.put('\u2200', "∀"); 242 mHtmlEncodeMap.put('\u2202', "∂"); 243 mHtmlEncodeMap.put('\u2203', "∃"); 244 mHtmlEncodeMap.put('\u2205', "∅"); 245 mHtmlEncodeMap.put('\u2207', "∇"); 246 mHtmlEncodeMap.put('\u2208', "∈"); 247 mHtmlEncodeMap.put('\u2209', "∉"); 248 mHtmlEncodeMap.put('\u220B', "∋"); 249 mHtmlEncodeMap.put('\u220F', "∏"); 250 mHtmlEncodeMap.put('\u2211', "∑"); 251 mHtmlEncodeMap.put('\u2212', "−"); 252 mHtmlEncodeMap.put('\u2217', "∗"); 253 mHtmlEncodeMap.put('\u221A', "√"); 254 mHtmlEncodeMap.put('\u221D', "∝"); 255 mHtmlEncodeMap.put('\u221E', "∞"); 256 mHtmlEncodeMap.put('\u2220', "∠"); 257 mHtmlEncodeMap.put('\u2227', "∧"); 258 mHtmlEncodeMap.put('\u2228', "∨"); 259 mHtmlEncodeMap.put('\u2229', "∩"); 260 mHtmlEncodeMap.put('\u222A', "∪"); 261 mHtmlEncodeMap.put('\u222B', "∫"); 262 mHtmlEncodeMap.put('\u2234', "∴"); 263 mHtmlEncodeMap.put('\u223C', "∼"); 264 mHtmlEncodeMap.put('\u2245', "≅"); 265 mHtmlEncodeMap.put('\u2248', "≈"); 266 mHtmlEncodeMap.put('\u2260', "≠"); 267 mHtmlEncodeMap.put('\u2261', "≡"); 268 mHtmlEncodeMap.put('\u2264', "≤"); 269 mHtmlEncodeMap.put('\u2265', "≥"); 270 mHtmlEncodeMap.put('\u2282', "⊂"); 271 mHtmlEncodeMap.put('\u2283', "⊃"); 272 mHtmlEncodeMap.put('\u2284', "⊄"); 273 mHtmlEncodeMap.put('\u2286', "⊆"); 274 mHtmlEncodeMap.put('\u2287', "⊇"); 275 mHtmlEncodeMap.put('\u2295', "⊕"); 276 mHtmlEncodeMap.put('\u2297', "⊗"); 277 mHtmlEncodeMap.put('\u22A5', "⊥"); 278 mHtmlEncodeMap.put('\u22C5', "⋅"); 279 mHtmlEncodeMap.put('\u2308', "⌈"); 280 mHtmlEncodeMap.put('\u2309', "⌉"); 281 mHtmlEncodeMap.put('\u230A', "⌊"); 282 mHtmlEncodeMap.put('\u230B', "⌋"); 283 mHtmlEncodeMap.put('\u2329', "⟨"); 284 mHtmlEncodeMap.put('\u232A', "⟩"); 285 mHtmlEncodeMap.put('\u25CA', "◊"); 286 mHtmlEncodeMap.put('\u2660', "♠"); 287 mHtmlEncodeMap.put('\u2663', "♣"); 288 mHtmlEncodeMap.put('\u2665', "♥"); 289 mHtmlEncodeMap.put('\u2666', "♦"); 290 } 291 292 private StringUtils() 293 { 294 } 295 296 /** 297 * Transforms a provided <code>String</code> object into a new string, 298 * containing only valid Html characters. 299 * 300 * @param source The string that has to be transformed into a valid Html 301 * string. 302 * 303 * @return The encoded <code>String</code> object. 304 * 305 * @since 1.0 306 */ 307 public static String encodeHtml(String source) 308 { 309 return encode(source, mHtmlEncodeMap); 310 } 311 312 /** 313 * Transforms a provided <code>String</code> object into a new string, 314 * using the mapping that are provided through the supplied encoding table. 315 * 316 * @param source The string that has to be transformed into a valid string, 317 * using the mappings that are provided through the supplied encoding table. 318 * @param encodingTables A <code>Map</code> object containing the mappings to 319 * transform characters into valid entities. The keys of this map should be 320 * <code>Character</code> objects and the values <code>String</code> 321 * objects. 322 * 323 * @return The encoded <code>String</code> object. 324 * 325 * @since 1.0 326 */ 327 private static String encode(String source, CharKeyOpenHashMap encodingTable) 328 { 329 if (null == source) 330 { 331 return null; 332 } 333 334 if (null == encodingTable) 335 { 336 return source; 337 } 338 339 StringBuffer encoded_string = null; 340 char[] string_to_encode_array = source.toCharArray(); 341 int last_match = -1; 342 int difference = 0; 343 344 for (int i = 0; i < string_to_encode_array.length; i++) 345 { 346 char char_to_encode = string_to_encode_array[i]; 347 348 if (encodingTable.containsKey(char_to_encode)) 349 { 350 if (null == encoded_string) 351 { 352 encoded_string = new StringBuffer(source.length()); 353 } 354 difference = i - (last_match + 1); 355 if (difference > 0) 356 { 357 encoded_string.append(string_to_encode_array, last_match + 1, difference); 358 } 359 encoded_string.append(encodingTable.get(char_to_encode)); 360 last_match = i; 361 } 362 } 363 364 if (null == encoded_string) 365 { 366 return source; 367 } 368 else 369 { 370 difference = string_to_encode_array.length - (last_match + 1); 371 if (difference > 0) 372 { 373 encoded_string.append(string_to_encode_array, last_match + 1, difference); 374 } 375 return encoded_string.toString(); 376 } 377 } 378 379 /** 380 * Checks if the name filters through an including and an excluding 381 * regular expression. 382 * 383 * @param name The <code>String</code> that will be filtered. 384 * @param included The regular expressions that needs to succeed 385 * @param excluded The regular expressions that needs to fail 386 * 387 * @return <code>true</code> if the name filtered through correctly; or 388 * <p> 389 * <code>false</code> otherwise. 390 * 391 * @since 1.0 392 */ 393 public static boolean filter(String name, Pattern included, Pattern excluded) 394 { 395 Pattern[] included_array = null; 396 if (included != null) 397 { 398 included_array = new Pattern[] {included}; 399 } 400 401 Pattern[] excluded_array = null; 402 if (excluded != null) 403 { 404 excluded_array = new Pattern[] {excluded}; 405 } 406 407 return filter(name, included_array, excluded_array); 408 } 409 410 /** 411 * Checks if the name filters through a series of including and excluding 412 * regular expressions. 413 * 414 * @param name The <code>String</code> that will be filtered. 415 * @param included An array of regular expressions that need to succeed 416 * @param excluded An array of regular expressions that need to fail 417 * 418 * @return <code>true</code> if the name filtered through correctly; or 419 * <p> 420 * <code>false</code> otherwise. 421 * 422 * @since 1.0 423 */ 424 public static boolean filter(String name, Pattern[] included, Pattern[] excluded) 425 { 426 if (null == name) 427 { 428 return false; 429 } 430 431 boolean accepted = false; 432 433 // retain only the includes 434 if (null == included) 435 { 436 accepted = true; 437 } 438 else 439 { 440 Pattern pattern; 441 for (int i = 0; i < included.length; i++) 442 { 443 pattern = included[i]; 444 445 if (pattern != null && 446 pattern.matcher(name).matches()) 447 { 448 accepted = true; 449 break; 450 } 451 } 452 } 453 454 // remove the excludes 455 if (accepted && 456 excluded != null) 457 { 458 Pattern pattern; 459 for (int i = 0; i < excluded.length; i++) 460 { 461 pattern = excluded[i]; 462 463 if (pattern != null && 464 pattern.matcher(name).matches()) 465 { 466 accepted = false; 467 break; 468 } 469 } 470 } 471 472 return accepted; 473 } 474 475 /** 476 * Splits a string into different parts, using a seperator string to detect 477 * the seperation boundaries in a case-sensitive manner. The seperator will 478 * not be included in the list of parts. 479 * 480 * @param source The string that will be split into parts. 481 * @param seperator The seperator string that will be used to determine the 482 * parts. 483 * 484 * @return An <code>ArrayList</code> containing the parts as 485 * <code>String</code> objects. 486 * 487 * @since 1.0 488 */ 489 public static ArrayList split(String source, String seperator) 490 { 491 return split(source, seperator, true); 492 } 493 494 /** 495 * Splits a string into different parts, using a seperator string to detect 496 * the seperation boundaries. The seperator will not be included in the list 497 * of parts. 498 * 499 * @param source The string that will be split into parts. 500 * @param seperator The seperator string that will be used to determine the 501 * parts. 502 * @param matchCase A <code>boolean</code> indicating if the match is going 503 * to be performed in a case-sensitive manner or not. 504 * 505 * @return An <code>ArrayList</code> containing the parts as 506 * <code>String</code> objects. 507 * 508 * @since 1.0 509 */ 510 public static ArrayList split(String source, String seperator, boolean matchCase) 511 { 512 ArrayList substrings = new ArrayList(); 513 514 if (null == source) 515 { 516 return substrings; 517 } 518 519 if (null == seperator) 520 { 521 substrings.add(source); 522 return substrings; 523 } 524 525 int current_index = 0; 526 int delimiter_index = 0; 527 String element = null; 528 529 String source_lookup_reference = null; 530 if (!matchCase) 531 { 532 source_lookup_reference = source.toLowerCase(); 533 seperator = seperator.toLowerCase(); 534 } 535 else 536 { 537 source_lookup_reference = source; 538 } 539 540 while (current_index <= source_lookup_reference.length()) 541 { 542 delimiter_index = source_lookup_reference.indexOf(seperator, current_index); 543 544 if (-1 == delimiter_index) 545 { 546 element = new String(source.substring(current_index, source.length())); 547 substrings.add(element); 548 current_index = source.length() + 1; 549 } 550 else 551 { 552 element = new String(source.substring(current_index, delimiter_index)); 553 substrings.add(element); 554 current_index = delimiter_index + seperator.length(); 555 } 556 } 557 558 return substrings; 559 } 560 561 /** 562 * Searches for a string within a specified string in a case-sensitive 563 * manner and replaces every match with another string. 564 * 565 * @param source The string in which the matching parts will be replaced. 566 * @param stringToReplace The string that will be searched for. 567 * @param replacementString The string that will replace each matching part. 568 * 569 * @return A new <code>String</code> object containing the replacement 570 * result. 571 * 572 * @since 1.0 573 */ 574 public static String replace(String source, String stringToReplace, String replacementString) 575 { 576 return replace(source, stringToReplace, replacementString, true); 577 } 578 579 /** 580 * Searches for a string within a specified string and replaces every match 581 * with another string. 582 * 583 * @param source The string in which the matching parts will be replaced. 584 * @param stringToReplace The string that will be searched for. 585 * @param replacementString The string that will replace each matching part. 586 * @param matchCase A <code>boolean</code> indicating if the match is going 587 * to be performed in a case-sensitive manner or not. 588 * 589 * @return A new <code>String</code> object containing the replacement 590 * result. 591 * 592 * @since 1.0 593 */ 594 public static String replace(String source, String stringToReplace, String replacementString, boolean matchCase) 595 { 596 if (null == source) 597 { 598 return null; 599 } 600 601 if (null == stringToReplace) 602 { 603 return source; 604 } 605 606 if (null == replacementString) 607 { 608 return source; 609 } 610 611 Iterator string_parts = split(source, stringToReplace, matchCase).iterator(); 612 StringBuffer new_string = new StringBuffer(); 613 614 synchronized (new_string) // speed increase by thread lock pre-allocation 615 { 616 while (string_parts.hasNext()) 617 { 618 String string_part = (String)string_parts.next(); 619 new_string.append(string_part); 620 if (string_parts.hasNext()) 621 { 622 new_string.append(replacementString); 623 } 624 } 625 626 return new_string.toString(); 627 } 628 } 629 630 /** 631 * Creates a new string that contains the provided string a number of times. 632 * 633 * @param source The string that will be repeated. 634 * @param count The number of times that the string will be repeated. 635 * @return A new <code>String</code> object containing the repeated 636 * concatenation result. 637 * 638 * @since 1.0 639 */ 640 public static String repeat(String source, int count) 641 { 642 if (null == source) 643 { 644 return null; 645 } 646 647 StringBuffer new_string = new StringBuffer(); 648 synchronized (new_string) // speed increase by thread lock pre-allocation 649 { 650 while (count > 0) 651 { 652 new_string.append(source); 653 count --; 654 } 655 656 return new_string.toString(); 657 } 658 } 659 660 /** 661 * Converts all tabs on a line to spaces according to the provided tab 662 * width. 663 * 664 * @param line The line whose tabs have to be converted. 665 * @param tabWidth The tab width. 666 * @return A new <code>String</code> object containing the line with the 667 * replaced tabs. 668 * @since 1.0 669 */ 670 public static String convertTabsToSpaces(String line, int tabWidth) 671 { 672 StringBuffer result = new StringBuffer(); 673 674 synchronized (result) // speed increase by thread lock pre-allocation 675 { 676 int tab_index = -1; 677 int last_tab_index = 0; 678 int added_chars = 0; 679 int tab_size; 680 while ((tab_index = line.indexOf("\t", last_tab_index)) != -1) 681 { 682 tab_size = tabWidth - ((tab_index + added_chars) % tabWidth); 683 if (0 == tab_size) 684 { 685 tab_size = tabWidth; 686 } 687 added_chars += tab_size - 1; 688 result.append(line.substring(last_tab_index, tab_index)); 689 result.append(StringUtils.repeat(" ", tab_size)); 690 last_tab_index = tab_index + 1; 691 } 692 if (0 == last_tab_index) 693 { 694 return line; 695 } 696 else 697 { 698 result.append(line.substring(last_tab_index)); 699 } 700 } 701 702 return result.toString(); 703 } 704} 705 706