001 /* RuleBasedCollator.java -- Concrete Collator Class 002 Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005 Free Software Foundation, Inc. 003 004 This file is part of GNU Classpath. 005 006 GNU Classpath is free software; you can redistribute it and/or modify 007 it under the terms of the GNU General Public License as published by 008 the Free Software Foundation; either version 2, or (at your option) 009 any later version. 010 011 GNU Classpath is distributed in the hope that it will be useful, but 012 WITHOUT ANY WARRANTY; without even the implied warranty of 013 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014 General Public License for more details. 015 016 You should have received a copy of the GNU General Public License 017 along with GNU Classpath; see the file COPYING. If not, write to the 018 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 019 02110-1301 USA. 020 021 Linking this library statically or dynamically with other modules is 022 making a combined work based on this library. Thus, the terms and 023 conditions of the GNU General Public License cover the whole 024 combination. 025 026 As a special exception, the copyright holders of this library give you 027 permission to link this library with independent modules to produce an 028 executable, regardless of the license terms of these independent 029 modules, and to copy and distribute the resulting executable under 030 terms of your choice, provided that you also meet, for each linked 031 independent module, the terms and conditions of the license of that 032 module. An independent module is a module which is not derived from 033 or based on this library. If you modify this library, you may extend 034 this exception to your version of the library, but you are not 035 obligated to do so. If you do not wish to do so, delete this 036 exception statement from your version. */ 037 038 039 package java.text; 040 041 import gnu.classpath.NotImplementedException; 042 043 import java.util.ArrayList; 044 import java.util.HashMap; 045 046 /* Written using "Java Class Libraries", 2nd edition, plus online 047 * API docs for JDK 1.2 from http://www.javasoft.com. 048 * Status: Believed complete and correct 049 */ 050 051 /** 052 * This class is a concrete subclass of <code>Collator</code> suitable 053 * for string collation in a wide variety of languages. An instance of 054 * this class is normally returned by the <code>getInstance</code> method 055 * of <code>Collator</code> with rules predefined for the requested 056 * locale. However, an instance of this class can be created manually 057 * with any desired rules. 058 * <p> 059 * Rules take the form of a <code>String</code> with the following syntax 060 * <ul> 061 * <li> Modifier: '@'</li> 062 * <li> Relation: '<' | ';' | ',' | '=' : <text></li> 063 * <li> Reset: '&' : <text></li> 064 * </ul> 065 * The modifier character indicates that accents sort backward as is the 066 * case with French. The modifier applies to all rules <b>after</b> 067 * the modifier but before the next primary sequence. If placed at the end 068 * of the sequence if applies to all unknown accented character. 069 * The relational operators specify how the text 070 * argument relates to the previous term. The relation characters have 071 * the following meanings: 072 * <ul> 073 * <li>'<' - The text argument is greater than the prior term at the primary 074 * difference level.</li> 075 * <li>';' - The text argument is greater than the prior term at the secondary 076 * difference level.</li> 077 * <li>',' - The text argument is greater than the prior term at the tertiary 078 * difference level.</li> 079 * <li>'=' - The text argument is equal to the prior term</li> 080 * </ul> 081 * <p> 082 * As for the text argument itself, this is any sequence of Unicode 083 * characters not in the following ranges: 0x0009-0x000D, 0x0020-0x002F, 084 * 0x003A-0x0040, 0x005B-0x0060, and 0x007B-0x007E. If these characters are 085 * desired, they must be enclosed in single quotes. If any whitespace is 086 * encountered, it is ignored. (For example, "a b" is equal to "ab"). 087 * <p> 088 * The reset operation inserts the following rule at the point where the 089 * text argument to it exists in the previously declared rule string. This 090 * makes it easy to add new rules to an existing string by simply including 091 * them in a reset sequence at the end. Note that the text argument, or 092 * at least the first character of it, must be present somewhere in the 093 * previously declared rules in order to be inserted properly. If this 094 * is not satisfied, a <code>ParseException</code> will be thrown. 095 * <p> 096 * This system of configuring <code>RuleBasedCollator</code> is needlessly 097 * complex and the people at Taligent who developed it (along with the folks 098 * at Sun who accepted it into the Java standard library) deserve a slow 099 * and agonizing death. 100 * <p> 101 * Here are a couple of example of rule strings: 102 * <p> 103 * "< a < b < c" - This string says that a is greater than b which is 104 * greater than c, with all differences being primary differences. 105 * <p> 106 * "< a,A < b,B < c,C" - This string says that 'A' is greater than 'a' with 107 * a tertiary strength comparison. Both 'b' and 'B' are greater than 'a' and 108 * 'A' during a primary strength comparison. But 'B' is greater than 'b' 109 * under a tertiary strength comparison. 110 * <p> 111 * "< a < c & a < b " - This sequence is identical in function to the 112 * "< a < b < c" rule string above. The '&' reset symbol indicates that 113 * the rule "< b" is to be inserted after the text argument "a" in the 114 * previous rule string segment. 115 * <p> 116 * "< a < b & y < z" - This is an error. The character 'y' does not appear 117 * anywhere in the previous rule string segment so the rule following the 118 * reset rule cannot be inserted. 119 * <p> 120 * "< a & A @ < e & E < f& F" - This sequence is equivalent to the following 121 * "< a & A < E & e < f & F". 122 * <p> 123 * For a description of the various comparison strength types, see the 124 * documentation for the <code>Collator</code> class. 125 * <p> 126 * As an additional complication to this already overly complex rule scheme, 127 * if any characters precede the first rule, these characters are considered 128 * ignorable. They will be treated as if they did not exist during 129 * comparisons. For example, "- < a < b ..." would make '-' an ignorable 130 * character such that the strings "high-tech" and "hightech" would 131 * be considered identical. 132 * <p> 133 * A <code>ParseException</code> will be thrown for any of the following 134 * conditions: 135 * <ul> 136 * <li>Unquoted punctuation characters in a text argument.</li> 137 * <li>A relational or reset operator not followed by a text argument</li> 138 * <li>A reset operator where the text argument is not present in 139 * the previous rule string section.</li> 140 * </ul> 141 * 142 * @author Aaron M. Renn (arenn@urbanophile.com) 143 * @author Tom Tromey (tromey@cygnus.com) 144 * @author Guilhem Lavaux (guilhem@kaffe.org) 145 */ 146 public class RuleBasedCollator extends Collator 147 { 148 /** 149 * This class describes what rank has a character (or a sequence of characters) 150 * in the lexicographic order. Each element in a rule has a collation element. 151 */ 152 static final class CollationElement 153 { 154 String key; 155 int primary; 156 short secondary; 157 short tertiary; 158 short equality; 159 boolean ignore; 160 String expansion; 161 162 CollationElement(String key, int primary, short secondary, short tertiary, 163 short equality, String expansion, boolean ignore) 164 { 165 this.key = key; 166 this.primary = primary; 167 this.secondary = secondary; 168 this.tertiary = tertiary; 169 this.equality = equality; 170 this.ignore = ignore; 171 this.expansion = expansion; 172 } 173 174 int getValue() 175 { 176 return (primary << 16) + (secondary << 8) + tertiary; 177 } 178 } 179 180 /** 181 * Basic collation instruction (internal format) to build the series of 182 * collation elements. It contains an instruction which specifies the new 183 * state of the generator. The sequence of instruction should not contain 184 * RESET (it is used by 185 * {@link #mergeRules(int,java.lang.String,java.util.ArrayList,java.util.ArrayList)}) 186 * as a temporary state while merging two sets of instructions. 187 */ 188 static final class CollationSorter 189 { 190 static final int GREATERP = 0; 191 static final int GREATERS = 1; 192 static final int GREATERT = 2; 193 static final int EQUAL = 3; 194 static final int RESET = 4; 195 static final int INVERSE_SECONDARY = 5; 196 197 int comparisonType; 198 String textElement; 199 int hashText; 200 int offset; 201 boolean ignore; 202 203 String expansionOrdering; 204 } 205 206 /** 207 * This the the original rule string. 208 */ 209 private String rules; 210 211 /** 212 * This is the table of collation element values 213 */ 214 private Object[] ce_table; 215 216 /** 217 * Quick-prefix finder. 218 */ 219 HashMap prefix_tree; 220 221 /** 222 * This is the value of the last sequence entered into 223 * <code>ce_table</code>. It is used to compute the 224 * ordering value of unspecified character. 225 */ 226 private int last_primary_value; 227 228 /** 229 * This is the value of the last secondary sequence of the 230 * primary 0, entered into 231 * <code>ce_table</code>. It is used to compute the 232 * ordering value of an unspecified accented character. 233 */ 234 private int last_tertiary_value; 235 236 /** 237 * This variable is true if accents need to be sorted 238 * in the other direction. 239 */ 240 private boolean inverseAccentComparison; 241 242 /** 243 * This collation element is special to unknown sequence. 244 * The JDK uses it to mark and sort the characters which has 245 * no collation rules. 246 */ 247 static final CollationElement SPECIAL_UNKNOWN_SEQ = 248 new CollationElement("", (short) 32767, (short) 0, (short) 0, 249 (short) 0, null, false); 250 251 /** 252 * This method initializes a new instance of <code>RuleBasedCollator</code> 253 * with the specified collation rules. Note that an application normally 254 * obtains an instance of <code>RuleBasedCollator</code> by calling the 255 * <code>getInstance</code> method of <code>Collator</code>. That method 256 * automatically loads the proper set of rules for the desired locale. 257 * 258 * @param rules The collation rule string. 259 * 260 * @exception ParseException If the rule string contains syntax errors. 261 */ 262 public RuleBasedCollator(String rules) throws ParseException 263 { 264 if (rules.equals("")) 265 throw new ParseException("empty rule set", 0); 266 267 this.rules = rules; 268 269 buildCollationVector(parseString(rules)); 270 buildPrefixAccess(); 271 } 272 273 /** 274 * This method returns the number of common characters at the beginning 275 * of the string of the two parameters. 276 * 277 * @param prefix A string considered as a prefix to test against 278 * the other string. 279 * @param s A string to test the prefix against. 280 * @return The number of common characters. 281 */ 282 static int findPrefixLength(String prefix, String s) 283 { 284 int index; 285 int len = prefix.length(); 286 287 for (index = 0; index < len && index < s.length(); ++index) 288 { 289 if (prefix.charAt(index) != s.charAt(index)) 290 return index; 291 } 292 293 294 return index; 295 } 296 297 /** 298 * Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods 299 * checks whether it is possible to find an anchor point for the rules to be merged and 300 * then insert them at that precise point. 301 * 302 * @param offset Offset in the string containing rules of the beginning of the rules 303 * being merged in. 304 * @param starter Text of the rules being merged. 305 * @param main Repository of all already parsed rules. 306 * @param patch Rules to be merged into the repository. 307 * @throws ParseException if it is impossible to find an anchor point for the new rules. 308 */ 309 private void mergeRules(int offset, String starter, ArrayList main, ArrayList patch) 310 throws ParseException 311 { 312 int insertion_point = -1; 313 int max_length = 0; 314 315 /* We must check that no rules conflict with another already present. If it 316 * is the case delete the old rule. 317 */ 318 319 /* For the moment good old O(N^2) algorithm. 320 */ 321 for (int i = 0; i < patch.size(); i++) 322 { 323 int j = 0; 324 325 while (j < main.size()) 326 { 327 CollationSorter rule1 = (CollationSorter) patch.get(i); 328 CollationSorter rule2 = (CollationSorter) main.get(j); 329 330 if (rule1.textElement.equals(rule2.textElement)) 331 main.remove(j); 332 else 333 j++; 334 } 335 } 336 337 // Find the insertion point... O(N) 338 for (int i = 0; i < main.size(); i++) 339 { 340 CollationSorter sorter = (CollationSorter) main.get(i); 341 int length = findPrefixLength(starter, sorter.textElement); 342 343 if (length > max_length) 344 { 345 max_length = length; 346 insertion_point = i+1; 347 } 348 } 349 350 if (insertion_point < 0) 351 throw new ParseException("no insertion point found for " + starter, offset); 352 353 if (max_length < starter.length()) 354 { 355 /* 356 * We need to expand the first entry. It must be sorted 357 * like if it was the reference key itself (like the spec 358 * said. So the first entry is special: the element is 359 * replaced by the specified text element for the sorting. 360 * This text replace the old one for comparisons. However 361 * to preserve the behaviour we replace the first key (corresponding 362 * to the found prefix) by a new code rightly ordered in the 363 * sequence. The rest of the subsequence must be appended 364 * to the end of the sequence. 365 */ 366 CollationSorter sorter = (CollationSorter) patch.get(0); 367 CollationSorter expansionPrefix = 368 (CollationSorter) main.get(insertion_point-1); 369 370 sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element 371 372 main.add(insertion_point, sorter); 373 374 /* 375 * This is a new set of rules. Append to the list. 376 */ 377 patch.remove(0); 378 insertion_point++; 379 } 380 381 // Now insert all elements of patch at the insertion point. 382 for (int i = 0; i < patch.size(); i++) 383 main.add(i+insertion_point, patch.get(i)); 384 } 385 386 /** 387 * This method parses a string and build a set of sorting instructions. The parsing 388 * may only be partial on the case the rules are to be merged sometime later. 389 * 390 * @param stop_on_reset If this parameter is true then the parser stops when it 391 * encounters a reset instruction. In the other case, it tries to parse the subrules 392 * and merged it in the same repository. 393 * @param v Output vector for the set of instructions. 394 * @param base_offset Offset in the string to begin parsing. 395 * @param rules Rules to be parsed. 396 * @return -1 if the parser reached the end of the string, an integer representing the 397 * offset in the string at which it stopped parsing. 398 * @throws ParseException if something turned wrong during the parsing. To get details 399 * decode the message. 400 */ 401 private int subParseString(boolean stop_on_reset, ArrayList v, 402 int base_offset, String rules) 403 throws ParseException 404 { 405 boolean ignoreChars = (base_offset == 0); 406 int operator = -1; 407 StringBuffer sb = new StringBuffer(); 408 boolean doubleQuote = false; 409 boolean eatingChars = false; 410 boolean nextIsModifier = false; 411 boolean isModifier = false; 412 int i; 413 414 main_parse_loop: 415 for (i = 0; i < rules.length(); i++) 416 { 417 char c = rules.charAt(i); 418 int type = -1; 419 420 if (!eatingChars && 421 ((c >= 0x09 && c <= 0x0D) || (c == 0x20))) 422 continue; 423 424 isModifier = nextIsModifier; 425 nextIsModifier = false; 426 427 if (eatingChars && c != '\'') 428 { 429 doubleQuote = false; 430 sb.append(c); 431 continue; 432 } 433 if (doubleQuote && eatingChars) 434 { 435 sb.append(c); 436 doubleQuote = false; 437 continue; 438 } 439 440 switch (c) 441 { 442 case '!': 443 throw new ParseException 444 ("Modifier '!' is not yet supported by Classpath", i + base_offset); 445 case '<': 446 type = CollationSorter.GREATERP; 447 break; 448 case ';': 449 type = CollationSorter.GREATERS; 450 break; 451 case ',': 452 type = CollationSorter.GREATERT; 453 break; 454 case '=': 455 type = CollationSorter.EQUAL; 456 break; 457 case '\'': 458 eatingChars = !eatingChars; 459 doubleQuote = true; 460 break; 461 case '@': 462 if (ignoreChars) 463 throw new ParseException 464 ("comparison list has not yet been started. You may only use" 465 + "(<,;=&)", i + base_offset); 466 // Inverse the order of secondaries from now on. 467 nextIsModifier = true; 468 type = CollationSorter.INVERSE_SECONDARY; 469 break; 470 case '&': 471 type = CollationSorter.RESET; 472 if (stop_on_reset) 473 break main_parse_loop; 474 break; 475 default: 476 if (operator < 0) 477 throw new ParseException 478 ("operator missing at " + (i + base_offset), i + base_offset); 479 if (! eatingChars 480 && ((c >= 0x21 && c <= 0x2F) 481 || (c >= 0x3A && c <= 0x40) 482 || (c >= 0x5B && c <= 0x60) 483 || (c >= 0x7B && c <= 0x7E))) 484 throw new ParseException 485 ("unquoted punctuation character '" + c + "'", i + base_offset); 486 487 //type = ignoreChars ? CollationSorter.IGNORE : -1; 488 sb.append(c); 489 break; 490 } 491 492 if (type < 0) 493 continue; 494 495 if (operator < 0) 496 { 497 operator = type; 498 continue; 499 } 500 501 if (sb.length() == 0 && !isModifier) 502 throw new ParseException 503 ("text element empty at " + (i+base_offset), i+base_offset); 504 505 if (operator == CollationSorter.RESET) 506 { 507 /* Reposition in the sorting list at the position 508 * indicated by the text element. 509 */ 510 String subrules = rules.substring(i); 511 ArrayList sorted_rules = new ArrayList(); 512 int idx; 513 514 // Parse the subrules but do not iterate through all 515 // sublist. This is the privilege of the first call. 516 idx = subParseString(true, sorted_rules, base_offset+i, subrules); 517 518 // Merge new parsed rules into the list. 519 mergeRules(base_offset+i, sb.toString(), v, sorted_rules); 520 sb.setLength(0); 521 522 // Reset state to none. 523 operator = -1; 524 type = -1; 525 // We have found a new subrule at 'idx' but it has not been parsed. 526 if (idx >= 0) 527 { 528 i += idx-1; 529 continue main_parse_loop; 530 } 531 else 532 // No more rules. 533 break main_parse_loop; 534 } 535 536 CollationSorter sorter = new CollationSorter(); 537 538 if (operator == CollationSorter.GREATERP) 539 ignoreChars = false; 540 541 sorter.comparisonType = operator; 542 sorter.textElement = sb.toString(); 543 sorter.hashText = sorter.textElement.hashCode(); 544 sorter.offset = base_offset+rules.length(); 545 sorter.ignore = ignoreChars; 546 sb.setLength(0); 547 548 v.add(sorter); 549 operator = type; 550 } 551 552 if (operator >= 0) 553 { 554 CollationSorter sorter = new CollationSorter(); 555 int pos = rules.length() + base_offset; 556 557 if ((sb.length() != 0 && nextIsModifier) 558 || (sb.length() == 0 && !nextIsModifier && !eatingChars)) 559 throw new ParseException("text element empty at " + pos, pos); 560 561 if (operator == CollationSorter.GREATERP) 562 ignoreChars = false; 563 564 sorter.comparisonType = operator; 565 sorter.textElement = sb.toString(); 566 sorter.hashText = sorter.textElement.hashCode(); 567 sorter.offset = base_offset+pos; 568 sorter.ignore = ignoreChars; 569 v.add(sorter); 570 } 571 572 if (i == rules.length()) 573 return -1; 574 else 575 return i; 576 } 577 578 /** 579 * This method creates a copy of this object. 580 * 581 * @return A copy of this object. 582 */ 583 public Object clone() 584 { 585 return super.clone(); 586 } 587 588 /** 589 * This method completely parses a string 'rules' containing sorting rules. 590 * 591 * @param rules String containing the rules to be parsed. 592 * @return A set of sorting instructions stored in a Vector. 593 * @throws ParseException if something turned wrong during the parsing. To get details 594 * decode the message. 595 */ 596 private ArrayList parseString(String rules) 597 throws ParseException 598 { 599 ArrayList v = new ArrayList(); 600 601 // result of the first subParseString is not absolute (may be -1 or a 602 // positive integer). But we do not care. 603 subParseString(false, v, 0, rules); 604 605 return v; 606 } 607 608 /** 609 * This method uses the sorting instructions built by {@link #parseString} 610 * to build collation elements which can be directly used to sort strings. 611 * 612 * @param parsedElements Parsed instructions stored in a ArrayList. 613 * @throws ParseException if the order of the instructions are not valid. 614 */ 615 private void buildCollationVector(ArrayList parsedElements) 616 throws ParseException 617 { 618 int primary_seq = 0; 619 int last_tertiary_seq = 0; 620 short secondary_seq = 0; 621 short tertiary_seq = 0; 622 short equality_seq = 0; 623 boolean inverseComparisons = false; 624 final boolean DECREASING = false; 625 final boolean INCREASING = true; 626 boolean secondaryType = INCREASING; 627 ArrayList v = new ArrayList(); 628 629 // elts is completely sorted. 630 element_loop: 631 for (int i = 0; i < parsedElements.size(); i++) 632 { 633 CollationSorter elt = (CollationSorter) parsedElements.get(i); 634 boolean ignoreChar = false; 635 636 switch (elt.comparisonType) 637 { 638 case CollationSorter.GREATERP: 639 primary_seq++; 640 if (inverseComparisons) 641 { 642 secondary_seq = Short.MAX_VALUE; 643 secondaryType = DECREASING; 644 } 645 else 646 { 647 secondary_seq = 0; 648 secondaryType = INCREASING; 649 } 650 tertiary_seq = 0; 651 equality_seq = 0; 652 inverseComparisons = false; 653 break; 654 case CollationSorter.GREATERS: 655 if (secondaryType == DECREASING) 656 secondary_seq--; 657 else 658 secondary_seq++; 659 tertiary_seq = 0; 660 equality_seq = 0; 661 break; 662 case CollationSorter.INVERSE_SECONDARY: 663 inverseComparisons = true; 664 continue element_loop; 665 case CollationSorter.GREATERT: 666 tertiary_seq++; 667 if (primary_seq == 0) 668 last_tertiary_seq = tertiary_seq; 669 equality_seq = 0; 670 break; 671 case CollationSorter.EQUAL: 672 equality_seq++; 673 break; 674 case CollationSorter.RESET: 675 throw new ParseException 676 ("Invalid reached state 'RESET'. Internal error", elt.offset); 677 default: 678 throw new ParseException 679 ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset); 680 } 681 682 v.add(new CollationElement(elt.textElement, primary_seq, 683 secondary_seq, tertiary_seq, 684 equality_seq, elt.expansionOrdering, elt.ignore)); 685 } 686 687 this.inverseAccentComparison = inverseComparisons; 688 689 ce_table = v.toArray(); 690 691 last_primary_value = primary_seq+1; 692 last_tertiary_value = last_tertiary_seq+1; 693 } 694 695 /** 696 * Build a tree where all keys are the texts of collation elements and data is 697 * the collation element itself. The tree is used when extracting all prefix 698 * for a given text. 699 */ 700 private void buildPrefixAccess() 701 { 702 prefix_tree = new HashMap(); 703 704 for (int i = 0; i < ce_table.length; i++) 705 { 706 CollationElement e = (CollationElement) ce_table[i]; 707 708 prefix_tree.put(e.key, e); 709 } 710 } 711 712 /** 713 * This method returns an integer which indicates whether the first 714 * specified <code>String</code> is less than, greater than, or equal to 715 * the second. The value depends not only on the collation rules in 716 * effect, but also the strength and decomposition settings of this object. 717 * 718 * @param source The first <code>String</code> to compare. 719 * @param target A second <code>String</code> to compare to the first. 720 * 721 * @return A negative integer if source < target, a positive integer 722 * if source > target, or 0 if source == target. 723 */ 724 public int compare(String source, String target) 725 { 726 CollationElementIterator cs, ct; 727 CollationElement ord1block = null; 728 CollationElement ord2block = null; 729 boolean advance_block_1 = true; 730 boolean advance_block_2 = true; 731 732 cs = getCollationElementIterator(source); 733 ct = getCollationElementIterator(target); 734 735 for(;;) 736 { 737 int ord1; 738 int ord2; 739 740 /* 741 * We have to check whether the characters are ignorable. 742 * If it is the case then forget them. 743 */ 744 if (advance_block_1) 745 { 746 ord1block = cs.nextBlock(); 747 if (ord1block != null && ord1block.ignore) 748 continue; 749 } 750 751 if (advance_block_2) 752 { 753 ord2block = ct.nextBlock(); 754 if (ord2block != null && ord2block.ignore) 755 { 756 advance_block_1 = false; 757 continue; 758 } 759 } 760 else 761 advance_block_2 = true; 762 763 if (!advance_block_1) 764 advance_block_1 = true; 765 766 if (ord1block != null) 767 ord1 = ord1block.getValue(); 768 else 769 { 770 if (ord2block == null) 771 return 0; 772 return -1; 773 } 774 775 if (ord2block == null) 776 return 1; 777 778 ord2 = ord2block.getValue(); 779 780 // We know chars are totally equal, so skip 781 if (ord1 == ord2) 782 { 783 if (getStrength() == IDENTICAL) 784 if (!ord1block.key.equals(ord2block.key)) 785 return ord1block.key.compareTo(ord2block.key); 786 continue; 787 } 788 789 // Check for primary strength differences 790 int prim1 = CollationElementIterator.primaryOrder(ord1); 791 int prim2 = CollationElementIterator.primaryOrder(ord2); 792 793 if (prim1 == 0 && getStrength() < TERTIARY) 794 { 795 advance_block_2 = false; 796 continue; 797 } 798 else if (prim2 == 0 && getStrength() < TERTIARY) 799 { 800 advance_block_1 = false; 801 continue; 802 } 803 804 if (prim1 < prim2) 805 return -1; 806 else if (prim1 > prim2) 807 return 1; 808 else if (getStrength() == PRIMARY) 809 continue; 810 811 // Check for secondary strength differences 812 int sec1 = CollationElementIterator.secondaryOrder(ord1); 813 int sec2 = CollationElementIterator.secondaryOrder(ord2); 814 815 if (sec1 < sec2) 816 return -1; 817 else if (sec1 > sec2) 818 return 1; 819 else if (getStrength() == SECONDARY) 820 continue; 821 822 // Check for tertiary differences 823 int tert1 = CollationElementIterator.tertiaryOrder(ord1); 824 int tert2 = CollationElementIterator.tertiaryOrder(ord2); 825 826 if (tert1 < tert2) 827 return -1; 828 else if (tert1 > tert2) 829 return 1; 830 else if (getStrength() == TERTIARY) 831 continue; 832 833 // Apparently JDK does this (at least for my test case). 834 return ord1block.key.compareTo(ord2block.key); 835 } 836 } 837 838 /** 839 * This method tests this object for equality against the specified 840 * object. This will be true if and only if the specified object is 841 * another reference to this object. 842 * 843 * @param obj The <code>Object</code> to compare against this object. 844 * 845 * @return <code>true</code> if the specified object is equal to this object, 846 * <code>false</code> otherwise. 847 */ 848 public boolean equals(Object obj) 849 { 850 if (obj == this) 851 return true; 852 else 853 return false; 854 } 855 856 /** 857 * This method builds a default collation element without invoking 858 * the database created from the rules passed to the constructor. 859 * 860 * @param c Character which needs a collation element. 861 * @return A valid brand new CollationElement instance. 862 */ 863 CollationElement getDefaultElement(char c) 864 { 865 int v; 866 867 // Preliminary support for generic accent sorting inversion (I don't know if all 868 // characters in the range should be sorted backward). This is the place 869 // to fix this if needed. 870 if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361)) 871 v = 0x0361 - ((int) c - 0x02B9); 872 else 873 v = (short) c; 874 return new CollationElement("" + c, last_primary_value + v, 875 (short) 0, (short) 0, (short) 0, null, false); 876 } 877 878 /** 879 * This method builds a default collation element for an accented character 880 * without invoking the database created from the rules passed to the constructor. 881 * 882 * @param c Character which needs a collation element. 883 * @return A valid brand new CollationElement instance. 884 */ 885 CollationElement getDefaultAccentedElement(char c) 886 { 887 int v; 888 889 // Preliminary support for generic accent sorting inversion (I don't know if all 890 // characters in the range should be sorted backward). This is the place 891 // to fix this if needed. 892 if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361)) 893 v = 0x0361 - ((int) c - 0x02B9); 894 else 895 v = (short) c; 896 return new CollationElement("" + c, (short) 0, 897 (short) 0, (short) (last_tertiary_value + v), (short) 0, null, false); 898 } 899 900 /** 901 * This method returns an instance for <code>CollationElementIterator</code> 902 * for the specified <code>String</code> under the collation rules for this 903 * object. 904 * 905 * @param source The <code>String</code> to return the 906 * <code>CollationElementIterator</code> instance for. 907 * 908 * @return A <code>CollationElementIterator</code> for the specified 909 * <code>String</code>. 910 */ 911 public CollationElementIterator getCollationElementIterator(String source) 912 { 913 return new CollationElementIterator(this, source); 914 } 915 916 /** 917 * This method returns an instance of <code>CollationElementIterator</code> 918 * for the <code>String</code> represented by the specified 919 * <code>CharacterIterator</code>. 920 * 921 * @param source The <code>CharacterIterator</code> with the desired <code>String</code>. 922 * 923 * @return A <code>CollationElementIterator</code> for the specified <code>String</code>. 924 */ 925 public CollationElementIterator getCollationElementIterator(CharacterIterator source) 926 throws NotImplementedException // Because decomposeCharacter does not work 927 { 928 StringBuffer expand = new StringBuffer(""); 929 930 // Right now we assume that we will read from the beginning of the string. 931 for (char c = source.first(); 932 c != CharacterIterator.DONE; 933 c = source.next()) 934 decomposeCharacter(c, expand); 935 936 return getCollationElementIterator(expand.toString()); 937 } 938 939 /** 940 * This method returns an instance of <code>CollationKey</code> for the 941 * specified <code>String</code>. The object returned will have a 942 * more efficient mechanism for its comparison function that could 943 * provide speed benefits if multiple comparisons are performed, such 944 * as during a sort. 945 * 946 * @param source The <code>String</code> to create a <code>CollationKey</code> for. 947 * 948 * @return A <code>CollationKey</code> for the specified <code>String</code>. 949 */ 950 public CollationKey getCollationKey(String source) 951 { 952 CollationElementIterator cei = getCollationElementIterator(source); 953 ArrayList vect = new ArrayList(); 954 955 int ord = cei.next(); 956 cei.reset(); //set to start of string 957 958 while (ord != CollationElementIterator.NULLORDER) 959 { 960 // If the primary order is null, it means this is an ignorable 961 // character. 962 if (CollationElementIterator.primaryOrder(ord) == 0) 963 { 964 ord = cei.next(); 965 continue; 966 } 967 switch (getStrength()) 968 { 969 case PRIMARY: 970 ord = CollationElementIterator.primaryOrder(ord); 971 break; 972 973 case SECONDARY: 974 ord = CollationElementIterator.primaryOrder(ord) << 8; 975 ord |= CollationElementIterator.secondaryOrder(ord); 976 977 default: 978 break; 979 } 980 981 vect.add(new Integer(ord)); 982 ord = cei.next(); //increment to next key 983 } 984 985 Object[] objarr = vect.toArray(); 986 byte[] key = new byte[objarr.length * 4]; 987 988 for (int i = 0; i < objarr.length; i++) 989 { 990 int j = ((Integer) objarr[i]).intValue(); 991 key [i * 4] = (byte) ((j & 0xFF000000) >> 24); 992 key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16); 993 key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8); 994 key [i * 4 + 3] = (byte) (j & 0x000000FF); 995 } 996 997 return new CollationKey(this, source, key); 998 } 999 1000 /** 1001 * This method returns a <code>String</code> containing the collation rules 1002 * for this object. 1003 * 1004 * @return The collation rules for this object. 1005 */ 1006 public String getRules() 1007 { 1008 return rules; 1009 } 1010 1011 /** 1012 * This method returns a hash value for this object. 1013 * 1014 * @return A hash value for this object. 1015 */ 1016 public int hashCode() 1017 { 1018 return System.identityHashCode(this); 1019 } 1020 }