001    /* RuleBasedCollator.java -- Concrete Collator Class
002       Copyright (C) 1998, 1999, 2000, 2001, 2003, 2004, 2005  Free Software Foundation, Inc.
003    
004    This file is part of GNU Classpath.
005    
006    GNU Classpath is free software; you can redistribute it and/or modify
007    it under the terms of the GNU General Public License as published by
008    the Free Software Foundation; either version 2, or (at your option)
009    any later version.
010     
011    GNU Classpath is distributed in the hope that it will be useful, but
012    WITHOUT ANY WARRANTY; without even the implied warranty of
013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014    General Public License for more details.
015    
016    You should have received a copy of the GNU General Public License
017    along with GNU Classpath; see the file COPYING.  If not, write to the
018    Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019    02110-1301 USA.
020    
021    Linking this library statically or dynamically with other modules is
022    making a combined work based on this library.  Thus, the terms and
023    conditions of the GNU General Public License cover the whole
024    combination.
025    
026    As a special exception, the copyright holders of this library give you
027    permission to link this library with independent modules to produce an
028    executable, regardless of the license terms of these independent
029    modules, and to copy and distribute the resulting executable under
030    terms of your choice, provided that you also meet, for each linked
031    independent module, the terms and conditions of the license of that
032    module.  An independent module is a module which is not derived from
033    or based on this library.  If you modify this library, you may extend
034    this exception to your version of the library, but you are not
035    obligated to do so.  If you do not wish to do so, delete this
036    exception statement from your version. */
037    
038    
039    package java.text;
040    
041    import gnu.classpath.NotImplementedException;
042    
043    import java.util.ArrayList;
044    import java.util.HashMap;
045    
046    /* Written using "Java Class Libraries", 2nd edition, plus online
047     * API docs for JDK 1.2 from http://www.javasoft.com.
048     * Status: Believed complete and correct
049     */
050    
051    /**
052     * This class is a concrete subclass of <code>Collator</code> suitable
053     * for string collation in a wide variety of languages.  An instance of
054     * this class is normally returned by the <code>getInstance</code> method
055     * of <code>Collator</code> with rules predefined for the requested
056     * locale.  However, an instance of this class can be created manually
057     * with any desired rules.
058     * <p>
059     * Rules take the form of a <code>String</code> with the following syntax
060     * <ul>
061     * <li> Modifier: '@'</li>
062     * <li> Relation: '&lt;' | ';' | ',' | '=' : &lt;text&gt;</li>
063     * <li> Reset: '&amp;' : &lt;text&gt;</li>
064     * </ul>
065     * The modifier character indicates that accents sort backward as is the
066     * case with French.  The modifier applies to all rules <b>after</b>
067     * the modifier but before the next primary sequence. If placed at the end
068     * of the sequence if applies to all unknown accented character.
069     * The relational operators specify how the text 
070     * argument relates to the previous term.  The relation characters have
071     * the following meanings:
072     * <ul>
073     * <li>'&lt;' - The text argument is greater than the prior term at the primary
074     * difference level.</li>
075     * <li>';' - The text argument is greater than the prior term at the secondary
076     * difference level.</li>
077     * <li>',' - The text argument is greater than the prior term at the tertiary
078     * difference level.</li>
079     * <li>'=' - The text argument is equal to the prior term</li>
080     * </ul>
081     * <p>
082     * As for the text argument itself, this is any sequence of Unicode
083     * characters not in the following ranges: 0x0009-0x000D, 0x0020-0x002F,
084     * 0x003A-0x0040, 0x005B-0x0060, and 0x007B-0x007E. If these characters are 
085     * desired, they must be enclosed in single quotes.  If any whitespace is 
086     * encountered, it is ignored.  (For example, "a b" is equal to "ab").  
087     * <p>
088     * The reset operation inserts the following rule at the point where the
089     * text argument to it exists in the previously declared rule string.  This
090     * makes it easy to add new rules to an existing string by simply including
091     * them in a reset sequence at the end.  Note that the text argument, or
092     * at least the first character of it, must be present somewhere in the
093     * previously declared rules in order to be inserted properly.  If this
094     * is not satisfied, a <code>ParseException</code> will be thrown. 
095     * <p>
096     * This system of configuring <code>RuleBasedCollator</code> is needlessly
097     * complex and the people at Taligent who developed it (along with the folks
098     * at Sun who accepted it into the Java standard library) deserve a slow
099     * and agonizing death.
100     * <p>
101     * Here are a couple of example of rule strings:
102     * <p>
103     * "&lt; a &lt; b &lt; c" - This string says that a is greater than b which is 
104     * greater than c, with all differences being primary differences.
105     * <p>
106     * "&lt; a,A &lt; b,B &lt; c,C" - This string says that 'A' is greater than 'a' with
107     * a tertiary strength comparison.  Both 'b' and 'B' are greater than 'a' and
108     * 'A' during a primary strength comparison.  But 'B' is greater than 'b'
109     * under a tertiary strength comparison.
110     * <p>
111     * "&lt; a &lt; c &amp; a &lt; b " - This sequence is identical in function to the 
112     * "&lt; a &lt; b &lt; c" rule string above.  The '&amp;' reset symbol indicates that
113     * the rule "&lt; b" is to be inserted after the text argument "a" in the
114     * previous rule string segment.
115     * <p>
116     * "&lt; a &lt; b &amp; y &lt; z" - This is an error.  The character 'y' does not appear
117     * anywhere in the previous rule string segment so the rule following the
118     * reset rule cannot be inserted.
119     * <p>
120     * "&lt; a &amp; A @ &lt; e &amp; E &lt; f&amp; F" - This sequence is equivalent to the following
121     * "&lt; a &amp; A &lt; E &amp; e &lt; f &amp; F".
122     * <p>
123     * For a description of the various comparison strength types, see the
124     * documentation for the <code>Collator</code> class.
125     * <p>
126     * As an additional complication to this already overly complex rule scheme,
127     * if any characters precede the first rule, these characters are considered
128     * ignorable.  They will be treated as if they did not exist during 
129     * comparisons.  For example, "- &lt; a &lt; b ..." would make '-' an ignorable
130     * character such that the strings "high-tech" and "hightech" would
131     * be considered identical.
132     * <p>
133     * A <code>ParseException</code> will be thrown for any of the following
134     * conditions:
135     * <ul>
136     * <li>Unquoted punctuation characters in a text argument.</li>
137     * <li>A relational or reset operator not followed by a text argument</li>
138     * <li>A reset operator where the text argument is not present in
139     * the previous rule string section.</li>
140     * </ul>
141     *
142     * @author Aaron M. Renn (arenn@urbanophile.com)
143     * @author Tom Tromey (tromey@cygnus.com)
144     * @author Guilhem Lavaux (guilhem@kaffe.org)
145     */
146    public class RuleBasedCollator extends Collator
147    {
148      /**
149       * This class describes what rank has a character (or a sequence of characters) 
150       * in the lexicographic order. Each element in a rule has a collation element.
151       */
152      static final class CollationElement
153      {
154        String key;
155        int primary;
156        short secondary;
157        short tertiary;
158        short equality;
159        boolean ignore;
160        String expansion;
161    
162        CollationElement(String key, int primary, short secondary, short tertiary,
163                         short equality, String expansion, boolean ignore)
164        {
165          this.key = key;
166          this.primary = primary;
167          this.secondary = secondary;
168          this.tertiary = tertiary;
169          this.equality = equality;
170          this.ignore = ignore;
171          this.expansion = expansion;
172        }
173    
174        int getValue()
175        {
176          return (primary << 16) + (secondary << 8) + tertiary;
177        }
178      }
179    
180      /**
181       * Basic collation instruction (internal format) to build the series of
182       * collation elements. It contains an instruction which specifies the new
183       * state of the generator. The sequence of instruction should not contain
184       * RESET (it is used by
185       * {@link #mergeRules(int,java.lang.String,java.util.ArrayList,java.util.ArrayList)})
186       * as a temporary state while merging two sets of instructions.
187       */
188      static final class CollationSorter
189      {
190        static final int GREATERP = 0;
191        static final int GREATERS = 1;
192        static final int GREATERT = 2;
193        static final int EQUAL = 3;
194        static final int RESET = 4;
195        static final int INVERSE_SECONDARY = 5;
196        
197        int comparisonType;
198        String textElement;
199        int hashText;
200        int offset;
201        boolean ignore;
202    
203        String expansionOrdering;
204      }
205    
206      /**
207       * This the the original rule string.
208       */
209      private String rules;
210    
211      /**
212       * This is the table of collation element values
213       */
214      private Object[] ce_table;
215    
216      /**
217       * Quick-prefix finder.
218       */
219      HashMap prefix_tree;
220    
221      /**
222       * This is the value of the last sequence entered into
223       * <code>ce_table</code>. It is used to compute the
224       * ordering value of unspecified character.
225       */
226      private int last_primary_value;
227    
228      /**
229       * This is the value of the last secondary sequence of the
230       * primary 0, entered into
231       * <code>ce_table</code>. It is used to compute the
232       * ordering value of an unspecified accented character.
233       */
234      private int last_tertiary_value;
235    
236      /**
237       * This variable is true if accents need to be sorted
238       * in the other direction.
239       */
240      private boolean inverseAccentComparison;
241    
242      /**
243       * This collation element is special to unknown sequence.
244       * The JDK uses it to mark and sort the characters which has
245       * no collation rules.
246       */
247      static final CollationElement SPECIAL_UNKNOWN_SEQ = 
248        new CollationElement("", (short) 32767, (short) 0, (short) 0,
249                             (short) 0, null, false);
250      
251      /**
252       * This method initializes a new instance of <code>RuleBasedCollator</code>
253       * with the specified collation rules.  Note that an application normally
254       * obtains an instance of <code>RuleBasedCollator</code> by calling the
255       * <code>getInstance</code> method of <code>Collator</code>.  That method
256       * automatically loads the proper set of rules for the desired locale.
257       *
258       * @param rules The collation rule string.
259       *
260       * @exception ParseException If the rule string contains syntax errors.
261       */
262      public RuleBasedCollator(String rules) throws ParseException
263      {
264        if (rules.equals(""))
265          throw new ParseException("empty rule set", 0);
266        
267        this.rules = rules;
268    
269        buildCollationVector(parseString(rules));
270        buildPrefixAccess();
271      }
272    
273      /**
274       * This method returns the number of common characters at the beginning
275       * of the string of the two parameters.
276       *
277       * @param prefix A string considered as a prefix to test against
278       * the other string.
279       * @param s A string to test the prefix against.
280       * @return The number of common characters.
281       */
282      static int findPrefixLength(String prefix, String s)
283      {
284        int index;
285        int len = prefix.length();
286    
287        for (index = 0; index < len && index < s.length(); ++index)
288          {
289            if (prefix.charAt(index) != s.charAt(index))
290              return index;
291          }
292    
293    
294        return index;
295      }
296    
297      /**
298       * Here we are merging two sets of sorting instructions: 'patch' into 'main'. This methods
299       * checks whether it is possible to find an anchor point for the rules to be merged and
300       * then insert them at that precise point.
301       *
302       * @param offset Offset in the string containing rules of the beginning of the rules
303       * being merged in.
304       * @param starter Text of the rules being merged.
305       * @param main Repository of all already parsed rules.
306       * @param patch Rules to be merged into the repository.
307       * @throws ParseException if it is impossible to find an anchor point for the new rules.
308       */
309      private void mergeRules(int offset, String starter, ArrayList main, ArrayList patch)
310        throws ParseException 
311      {
312        int insertion_point = -1;
313        int max_length = 0;
314        
315        /* We must check that no rules conflict with another already present. If it
316         * is the case delete the old rule. 
317         */
318        
319        /* For the moment good old O(N^2) algorithm.
320         */
321        for (int i = 0; i < patch.size(); i++)
322          {
323            int j = 0;
324            
325            while (j < main.size())
326              {
327                CollationSorter rule1 = (CollationSorter) patch.get(i);
328                CollationSorter rule2 = (CollationSorter) main.get(j);
329                
330                if (rule1.textElement.equals(rule2.textElement))
331                  main.remove(j);
332                else
333                  j++;
334              }
335          }
336    
337        // Find the insertion point... O(N)
338        for (int i = 0; i < main.size(); i++)
339          {
340            CollationSorter sorter = (CollationSorter) main.get(i);
341            int length = findPrefixLength(starter, sorter.textElement);
342                    
343            if (length > max_length)
344              {
345                max_length = length;
346                insertion_point = i+1;
347              }
348          }
349    
350        if (insertion_point < 0)
351          throw new ParseException("no insertion point found for " + starter, offset);
352    
353        if (max_length < starter.length())
354          {
355            /*
356             * We need to expand the first entry. It must be sorted
357             * like if it was the reference key itself (like the spec
358             * said. So the first entry is special: the element is
359             * replaced by the specified text element for the sorting.
360             * This text replace the old one for comparisons. However
361             * to preserve the behaviour we replace the first key (corresponding
362             * to the found prefix) by a new code rightly ordered in the
363             * sequence. The rest of the subsequence must be appended
364             * to the end of the sequence.
365             */
366            CollationSorter sorter = (CollationSorter) patch.get(0);
367            CollationSorter expansionPrefix =
368              (CollationSorter) main.get(insertion_point-1);
369            
370            sorter.expansionOrdering = starter.substring(max_length); // Skip the first good prefix element
371                    
372            main.add(insertion_point, sorter);
373            
374            /*
375             * This is a new set of rules. Append to the list.
376             */
377            patch.remove(0);
378            insertion_point++;
379          }
380    
381        // Now insert all elements of patch at the insertion point.
382        for (int i = 0; i < patch.size(); i++)
383          main.add(i+insertion_point, patch.get(i));
384      }
385    
386      /**
387       * This method parses a string and build a set of sorting instructions. The parsing
388       * may only be partial on the case the rules are to be merged sometime later.
389       * 
390       * @param stop_on_reset If this parameter is true then the parser stops when it
391       * encounters a reset instruction. In the other case, it tries to parse the subrules
392       * and merged it in the same repository.
393       * @param v Output vector for the set of instructions.
394       * @param base_offset Offset in the string to begin parsing.
395       * @param rules Rules to be parsed.
396       * @return -1 if the parser reached the end of the string, an integer representing the
397       * offset in the string at which it stopped parsing. 
398       * @throws ParseException if something turned wrong during the parsing. To get details
399       * decode the message.
400       */
401      private int subParseString(boolean stop_on_reset, ArrayList v,
402                                 int base_offset, String rules)
403        throws ParseException
404      {
405        boolean ignoreChars = (base_offset == 0);
406        int operator = -1;
407        StringBuffer sb = new StringBuffer();
408        boolean doubleQuote = false;
409        boolean eatingChars = false;
410        boolean nextIsModifier = false;
411        boolean isModifier = false;
412        int i;
413        
414    main_parse_loop:
415        for (i = 0; i < rules.length(); i++)
416          {
417            char c = rules.charAt(i);
418            int type = -1;
419            
420            if (!eatingChars &&
421                ((c >= 0x09 && c <= 0x0D) || (c == 0x20)))
422                  continue;
423    
424            isModifier = nextIsModifier;
425            nextIsModifier = false;
426    
427            if (eatingChars && c != '\'')
428              {
429                doubleQuote = false;
430                sb.append(c);
431                continue;
432              }
433            if (doubleQuote && eatingChars)
434              {
435                sb.append(c);
436                doubleQuote = false;
437                continue;
438              }
439    
440            switch (c)
441              {
442              case '!':
443                throw new ParseException
444                  ("Modifier '!' is not yet supported by Classpath", i + base_offset);
445              case '<':
446                type = CollationSorter.GREATERP;
447                break;
448              case ';':
449                type = CollationSorter.GREATERS;
450                break;
451              case ',':
452                type = CollationSorter.GREATERT;
453                break;
454              case '=':
455                type = CollationSorter.EQUAL;
456                break;
457              case '\'':
458                eatingChars = !eatingChars;
459                doubleQuote = true;
460                break;
461              case '@':
462                if (ignoreChars)
463                  throw new ParseException
464                    ("comparison list has not yet been started. You may only use"
465                     + "(<,;=&)", i + base_offset);
466                // Inverse the order of secondaries from now on.
467                nextIsModifier = true;
468                type = CollationSorter.INVERSE_SECONDARY;
469                break;
470              case '&':
471                type = CollationSorter.RESET;
472                if (stop_on_reset)
473                  break main_parse_loop;
474                break;
475              default:
476                if (operator < 0)
477                  throw new ParseException
478                    ("operator missing at " + (i + base_offset), i + base_offset);
479                if (! eatingChars
480                    && ((c >= 0x21 && c <= 0x2F) 
481                        || (c >= 0x3A && c <= 0x40)
482                        || (c >= 0x5B && c <= 0x60)
483                        || (c >= 0x7B && c <= 0x7E)))
484                  throw new ParseException
485                    ("unquoted punctuation character '" + c + "'", i + base_offset);
486    
487                //type = ignoreChars ? CollationSorter.IGNORE : -1;
488                sb.append(c);
489                break;
490              }
491    
492            if (type  < 0)
493              continue;
494    
495            if (operator < 0)
496              {
497                operator = type;
498                continue;
499              }
500    
501            if (sb.length() == 0 && !isModifier)
502              throw new ParseException
503                ("text element empty at " + (i+base_offset), i+base_offset);
504    
505            if (operator == CollationSorter.RESET)
506              {
507                /* Reposition in the sorting list at the position
508                 * indicated by the text element.
509                 */
510                String subrules = rules.substring(i);
511                ArrayList sorted_rules = new ArrayList();
512                int idx;
513    
514                // Parse the subrules but do not iterate through all
515                // sublist. This is the privilege of the first call.
516                idx = subParseString(true, sorted_rules, base_offset+i, subrules);
517        
518                // Merge new parsed rules into the list.
519                mergeRules(base_offset+i, sb.toString(), v, sorted_rules);
520                sb.setLength(0);
521                
522                // Reset state to none.
523                operator = -1;
524                type = -1;
525                // We have found a new subrule at 'idx' but it has not been parsed.
526                if (idx >= 0)
527                  {
528                    i += idx-1;
529                    continue main_parse_loop;
530                  }
531                else
532                    // No more rules.
533                    break main_parse_loop;
534              }
535    
536            CollationSorter sorter = new CollationSorter();
537            
538            if (operator == CollationSorter.GREATERP)
539              ignoreChars = false;
540    
541            sorter.comparisonType = operator;
542            sorter.textElement = sb.toString();
543            sorter.hashText = sorter.textElement.hashCode();
544            sorter.offset = base_offset+rules.length();
545            sorter.ignore = ignoreChars;
546            sb.setLength(0);
547    
548            v.add(sorter);
549            operator = type;
550          }
551    
552        if (operator >= 0)
553          {
554            CollationSorter sorter = new CollationSorter();
555            int pos = rules.length() + base_offset;
556    
557            if ((sb.length() != 0 && nextIsModifier)
558                || (sb.length() == 0 && !nextIsModifier && !eatingChars))
559              throw new ParseException("text element empty at " + pos, pos);
560    
561            if (operator == CollationSorter.GREATERP)
562              ignoreChars = false;
563    
564            sorter.comparisonType = operator;
565            sorter.textElement = sb.toString();
566            sorter.hashText = sorter.textElement.hashCode();
567            sorter.offset = base_offset+pos;
568            sorter.ignore = ignoreChars;
569            v.add(sorter);
570          }
571    
572        if (i == rules.length())
573          return -1;
574        else
575          return i;
576      }
577    
578      /**
579       * This method creates a copy of this object.
580       *
581       * @return A copy of this object.
582       */
583      public Object clone()
584      {
585        return super.clone();
586      }
587    
588      /**
589       * This method completely parses a string 'rules' containing sorting rules.
590       *
591       * @param rules String containing the rules to be parsed. 
592       * @return A set of sorting instructions stored in a Vector.
593       * @throws ParseException if something turned wrong during the parsing. To get details
594       * decode the message.
595       */
596      private ArrayList parseString(String rules) 
597        throws ParseException
598      {
599        ArrayList v = new ArrayList();
600    
601        // result of the first subParseString is not absolute (may be -1 or a
602        // positive integer). But we do not care.
603        subParseString(false, v, 0, rules);
604        
605        return v;
606      }
607    
608      /**
609       * This method uses the sorting instructions built by {@link #parseString}
610       * to build collation elements which can be directly used to sort strings.
611       *
612       * @param parsedElements Parsed instructions stored in a ArrayList.
613       * @throws ParseException if the order of the instructions are not valid.
614       */
615      private void buildCollationVector(ArrayList parsedElements)
616        throws ParseException
617      {
618        int primary_seq = 0;
619        int last_tertiary_seq = 0;
620        short secondary_seq = 0;
621        short tertiary_seq = 0;
622        short equality_seq = 0;
623        boolean inverseComparisons = false;
624        final boolean DECREASING = false;
625        final boolean INCREASING = true;
626        boolean secondaryType = INCREASING;
627        ArrayList v = new ArrayList();
628    
629        // elts is completely sorted.
630    element_loop:
631        for (int i = 0; i < parsedElements.size(); i++)
632          {
633            CollationSorter elt = (CollationSorter) parsedElements.get(i);
634            boolean ignoreChar = false;
635    
636            switch (elt.comparisonType)
637              {
638              case CollationSorter.GREATERP:
639                primary_seq++;
640                if (inverseComparisons)
641                  {
642                    secondary_seq = Short.MAX_VALUE;
643                    secondaryType = DECREASING;
644                  }
645                else
646                  {
647                    secondary_seq = 0;
648                    secondaryType = INCREASING;
649                  }
650                tertiary_seq = 0;
651                equality_seq = 0;
652                inverseComparisons = false;
653                break;
654              case CollationSorter.GREATERS:
655                if (secondaryType == DECREASING)
656                  secondary_seq--;
657                else
658                  secondary_seq++;
659                tertiary_seq = 0;
660                equality_seq = 0;
661                break;
662              case CollationSorter.INVERSE_SECONDARY:
663                inverseComparisons = true;
664                continue element_loop;
665              case CollationSorter.GREATERT:
666                tertiary_seq++;
667                if (primary_seq == 0)
668                  last_tertiary_seq = tertiary_seq;
669                equality_seq = 0;
670                break;
671              case CollationSorter.EQUAL:
672                equality_seq++;
673                break;
674              case CollationSorter.RESET:
675                throw new ParseException
676                  ("Invalid reached state 'RESET'. Internal error", elt.offset);
677              default:
678                throw new ParseException
679                  ("Invalid unknown state '" + elt.comparisonType + "'", elt.offset);
680              }
681    
682            v.add(new CollationElement(elt.textElement, primary_seq,
683                                       secondary_seq, tertiary_seq,
684                                       equality_seq, elt.expansionOrdering, elt.ignore));
685          }
686    
687        this.inverseAccentComparison = inverseComparisons; 
688    
689        ce_table = v.toArray();
690    
691        last_primary_value = primary_seq+1;
692        last_tertiary_value = last_tertiary_seq+1;
693      }
694    
695      /**
696       * Build a tree where all keys are the texts of collation elements and data is
697       * the collation element itself. The tree is used when extracting all prefix
698       * for a given text.
699       */
700      private void buildPrefixAccess()
701      {
702        prefix_tree = new HashMap();
703    
704        for (int i = 0; i < ce_table.length; i++)
705          {
706            CollationElement e = (CollationElement) ce_table[i];
707    
708            prefix_tree.put(e.key, e);
709          }
710      }
711    
712      /**
713       * This method returns an integer which indicates whether the first
714       * specified <code>String</code> is less than, greater than, or equal to
715       * the second.  The value depends not only on the collation rules in
716       * effect, but also the strength and decomposition settings of this object.
717       *
718       * @param source The first <code>String</code> to compare.
719       * @param target A second <code>String</code> to compare to the first.
720       *
721       * @return A negative integer if source &lt; target, a positive integer
722       * if source &gt; target, or 0 if source == target.
723       */
724      public int compare(String source, String target)
725      {
726        CollationElementIterator cs, ct;
727        CollationElement ord1block = null;
728        CollationElement ord2block = null;
729        boolean advance_block_1 = true;
730        boolean advance_block_2 = true;
731    
732        cs = getCollationElementIterator(source);
733        ct = getCollationElementIterator(target);
734    
735        for(;;)
736          {
737            int ord1;
738            int ord2;
739    
740            /*
741             * We have to check whether the characters are ignorable.
742             * If it is the case then forget them. 
743             */
744            if (advance_block_1)
745              {
746                ord1block = cs.nextBlock();
747                if (ord1block != null && ord1block.ignore)
748                  continue;
749              }
750            
751            if (advance_block_2)
752              {
753                ord2block = ct.nextBlock();
754                if (ord2block != null && ord2block.ignore)
755                  {
756                    advance_block_1 = false;
757                    continue;
758                  }
759             }
760            else
761              advance_block_2 = true;
762    
763            if (!advance_block_1)
764              advance_block_1 = true;
765    
766            if (ord1block != null)
767              ord1 = ord1block.getValue();
768            else
769              {
770                if (ord2block == null)
771                  return 0;
772                return -1;
773              }
774    
775            if (ord2block == null)
776              return 1;
777            
778            ord2 = ord2block.getValue();
779            
780            // We know chars are totally equal, so skip
781            if (ord1 == ord2)
782              {
783                if (getStrength() == IDENTICAL)
784                  if (!ord1block.key.equals(ord2block.key))
785                    return ord1block.key.compareTo(ord2block.key);
786                continue;
787              }
788    
789            // Check for primary strength differences
790            int prim1 = CollationElementIterator.primaryOrder(ord1); 
791            int prim2 = CollationElementIterator.primaryOrder(ord2); 
792            
793            if (prim1 == 0 && getStrength() < TERTIARY)
794              {
795                advance_block_2 = false;
796                continue;
797              }
798            else if (prim2 == 0 && getStrength() < TERTIARY)
799              {
800                advance_block_1 = false;
801                continue;
802              }
803    
804            if (prim1 < prim2)
805              return -1;
806            else if (prim1 > prim2)
807              return 1;
808            else if (getStrength() == PRIMARY)
809              continue;
810    
811            // Check for secondary strength differences
812            int sec1 = CollationElementIterator.secondaryOrder(ord1);
813            int sec2 = CollationElementIterator.secondaryOrder(ord2);
814    
815            if (sec1 < sec2)
816              return -1;
817            else if (sec1 > sec2)
818              return 1;
819            else if (getStrength() == SECONDARY)
820              continue;
821    
822            // Check for tertiary differences
823            int tert1 = CollationElementIterator.tertiaryOrder(ord1);
824            int tert2 = CollationElementIterator.tertiaryOrder(ord2);
825    
826            if (tert1 < tert2)
827              return -1;
828            else if (tert1 > tert2)
829              return 1;
830            else if (getStrength() == TERTIARY)
831              continue;
832    
833            // Apparently JDK does this (at least for my test case).
834            return ord1block.key.compareTo(ord2block.key);    
835          }
836      }
837    
838      /**
839       * This method tests this object for equality against the specified 
840       * object.  This will be true if and only if the specified object is
841       * another reference to this object.
842       *
843       * @param obj The <code>Object</code> to compare against this object.
844       *
845       * @return <code>true</code> if the specified object is equal to this object,
846       * <code>false</code> otherwise.
847       */
848      public boolean equals(Object obj)
849      {
850        if (obj == this)
851          return true;
852        else
853          return false;
854      }
855    
856      /**
857       * This method builds a default collation element without invoking
858       * the database created from the rules passed to the constructor.
859       *
860       * @param c Character which needs a collation element.
861       * @return A valid brand new CollationElement instance.
862       */
863      CollationElement getDefaultElement(char c)
864      {
865        int v;
866    
867        // Preliminary support for generic accent sorting inversion (I don't know if all
868        // characters in the range should be sorted backward). This is the place
869        // to fix this if needed.
870        if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
871          v = 0x0361 - ((int) c - 0x02B9);
872        else
873          v = (short) c;
874        return new CollationElement("" + c, last_primary_value + v,
875                                    (short) 0, (short) 0, (short) 0, null, false);
876      }
877    
878      /**
879       * This method builds a default collation element for an accented character
880       * without invoking the database created from the rules passed to the constructor.
881       *
882       * @param c Character which needs a collation element.
883       * @return A valid brand new CollationElement instance.
884       */
885      CollationElement getDefaultAccentedElement(char c)
886      {
887        int v;
888    
889        // Preliminary support for generic accent sorting inversion (I don't know if all
890        // characters in the range should be sorted backward). This is the place
891        // to fix this if needed.
892        if (inverseAccentComparison && (c >= 0x02B9 && c <= 0x0361))
893          v = 0x0361 - ((int) c - 0x02B9);
894        else
895          v = (short) c;
896        return new CollationElement("" + c, (short) 0,
897                                    (short) 0, (short) (last_tertiary_value + v), (short) 0, null, false);
898      }
899    
900      /**
901       * This method returns an instance for <code>CollationElementIterator</code>
902       * for the specified <code>String</code> under the collation rules for this
903       * object.
904       *
905       * @param source The <code>String</code> to return the
906       * <code>CollationElementIterator</code> instance for.
907       *
908       * @return A <code>CollationElementIterator</code> for the specified
909       * <code>String</code>.
910       */
911      public CollationElementIterator getCollationElementIterator(String source)
912      {
913        return new CollationElementIterator(this, source);
914      }
915    
916      /**
917       * This method returns an instance of <code>CollationElementIterator</code>
918       * for the <code>String</code> represented by the specified
919       * <code>CharacterIterator</code>.
920       *
921       * @param source The <code>CharacterIterator</code> with the desired <code>String</code>.
922       *
923       * @return A <code>CollationElementIterator</code> for the specified <code>String</code>.
924       */
925      public CollationElementIterator getCollationElementIterator(CharacterIterator source)
926        throws NotImplementedException  // Because decomposeCharacter does not work
927      {
928        StringBuffer expand = new StringBuffer("");
929        
930        // Right now we assume that we will read from the beginning of the string.
931        for (char c = source.first();
932             c != CharacterIterator.DONE;
933             c = source.next())
934          decomposeCharacter(c, expand);
935    
936        return getCollationElementIterator(expand.toString());
937      }
938    
939      /**
940       * This method returns an instance of <code>CollationKey</code> for the
941       * specified <code>String</code>.  The object returned will have a
942       * more efficient mechanism for its comparison function that could
943       * provide speed benefits if multiple comparisons are performed, such
944       * as during a sort.
945       *
946       * @param source The <code>String</code> to create a <code>CollationKey</code> for.
947       *
948       * @return A <code>CollationKey</code> for the specified <code>String</code>.
949       */
950      public CollationKey getCollationKey(String source)
951      {
952        CollationElementIterator cei = getCollationElementIterator(source);
953        ArrayList vect = new ArrayList();
954    
955        int ord = cei.next();
956        cei.reset(); //set to start of string
957    
958        while (ord != CollationElementIterator.NULLORDER)
959          {
960            // If the primary order is null, it means this is an ignorable
961            // character.
962            if (CollationElementIterator.primaryOrder(ord) == 0)
963              {
964                ord = cei.next();
965                continue;
966              }
967            switch (getStrength())
968              {
969                case PRIMARY:
970                  ord = CollationElementIterator.primaryOrder(ord);
971                  break;
972                  
973                case SECONDARY:
974                  ord = CollationElementIterator.primaryOrder(ord) << 8;
975                  ord |= CollationElementIterator.secondaryOrder(ord);
976    
977                default:
978                   break;
979              }
980    
981            vect.add(new Integer(ord)); 
982            ord = cei.next(); //increment to next key
983          }
984    
985        Object[] objarr = vect.toArray();
986        byte[] key = new byte[objarr.length * 4];
987    
988        for (int i = 0; i < objarr.length; i++)
989          {
990            int j = ((Integer) objarr[i]).intValue();
991            key [i * 4] = (byte) ((j & 0xFF000000) >> 24);
992            key [i * 4 + 1] = (byte) ((j & 0x00FF0000) >> 16);
993            key [i * 4 + 2] = (byte) ((j & 0x0000FF00) >> 8);
994            key [i * 4 + 3] = (byte) (j & 0x000000FF);
995          }
996    
997        return new CollationKey(this, source, key);
998      }
999    
1000      /**
1001       * This method returns a <code>String</code> containing the collation rules
1002       * for this object.
1003       *
1004       * @return The collation rules for this object.
1005       */
1006      public String getRules()
1007      {
1008        return rules;
1009      }
1010    
1011      /**
1012       * This method returns a hash value for this object.
1013       *
1014       * @return A hash value for this object.
1015       */
1016      public int hashCode()
1017      {
1018        return System.identityHashCode(this);
1019      }
1020    }