001    /* Bidi.java -- Bidirectional Algorithm implementation
002       Copyright (C) 2005, 2006  Free Software Foundation, Inc.
003    
004    This file is part of GNU Classpath.
005    
006    GNU Classpath is free software; you can redistribute it and/or modify
007    it under the terms of the GNU General Public License as published by
008    the Free Software Foundation; either version 2, or (at your option)
009    any later version.
010     
011    GNU Classpath is distributed in the hope that it will be useful, but
012    WITHOUT ANY WARRANTY; without even the implied warranty of
013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014    General Public License for more details.
015    
016    You should have received a copy of the GNU General Public License
017    along with GNU Classpath; see the file COPYING.  If not, write to the
018    Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019    02110-1301 USA.
020    
021    Linking this library statically or dynamically with other modules is
022    making a combined work based on this library.  Thus, the terms and
023    conditions of the GNU General Public License cover the whole
024    combination.
025    
026    As a special exception, the copyright holders of this library give you
027    permission to link this library with independent modules to produce an
028    executable, regardless of the license terms of these independent
029    modules, and to copy and distribute the resulting executable under
030    terms of your choice, provided that you also meet, for each linked
031    independent module, the terms and conditions of the license of that
032    module.  An independent module is a module which is not derived from
033    or based on this library.  If you modify this library, you may extend
034    this exception to your version of the library, but you are not
035    obligated to do so.  If you do not wish to do so, delete this
036    exception statement from your version. */
037    
038    
039    package java.text;
040    
041    import java.awt.font.NumericShaper;
042    import java.awt.font.TextAttribute;
043    import java.util.ArrayList;
044    
045    
046    /**
047     * Bidirectional Algorithm implementation.
048     *
049     * The full algorithm is
050     * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard
051     * Annex #9: The Bidirectional Algorithm</a>.
052     *
053     * @since 1.4
054     */
055    public final class Bidi
056    {
057      /**
058       * This indicates that a strongly directional character in the text should
059       * set the initial direction, but if no such character is found, then the
060       * initial direction will be left-to-right.
061       */
062      public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2;
063    
064      /**
065       * This indicates that a strongly directional character in the text should
066       * set the initial direction, but if no such character is found, then the
067       * initial direction will be right-to-left.
068       */
069      public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1;
070    
071      /**
072       * This indicates that the initial direction should be left-to-right.
073       */
074      public static final int DIRECTION_LEFT_TO_RIGHT = 0;
075    
076      /**
077       * This indicates that the initial direction should be right-to-left.
078       */
079      public static final int DIRECTION_RIGHT_TO_LEFT = 1;
080    
081      // Flags used when computing the result.
082      private static final int LTOR = 1 << DIRECTION_LEFT_TO_RIGHT;
083      private static final int RTOL = 1 << DIRECTION_RIGHT_TO_LEFT;
084    
085      // The text we are examining, and the starting offset.
086      // If we had a better way to handle createLineBidi, we wouldn't
087      // need this at all -- which for the String case would be an
088      // efficiency win.
089      private char[] text;
090      private int textOffset;
091      // The embeddings corresponding to the text, and the starting offset.
092      private byte[] embeddings;
093      private int embeddingOffset;
094      // The length of the text (and embeddings) to use.
095      private int length;
096      // The flags.
097      private int flags;
098    
099      // All instance fields following this point are initialized
100      // during analysis.  Fields before this must be set by the constructor.
101    
102      // The initial embedding level.
103      private int baseEmbedding;
104      // The type of each character in the text.
105      private byte[] types;
106      // The levels we compute.
107      private byte[] levels;
108    
109      // A list of indices where a formatting code was found.  These
110      // are indicies into the original text -- not into the text after
111      // the codes have been removed.
112      private ArrayList formatterIndices;
113    
114      // Indices of the starts of runs in the text. 
115      private int[] runs;
116    
117      // A convenience field where we keep track of what kinds of runs
118      // we've seen.
119      private int resultFlags;
120    
121      /**
122       * Create a new Bidi object given an attributed character iterator.
123       * This constructor will examine various attributes of the text:
124       * <ul>
125       * <li> {@link TextAttribute#RUN_DIRECTION} is used to determine the
126       * paragraph's base embedding level.  This constructor will recognize
127       * either {@link TextAttribute#RUN_DIRECTION_LTR} or
128       * {@link TextAttribute#RUN_DIRECTION_RTL}.  If neither is given,
129       * {@link #DIRECTION_DEFAULT_LEFT_TO_RIGHT} is assumed.
130       * </li>
131       * 
132       * <li> If {@link TextAttribute#NUMERIC_SHAPING} is seen, then numeric
133       * shaping will be done before the Bidi algorithm is run.
134       * </li>
135       * 
136       * <li> If {@link TextAttribute#BIDI_EMBEDDING} is seen on a given
137       * character, then the value of this attribute will be used as an
138       * embedding level override.
139       * </li>
140       * </ul>
141       * @param iter the attributed character iterator to use
142       */
143      public Bidi(AttributedCharacterIterator iter)
144      {
145        // If set, this attribute should be set on all characters.
146        // We don't check this (should we?) but we do assume that we
147        // can simply examine the first character.
148        Object val = iter.getAttribute(TextAttribute.RUN_DIRECTION);
149        if (val == TextAttribute.RUN_DIRECTION_LTR)
150          this.flags = DIRECTION_LEFT_TO_RIGHT;
151        else if (val == TextAttribute.RUN_DIRECTION_RTL)
152          this.flags = DIRECTION_RIGHT_TO_LEFT;
153        else
154          this.flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT;
155    
156        // Likewise this attribute should be specified on the whole text.
157        // We read it here and then, if it is set, we apply the numeric shaper
158        // to the text before processing it.
159        NumericShaper shaper = null;
160        val = iter.getAttribute(TextAttribute.NUMERIC_SHAPING);
161        if (val instanceof NumericShaper)
162          shaper = (NumericShaper) val;
163    
164        char[] text = new char[iter.getEndIndex() - iter.getBeginIndex()];
165        this.embeddings = new byte[this.text.length];
166        this.embeddingOffset = 0;
167        this.length = text.length;
168        for (int i = 0; i < this.text.length; ++i)
169          {
170            this.text[i] = iter.current();
171    
172            val = iter.getAttribute(TextAttribute.BIDI_EMBEDDING);
173            if (val instanceof Integer)
174              {
175                int ival = ((Integer) val).intValue();
176                byte bval;
177                if (ival < -62 || ival > 62)
178                  bval = 0;
179                else
180                  bval = (byte) ival;
181                this.embeddings[i] = bval;
182              }
183          }
184    
185        // Invoke the numeric shaper, if specified.
186        if (shaper != null)
187          shaper.shape(this.text, 0, this.length);
188    
189        runBidi();
190      }
191    
192      /**
193       * Create a new Bidi object with the indicated text and, possibly, explicit
194       * embedding settings.
195       * 
196       * If the embeddings array is null, it is ignored.  Otherwise it is taken to
197       * be explicit embedding settings corresponding to the text.  Positive values
198       * from 1 to 61 are embedding levels, and negative values from -1 to -61 are
199       * embedding overrides.  (FIXME: not at all clear what this really means.)
200       * 
201       * @param text the text to use
202       * @param offset the offset of the first character of the text
203       * @param embeddings the explicit embeddings, or null if there are none
204       * @param embedOffset the offset of the first embedding value to use
205       * @param length the length of both the text and the embeddings
206       * @param flags a flag indicating the base embedding direction
207       */
208      public Bidi(char[] text, int offset, byte[] embeddings, int embedOffset,
209                  int length, int flags)
210      {
211        if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT
212            && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT
213            && flags != DIRECTION_LEFT_TO_RIGHT
214            && flags != DIRECTION_RIGHT_TO_LEFT)
215          throw new IllegalArgumentException("unrecognized 'flags' argument: "
216                                             + flags);
217        this.text = text;
218        this.textOffset = offset;
219        this.embeddings = embeddings;
220        this.embeddingOffset = embedOffset;
221        this.length = length;
222        this.flags = flags;
223    
224        runBidi();
225      }
226    
227      /**
228       * Create a new Bidi object using the contents of the given String
229       * as the text.
230       * @param text the text to use
231       * @param flags a flag indicating the base embedding direction
232       */
233      public Bidi(String text, int flags)
234      {
235        if (flags != DIRECTION_DEFAULT_LEFT_TO_RIGHT
236            && flags != DIRECTION_DEFAULT_RIGHT_TO_LEFT
237            && flags != DIRECTION_LEFT_TO_RIGHT
238            && flags != DIRECTION_RIGHT_TO_LEFT)
239          throw new IllegalArgumentException("unrecognized 'flags' argument: "
240                                             + flags);
241    
242        // This is inefficient, but it isn't clear whether it matters.
243        // If it does we can change our implementation a bit to allow either
244        // a String or a char[].
245        this.text = text.toCharArray();
246        this.textOffset = 0;
247        this.embeddings = null;
248        this.embeddingOffset = 0;
249        this.length = text.length();
250        this.flags = flags;
251    
252        runBidi();
253      }
254    
255      /**
256       * Implementation function which computes the initial type of
257       * each character in the input.
258       */
259      private void computeTypes()
260      {
261        types = new byte[length];
262        for (int i = 0; i < length; ++i)
263          types[i] = Character.getDirectionality(text[textOffset + i]);
264      }
265    
266      /**
267       * An internal function which implements rules P2 and P3.
268       * This computes the base embedding level.
269       * @return the paragraph's base embedding level
270       */
271      private int computeParagraphEmbeddingLevel()
272      {
273        // First check to see if the user supplied a directionality override.
274        if (flags == DIRECTION_LEFT_TO_RIGHT
275            || flags == DIRECTION_RIGHT_TO_LEFT)
276          return flags;
277    
278        // This implements rules P2 and P3.
279        // (Note that we don't need P1, as the user supplies
280        // a paragraph.)
281        for (int i = 0; i < length; ++i)
282          {
283            int dir = types[i];
284            if (dir == Character.DIRECTIONALITY_LEFT_TO_RIGHT)
285              return DIRECTION_LEFT_TO_RIGHT;
286            if (dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT
287                || dir == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
288              return DIRECTION_RIGHT_TO_LEFT;
289          }
290        return (flags == DIRECTION_DEFAULT_LEFT_TO_RIGHT
291                ? DIRECTION_LEFT_TO_RIGHT
292                : DIRECTION_RIGHT_TO_LEFT);
293      }
294    
295      /**
296       * An internal function which implements rules X1 through X9.
297       * This computes the initial levels for the text, handling
298       * explicit overrides and embeddings.
299       */
300      private void computeExplicitLevels()
301      {
302        levels = new byte[length];
303        byte currentEmbedding = (byte) baseEmbedding;
304        // The directional override is a Character directionality
305        // constant.  -1 means there is no override.
306        byte directionalOverride = -1;
307        // The stack of pushed embeddings, and the stack pointer.
308        // Note that because the direction is inherent in the depth,
309        // and because we have a bit left over in a byte, we can encode
310        // the override, if any, directly in this value on the stack.
311        final int MAX_DEPTH = 62;
312        byte[] embeddingStack = new byte[MAX_DEPTH];
313        int sp = 0;
314    
315        for (int i = 0; i < length; ++i)
316          {
317            // If we see an explicit embedding, we use that, even if
318            // the current character is itself a directional override.
319            if (embeddings != null && embeddings[embeddingOffset + i] != 0)
320              {
321                // It isn't at all clear what we're supposed to do here.
322                // What does a negative value really mean?
323                // Should we push on the embedding stack here?
324                currentEmbedding = embeddings[embeddingOffset + i]; 
325                if (currentEmbedding < 0)
326                  {
327                    currentEmbedding = (byte) -currentEmbedding;
328                    directionalOverride
329                      = (((currentEmbedding % 2) == 0)
330                          ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
331                          : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
332                  }
333                else
334                  directionalOverride = -1;
335                continue;
336              }
337            // No explicit embedding.
338            boolean isLtoR = false;
339            boolean isSpecial = true;
340            switch (types[i])
341              {
342                case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
343                case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
344                  isLtoR = true;
345                  // Fall through.
346                case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
347                case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
348                  {
349                    byte newEmbedding;
350                    if (isLtoR)
351                      {
352                        // Least greater even.
353                        newEmbedding = (byte) ((currentEmbedding & ~1) + 2);
354                      }
355                    else
356                      {
357                        // Least greater odd.
358                        newEmbedding = (byte) ((currentEmbedding + 1) | 1);
359                      }
360                    // FIXME: we don't properly handle invalid pushes.
361                    if (newEmbedding < MAX_DEPTH)
362                      {
363                        // The new level is valid.  Push the old value.
364                        // See above for a comment on the encoding here.
365                        if (directionalOverride != -1)
366                          currentEmbedding |= Byte.MIN_VALUE;
367                        embeddingStack[sp++] = currentEmbedding;
368                        currentEmbedding = newEmbedding;
369                        if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE)
370                          directionalOverride = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
371                        else if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE)
372                          directionalOverride = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
373                        else
374                          directionalOverride = -1;
375                      }
376                    }
377                  break;
378                case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT:
379                  {
380                    // FIXME: we don't properly handle a pop with a corresponding
381                    // invalid push.
382                    if (sp == 0)
383                      {
384                        // We saw a pop without a push.  Just ignore it.
385                        break;
386                      }
387                    byte newEmbedding = embeddingStack[--sp];
388                    currentEmbedding = (byte) (newEmbedding & 0x7f);
389                    if (newEmbedding < 0)
390                      directionalOverride
391                      = (((newEmbedding & 1) == 0)
392                          ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
393                          : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
394                    else
395                      directionalOverride = -1;
396                  }
397                  break;
398                default:
399                  isSpecial = false;
400                  break;
401              }
402            levels[i] = currentEmbedding;
403            if (isSpecial)
404              {
405                // Mark this character for removal.
406                if (formatterIndices == null)
407                  formatterIndices = new ArrayList();
408                formatterIndices.add(Integer.valueOf(i));
409              }
410            else if (directionalOverride != -1)
411              types[i] = directionalOverride;
412          }
413    
414        // Remove the formatting codes and update both the arrays
415        // and 'length'.  It would be more efficient not to remove
416        // these codes, but it is also more complicated.  Also, the
417        // Unicode algorithm reference does not properly describe
418        // how this is to be done -- from what I can tell, their suggestions
419        // in this area will not yield the correct results.
420        if (formatterIndices == null)
421          return;
422        int output = 0, input = 0;
423        final int size = formatterIndices.size();
424        for (int i = 0; i <= size; ++i)
425          {
426            int nextFmt;
427            if (i == size)
428              nextFmt = length;
429            else
430              nextFmt = ((Integer) formatterIndices.get(i)).intValue();
431            // Non-formatter codes are from 'input' to 'nextFmt'.
432            int len = nextFmt - input;
433            System.arraycopy(levels, input, levels, output, len);
434            System.arraycopy(types, input, types, output, len);
435            output += len;
436            input = nextFmt + 1;
437          }
438        length -= formatterIndices.size();
439      }
440    
441      /**
442       * An internal function to compute the boundaries of runs
443       * in the text.  It isn't strictly necessary to do this, but
444       * it lets us write some following passes in a less complicated
445       * way.  Also it lets us efficiently implement some of the public
446       * methods.  A run is simply a sequence of characters at the
447       * same level.
448       */
449      private void computeRuns()
450      {
451        int runCount = 0;
452        int currentEmbedding = baseEmbedding;
453        for (int i = 0; i < length; ++i)
454          {
455            if (levels[i] != currentEmbedding)
456              {
457                currentEmbedding = levels[i];
458                ++runCount;
459              }
460          }
461    
462        // This may be called multiple times.  If so, and if
463        // the number of runs has not changed, then don't bother
464        // allocating a new array.
465        if (runs == null || runs.length != runCount + 1)
466          runs = new int[runCount + 1];
467        int where = 0;
468        int lastRunStart = 0;
469        currentEmbedding = baseEmbedding;
470        for (int i = 0; i < length; ++i)
471          {
472            if (levels[i] != currentEmbedding)
473              {
474                runs[where++] = lastRunStart;
475                lastRunStart = i;
476                currentEmbedding = levels[i];
477              }
478          }
479        runs[where++] = lastRunStart;
480      }
481    
482      /**
483       * An internal method to resolve weak types.  This implements
484       * rules W1 through W7.
485       */
486      private void resolveWeakTypes()
487      {
488        final int runCount = getRunCount();
489        
490        int previousLevel = baseEmbedding;
491        for (int run = 0; run < runCount; ++run)
492          {
493            int start = getRunStart(run);
494            int end = getRunLimit(run);
495            int level = getRunLevel(run);
496    
497            // These are the names used in the Bidi algorithm.
498            byte sor = (((Math.max(previousLevel, level) % 2) == 0)
499                          ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
500                          : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
501            int nextLevel;
502            if (run == runCount - 1)
503              nextLevel = baseEmbedding;
504            else
505              nextLevel = getRunLevel(run + 1);
506            byte eor = (((Math.max(level, nextLevel) % 2) == 0)
507                          ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
508                          : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
509    
510            byte prevType = sor;
511            byte prevStrongType = sor;
512            for (int i = start; i < end; ++i)
513              {
514                final byte nextType = (i == end - 1) ? eor : types[i + 1];
515    
516                // Rule W1: change NSM to the prevailing direction.
517                if (types[i] == Character.DIRECTIONALITY_NONSPACING_MARK)
518                  types[i] = prevType;
519                else
520                  prevType = types[i];
521    
522                // Rule W2: change EN to AN in some cases.
523                if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
524                  {
525                    if (prevStrongType == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
526                      types[i] = Character.DIRECTIONALITY_ARABIC_NUMBER;
527                  }
528                else if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT
529                         || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT
530                         || types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
531                  prevStrongType = types[i];
532    
533                // Rule W3: change AL to R.
534                if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC)
535                  types[i] = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
536    
537                // Rule W4: handle separators between two numbers.
538                if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER
539                    && nextType == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
540                  {
541                    if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
542                        || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR)
543                      types[i] = nextType;
544                  }
545                else if (prevType == Character.DIRECTIONALITY_ARABIC_NUMBER
546                         && nextType == Character.DIRECTIONALITY_ARABIC_NUMBER
547                         && types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR)
548                  types[i] = nextType;
549    
550                // Rule W5: change a sequence of european terminators to
551                // european numbers, if they are adjacent to european numbers.
552                // We also include BN characters in this.
553                if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
554                    || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)
555                  {
556                    if (prevType == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
557                      types[i] = prevType;
558                    else
559                      {
560                        // Look ahead to see if there is an EN terminating this
561                        // sequence of ETs.
562                        int j = i + 1;
563                        while (j < end
564                               && (types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
565                                   || types[j] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL))
566                          ++j;
567                        if (j < end
568                            && types[j] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
569                          {
570                            // Change them all to EN now.
571                            for (int k = i; k < j; ++k)
572                              types[k] = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
573                          }
574                      }
575                  }
576    
577                // Rule W6: separators and terminators change to ON.
578                // Again we include BN.
579                if (types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
580                    || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
581                    || types[i] == Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
582                    || types[i] == Character.DIRECTIONALITY_BOUNDARY_NEUTRAL)
583                  types[i] = Character.DIRECTIONALITY_OTHER_NEUTRALS;
584    
585                // Rule W7: change european number types.
586                if (prevStrongType == Character.DIRECTIONALITY_LEFT_TO_RIGHT
587                    && types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
588                  types[i] = prevStrongType;
589              }
590    
591            previousLevel = level;
592          }
593      }
594    
595      /**
596       * An internal method to resolve neutral types.  This implements
597       * rules N1 and N2.
598       */
599      private void resolveNeutralTypes()
600      {
601        // This implements rules N1 and N2.
602        final int runCount = getRunCount();
603        
604        int previousLevel = baseEmbedding;
605        for (int run = 0; run < runCount; ++run)
606          {
607            int start = getRunStart(run);
608            int end = getRunLimit(run);
609            int level = getRunLevel(run);
610    
611            byte embeddingDirection
612              = (((level % 2) == 0) ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
613                                    : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
614            // These are the names used in the Bidi algorithm.
615            byte sor = (((Math.max(previousLevel, level) % 2) == 0)
616                          ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
617                          : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
618            int nextLevel;
619            if (run == runCount - 1)
620              nextLevel = baseEmbedding;
621            else
622              nextLevel = getRunLevel(run + 1);
623            byte eor = (((Math.max(level, nextLevel) % 2) == 0)
624                          ? Character.DIRECTIONALITY_LEFT_TO_RIGHT
625                          : Character.DIRECTIONALITY_RIGHT_TO_LEFT);
626    
627            byte prevStrong = sor;
628            int neutralStart = -1;
629            for (int i = start; i <= end; ++i)
630              {
631                byte newStrong = -1;
632                byte thisType = i == end ? eor : types[i];
633                switch (thisType)
634                  {
635                  case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
636                    newStrong = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
637                    break;
638                  case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
639                  case Character.DIRECTIONALITY_ARABIC_NUMBER:
640                  case Character.DIRECTIONALITY_EUROPEAN_NUMBER:
641                    newStrong = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
642                    break;
643                  case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL:
644                  case Character.DIRECTIONALITY_OTHER_NEUTRALS:
645                  case Character.DIRECTIONALITY_SEGMENT_SEPARATOR:
646                  case Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR:
647                  case Character.DIRECTIONALITY_WHITESPACE:
648                    if (neutralStart == -1)
649                      neutralStart = i;
650                    break;
651                  }
652                // If we see a strong character, update all the neutrals.
653                if (newStrong != -1)
654                  {
655                    if (neutralStart != -1)
656                      {
657                        byte override = (prevStrong == newStrong
658                                         ? prevStrong
659                                         : embeddingDirection);
660                        for (int j = neutralStart; j < i; ++j)
661                          types[j] = override;
662                      }
663                    prevStrong = newStrong;
664                    neutralStart = -1;
665                  }
666              }
667    
668            previousLevel = level;
669          }
670      }
671    
672      /**
673       * An internal method to resolve implicit levels.
674       * This implements rules I1 and I2.
675       */
676      private void resolveImplicitLevels()
677      {
678        // This implements rules I1 and I2.
679        for (int i = 0; i < length; ++i)
680          {
681            if ((levels[i] & 1) == 0)
682              {
683                if (types[i] == Character.DIRECTIONALITY_RIGHT_TO_LEFT)
684                  ++levels[i];
685                else if (types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER
686                         || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
687                  levels[i] += 2;
688              }
689            else
690              {
691                if (types[i] == Character.DIRECTIONALITY_LEFT_TO_RIGHT
692                    || types[i] == Character.DIRECTIONALITY_ARABIC_NUMBER
693                    || types[i] == Character.DIRECTIONALITY_EUROPEAN_NUMBER)
694                  ++levels[i];
695              }
696    
697            // Update the result flags.
698            resultFlags |= 1 << (levels[i] & 1);
699          }
700        // One final update of the result flags, using the base level.
701        resultFlags |= 1 << baseEmbedding;
702      }
703    
704      /**
705       * This reinserts the formatting codes that we removed early on.
706       * Actually it does not insert formatting codes per se, but rather
707       * simply inserts new levels at the appropriate locations in the
708       * 'levels' array.
709       */
710      private void reinsertFormattingCodes()
711      {
712        if (formatterIndices == null)
713          return;
714        int input = length;
715        int output = levels.length;
716        // Process from the end as we are copying the array over itself here.
717        for (int index = formatterIndices.size() - 1; index >= 0; --index)
718          {
719            int nextFmt = ((Integer) formatterIndices.get(index)).intValue();
720    
721            // nextFmt points to a location in the original array.  So,
722            // nextFmt+1 is the target of our copying.  output is the location
723            // to which we last copied, thus we can derive the length of the
724            // copy from it.
725            int len = output - nextFmt - 1;
726            output = nextFmt;
727            input -= len;
728            // Note that we no longer need 'types' at this point, so we
729            // only edit 'levels'.
730            if (nextFmt + 1 < levels.length)
731              System.arraycopy(levels, input, levels, nextFmt + 1, len);
732    
733            // Now set the level at the reinsertion point.
734            int rightLevel;
735            if (output == levels.length - 1)
736              rightLevel = baseEmbedding;
737            else
738              rightLevel = levels[output + 1];
739            int leftLevel;
740            if (input == 0)
741              leftLevel = baseEmbedding;
742            else
743              leftLevel = levels[input];
744            levels[output] = (byte) Math.max(leftLevel, rightLevel);
745          }
746        length = levels.length;
747      }
748    
749      /**
750       * This is the main internal entry point.  After a constructor
751       * has initialized the appropriate local state, it will call
752       * this method to do all the work.
753       */
754      private void runBidi()
755      {
756        computeTypes();
757        baseEmbedding = computeParagraphEmbeddingLevel();
758        computeExplicitLevels();
759        computeRuns();
760        resolveWeakTypes();
761        resolveNeutralTypes();
762        resolveImplicitLevels();
763        // We're done with the types.  Let the GC clean up.
764        types = null;
765        reinsertFormattingCodes();
766        // After resolving the implicit levels, the number
767        // of runs may have changed.
768        computeRuns();
769      }
770    
771      /**
772       * Return true if the paragraph base embedding is left-to-right,
773       * false otherwise.
774       */
775      public boolean baseIsLeftToRight()
776      {
777        return baseEmbedding == DIRECTION_LEFT_TO_RIGHT;
778      }
779    
780      /**
781       * Create a new Bidi object for a single line of text, taken
782       * from the text used when creating the current Bidi object.
783       * @param start the index of the first character of the line
784       * @param end the index of the final character of the line
785       * @return a new Bidi object for the indicated line of text
786       */
787      public Bidi createLineBidi(int start, int end)
788      {
789        // This isn't the most efficient implementation possible.
790        // This probably does not matter, so we choose simplicity instead.
791        int level = getLevelAt(start);
792        int flag = (((level % 2) == 0)
793                    ? DIRECTION_LEFT_TO_RIGHT
794                    : DIRECTION_RIGHT_TO_LEFT);
795        return new Bidi(text, textOffset + start,
796                        embeddings, embeddingOffset + start,
797                        end - start, flag);
798      }
799    
800      /**
801       * Return the base embedding level of the paragraph.
802       */
803      public int getBaseLevel()
804      {
805        return baseEmbedding;
806      }
807    
808      /**
809       * Return the length of the paragraph, in characters.
810       */
811      public int getLength()
812      {
813        return length;
814      }
815    
816      /**
817       * Return the level at the indicated character.  If the
818       * supplied index is less than zero or greater than the length
819       * of the text, then the paragraph's base embedding level will
820       * be returned.
821       * @param offset the character to examine
822       * @return the level of that character
823       */
824      public int getLevelAt(int offset)
825      {
826        if (offset < 0 || offset >= length)
827          return getBaseLevel();
828        return levels[offset];
829      }
830    
831      /**
832       * Return the number of runs in the result.  A run is
833       * a sequence of characters at the same embedding level.
834       */
835      public int getRunCount()
836      {
837        return runs.length;
838      }
839    
840      /**
841       * Return the level of the indicated run.
842       * @param which the run to examine
843       * @return the level of that run
844       */
845      public int getRunLevel(int which)
846      {
847        return levels[runs[which]];
848      }
849    
850      /**
851       * Return the index of the character just following the end
852       * of the indicated run.
853       * @param which the run to examine
854       * @return the index of the character after the final character
855       * of the run
856       */
857      public int getRunLimit(int which)
858      {
859        if (which == runs.length - 1)
860          return length;
861        return runs[which + 1];
862      }
863    
864      /**
865       * Return the index of the first character in the indicated run.
866       * @param which the run to examine
867       * @return the index of the first character of the run
868       */
869      public int getRunStart(int which)
870      {
871        return runs[which];
872      }
873    
874      /**
875       * Return true if the text is entirely left-to-right, and the
876       * base embedding is also left-to-right.
877       */
878      public boolean isLeftToRight()
879      {
880        return resultFlags == LTOR;
881      }
882    
883      /**
884       * Return true if the text consists of mixed left-to-right and
885       * right-to-left runs, or if the text consists of one kind of run
886       * which differs from the base embedding direction.
887       */
888      public boolean isMixed()
889      {
890        return resultFlags == (LTOR | RTOL);
891      }
892    
893      /**
894       * Return true if the text is entirely right-to-left, and the
895       * base embedding is also right-to-left.
896       */
897      public boolean isRightToLeft()
898      {
899        return resultFlags == RTOL;
900      }
901    
902      /**
903       * Return a String describing the internal state of this object.
904       * This is only useful for debugging.
905       */
906      public String toString()
907      {
908        return "Bidi Bidi Bidi I like you, Buck!";
909      }
910    
911      /**
912       * Reorder objects according to the levels passed in.  This implements
913       * reordering as defined by the Unicode bidirectional layout specification.
914       * The levels are integers from 0 to 62; even numbers represent left-to-right
915       * runs, and odd numbers represent right-to-left runs.
916       *
917       * @param levels the levels associated with each object
918       * @param levelOffset the index of the first level to use
919       * @param objs the objects to reorder according to the levels
920       * @param objOffset the index of the first object to use
921       * @param count the number of objects (and levels) to manipulate
922       */
923      public static void reorderVisually(byte[] levels, int levelOffset,
924                                         Object[] objs, int objOffset, int count)
925      {
926        // We need a copy of the 'levels' array, as we are going to modify it.
927        // This is unfortunate but difficult to avoid.
928        byte[] levelCopy = new byte[count];
929        // Do this explicitly so we can also find the maximum depth at the
930        // same time.
931        int max = 0;
932        int lowestOdd = 63;
933        for (int i = 0; i < count; ++i)
934          {
935            levelCopy[i] = levels[levelOffset + i];
936            max = Math.max(levelCopy[i], max);
937            if (levelCopy[i] % 2 != 0)
938              lowestOdd = Math.min(lowestOdd, levelCopy[i]);
939          }
940    
941        // Reverse the runs starting with the deepest.
942        for (int depth = max; depth >= lowestOdd; --depth)
943          {
944            int start = 0;
945            while (start < count)
946              {
947                // Find the start of a run >= DEPTH.
948                while (start < count && levelCopy[start] < depth)
949                  ++start;
950                if (start == count)
951                  break;
952                // Find the end of the run.
953                int end = start + 1;
954                while (end < count && levelCopy[end] >= depth)
955                  ++end;
956    
957                // Reverse this run.
958                for (int i = 0; i < (end - start) / 2; ++i)
959                  {
960                    byte tmpb = levelCopy[end - i - 1];
961                    levelCopy[end - i - 1] = levelCopy[start + i];
962                    levelCopy[start + i] = tmpb;
963                    Object tmpo = objs[objOffset + end - i - 1];
964                    objs[objOffset + end - i - 1] = objs[objOffset + start + i];
965                    objs[objOffset + start + i] = tmpo;
966                  }
967    
968                // Handle the next run.
969                start = end + 1;
970              }
971          }
972      }
973    
974      /**
975       * Returns false if all characters in the text between start and end
976       * are all left-to-right text. This implementation is just calls
977       * <code>Character.getDirectionality(char)</code> on all characters
978       * and makes sure all characters are either explicitly left-to-right
979       * or neutral in directionality (character types L, EN, ES, ET, AN,
980       * CS, S and WS).
981       */
982      public static boolean requiresBidi(char[] text, int start, int end)
983      {
984        for (int i = start; i < end; i++)
985          {
986            byte dir = Character.getDirectionality(text[i]);
987            if (dir != Character.DIRECTIONALITY_LEFT_TO_RIGHT
988                && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER
989                && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR
990                && dir != Character.DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR
991                && dir != Character.DIRECTIONALITY_ARABIC_NUMBER
992                && dir != Character.DIRECTIONALITY_COMMON_NUMBER_SEPARATOR
993                && dir != Character.DIRECTIONALITY_SEGMENT_SEPARATOR
994                && dir != Character.DIRECTIONALITY_WHITESPACE
995                && dir != Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR)
996              return true;
997          }
998    
999        return false;
1000      }
1001    }