001    /* BreakIterator.java -- Breaks text into elements
002       Copyright (C) 1998, 1999, 2001, 2004, 2005, 2007
003       Free Software Foundation, Inc.
004    
005    This file is part of GNU Classpath.
006    
007    GNU Classpath is free software; you can redistribute it and/or modify
008    it under the terms of the GNU General Public License as published by
009    the Free Software Foundation; either version 2, or (at your option)
010    any later version.
011    
012    GNU Classpath is distributed in the hope that it will be useful, but
013    WITHOUT ANY WARRANTY; without even the implied warranty of
014    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
015    General Public License for more details.
016    
017    You should have received a copy of the GNU General Public License
018    along with GNU Classpath; see the file COPYING.  If not, write to the
019    Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
020    02110-1301 USA.
021    
022    Linking this library statically or dynamically with other modules is
023    making a combined work based on this library.  Thus, the terms and
024    conditions of the GNU General Public License cover the whole
025    combination.
026    
027    As a special exception, the copyright holders of this library give you
028    permission to link this library with independent modules to produce an
029    executable, regardless of the license terms of these independent
030    modules, and to copy and distribute the resulting executable under
031    terms of your choice, provided that you also meet, for each linked
032    independent module, the terms and conditions of the license of that
033    module.  An independent module is a module which is not derived from
034    or based on this library.  If you modify this library, you may extend
035    this exception to your version of the library, but you are not
036    obligated to do so.  If you do not wish to do so, delete this
037    exception statement from your version. */
038    
039    
040    package java.text;
041    
042    import gnu.java.locale.LocaleHelper;
043    
044    import gnu.java.text.CharacterBreakIterator;
045    import gnu.java.text.LineBreakIterator;
046    import gnu.java.text.SentenceBreakIterator;
047    import gnu.java.text.WordBreakIterator;
048    
049    import java.text.spi.BreakIteratorProvider;
050    
051    import java.util.Locale;
052    import java.util.MissingResourceException;
053    import java.util.ResourceBundle;
054    import java.util.ServiceLoader;
055    
056    /**
057     * This class iterates over text elements such as words, lines, sentences,
058     * and characters.  It can only iterate over one of these text elements at
059     * a time.  An instance of this class configured for the desired iteration
060     * type is created by calling one of the static factory methods, not
061     * by directly calling a constructor.
062     *
063     * The standard iterators created by the factory methods in this
064     * class will be valid upon creation.  That is, their methods will
065     * not cause exceptions if called before you call setText().
066     *
067     * @author Tom Tromey (tromey@cygnus.com)
068     * @author Aaron M. Renn (arenn@urbanophile.com)
069     * @date March 19, 1999
070     */
071    /* Written using "Java Class Libraries", 2nd edition, plus online
072     * API docs for JDK 1.2 beta from http://www.javasoft.com.
073     * Status:  Believed complete and correct to 1.1.
074     */
075    public abstract class BreakIterator implements Cloneable
076    {
077      /**
078       * This value is returned by the <code>next()</code> and
079       * <code>previous</code> in order to indicate that the end of the
080       * text has been reached.
081       */
082      // The value was discovered by writing a test program.
083      public static final int DONE = -1;
084    
085      /**
086       * This method initializes a new instance of <code>BreakIterator</code>.
087       * This protected constructor is available to subclasses as a default
088       * no-arg superclass constructor.
089       */
090      protected BreakIterator ()
091      {
092      }
093    
094      /**
095       * Create a clone of this object.
096       */
097      public Object clone ()
098      {
099        try
100          {
101            return super.clone();
102          }
103        catch (CloneNotSupportedException e)
104          {
105            return null;
106          }
107      }
108    
109      /**
110       * This method returns the index of the current text element boundary.
111       *
112       * @return The current text boundary.
113       */
114      public abstract int current ();
115    
116      /**
117       * This method returns the first text element boundary in the text being
118       * iterated over.
119       *
120       * @return The first text boundary.
121       */
122      public abstract int first ();
123    
124      /**
125       * This methdod returns the offset of the text element boundary following
126       * the specified offset.
127       *
128       * @param pos The text index from which to find the next text boundary.
129       *
130       * @return The next text boundary following the specified index.
131       */
132      public abstract int following (int pos);
133    
134      /**
135       * This method returns a list of locales for which instances of
136       * <code>BreakIterator</code> are available.
137       *
138       * @return A list of available locales
139       */
140      public static synchronized Locale[] getAvailableLocales ()
141      {
142        Locale[] l = new Locale[1];
143        l[0] = Locale.US;
144        return l;
145      }
146    
147      private static BreakIterator getInstance (String type, Locale loc)
148      {
149        String className;
150        try
151          {
152            ResourceBundle res
153              = ResourceBundle.getBundle("gnu.java.locale.LocaleInformation",
154                                         loc, ClassLoader.getSystemClassLoader());
155            className = res.getString(type);
156          }
157        catch (MissingResourceException x)
158          {
159            return null;
160          }
161        try
162          {
163            Class k = Class.forName(className);
164            return (BreakIterator) k.newInstance();
165          }
166        catch (ClassNotFoundException x1)
167          {
168            return null;
169          }
170        catch (InstantiationException x2)
171          {
172            return null;
173          }
174        catch (IllegalAccessException x3)
175          {
176            return null;
177          }
178      }
179    
180      /**
181       * This method returns an instance of <code>BreakIterator</code> that will
182       * iterate over characters as defined in the default locale.
183       *
184       * @return A <code>BreakIterator</code> instance for the default locale.
185       */
186      public static BreakIterator getCharacterInstance ()
187      {
188        return getCharacterInstance (Locale.getDefault());
189      }
190    
191      /**
192       * This method returns an instance of <code>BreakIterator</code> that will
193       * iterate over characters as defined in the specified locale.
194       *
195       * @param locale The desired locale.
196       *
197       * @return A <code>BreakIterator</code> instance for the specified locale.
198       */
199      public static BreakIterator getCharacterInstance (Locale locale)
200      {
201        BreakIterator r = getInstance("CharacterIterator", locale);
202        if (r != null)
203          return r;
204        for (BreakIteratorProvider p :
205               ServiceLoader.load(BreakIteratorProvider.class))
206          {
207            for (Locale loc : p.getAvailableLocales())
208              {
209                if (loc.equals(locale))
210                  {
211                    BreakIterator bi = p.getCharacterInstance(locale);
212                    if (bi != null)
213                      return bi;
214                    break;
215                  }
216              }
217          }
218        if (locale.equals(Locale.ROOT))
219          return new CharacterBreakIterator();
220        return getCharacterInstance(LocaleHelper.getFallbackLocale(locale));
221      }
222    
223      /**
224       * This method returns an instance of <code>BreakIterator</code> that will
225       * iterate over line breaks as defined in the default locale.
226       *
227       * @return A <code>BreakIterator</code> instance for the default locale.
228       */
229      public static BreakIterator getLineInstance ()
230      {
231        return getLineInstance (Locale.getDefault());
232      }
233    
234      /**
235       * This method returns an instance of <code>BreakIterator</code> that will
236       * iterate over line breaks as defined in the specified locale.
237       *
238       * @param locale The desired locale.
239       *
240       * @return A <code>BreakIterator</code> instance for the default locale.
241       */
242      public static BreakIterator getLineInstance (Locale locale)
243      {
244        BreakIterator r = getInstance ("LineIterator", locale);
245        if (r != null)
246          return r;
247        for (BreakIteratorProvider p :
248               ServiceLoader.load(BreakIteratorProvider.class))
249          {
250            for (Locale loc : p.getAvailableLocales())
251              {
252                if (loc.equals(locale))
253                  {
254                    BreakIterator bi = p.getLineInstance(locale);
255                    if (bi != null)
256                      return bi;
257                    break;
258                  }
259              }
260          }
261        if (locale.equals(Locale.ROOT))
262          return new LineBreakIterator();
263        return getLineInstance(LocaleHelper.getFallbackLocale(locale));
264      }
265    
266      /**
267       * This method returns an instance of <code>BreakIterator</code> that will
268       * iterate over sentences as defined in the default locale.
269       *
270       * @return A <code>BreakIterator</code> instance for the default locale.
271       */
272      public static BreakIterator getSentenceInstance ()
273      {
274        return getSentenceInstance (Locale.getDefault());
275      }
276    
277      /**
278       * This method returns an instance of <code>BreakIterator</code> that will
279       * iterate over sentences as defined in the specified locale.
280       *
281       * @param locale The desired locale.
282       *
283       * @return A <code>BreakIterator</code> instance for the default locale.
284       */
285      public static BreakIterator getSentenceInstance (Locale locale)
286      {
287        BreakIterator r = getInstance ("SentenceIterator", locale);
288        if (r != null)
289          return r;
290        for (BreakIteratorProvider p :
291               ServiceLoader.load(BreakIteratorProvider.class))
292          {
293            for (Locale loc : p.getAvailableLocales())
294              {
295                if (loc.equals(locale))
296                  {
297                    BreakIterator bi = p.getSentenceInstance(locale);
298                    if (bi != null)
299                      return bi;
300                    break;
301                  }
302              }
303          }
304        if (locale.equals(Locale.ROOT))
305          return new SentenceBreakIterator();
306        return getSentenceInstance(LocaleHelper.getFallbackLocale(locale));
307      }
308    
309      /**
310       * This method returns the text this object is iterating over as a
311       * <code>CharacterIterator</code>.
312       *
313       * @return The text being iterated over.
314       */
315      public abstract CharacterIterator getText ();
316    
317      /**
318       * This method returns an instance of <code>BreakIterator</code> that will
319       * iterate over words as defined in the default locale.
320       *
321       * @return A <code>BreakIterator</code> instance for the default locale.
322       */
323      public static BreakIterator getWordInstance ()
324      {
325        return getWordInstance (Locale.getDefault());
326      }
327    
328      /**
329       * This method returns an instance of <code>BreakIterator</code> that will
330       * iterate over words as defined in the specified locale.
331       *
332       * @param locale The desired locale.
333       *
334       * @return A <code>BreakIterator</code> instance for the default locale.
335       */
336      public static BreakIterator getWordInstance (Locale locale)
337      {
338        BreakIterator r = getInstance ("WordIterator", locale);
339        if (r != null)
340          return r;
341        for (BreakIteratorProvider p :
342               ServiceLoader.load(BreakIteratorProvider.class))
343          {
344            for (Locale loc : p.getAvailableLocales())
345              {
346                if (loc.equals(locale))
347                  {
348                    BreakIterator bi = p.getWordInstance(locale);
349                    if (bi != null)
350                      return bi;
351                    break;
352                  }
353              }
354          }
355        if (locale.equals(Locale.ROOT))
356          return new WordBreakIterator();
357        return getWordInstance(LocaleHelper.getFallbackLocale(locale));
358      }
359    
360      /**
361       * This method tests whether or not the specified position is a text
362       * element boundary.
363       *
364       * @param pos The text position to test.
365       *
366       * @return <code>true</code> if the position is a boundary,
367       * <code>false</code> otherwise.
368       */
369      public boolean isBoundary (int pos)
370      {
371        if (pos == 0)
372          return true;
373        return following (pos - 1) == pos;
374      }
375    
376      /**
377       * This method returns the last text element boundary in the text being
378       * iterated over.
379       *
380       * @return The last text boundary.
381       */
382      public abstract int last ();
383    
384      /**
385       * This method returns the text element boundary following the current
386       * text position.
387       *
388       * @return The next text boundary.
389       */
390      public abstract int next ();
391    
392      /**
393       * This method returns the n'th text element boundary following the current
394       * text position.
395       *
396       * @param n The number of text element boundaries to skip.
397       *
398       * @return The next text boundary.
399       */
400      public abstract int next (int n);
401    
402      /**
403       * This methdod returns the offset of the text element boundary preceding
404       * the specified offset.
405       *
406       * @param pos The text index from which to find the preceding text boundary.
407       *
408       * @returns The next text boundary preceding the specified index.
409       */
410      public int preceding (int pos)
411      {
412        if (following (pos) == DONE)
413          last ();
414        while (previous () >= pos)
415          ;
416        return current ();
417      }
418    
419      /**
420       * This method returns the text element boundary preceding the current
421       * text position.
422       *
423       * @return The previous text boundary.
424       */
425      public abstract int previous ();
426    
427      /**
428       * This method sets the text string to iterate over.
429       *
430       * @param newText The <code>String</code> to iterate over.
431       */
432      public void setText (String newText)
433      {
434        setText (new StringCharacterIterator (newText));
435      }
436    
437      /**
438       * This method sets the text to iterate over from the specified
439       * <code>CharacterIterator</code>.
440       *
441       * @param newText The desired <code>CharacterIterator</code>.
442       */
443      public abstract void setText (CharacterIterator newText);
444    }