001    /* Pattern.java -- Compiled regular expression ready to be applied.
002       Copyright (C) 2002, 2004, 2005, 2007 Free Software Foundation, Inc.
003    
004    This file is part of GNU Classpath.
005    
006    GNU Classpath is free software; you can redistribute it and/or modify
007    it under the terms of the GNU General Public License as published by
008    the Free Software Foundation; either version 2, or (at your option)
009    any later version.
010    
011    GNU Classpath is distributed in the hope that it will be useful, but
012    WITHOUT ANY WARRANTY; without even the implied warranty of
013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014    General Public License for more details.
015    
016    You should have received a copy of the GNU General Public License
017    along with GNU Classpath; see the file COPYING.  If not, write to the
018    Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019    02110-1301 USA.
020    
021    Linking this library statically or dynamically with other modules is
022    making a combined work based on this library.  Thus, the terms and
023    conditions of the GNU General Public License cover the whole
024    combination.
025    
026    As a special exception, the copyright holders of this library give you
027    permission to link this library with independent modules to produce an
028    executable, regardless of the license terms of these independent
029    modules, and to copy and distribute the resulting executable under
030    terms of your choice, provided that you also meet, for each linked
031    independent module, the terms and conditions of the license of that
032    module.  An independent module is a module which is not derived from
033    or based on this library.  If you modify this library, you may extend
034    this exception to your version of the library, but you are not
035    obligated to do so.  If you do not wish to do so, delete this
036    exception statement from your version. */
037    
038    package java.util.regex;
039    
040    import gnu.java.util.regex.RE;
041    import gnu.java.util.regex.REException;
042    import gnu.java.util.regex.RESyntax;
043    
044    import java.io.Serializable;
045    import java.util.ArrayList;
046    
047    
048    /**
049     * Compiled regular expression ready to be applied.
050     *
051     * @since 1.4
052     */
053    public final class Pattern implements Serializable
054    {
055      private static final long serialVersionUID = 5073258162644648461L;
056    
057      public static final int CANON_EQ = 128;
058      public static final int CASE_INSENSITIVE = 2;
059      public static final int COMMENTS = 4;
060      public static final int DOTALL = 32;
061      public static final int MULTILINE = 8;
062      public static final int UNICODE_CASE = 64;
063      public static final int UNIX_LINES = 1;
064    
065      private final String regex;
066      private final int flags;
067    
068      private final RE re;
069    
070      private Pattern (String regex, int flags)
071        throws PatternSyntaxException
072      {
073        this.regex = regex;
074        this.flags = flags;
075    
076        RESyntax syntax = RESyntax.RE_SYNTAX_JAVA_1_4;
077        int gnuFlags = 0;
078        gnuFlags |= RE.REG_ICASE_USASCII;
079        if ((flags & CASE_INSENSITIVE) != 0)
080          gnuFlags |= RE.REG_ICASE;
081        if ((flags & MULTILINE) != 0)
082          {
083            gnuFlags |= RE.REG_MULTILINE;
084            syntax = new RESyntax(syntax);
085            syntax.setLineSeparator(null);
086          }
087        if ((flags & DOTALL) != 0)
088          gnuFlags |= RE.REG_DOT_NEWLINE;
089        if ((flags & UNICODE_CASE) != 0)
090          gnuFlags &= ~RE.REG_ICASE_USASCII;
091        // not yet supported:
092        // if ((flags & CANON_EQ) != 0) gnuFlags =
093    
094        if ((flags & UNIX_LINES) != 0)
095          {
096            // Use a syntax set with \n for linefeeds?
097            syntax = new RESyntax(syntax);
098            syntax.setLineSeparator("\n");
099          }
100    
101        if ((flags & COMMENTS) != 0)
102          {
103            gnuFlags |= RE.REG_X_COMMENTS;
104          }
105    
106        try
107          {
108            this.re = new RE(regex, gnuFlags, syntax);
109          }
110        catch (REException e)
111          {
112            PatternSyntaxException pse;
113            pse = new PatternSyntaxException(e.getMessage(),
114                                             regex, e.getPosition());
115            pse.initCause(e);
116            throw pse;
117          }
118      }
119    
120      // package private accessor method
121      RE getRE()
122      {
123        return re;
124      }
125    
126      /**
127       * @param regex The regular expression
128       *
129       * @exception PatternSyntaxException If the expression's syntax is invalid
130       */
131      public static Pattern compile (String regex)
132        throws PatternSyntaxException
133      {
134        return compile(regex, 0);
135      }
136    
137      /**
138       * @param regex The regular expression
139       * @param flags The match flags, a bit mask
140       *
141       * @exception PatternSyntaxException If the expression's syntax is invalid
142       * @exception IllegalArgumentException If bit values other than those
143       * corresponding to the defined match flags are set in flags
144       */
145      public static Pattern compile (String regex, int flags)
146        throws PatternSyntaxException
147      {
148        // FIXME: check which flags are really accepted
149        if ((flags & ~0xEF) != 0)
150          throw new IllegalArgumentException ();
151    
152        return new Pattern (regex, flags);
153      }
154    
155      public int flags ()
156      {
157        return this.flags;
158      }
159    
160      /**
161       * @param regex The regular expression
162       * @param input The character sequence to be matched
163       *
164       * @exception PatternSyntaxException If the expression's syntax is invalid
165       */
166      public static boolean matches (String regex, CharSequence input)
167      {
168        return compile(regex).matcher(input).matches();
169      }
170    
171      /**
172       * @param input The character sequence to be matched
173       */
174      public Matcher matcher (CharSequence input)
175      {
176        return new Matcher(this, input);
177      }
178    
179      /**
180       * @param input The character sequence to be matched
181       */
182      public String[] split (CharSequence input)
183      {
184        return split(input, 0);
185      }
186    
187      /**
188       * @param input The character sequence to be matched
189       * @param limit The result threshold
190       */
191      public String[] split (CharSequence input, int limit)
192      {
193        Matcher matcher = new Matcher(this, input);
194        ArrayList<String> list = new ArrayList<String>();
195        int empties = 0;
196        int count = 0;
197        int start = 0;
198        int end;
199        boolean matched = matcher.find();
200    
201        while (matched && (limit <= 0 || count < limit - 1))
202          {
203            ++count;
204            end = matcher.start();
205            if (start == end)
206              empties++;
207            else
208              {
209                while (empties > 0)
210                  {
211                    list.add("");
212                    empties--;
213                  }
214    
215                String text = input.subSequence(start, end).toString();
216                list.add(text);
217              }
218            start = matcher.end();
219            matched = matcher.find();
220          }
221    
222        // We matched nothing.
223        if (!matched && count == 0)
224          return new String[] { input.toString() };
225    
226        // Is the last token empty?
227        boolean emptyLast = (start == input.length());
228    
229        // Can/Must we add empties or an extra last token at the end?
230        if (list.size() < limit || limit < 0 || (limit == 0 && !emptyLast))
231          {
232            if (limit > list.size())
233              {
234                int max = limit - list.size();
235                empties = (empties > max) ? max : empties;
236              }
237            while (empties > 0)
238              {
239                list.add("");
240                empties--;
241              }
242          }
243    
244        // last token at end
245        if (limit != 0 || (limit == 0 && !emptyLast))
246          {
247            String t = input.subSequence(start, input.length()).toString();
248            if ("".equals(t) && limit == 0)
249              { /* Don't add. */ }
250            else
251              list.add(t);
252          }
253    
254        return list.toArray(new String[list.size()]);
255      }
256    
257      public String pattern ()
258      {
259        return regex;
260      }
261    
262      /**
263       * Return the regular expression used to construct this object.
264       * @specnote Prior to JDK 1.5 this method had a different behavior
265       * @since 1.5
266       */
267      public String toString()
268      {
269        return regex;
270      }
271    }