001/* Pattern.java -- Compiled regular expression ready to be applied.
002   Copyright (C) 2002, 2004, 2005, 2007, 2010
003   Free Software Foundation, Inc.
004
005This file is part of GNU Classpath.
006
007GNU Classpath is free software; you can redistribute it and/or modify
008it under the terms of the GNU General Public License as published by
009the Free Software Foundation; either version 2, or (at your option)
010any later version.
011
012GNU Classpath is distributed in the hope that it will be useful, but
013WITHOUT ANY WARRANTY; without even the implied warranty of
014MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
015General Public License for more details.
016
017You should have received a copy of the GNU General Public License
018along with GNU Classpath; see the file COPYING.  If not, write to the
019Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02002110-1301 USA.
021
022Linking this library statically or dynamically with other modules is
023making a combined work based on this library.  Thus, the terms and
024conditions of the GNU General Public License cover the whole
025combination.
026
027As a special exception, the copyright holders of this library give you
028permission to link this library with independent modules to produce an
029executable, regardless of the license terms of these independent
030modules, and to copy and distribute the resulting executable under
031terms of your choice, provided that you also meet, for each linked
032independent module, the terms and conditions of the license of that
033module.  An independent module is a module which is not derived from
034or based on this library.  If you modify this library, you may extend
035this exception to your version of the library, but you are not
036obligated to do so.  If you do not wish to do so, delete this
037exception statement from your version. */
038
039package java.util.regex;
040
041import gnu.java.lang.CPStringBuilder;
042
043import gnu.java.util.regex.RE;
044import gnu.java.util.regex.REException;
045import gnu.java.util.regex.RESyntax;
046
047import java.io.Serializable;
048import java.util.ArrayList;
049
050
051/**
052 * Compiled regular expression ready to be applied.
053 *
054 * @since 1.4
055 */
056public final class Pattern implements Serializable
057{
058  private static final long serialVersionUID = 5073258162644648461L;
059
060  public static final int CANON_EQ = 128;
061  public static final int CASE_INSENSITIVE = 2;
062  public static final int COMMENTS = 4;
063  public static final int DOTALL = 32;
064  public static final int MULTILINE = 8;
065  public static final int UNICODE_CASE = 64;
066  public static final int UNIX_LINES = 1;
067
068  private final String regex;
069  private final int flags;
070
071  private final RE re;
072
073  private Pattern (String regex, int flags)
074    throws PatternSyntaxException
075  {
076    this.regex = regex;
077    this.flags = flags;
078
079    RESyntax syntax = RESyntax.RE_SYNTAX_JAVA_1_4;
080    int gnuFlags = 0;
081    gnuFlags |= RE.REG_ICASE_USASCII;
082    if ((flags & CASE_INSENSITIVE) != 0)
083      gnuFlags |= RE.REG_ICASE;
084    if ((flags & MULTILINE) != 0)
085      {
086        gnuFlags |= RE.REG_MULTILINE;
087        syntax = new RESyntax(syntax);
088        syntax.setLineSeparator(null);
089      }
090    if ((flags & DOTALL) != 0)
091      gnuFlags |= RE.REG_DOT_NEWLINE;
092    if ((flags & UNICODE_CASE) != 0)
093      gnuFlags &= ~RE.REG_ICASE_USASCII;
094    // not yet supported:
095    // if ((flags & CANON_EQ) != 0) gnuFlags =
096
097    if ((flags & UNIX_LINES) != 0)
098      {
099        // Use a syntax set with \n for linefeeds?
100        syntax = new RESyntax(syntax);
101        syntax.setLineSeparator("\n");
102      }
103
104    if ((flags & COMMENTS) != 0)
105      {
106        gnuFlags |= RE.REG_X_COMMENTS;
107      }
108
109    try
110      {
111        this.re = new RE(regex, gnuFlags, syntax);
112      }
113    catch (REException e)
114      {
115        PatternSyntaxException pse;
116        pse = new PatternSyntaxException(e.getMessage(),
117                                         regex, e.getPosition());
118        pse.initCause(e);
119        throw pse;
120      }
121  }
122
123  // package private accessor method
124  RE getRE()
125  {
126    return re;
127  }
128
129  /**
130   * @param regex The regular expression
131   *
132   * @exception PatternSyntaxException If the expression's syntax is invalid
133   */
134  public static Pattern compile (String regex)
135    throws PatternSyntaxException
136  {
137    return compile(regex, 0);
138  }
139
140  /**
141   * @param regex The regular expression
142   * @param flags The match flags, a bit mask
143   *
144   * @exception PatternSyntaxException If the expression's syntax is invalid
145   * @exception IllegalArgumentException If bit values other than those
146   * corresponding to the defined match flags are set in flags
147   */
148  public static Pattern compile (String regex, int flags)
149    throws PatternSyntaxException
150  {
151    // FIXME: check which flags are really accepted
152    if ((flags & ~0xEF) != 0)
153      throw new IllegalArgumentException ();
154
155    return new Pattern (regex, flags);
156  }
157
158  public int flags ()
159  {
160    return this.flags;
161  }
162
163  /**
164   * @param regex The regular expression
165   * @param input The character sequence to be matched
166   *
167   * @exception PatternSyntaxException If the expression's syntax is invalid
168   */
169  public static boolean matches (String regex, CharSequence input)
170  {
171    return compile(regex).matcher(input).matches();
172  }
173
174  /**
175   * @param input The character sequence to be matched
176   */
177  public Matcher matcher (CharSequence input)
178  {
179    return new Matcher(this, input);
180  }
181
182  /**
183   * @param input The character sequence to be matched
184   */
185  public String[] split (CharSequence input)
186  {
187    return split(input, 0);
188  }
189
190  /**
191   * @param input The character sequence to be matched
192   * @param limit The result threshold
193   */
194  public String[] split (CharSequence input, int limit)
195  {
196    Matcher matcher = new Matcher(this, input);
197    ArrayList<String> list = new ArrayList<String>();
198    int empties = 0;
199    int count = 0;
200    int start = 0;
201    int end;
202    boolean matched = matcher.find();
203
204    while (matched && (limit <= 0 || count < limit - 1))
205      {
206        ++count;
207        end = matcher.start();
208        if (start == end)
209          empties++;
210        else
211          {
212            while (empties > 0)
213              {
214                list.add("");
215                empties--;
216              }
217
218            String text = input.subSequence(start, end).toString();
219            list.add(text);
220          }
221        start = matcher.end();
222        matched = matcher.find();
223      }
224
225    // We matched nothing.
226    if (!matched && count == 0)
227      return new String[] { input.toString() };
228
229    // Is the last token empty?
230    boolean emptyLast = (start == input.length());
231
232    // Can/Must we add empties or an extra last token at the end?
233    if (list.size() < limit || limit < 0 || (limit == 0 && !emptyLast))
234      {
235        if (limit > list.size())
236          {
237            int max = limit - list.size();
238            empties = (empties > max) ? max : empties;
239          }
240        while (empties > 0)
241          {
242            list.add("");
243            empties--;
244          }
245      }
246
247    // last token at end
248    if (limit != 0 || (limit == 0 && !emptyLast))
249      {
250        String t = input.subSequence(start, input.length()).toString();
251        if ("".equals(t) && limit == 0)
252          { /* Don't add. */ }
253        else
254          list.add(t);
255      }
256
257    return list.toArray(new String[list.size()]);
258  }
259
260  public String pattern ()
261  {
262    return regex;
263  }
264
265  /**
266   * Returns a literal pattern for the specified String.
267   *
268   * @param String to return a literal pattern for.
269   * @return a literal pattern for the specified String.
270   * @exception NullPointerException if str is null.
271   * @since 1.5
272   */
273  public static String quote(String str)
274  {
275    int eInd = str.indexOf("\\E");
276    if (eInd < 0)
277      {
278        // No need to handle backslashes.
279        return "\\Q" + str + "\\E";
280      }
281
282    CPStringBuilder sb = new CPStringBuilder(str.length() + 16);
283    sb.append("\\Q"); // start quote
284
285    int pos = 0;
286    do
287      {
288        // A backslash is quoted by another backslash;
289        // 'E' is not needed to be quoted.
290        sb.append(str.substring(pos, eInd))
291          .append("\\E" + "\\\\" + "E" + "\\Q");
292        pos = eInd + 2;
293      } while ((eInd = str.indexOf("\\E", pos)) >= 0);
294
295    sb.append(str.substring(pos, str.length()))
296      .append("\\E"); // end quote
297    return sb.toString();
298  }
299
300  /**
301   * Return the regular expression used to construct this object.
302   * @specnote Prior to JDK 1.5 this method had a different behavior
303   * @since 1.5
304   */
305  public String toString()
306  {
307    return regex;
308  }
309}