001/* DTD.java --
002   Copyright (C) 2005 Free Software Foundation, Inc.
003
004This file is part of GNU Classpath.
005
006GNU Classpath is free software; you can redistribute it and/or modify
007it under the terms of the GNU General Public License as published by
008the Free Software Foundation; either version 2, or (at your option)
009any later version.
010
011GNU Classpath is distributed in the hope that it will be useful, but
012WITHOUT ANY WARRANTY; without even the implied warranty of
013MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014General Public License for more details.
015
016You should have received a copy of the GNU General Public License
017along with GNU Classpath; see the file COPYING.  If not, write to the
018Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
01902110-1301 USA.
020
021Linking this library statically or dynamically with other modules is
022making a combined work based on this library.  Thus, the terms and
023conditions of the GNU General Public License cover the whole
024combination.
025
026As a special exception, the copyright holders of this library give you
027permission to link this library with independent modules to produce an
028executable, regardless of the license terms of these independent
029modules, and to copy and distribute the resulting executable under
030terms of your choice, provided that you also meet, for each linked
031independent module, the terms and conditions of the license of that
032module.  An independent module is a module which is not derived from
033or based on this library.  If you modify this library, you may extend
034this exception to your version of the library, but you are not
035obligated to do so.  If you do not wish to do so, delete this
036exception statement from your version. */
037
038
039package javax.swing.text.html.parser;
040
041import java.io.DataInputStream;
042import java.io.EOFException;
043import java.io.IOException;
044import java.io.ObjectInputStream;
045import java.lang.reflect.Field;
046import java.lang.reflect.Modifier;
047import java.util.BitSet;
048import java.util.Hashtable;
049import java.util.StringTokenizer;
050import java.util.Vector;
051
052/**
053 * <p>Representation or the SGML DTD document.
054 * Provides basis for describing a syntax of the
055 * HTML documents. The fields of this class are NOT initialized in
056 * constructor. You need to do this separately before passing this data
057 * structure to the HTML parser. The subclasses with the fields, pre-
058 * initialized, for example, for HTML 4.01, can be available only between
059 * the implementation specific classes
060 * ( for example, {@link gnu.javax.swing.text.html.parser.HTML_401F }
061 * in this implementation).</p>
062 * <p>
063 * If you need more information about SGML DTD documents,
064 * the author suggests to read SGML tutorial on
065 * <a href="http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html"
066 * >http://www.w3.org/TR/WD-html40-970708/intro/sgmltut.html</a>.
067 * We also recommend Goldfarb C.F (1991) <i>The SGML Handbook</i>,
068 * Oxford University Press, 688 p, ISBN: 0198537379.
069 * </p>
070 * <p>
071 * Warning: the html, head and other tag fields will only be automatically
072 * assigned if the VM has the correctly implemented reflection mechanism.
073 * As these fields are not used anywhere in the implementation, not
074 * exception will be thrown in the opposite case.
075 * </p>
076 *
077 * @author Audrius Meskauskas, Lithuania (AudriusA@Bioinformatics.org)
078 */
079public class DTD
080  implements DTDConstants
081{
082  /**
083   * The version of the persistent data format.
084   * @specnote This was made <code>final</code> in 1.5.
085   */
086  public static final int FILE_VERSION = 1;
087
088  /**
089   * The table of existing available DTDs.
090   */
091  static Hashtable<String,DTD> dtdHash = new Hashtable<String,DTD>();
092
093  /**
094   * The applet element for this DTD.
095   */
096  public Element applet;
097
098  /**
099   * The base element for this DTD.
100   */
101  public Element base;
102
103  /**
104   * The body element for this DTD.
105   */
106  public Element body;
107
108  /**
109   * The head element for this DTD.
110   */
111  public Element head;
112
113  /**
114   * The html element for this DTD.
115   */
116  public Element html;
117
118  /**
119   * The isindex element of for this DTD.
120   */
121  public Element isindex;
122
123  /**
124   * The meta element for this DTD.
125   */
126  public Element meta;
127
128  /**
129   * The p element for this DTD.
130   */
131  public Element p;
132
133  /**
134   * The param element for this DTD.
135   */
136  public Element param;
137
138  /**
139   * The pcdata for this DTD.
140   */
141  public Element pcdata;
142
143  /**
144   * The title element for this DTD.
145   */
146  public Element title;
147
148  /**
149   * The element for accessing all DTD elements by name.
150   */
151  public Hashtable<String,Element> elementHash =
152    new Hashtable<String,Element>();
153
154  /**
155   * The entity table for accessing all DTD entities by name.
156   */
157  public Hashtable<Object, Entity> entityHash = new Hashtable<Object, Entity>();
158
159  /**
160   *  The name of this DTD.
161   */
162  public String name;
163
164  /**
165   * Contains all elements in this DTD. The
166   * javax.swing.text.html.parser.Element#index field of all elements
167   * in this vector is set to the element position in this vector.
168   */
169  public Vector<Element> elements = new Vector<Element>();
170
171  /** Create a new DTD with the specified name. */
172  protected DTD(String a_name)
173  {
174    name = a_name;
175  }
176
177  /** Get this DTD by name. The current implementation
178   * only looks in the internal table of DTD documents. If no corresponding
179   * entry is found, the new entry is created, placed into
180   * the table and returned. */
181  public static DTD getDTD(String name)
182                    throws IOException
183  {
184    DTD d = dtdHash.get(name);
185
186    if (d == null)
187      {
188        d = new DTD(name);
189        dtdHash.put(d.name, d);
190      }
191
192    return d;
193  }
194
195  /**
196   * Get the element by the element name. If the element is not yet
197   * defined, it is newly created and placed into the element table.
198   * If the element name matches (ingoring case) a public non static
199   * element field in this class, this field is assigned to the value
200   * of the newly created element.
201   */
202  public Element getElement(String element_name)
203  {
204    return newElement(element_name);
205  }
206
207  /**
208   * Get the element by the value of its
209   * {@link javax.swing.text.html.parser.Element#index} field.
210   */
211  public Element getElement(int index)
212  {
213    return elements.get(index);
214  }
215
216  /**
217   * Get the entity with the given identifier.
218   * @param id that can be returned by
219   * {@link javax.swing.text.html.parser.Entity#name2type(String an_entity)}
220   * @return The entity from this DTD or null if there is no entity with
221   * such id or such entity is not present in the table of this instance.
222   */
223  public Entity getEntity(int id)
224  {
225    String name = Entity.mapper.get(id);
226
227    if (name != null)
228      return entityHash.get(name);
229    else
230      return null;
231  }
232
233  /**
234   * Get the named entity by its name.
235   */
236  public Entity getEntity(String entity_name)
237  {
238    return entityHash.get(entity_name);
239  }
240
241  /**
242   * Get the name of this instance of DTD
243   */
244  public String getName()
245  {
246    return name;
247  }
248
249  /**
250   * Creates, adds into the entity table and returns the
251   * character entity like <code>&amp;lt;</code>
252   *  (means '<code>&lt;</code>' );
253   * @param name The entity name (without heading &amp; and closing ;)
254   * @param type The entity type
255   * @param character The entity value (single character)
256   * @return The created entity
257   */
258  public Entity defEntity(String name, int type, int character)
259  {
260    Entity e = newEntity(name, type);
261    e.data = new char[] { (char) character };
262    return e;
263  }
264
265  /**
266   * Define the attributes for the element with the given name.
267   * If the element is not exist, it is created.
268   * @param forElement
269   * @param attributes
270   */
271  public void defineAttributes(String forElement, AttributeList attributes)
272  {
273    Element e = elementHash.get(forElement.toLowerCase());
274
275    if (e == null)
276      e = newElement(forElement);
277
278    e.atts = attributes;
279  }
280
281  /**
282   * Defines the element and adds it to the element table. Sets the
283   * <code>Element.index</code> field to the value, unique for this
284   * instance of DTD. If the element with the given name already exists,
285   * replaces all other its settings by the method argument values.
286   * @param name the name of the element
287   * @param type the type of the element
288   * @param headless true if the element needs no starting tag
289   * (should not occur in HTML).
290   * @param tailless true if the element needs no ending tag (like
291   * <code>&lt;hr&gt;</code>
292   * @param content the element content
293   * @param exclusions the set of elements that must not occur inside
294   * this element. The <code>Element.index</code> value defines which
295   * bit in this bitset corresponds to that element.
296   * @param inclusions the set of elements that can occur inside this
297   * element. the <code>Element.index</code> value defines which
298   * bit in this bitset corresponds to that element.
299   * @param attributes the element attributes.
300   * @return the newly defined element.
301   */
302  public Element defineElement(String name, int type, boolean headless,
303                               boolean tailless, ContentModel content,
304                               BitSet exclusions, BitSet inclusions,
305                               AttributeList attributes
306                              )
307  {
308    Element e = newElement(name);
309    e.type = type;
310    e.oStart = headless;
311    e.oEnd = tailless;
312    e.content = content;
313    e.exclusions = exclusions;
314    e.inclusions = inclusions;
315    e.atts = attributes;
316
317    return e;
318  }
319
320  /**
321   * Creates, intializes and adds to the entity table the new
322   * entity.
323   * @param name the name of the entity
324   * @param type the type of the entity
325   * @param data the data section of the entity
326   * @return the created entity
327   */
328  public Entity defineEntity(String name, int type, char[] data)
329  {
330    Entity e = newEntity(name, type);
331    e.data = data;
332
333    return e;
334  }
335
336  /** Place this DTD into the DTD table. */
337  public static void putDTDHash(String name, DTD dtd)
338  {
339    dtdHash.put(name, dtd);
340  }
341
342  /**
343   * <p>Reads DTD from an archived format. This format is not standardized
344   * and differs between implementations.</p><p> This implementation
345   * reads and defines all entities and elements using
346   * ObjectInputStream. The elements and entities can be written into the
347   * stream in any order. The objects other than elements and entities
348   * are ignored.</p>
349   * @param stream A data stream to read from.
350   * @throws java.io.IOException If one is thrown by the input stream
351   */
352  public void read(DataInputStream stream)
353            throws java.io.IOException
354  {
355    ObjectInputStream oi = new ObjectInputStream(stream);
356    Object def;
357    try
358      {
359        while (true)
360          {
361            def = oi.readObject();
362            if (def instanceof Element)
363              {
364                Element e = (Element) def;
365                elementHash.put(e.name.toLowerCase(), e);
366                assignField(e);
367              }
368            else if (def instanceof Entity)
369              {
370                Entity e = (Entity) def;
371                entityHash.put(e.name, e);
372              }
373          }
374      }
375    catch (ClassNotFoundException ex)
376      {
377        throw new IOException(ex.getMessage());
378      }
379    catch (EOFException ex)
380      {
381        // ok EOF
382      }
383  }
384
385  /**
386   * Returns the name of this instance of DTD.
387   */
388  public String toString()
389  {
390    return name;
391  }
392
393  /**
394   * Creates and returns new attribute (not an attribute list).
395   * @param name the name of this attribute
396   * @param type the type of this attribute (FIXED, IMPLIED or
397   * REQUIRED from <code>DTDConstants</code>).
398   * @param modifier the modifier of this attribute
399   * @param default_value the default value of this attribute
400   * @param allowed_values the allowed values of this attribute. The multiple
401   * possible values in this parameter are supposed to be separated by
402   * '|', same as in SGML DTD <code>&lt;!ATTLIST </code>tag. This parameter
403   * can be null if no list of allowed values is specified.
404   * @param atts the previous attribute of this element. This is
405   * placed to the field
406   * {@link javax.swing.text.html.parser.AttributeList#next },
407   * creating a linked list.
408   * @return The attributes.
409   */
410  protected AttributeList defAttributeList(String name, int type, int modifier,
411                                           String default_value,
412                                           String allowed_values,
413                                           AttributeList atts
414                                          )
415  {
416    AttributeList al = new AttributeList(name);
417    al.modifier = modifier;
418    al.value = default_value;
419    al.next = atts;
420
421    if (allowed_values != null)
422      {
423        StringTokenizer st = new StringTokenizer(allowed_values, " \t|");
424        Vector<String> v = new Vector<String>(st.countTokens());
425
426        while (st.hasMoreTokens())
427          v.add(st.nextToken());
428
429        al.values = v;
430      }
431
432    return al;
433  }
434
435  /**
436   * Creates a new content model.
437   * @param type specifies the BNF operation for this content model.
438   * The valid operations are documented in the
439   * {@link javax.swing.text.html.parser.ContentModel#type }.
440   * @param content the content of this content model
441   * @param next if the content model is specified by BNF-like
442   * expression, contains the rest of this expression.
443   * @return The newly created content model.
444   */
445  protected ContentModel defContentModel(int type, Object content,
446                                         ContentModel next
447                                        )
448  {
449    ContentModel model = new ContentModel();
450    model.type = type;
451    model.next = next;
452    model.content = content;
453
454    return model;
455  }
456
457  /**
458   * Defines a new element and adds it to the element table.
459   * If the element alredy exists,
460   * overrides it settings with the specified values.
461   * @param name the name of the new element
462   * @param type the type of the element
463   * @param headless true if the element needs no starting tag
464   * @param tailless true if the element needs no closing tag
465   * @param content the element content.
466   * @param exclusions the elements that must be excluded from the
467   * content of this element, in all levels of the hierarchy.
468   * @param inclusions the elements that can be included as the
469   * content of this element.
470   * @param attributes the element attributes.
471   * @return the created or updated element.
472   */
473  protected Element defElement(String name, int type, boolean headless,
474                               boolean tailless, ContentModel content,
475                               String[] exclusions, String[] inclusions,
476                               AttributeList attributes
477                              )
478  {
479    // compute the bit sets
480    BitSet exclude = bitSet(exclusions);
481    BitSet include = bitSet(inclusions);
482
483    Element e =
484      defineElement(name, type, headless, tailless, content, exclude, include,
485                    attributes
486                   );
487
488    return e;
489  }
490
491  /**
492   * Creates, intializes and adds to the entity table the new
493   * entity.
494   * @param name the name of the entity
495   * @param type the type of the entity
496   * @param data the data section of the entity
497   * @return the created entity
498   */
499  protected Entity defEntity(String name, int type, String data)
500  {
501    Entity e = newEntity(name, type);
502    e.data = data.toCharArray();
503
504    return e;
505  }
506
507  private void assignField(Element e)
508  {
509    String element_name = e.name;
510    try
511      {
512        // Assign the field via reflection.
513        Field f = getClass().getField(element_name.toLowerCase());
514        if ((f.getModifiers() & Modifier.PUBLIC) != 0)
515          if ((f.getModifiers() & Modifier.STATIC) == 0)
516            if (f.getType().isAssignableFrom(e.getClass()))
517              f.set(this, e);
518      }
519    catch (IllegalAccessException ex)
520      {
521        unexpected(ex);
522      }
523    catch (NoSuchFieldException ex)
524      {
525        // This is ok.
526      }
527
528    // Some virtual machines may still lack the proper
529    // implementation of reflection. As the tag fields
530    // are not used anywhere in this implementation,
531    // (and this class is also rarely used by the end user),
532    // it may be better not to crash everything by throwing an error
533    // for each case when the HTML parsing is required.
534    catch (Throwable t)
535      {
536        // This VM has no reflection mechanism implemented!
537        if (t instanceof OutOfMemoryError)
538          throw (Error) t;
539      }
540  }
541
542  /**
543   * Create the bit set for this array of elements.
544   * The unknown elements are automatically defined and added
545   * to the element table.
546   * @param elements
547   * @return The bit set.
548   */
549  private BitSet bitSet(String[] elements)
550  {
551    BitSet b = new BitSet();
552
553    for (int i = 0; i < elements.length; i++)
554      {
555        Element e = getElement(elements [ i ]);
556
557        if (e == null)
558          e = newElement(elements [ i ]);
559
560        b.set(e.index);
561      }
562
563    return b;
564  }
565
566  /**
567   * Find the element with the given name in the element table.
568   * If not find, create a new element with this name and add to the
569   * table.
570   * @param name the name of the element
571   * @return the found or created element.
572   */
573  private Element newElement(String name)
574  {
575    Element e = elementHash.get(name.toLowerCase());
576
577    if (e == null)
578      {
579        e = new Element();
580        e.name = name;
581        e.index = elements.size();
582        elements.add(e);
583        elementHash.put(e.name.toLowerCase(), e);
584        assignField(e);
585      }
586    return e;
587  }
588
589  /**
590   * Creates and adds to the element table the entity with an
591   * unitialized data section. Used internally.
592   * @param name the name of the entity
593   * @param type the type of the entity, a bitwise combination
594   * of GENERAL, PARAMETER, SYSTEM and PUBLIC.
595   *
596   * @return the created entity
597   */
598  private Entity newEntity(String name, int type)
599  {
600    Entity e = new Entity(name, type, null);
601    entityHash.put(e.name, e);
602    return e;
603  }
604
605  private void unexpected(Exception ex)
606  {
607    throw new Error("This should never happen, report a bug", ex);
608  }
609}