class MaRuKu::NokogiriHTMLFragment

Nokogiri backend for HTML handling

Public Class Methods

new(raw_html) click to toggle source
# File lib/maruku/html.rb, line 49
    def initialize(raw_html)
      # Wrap our HTML in a dummy document with a doctype (just
      # for the entity references)
      wrapped = '<!DOCTYPE html PUBLIC
  "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN"
  "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
<html>' + raw_html.strip + '</html>'

      d = Nokogiri::XML::Document.parse(wrapped) {|c| c.nonet }
      @fragment = d.root
    end

Public Instance Methods

add_class(class_name) click to toggle source

Add a class to the children of this fragment

# File lib/maruku/html.rb, line 68
def add_class(class_name)
  @fragment.children.each do |c|
    c['class'] = ((c['class']||'').split(' ') + [class_name]).join(' ')
  end
end
first_node_name() click to toggle source

@return The name of the first child element in the fragment.

# File lib/maruku/html.rb, line 62
def first_node_name
  first_child = @fragment.children.first
  first_child ? first_child.name : nil
end
process_markdown_inside_elements(doc) click to toggle source

Process markdown within the contents of some elements and replace their contents with the processed version.

@param doc [MaRuKu::MDDocument] A document to process.

# File lib/maruku/html.rb, line 78
def process_markdown_inside_elements(doc)
  # find span elements or elements with 'markdown' attribute
  elts = @fragment.css("[markdown]")

  d = @fragment.children.first
  if d && HTML_INLINE_ELEMS.include?(d.name)
    elts << d unless d.attribute('markdown')
    elts += span_descendents(d)
  end

  elts.each do |e|
    how = e['markdown']
    e.remove_attribute('markdown')

    next if "0" == how # user requests no markdown parsing inside
    parse_blocks = (how == 'block') || BLOCK_TAGS.include?(e.name)

    # Select all text children of e
    e.xpath("./text()").each do |original_text|
      s = MaRuKu::Out::HTML.escapeHTML(original_text.text)
      unless s.strip.empty?
        parsed = parse_blocks ? doc.parse_text_as_markdown(s) : doc.parse_span(s)

        # restore leading and trailing spaces
        padding = /\A(\s*).*?(\s*)\z/.match(s)
        parsed = [padding[1]] + parsed + [padding[2]] if padding

        el = doc.md_el(:dummy, parsed)

        # Nokogiri collapses consecutive Text nodes, so replace it by a dummy element
        guard = Nokogiri::XML::Element.new('guard', @fragment)
        original_text.replace(guard)
        el.children_to_html.each do |x|
          guard.before(x.to_s)
        end
        guard.remove
      end
    end
  end
end
to_html() click to toggle source

Convert this fragment to an HTML or XHTML string. @return [String]

# File lib/maruku/html.rb, line 121
def to_html
  output_options = Nokogiri::XML::Node::SaveOptions::DEFAULT_XHTML ^
    Nokogiri::XML::Node::SaveOptions::FORMAT
  @fragment.children.inject("") do |out, child|
    out << child.serialize(:save_with => output_options, :encoding => 'UTF-8')
  end
end

Private Instance Methods

span_descendents(e) click to toggle source

Get all span-level descendents of the given element, recursively, as a flat NodeSet.

@param e [Nokogiri::XML::Node] An element. @return [Nokogiri::XML::NodeSet]

# File lib/maruku/html.rb, line 136
def span_descendents(e)
  ns = Nokogiri::XML::NodeSet.new(Nokogiri::XML::Document.new)
  e.element_children.inject(ns) do |descendents, c|
    if HTML_INLINE_ELEMS.include?(c.name)
      descendents << c
      descendents += span_descendents(c)
    end
    descendents
  end
end