module Loofah::HTML5::Scrub
Constants
- CONTROL_CHARACTERS
- CRASS_SEMICOLON
- CSS_IMPORTANT
- CSS_KEYWORDISH
- CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES
- DATA_ATTRIBUTE_NAME
Public Class Methods
allowed_element?(element_name)
click to toggle source
# File lib/loofah/html5/scrub.rb, line 16 def allowed_element?(element_name) ::Loofah::HTML5::SafeList::ALLOWED_ELEMENTS_WITH_LIBXML2.include?(element_name) end
force_correct_attribute_escaping!(node)
click to toggle source
libxml2 >= 2.9.2 fails to escape comments within some attributes.
see comments about CVE-2018-8048 within the tests for more information
# File lib/loofah/html5/scrub.rb, line 135 def force_correct_attribute_escaping!(node) return unless Nokogiri::VersionInfo.instance.libxml2? node.attribute_nodes.each do |attr_node| next unless LibxmlWorkarounds::BROKEN_ESCAPING_ATTRIBUTES.include?(attr_node.name) tag_name = LibxmlWorkarounds::BROKEN_ESCAPING_ATTRIBUTES_QUALIFYING_TAG[attr_node.name] next unless tag_name.nil? || tag_name == node.name # # this block is just like CGI.escape in Ruby 2.4, but # only encodes space and double-quote, to mimic # pre-2.9.2 behavior # encoding = attr_node.value.encoding attr_node.value = attr_node.value.gsub(/[ "]/) do |m| "%" + m.unpack("H2" * m.bytesize).join("%").upcase end.force_encoding(encoding) end end
scrub_attributes(node)
click to toggle source
alternative implementation of the html5lib attribute scrubbing algorithm
# File lib/loofah/html5/scrub.rb, line 21 def scrub_attributes(node) node.attribute_nodes.each do |attr_node| attr_name = if attr_node.namespace "#{attr_node.namespace.prefix}:#{attr_node.node_name}" else attr_node.node_name end if attr_name =~ DATA_ATTRIBUTE_NAME next end unless SafeList::ALLOWED_ATTRIBUTES.include?(attr_name) attr_node.remove next end if SafeList::ATTR_VAL_IS_URI.include?(attr_name) # this block lifted nearly verbatim from HTML5 sanitization val_unescaped = CGI.unescapeHTML(attr_node.value).gsub(CONTROL_CHARACTERS, "").downcase if val_unescaped =~ /^[a-z0-9][-+.a-z0-9]*:/ && !SafeList::ALLOWED_PROTOCOLS.include?(val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0]) attr_node.remove next elsif val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[0] == "data" # permit only allowed data mediatypes mediatype = val_unescaped.split(SafeList::PROTOCOL_SEPARATOR)[1] mediatype, _ = mediatype.split(";")[0..1] if mediatype if mediatype && !SafeList::ALLOWED_URI_DATA_MEDIATYPES.include?(mediatype) attr_node.remove next end end end if SafeList::SVG_ATTR_VAL_ALLOWS_REF.include?(attr_name) attr_node.value = attr_node.value.gsub(/url\s*\(\s*[^#\s][^)]+?\)/m, " ") if attr_node.value end if SafeList::SVG_ALLOW_LOCAL_HREF.include?(node.name) && attr_name == "xlink:href" && attr_node.value =~ /^\s*[^#\s].*/m attr_node.remove next end end scrub_css_attribute(node) node.attribute_nodes.each do |attr_node| if attr_node.value !~ /[^[:space:]]/ && attr_node.name !~ DATA_ATTRIBUTE_NAME node.remove_attribute(attr_node.name) end end force_correct_attribute_escaping!(node) end
scrub_css(style)
click to toggle source
# File lib/loofah/html5/scrub.rb, line 79 def scrub_css(style) style_tree = Crass.parse_properties(style) sanitized_tree = [] style_tree.each do |node| next unless node[:node] == :property next if node[:children].any? do |child| [:url, :bad_url].include?(child[:node]) end name = node[:name].downcase next unless SafeList::ALLOWED_CSS_PROPERTIES.include?(name) || SafeList::ALLOWED_SVG_PROPERTIES.include?(name) || SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first) value = node[:children].map do |child| case child[:node] when :whitespace nil when :string if child[:raw] =~ CSS_PROPERTY_STRING_WITHOUT_EMBEDDED_QUOTES Crass::Parser.stringify(child) else nil end when :function if SafeList::ALLOWED_CSS_FUNCTIONS.include?(child[:name].downcase) Crass::Parser.stringify(child) end when :ident keyword = child[:value] if !SafeList::SHORTHAND_CSS_PROPERTIES.include?(name.split("-").first) || SafeList::ALLOWED_CSS_KEYWORDS.include?(keyword) || (keyword =~ CSS_KEYWORDISH) keyword end else child[:raw] end end.compact next if value.empty? value << CSS_IMPORTANT if node[:important] propstring = format("%s:%s", name, value.join(" ")) sanitized_node = Crass.parse_properties(propstring).first sanitized_tree << sanitized_node << CRASS_SEMICOLON end Crass::Parser.stringify(sanitized_tree) end
scrub_css_attribute(node)
click to toggle source
# File lib/loofah/html5/scrub.rb, line 74 def scrub_css_attribute(node) style = node.attributes["style"] style.value = scrub_css(style.value) if style end