module Unicode::DisplayWidth::IndexBuilder

Constants

EAST_ASIAN_WIDTH_DATA_FILENAME
EAST_ASIAN_WIDTH_DATA_URL
IGNORE_CATEGORIES
SPECIAL_WIDTHS
ZERO_WIDTH_CATEGORIES
ZERO_WIDTH_CODEPOINTS

Public Class Methods

build!() click to toggle source
# File lib/unicode/display_width/index_builder.rb, line 34
def self.build!
  data = File.open(EAST_ASIAN_WIDTH_DATA_FILENAME)
  data.rewind
  Dir.mkdir(DATA_DIRECTORY) unless Dir.exists?(DATA_DIRECTORY)
  index = {}

  data.each_line{ |line|
    line =~ /^(\S+?);(\S+)\s+#\s(\S+).*$/
    if $1 && $2
      cps, width, category = $1, $2, $3
      next if IGNORE_CATEGORIES.include?(category)
      if cps['..']
        codepoints = Range.new(*cps.split('..').map{ |cp| cp.to_i(16) })
      else
        codepoints = [cps.to_i(16)]
      end

      codepoints.each{ |cp|
        index[cp] = is_zero_width?(category, cp) ? 0 : width.to_sym
      }
    end
  }

  index.merge! SPECIAL_WIDTHS
  File.open(INDEX_FILENAME, 'wb') { |f| Marshal.dump(index, f) }
end
fetch!() click to toggle source
# File lib/unicode/display_width/index_builder.rb, line 27
def self.fetch!
  require 'open-uri'
  open(EAST_ASIAN_WIDTH_DATA_URL) { |f|
    File.write(EAST_ASIAN_WIDTH_DATA_FILENAME, f.read)
  }
end
is_zero_width?(category, cp) click to toggle source
# File lib/unicode/display_width/index_builder.rb, line 61
def self.is_zero_width?(category, cp)
  ( ZERO_WIDTH_CATEGORIES.include?(category) &&
      [cp].pack('U') !~ /\p{Cf}(?<=\p{Arabic})/ ) ||
    ZERO_WIDTH_CODEPOINTS.include?(cp)
end