class String
Public Instance Methods
ascii()
click to toggle source
# File lib/sup/util.rb, line 434 def ascii out = "" each_byte do |b| if (b & 128) != 0 out << "\\x#{b.to_s 16}" else out << b.chr end end out = out.fix_encoding! # this should now be an utf-8 string of ascii # compat chars. end
camel_to_hyphy()
click to toggle source
# File lib/sup/util.rb, line 260 def camel_to_hyphy self.gsub(/([a-z])([A-Z0-9])/, '\1-\2').downcase end
check()
click to toggle source
# File lib/sup/util.rb, line 425 def check begin fail "unexpected encoding #{encoding}" if respond_to?(:encoding) && !(encoding == Encoding::UTF_8 || encoding == Encoding::ASCII) fail "invalid encoding" if respond_to?(:valid_encoding?) && !valid_encoding? rescue raise CheckError.new($!.message) end end
display_length()
click to toggle source
# File lib/sup/util.rb, line 241 def display_length @display_length ||= Unicode.width(self.fix_encoding!, false) # if Unicode.width fails and returns -1, fall back to # regular String#length, see pull-request: #256. if @display_length < 0 @display_length = self.length end @display_length end
each(&b)
click to toggle source
# File lib/sup/util.rb, line 412 def each &b each_line &b end
find_all_positions(x)
click to toggle source
# File lib/sup/util.rb, line 264 def find_all_positions x ret = [] start = 0 while start < length pos = index x, start break if pos.nil? ret << pos start = pos + 1 end ret end
fix_encoding!()
click to toggle source
Fix the damn string! make sure it is valid utf-8, then convert to user encoding.
# File lib/sup/util.rb, line 358 def fix_encoding! # first try to encode to utf-8 from whatever current encoding encode!('UTF-8', :invalid => :replace, :undef => :replace) # do this anyway in case string is set to be UTF-8, encoding to # something else (UTF-16 which can fully represent UTF-8) and back # ensures invalid chars are replaced. encode!('UTF-16', 'UTF-8', :invalid => :replace, :undef => :replace) encode!('UTF-8', 'UTF-16', :invalid => :replace, :undef => :replace) fail "Could not create valid UTF-8 string out of: '#{self.to_s}'." unless valid_encoding? # now convert to $encoding encode!($encoding, :invalid => :replace, :undef => :replace) fail "Could not create valid #{$encoding.inspect} string out of: '#{self.to_s}'." unless valid_encoding? self end
normalize_whitespace()
click to toggle source
# File lib/sup/util.rb, line 400 def normalize_whitespace fix_encoding! gsub(/\t/, " ").gsub(/\r/, "") end
ord()
click to toggle source
# File lib/sup/util.rb, line 406 def ord self[0] end
slice_by_display_length(len)
click to toggle source
# File lib/sup/util.rb, line 253 def slice_by_display_length len each_char.each_with_object "" do |c, buffer| len -= c.display_length buffer << c if len >= 0 end end
split_on_commas()
click to toggle source
a very complicated regex found on teh internets to split on commas, unless they occurr within double quotes.
# File lib/sup/util.rb, line 278 def split_on_commas normalize_whitespace().split(/,\s*(?=(?:[^"]*"[^"]*")*(?![^"]*"))/) end
split_on_commas_with_remainder()
click to toggle source
ok, here we do it the hard way. got to have a remainder for purposes of tab-completing full email addresses
# File lib/sup/util.rb, line 284 def split_on_commas_with_remainder ret = [] state = :outstring pos = 0 region_start = 0 while pos <= length newpos = case state when :escaped_instring, :escaped_outstring then pos else index(/[,"\\]/, pos) end if newpos char = self[newpos] else char = nil newpos = length end case char when ?" state = case state when :outstring then :instring when :instring then :outstring when :escaped_instring then :instring when :escaped_outstring then :outstring end when ?,, nil state = case state when :outstring, :escaped_outstring then ret << self[region_start ... newpos].gsub(/^\s+|\s+$/, "") region_start = newpos + 1 :outstring when :instring then :instring when :escaped_instring then :instring end when ?\\ state = case state when :instring then :escaped_instring when :outstring then :escaped_outstring when :escaped_instring then :instring when :escaped_outstring then :outstring end end pos = newpos + 1 end remainder = case state when :instring self[region_start .. -1].gsub(/^\s+/, "") else nil end [ret, remainder] end
to_set_of_symbols(split_on=nil;)
click to toggle source
takes a list of words, and returns an array of symbols. typically used in Sup for translating Xapian's representation of a list of labels (a string) to an array of label symbols.
split_on will be passed to String#split, so you can leave this nil for space.
# File lib/sup/util.rb, line 422 def to_set_of_symbols split_on=nil; Set.new split(split_on).map { |x| x.strip.intern } end
transcode(to_encoding, from_encoding)
click to toggle source
transcode the string if original encoding is know fix if broken.
# File lib/sup/util.rb, line 380 def transcode to_encoding, from_encoding begin encode!(to_encoding, from_encoding, :invalid => :replace, :undef => :replace) unless valid_encoding? # fix encoding (through UTF-8) encode!('UTF-16', from_encoding, :invalid => :replace, :undef => :replace) encode!(to_encoding, 'UTF-16', :invalid => :replace, :undef => :replace) end rescue Encoding::ConverterNotFoundError debug "Encoding converter not found for #{from_encoding.inspect} or #{to_encoding.inspect}, fixing string: '#{self.to_s}', but expect weird characters." fix_encoding! end fail "Could not create valid #{to_encoding.inspect} string out of: '#{self.to_s}'." unless valid_encoding? self end
wrap(len)
click to toggle source
# File lib/sup/util.rb, line 340 def wrap len ret = [] s = self while s.display_length > len cut = s.slice_by_display_length(len).rindex(/\s/) if cut ret << s[0 ... cut] s = s[(cut + 1) .. -1] else ret << s.slice_by_display_length(len) s = s[ret.last.length .. -1] end end ret << s end