Class CodeRay::Scanners::Ruby
In: lib/coderay/scanners/ruby.rb
Parent: Scanner

This scanner is really complex, since Ruby is a complex language!

It tries to highlight 100% of all common code, and 90% of strange codes.

It is optimized for HTML highlighting, and is not very useful for parsing or pretty printing.

For now, I think it‘s better than the scanners in VIM or Syntax, or any highlighter I was able to find, except Caleb‘s RubyLexer.

I hope it‘s also better than the rdoc/irb lexer.

Methods

Included Modules

Streamable

Private Instance methods

[Source]

     # File lib/coderay/scanners/ruby.rb, line 26
 26:     def scan_tokens tokens, options
 27:       last_token_dot = false
 28:       value_expected = true
 29:       heredocs = nil
 30:       last_state = nil
 31:       state = :initial
 32:       depth = nil
 33:       inline_block_stack = []
 34: 
 35:       patterns = Patterns  # avoid constant lookup
 36: 
 37:       until eos?
 38:         match = nil
 39:         kind = nil
 40: 
 41:         if state.instance_of? patterns::StringState
 42: # {{{
 43:           match = scan_until(state.pattern) || scan_until(/\z/)
 44:           tokens << [match, :content] unless match.empty?
 45:           break if eos?
 46: 
 47:           if state.heredoc and self[1]  # end of heredoc
 48:             match = getch.to_s
 49:             match << scan_until(/$/) unless eos?
 50:             tokens << [match, :delimiter]
 51:             tokens << [:close, state.type]
 52:             state = state.next_state
 53:             next
 54:           end
 55: 
 56:           case match = getch
 57: 
 58:           when state.delim
 59:             if state.paren
 60:               state.paren_depth -= 1
 61:               if state.paren_depth > 0
 62:                 tokens << [match, :nesting_delimiter]
 63:                 next
 64:               end
 65:             end
 66:             tokens << [match, :delimiter]
 67:             if state.type == :regexp and not eos?
 68:               modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
 69:               tokens << [modifiers, :modifier] unless modifiers.empty?
 70:             end
 71:             tokens << [:close, state.type]
 72:             value_expected = false
 73:             state = state.next_state
 74: 
 75:           when '\\'
 76:             if state.interpreted
 77:               if esc = scan(/ #{patterns::ESCAPE} /ox)
 78:                 tokens << [match + esc, :char]
 79:               else
 80:                 tokens << [match, :error]
 81:               end
 82:             else
 83:               case m = getch
 84:               when state.delim, '\\'
 85:                 tokens << [match + m, :char]
 86:               when nil
 87:                 tokens << [match, :error]
 88:               else
 89:                 tokens << [match + m, :content]
 90:               end
 91:             end
 92: 
 93:           when '#'
 94:             case peek(1)
 95:             when '{'
 96:               inline_block_stack << [state, depth, heredocs]
 97:               value_expected = true
 98:               state = :initial
 99:               depth = 1
100:               tokens << [:open, :inline]
101:               tokens << [match + getch, :inline_delimiter]
102:             when '$', '@'
103:               tokens << [match, :escape]
104:               last_state = state  # scan one token as normal code, then return here
105:               state = :initial
106:             else
107:               raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
108:             end
109: 
110:           when state.paren
111:             state.paren_depth += 1
112:             tokens << [match, :nesting_delimiter]
113: 
114:           when /#{patterns::REGEXP_SYMBOLS}/ox
115:             tokens << [match, :function]
116: 
117:           else
118:             raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
119: 
120:           end
121:           next
122: # }}}
123:         else
124: # {{{
125:           if match = scan(/[ \t\f]+/)
126:             kind = :space
127:             match << scan(/\s*/) unless eos? || heredocs
128:             value_expected = true if match.index(?\n) # FIXME not quite true
129:             tokens << [match, kind]
130:             next
131:             
132:           elsif match = scan(/\\?\n/)
133:             kind = :space
134:             if match == "\n"
135:               value_expected = true  # FIXME not quite true
136:               state = :initial if state == :undef_comma_expected
137:             end
138:             if heredocs
139:               unscan  # heredoc scanning needs \n at start
140:               state = heredocs.shift
141:               tokens << [:open, state.type]
142:               heredocs = nil if heredocs.empty?
143:               next
144:             else
145:               match << scan(/\s*/) unless eos?
146:             end
147:             tokens << [match, kind]
148:             next
149:           
150:           elsif bol? && match = scan(/\#!.*/)
151:             tokens << [match, :doctype]
152:             next
153:             
154:           elsif match = scan(/\#.*/) or
155:             ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
156:               kind = :comment
157:               value_expected = true
158:               tokens << [match, kind]
159:               next
160: 
161:           elsif state == :initial
162: 
163:             # IDENTS #
164:             if match = scan(/#{patterns::METHOD_NAME}/o)
165:               if last_token_dot
166:                 kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
167:               else
168:                 kind = patterns::IDENT_KIND[match]
169:                 if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
170:                   kind = :constant
171:                 elsif kind == :reserved
172:                   state = patterns::DEF_NEW_STATE[match]
173:                 end
174:               end
175:               ## experimental!
176:               value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
177:             
178:             elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)
179:               kind = :ident
180:               value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
181: 
182:             # OPERATORS #
183:             # TODO: match (), [], {} as one single operator
184:             elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x)
185:               if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
186:                 value_expected = :set
187:               end
188:               last_token_dot = :set if self[1]
189:               kind = :operator
190:               unless inline_block_stack.empty?
191:                 case match
192:                 when '{'
193:                   depth += 1
194:                 when '}'
195:                   depth -= 1
196:                   if depth == 0  # closing brace of inline block reached
197:                     state, depth, heredocs = inline_block_stack.pop
198:                     heredocs = nil if heredocs && heredocs.empty?
199:                     tokens << [match, :inline_delimiter]
200:                     kind = :inline
201:                     match = :close
202:                   end
203:                 end
204:               end
205: 
206:             elsif match = scan(/ ['"] /mx)
207:               tokens << [:open, :string]
208:               kind = :delimiter
209:               state = patterns::StringState.new :string, match == '"', match  # important for streaming
210: 
211:             elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
212:               kind = :instance_variable
213: 
214:             elsif value_expected and match = scan(/\//)
215:               tokens << [:open, :regexp]
216:               kind = :delimiter
217:               interpreted = true
218:               state = patterns::StringState.new :regexp, interpreted, match
219: 
220:             # elsif match = scan(/[-+]?#{patterns::NUMERIC}/o)
221:             elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o)
222:               kind = self[1] ? :float : :integer
223: 
224:             elsif match = scan(/#{patterns::SYMBOL}/o)
225:               case delim = match[1]
226:               when ?', ?"
227:                 tokens << [:open, :symbol]
228:                 tokens << [':', :symbol]
229:                 match = delim.chr
230:                 kind = :delimiter
231:                 state = patterns::StringState.new :symbol, delim == ?", match
232:               else
233:                 kind = :symbol
234:               end
235: 
236:             elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
237:               value_expected = :set
238:               kind = :operator
239: 
240:             elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/o)
241:               indented = self[1] == '-'
242:               quote = self[3]
243:               delim = self[quote ? 4 : 2]
244:               kind = patterns::QUOTE_TO_TYPE[quote]
245:               tokens << [:open, kind]
246:               tokens << [match, :delimiter]
247:               match = :close
248:               heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
249:               heredocs ||= []  # create heredocs if empty
250:               heredocs << heredoc
251: 
252:             elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o)
253:               kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
254:                 raise_inspect 'Unknown fancy string: %%%p' % k, tokens
255:               end
256:               tokens << [:open, kind]
257:               state = patterns::StringState.new kind, interpreted, self[2]
258:               kind = :delimiter
259: 
260:             elsif value_expected and match = scan(/#{patterns::CHARACTER}/o)
261:               kind = :integer
262: 
263:             elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
264:               value_expected = :set
265:               kind = :operator
266: 
267:             elsif match = scan(/`/)
268:               if last_token_dot
269:                 kind = :operator
270:               else
271:                 tokens << [:open, :shell]
272:                 kind = :delimiter
273:                 state = patterns::StringState.new :shell, true, match
274:               end
275: 
276:             elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
277:               kind = :global_variable
278: 
279:             elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
280:               kind = :class_variable
281: 
282:             else
283:               kind = :error
284:               match = getch
285: 
286:             end
287: 
288:           elsif state == :def_expected
289:             state = :initial
290:             if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
291:               kind = :method
292:             else
293:               next
294:             end
295: 
296:           elsif state == :module_expected
297:             if match = scan(/<</)
298:               kind = :operator
299:             else
300:               state = :initial
301:               if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
302:                 kind = :class
303:               else
304:                 next
305:               end
306:             end
307: 
308:           elsif state == :undef_expected
309:             state = :undef_comma_expected
310:             if match = scan(/#{patterns::METHOD_NAME_EX}/o)
311:               kind = :method
312:             elsif match = scan(/#{patterns::SYMBOL}/o)
313:               case delim = match[1]
314:               when ?', ?"
315:                 tokens << [:open, :symbol]
316:                 tokens << [':', :symbol]
317:                 match = delim.chr
318:                 kind = :delimiter
319:                 state = patterns::StringState.new :symbol, delim == ?", match
320:                 state.next_state = :undef_comma_expected
321:               else
322:                 kind = :symbol
323:               end
324:             else
325:               state = :initial
326:               next
327:             end
328: 
329:           elsif state == :alias_expected
330:             if match = scan(/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
331:               tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
332:               tokens << [self[2], :space]
333:               tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
334:             end
335:             state = :initial
336:             next
337: 
338:           elsif state == :undef_comma_expected
339:             if match = scan(/,/)
340:               kind = :operator
341:               state = :undef_expected
342:             else
343:               state = :initial
344:               next
345:             end
346: 
347:           end
348: # }}}
349:           
350:           unless kind == :error
351:             value_expected = value_expected == :set
352:             last_token_dot = last_token_dot == :set
353:           end
354:           
355:           if $DEBUG and not kind
356:             raise_inspect 'Error token %p in line %d' %
357:               [[match, kind], line], tokens, state
358:           end
359:           raise_inspect 'Empty token', tokens unless match
360: 
361:           tokens << [match, kind]
362: 
363:           if last_state
364:             state = last_state
365:             last_state = nil
366:           end
367:         end
368:       end
369: 
370:       inline_block_stack << [state] if state.is_a? patterns::StringState
371:       until inline_block_stack.empty?
372:         this_block = inline_block_stack.pop
373:         tokens << [:close, :inline] if this_block.size > 1
374:         state = this_block.first
375:         tokens << [:close, state.type]
376:       end
377: 
378:       tokens
379:     end

[Validate]