12: def scan_tokens tokens, options
13:
14: value_expected = nil
15: state = :initial
16: key_indent = indent = 0
17:
18: until eos?
19:
20: kind = nil
21: match = nil
22:
23: if bol?
24: key_indent = nil
25: if $DEBUG
26: indent = check(/ +/) ? matched.size : 0
27: tokens << [indent.to_s, :debug]
28: end
29: end
30:
31: if match = scan(/ +[\t ]*/)
32: kind = :space
33:
34: elsif match = scan(/\n+/)
35: kind = :space
36: state = :initial if match.index(?\n)
37:
38: elsif match = scan(/#.*/)
39: kind = :comment
40:
41: elsif bol? and case
42: when match = scan(/---|\.\.\./)
43: tokens << [:open, :head]
44: tokens << [match, :head]
45: tokens << [:close, :head]
46: next
47: when match = scan(/%.*/)
48: tokens << [match, :doctype]
49: next
50: end
51:
52: elsif state == :value and case
53: when !check(/(?:"[^"]*")(?=: |:$)/) && scan(/"/)
54: tokens << [:open, :string]
55: tokens << [matched, :delimiter]
56: tokens << [matched, :content] if scan(/ [^"\\]* (?: \\. [^"\\]* )* /mx)
57: tokens << [matched, :delimiter] if scan(/"/)
58: tokens << [:close, :string]
59: next
60: when match = scan(/[|>][-+]?/)
61: tokens << [:open, :string]
62: tokens << [match, :delimiter]
63: tokens << [matched, :content] if scan(/(?:\n+ {#{key_indent + 1}}.*)+/)
64: tokens << [:close, :string]
65: next
66: when match = scan(/(?![!"*&]).+?(?=$|\s+#)/)
67: tokens << [match, :string]
68: string_indent = key_indent || column(pos - match.size - 1)
69: tokens << [matched, :string] if scan(/(?:\n+ {#{string_indent + 1}}.*)+/)
70: next
71: end
72:
73: elsif case
74: when match = scan(/[-:](?= |$)/)
75: state = :value if state == :colon && (match == ':' || match == '-')
76: state = :value if state == :initial && match == '-'
77: kind = :operator
78: when match = scan(/[,{}\[\]]/)
79: kind = :operator
80: when state == :initial && match = scan(/[\w.() ]*\S(?=: |:$)/)
81: kind = :key
82: key_indent = column(pos - match.size - 1)
83:
84: state = :colon
85: when match = scan(/(?:"[^"\n]*"|'[^'\n]*')(?=: |:$)/)
86: tokens << [:open, :key]
87: tokens << [match[0,1], :delimiter]
88: tokens << [match[1..-2], :content]
89: tokens << [match[-1,1], :delimiter]
90: tokens << [:close, :key]
91: key_indent = column(pos - match.size - 1)
92:
93: state = :colon
94: next
95: when scan(/(![\w\/]+)(:([\w:]+))?/)
96: tokens << [self[1], :type]
97: if self[2]
98: tokens << [':', :operator]
99: tokens << [self[3], :class]
100: end
101: next
102: when scan(/&\S+/)
103: kind = :variable
104: when scan(/\*\w+/)
105: kind = :global_variable
106: when scan(/<</)
107: kind = :class_variable
108: when scan(/\d\d:\d\d:\d\d/)
109: kind = :oct
110: when scan(/\d\d\d\d-\d\d-\d\d\s\d\d:\d\d:\d\d(\.\d+)? [-+]\d\d:\d\d/)
111: kind = :oct
112: when scan(/:\w+/)
113: kind = :symbol
114: when scan(/[^:\s]+(:(?! |$)[^:\s]*)* .*/)
115: kind = :error
116: when scan(/[^:\s]+(:(?! |$)[^:\s]*)*/)
117: kind = :error
118: end
119:
120: else
121: getch
122: kind = :error
123:
124: end
125:
126: match ||= matched
127:
128: raise_inspect 'Error token %p in line %d' % [[match, kind], line], tokens if $DEBUG && !kind
129: raise_inspect 'Empty token', tokens unless match
130:
131: tokens << [match, kind]
132:
133: end
134:
135: tokens
136: end