50: def scan_tokens tokens, options
51:
52: state = :initial
53: string_delimiter = nil
54: value_expected = true
55: key_expected = false
56:
57: until eos?
58:
59: kind = nil
60: match = nil
61:
62: case state
63:
64: when :initial
65:
66: if match = scan(/ \s+ | \\\n /x)
67: value_expected = true if !value_expected && match.index(?\n)
68: tokens << [match, :space]
69: next
70:
71: elsif scan(%r! // [^\n\\]* (?: \\. [^\n\\]* )* | /\* (?: .*? \*/ | .* ) !mx)
72: value_expected = true
73: kind = :comment
74:
75: elsif check(/\d/)
76: key_expected = value_expected = false
77: if scan(/0[xX][0-9A-Fa-f]+/)
78: kind = :hex
79: elsif scan(/(?>0[0-7]+)(?![89.eEfF])/)
80: kind = :oct
81: elsif scan(/\d+[fF]|\d*\.\d+(?:[eE][+-]?\d+)?[fF]?|\d+[eE][+-]?\d+[fF]?/)
82: kind = :float
83: elsif scan(/\d+/)
84: kind = :integer
85: end
86:
87: elsif match = scan(/ [-+*=<>?:;,!&^|(\[{~%]+ | \.(?!\d) /x)
88: value_expected = true
89: last_operator = match[-1]
90: key_expected = (last_operator == ?{) || (last_operator == ?,)
91: kind = :operator
92:
93: elsif scan(/ [)\]}]+ /x)
94: key_expected = value_expected = false
95: kind = :operator
96:
97: elsif match = scan(/ [$a-zA-Z_][A-Za-z_0-9$]* /x)
98: kind = IDENT_KIND[match]
99: value_expected = (kind == :keyword) && KEYWORDS_EXPECTING_VALUE[match]
100: if kind == :ident
101: if match.index(?$)
102: kind = :predefined
103: elsif key_expected && check(/\s*:/)
104: kind = :key
105: end
106: end
107: key_expected = false
108:
109: elsif match = scan(/["']/)
110: if key_expected && check(KEY_CHECK_PATTERN[match])
111: state = :key
112: else
113: state = :string
114: end
115: tokens << [:open, state]
116: string_delimiter = match
117: kind = :delimiter
118:
119: elsif value_expected && (match = scan(/\/(?=\S)/))
120: tokens << [:open, :regexp]
121: state = :regexp
122: string_delimiter = '/'
123: kind = :delimiter
124:
125: elsif scan(/ \/ /x)
126: value_expected = true
127: key_expected = false
128: kind = :operator
129:
130: else
131: getch
132: kind = :error
133:
134: end
135:
136: when :string, :regexp, :key
137: if scan(STRING_CONTENT_PATTERN[string_delimiter])
138: kind = :content
139: elsif match = scan(/["'\/]/)
140: tokens << [match, :delimiter]
141: if state == :regexp
142: modifiers = scan(/[gim]+/)
143: tokens << [modifiers, :modifier] if modifiers && !modifiers.empty?
144: end
145: tokens << [:close, state]
146: string_delimiter = nil
147: key_expected = value_expected = false
148: state = :initial
149: next
150: elsif state != :regexp && (match = scan(/ \\ (?: #{ESCAPE} | #{UNICODE_ESCAPE} ) /mox))
151: if string_delimiter == "'" && !(match == "\\\\" || match == "\\'")
152: kind = :content
153: else
154: kind = :char
155: end
156: elsif state == :regexp && scan(/ \\ (?: #{ESCAPE} | #{REGEXP_ESCAPE} | #{UNICODE_ESCAPE} ) /mox)
157: kind = :char
158: elsif scan(/\\./m)
159: kind = :content
160: elsif scan(/ \\ | $ /x)
161: tokens << [:close, :delimiter]
162: kind = :error
163: key_expected = value_expected = false
164: state = :initial
165: else
166: raise_inspect "else case \" reached; %p not handled." % peek(1), tokens, state
167: end
168:
169: else
170: raise_inspect 'Unknown state', tokens, state
171:
172: end
173:
174: match ||= matched
175: if $DEBUG and not kind
176: raise_inspect 'Error token %p in line %d' %
177: [[match, kind], line], tokens, state
178: end
179: raise_inspect 'Empty token', tokens, state unless match
180:
181: tokens << [match, kind]
182:
183: end
184:
185: if [:string, :regexp].include? state
186: tokens << [:close, state]
187: end
188:
189: tokens
190: end