| Class | CodeRay::Scanners::Ruby |
| In: |
lib/coderay/scanners/ruby.rb
|
| Parent: | Scanner |
This scanner is really complex, since Ruby is a complex language!
It tries to highlight 100% of all common code, and 90% of strange codes.
It is optimized for HTML highlighting, and is not very useful for parsing or pretty printing.
For now, I think it‘s better than the scanners in VIM or Syntax, or any highlighter I was able to find, except Caleb‘s RubyLexer.
I hope it‘s also better than the rdoc/irb lexer.
# File lib/coderay/scanners/ruby.rb, line 26
26: def scan_tokens tokens, options
27: last_token_dot = false
28: value_expected = true
29: heredocs = nil
30: last_state = nil
31: state = :initial
32: depth = nil
33: inline_block_stack = []
34:
35: patterns = Patterns # avoid constant lookup
36:
37: until eos?
38: match = nil
39: kind = nil
40:
41: if state.instance_of? patterns::StringState
42: # {{{
43: match = scan_until(state.pattern) || scan_until(/\z/)
44: tokens << [match, :content] unless match.empty?
45: break if eos?
46:
47: if state.heredoc and self[1] # end of heredoc
48: match = getch.to_s
49: match << scan_until(/$/) unless eos?
50: tokens << [match, :delimiter]
51: tokens << [:close, state.type]
52: state = state.next_state
53: next
54: end
55:
56: case match = getch
57:
58: when state.delim
59: if state.paren
60: state.paren_depth -= 1
61: if state.paren_depth > 0
62: tokens << [match, :nesting_delimiter]
63: next
64: end
65: end
66: tokens << [match, :delimiter]
67: if state.type == :regexp and not eos?
68: modifiers = scan(/#{patterns::REGEXP_MODIFIERS}/ox)
69: tokens << [modifiers, :modifier] unless modifiers.empty?
70: end
71: tokens << [:close, state.type]
72: value_expected = false
73: state = state.next_state
74:
75: when '\\'
76: if state.interpreted
77: if esc = scan(/ #{patterns::ESCAPE} /ox)
78: tokens << [match + esc, :char]
79: else
80: tokens << [match, :error]
81: end
82: else
83: case m = getch
84: when state.delim, '\\'
85: tokens << [match + m, :char]
86: when nil
87: tokens << [match, :error]
88: else
89: tokens << [match + m, :content]
90: end
91: end
92:
93: when '#'
94: case peek(1)
95: when '{'
96: inline_block_stack << [state, depth, heredocs]
97: value_expected = true
98: state = :initial
99: depth = 1
100: tokens << [:open, :inline]
101: tokens << [match + getch, :inline_delimiter]
102: when '$', '@'
103: tokens << [match, :escape]
104: last_state = state # scan one token as normal code, then return here
105: state = :initial
106: else
107: raise_inspect 'else-case # reached; #%p not handled' % peek(1), tokens
108: end
109:
110: when state.paren
111: state.paren_depth += 1
112: tokens << [match, :nesting_delimiter]
113:
114: when /#{patterns::REGEXP_SYMBOLS}/ox
115: tokens << [match, :function]
116:
117: else
118: raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], tokens
119:
120: end
121: next
122: # }}}
123: else
124: # {{{
125: if match = scan(/[ \t\f]+/)
126: kind = :space
127: match << scan(/\s*/) unless eos? || heredocs
128: value_expected = true if match.index(?\n) # FIXME not quite true
129: tokens << [match, kind]
130: next
131:
132: elsif match = scan(/\\?\n/)
133: kind = :space
134: if match == "\n"
135: value_expected = true # FIXME not quite true
136: state = :initial if state == :undef_comma_expected
137: end
138: if heredocs
139: unscan # heredoc scanning needs \n at start
140: state = heredocs.shift
141: tokens << [:open, state.type]
142: heredocs = nil if heredocs.empty?
143: next
144: else
145: match << scan(/\s*/) unless eos?
146: end
147: tokens << [match, kind]
148: next
149:
150: elsif bol? && match = scan(/\#!.*/)
151: tokens << [match, :doctype]
152: next
153:
154: elsif match = scan(/\#.*/) or
155: ( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
156: kind = :comment
157: value_expected = true
158: tokens << [match, kind]
159: next
160:
161: elsif state == :initial
162:
163: # IDENTS #
164: if match = scan(/#{patterns::METHOD_NAME}/o)
165: if last_token_dot
166: kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
167: else
168: kind = patterns::IDENT_KIND[match]
169: if kind == :ident and match[/^[A-Z]/] and not match[/[!?]$/] and not match?(/\(/)
170: kind = :constant
171: elsif kind == :reserved
172: state = patterns::DEF_NEW_STATE[match]
173: end
174: end
175: ## experimental!
176: value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
177:
178: elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}/o)
179: kind = :ident
180: value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
181:
182: # OPERATORS #
183: # TODO: match (), [], {} as one single operator
184: elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x)
185: if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
186: value_expected = :set
187: end
188: last_token_dot = :set if self[1]
189: kind = :operator
190: unless inline_block_stack.empty?
191: case match
192: when '{'
193: depth += 1
194: when '}'
195: depth -= 1
196: if depth == 0 # closing brace of inline block reached
197: state, depth, heredocs = inline_block_stack.pop
198: heredocs = nil if heredocs && heredocs.empty?
199: tokens << [match, :inline_delimiter]
200: kind = :inline
201: match = :close
202: end
203: end
204: end
205:
206: elsif match = scan(/ ['"] /mx)
207: tokens << [:open, :string]
208: kind = :delimiter
209: state = patterns::StringState.new :string, match == '"', match # important for streaming
210:
211: elsif match = scan(/#{patterns::INSTANCE_VARIABLE}/o)
212: kind = :instance_variable
213:
214: elsif value_expected and match = scan(/\//)
215: tokens << [:open, :regexp]
216: kind = :delimiter
217: interpreted = true
218: state = patterns::StringState.new :regexp, interpreted, match
219:
220: # elsif match = scan(/[-+]?#{patterns::NUMERIC}/o)
221: elsif match = value_expected ? scan(/[-+]?#{patterns::NUMERIC}/o) : scan(/#{patterns::NUMERIC}/o)
222: kind = self[1] ? :float : :integer
223:
224: elsif match = scan(/#{patterns::SYMBOL}/o)
225: case delim = match[1]
226: when ?', ?"
227: tokens << [:open, :symbol]
228: tokens << [':', :symbol]
229: match = delim.chr
230: kind = :delimiter
231: state = patterns::StringState.new :symbol, delim == ?", match
232: else
233: kind = :symbol
234: end
235:
236: elsif match = scan(/ [-+!~^]=? | [*|&]{1,2}=? | >>? /x)
237: value_expected = :set
238: kind = :operator
239:
240: elsif value_expected and match = scan(/#{patterns::HEREDOC_OPEN}/o)
241: indented = self[1] == '-'
242: quote = self[3]
243: delim = self[quote ? 4 : 2]
244: kind = patterns::QUOTE_TO_TYPE[quote]
245: tokens << [:open, kind]
246: tokens << [match, :delimiter]
247: match = :close
248: heredoc = patterns::StringState.new kind, quote != '\'', delim, (indented ? :indented : :linestart )
249: heredocs ||= [] # create heredocs if empty
250: heredocs << heredoc
251:
252: elsif value_expected and match = scan(/#{patterns::FANCY_START_CORRECT}/o)
253: kind, interpreted = *patterns::FancyStringType.fetch(self[1]) do
254: raise_inspect 'Unknown fancy string: %%%p' % k, tokens
255: end
256: tokens << [:open, kind]
257: state = patterns::StringState.new kind, interpreted, self[2]
258: kind = :delimiter
259:
260: elsif value_expected and match = scan(/#{patterns::CHARACTER}/o)
261: kind = :integer
262:
263: elsif match = scan(/ [\/%]=? | <(?:<|=>?)? | [?:;] /x)
264: value_expected = :set
265: kind = :operator
266:
267: elsif match = scan(/`/)
268: if last_token_dot
269: kind = :operator
270: else
271: tokens << [:open, :shell]
272: kind = :delimiter
273: state = patterns::StringState.new :shell, true, match
274: end
275:
276: elsif match = scan(/#{patterns::GLOBAL_VARIABLE}/o)
277: kind = :global_variable
278:
279: elsif match = scan(/#{patterns::CLASS_VARIABLE}/o)
280: kind = :class_variable
281:
282: else
283: kind = :error
284: match = getch
285:
286: end
287:
288: elsif state == :def_expected
289: state = :initial
290: if match = scan(/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
291: kind = :method
292: else
293: next
294: end
295:
296: elsif state == :module_expected
297: if match = scan(/<</)
298: kind = :operator
299: else
300: state = :initial
301: if match = scan(/ (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
302: kind = :class
303: else
304: next
305: end
306: end
307:
308: elsif state == :undef_expected
309: state = :undef_comma_expected
310: if match = scan(/#{patterns::METHOD_NAME_EX}/o)
311: kind = :method
312: elsif match = scan(/#{patterns::SYMBOL}/o)
313: case delim = match[1]
314: when ?', ?"
315: tokens << [:open, :symbol]
316: tokens << [':', :symbol]
317: match = delim.chr
318: kind = :delimiter
319: state = patterns::StringState.new :symbol, delim == ?", match
320: state.next_state = :undef_comma_expected
321: else
322: kind = :symbol
323: end
324: else
325: state = :initial
326: next
327: end
328:
329: elsif state == :alias_expected
330: if match = scan(/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
331: tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
332: tokens << [self[2], :space]
333: tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
334: end
335: state = :initial
336: next
337:
338: elsif state == :undef_comma_expected
339: if match = scan(/,/)
340: kind = :operator
341: state = :undef_expected
342: else
343: state = :initial
344: next
345: end
346:
347: end
348: # }}}
349:
350: unless kind == :error
351: value_expected = value_expected == :set
352: last_token_dot = last_token_dot == :set
353: end
354:
355: if $DEBUG and not kind
356: raise_inspect 'Error token %p in line %d' %
357: [[match, kind], line], tokens, state
358: end
359: raise_inspect 'Empty token', tokens unless match
360:
361: tokens << [match, kind]
362:
363: if last_state
364: state = last_state
365: last_state = nil
366: end
367: end
368: end
369:
370: inline_block_stack << [state] if state.is_a? patterns::StringState
371: until inline_block_stack.empty?
372: this_block = inline_block_stack.pop
373: tokens << [:close, :inline] if this_block.size > 1
374: state = this_block.first
375: tokens << [:close, state.type]
376: end
377:
378: tokens
379: end