| Class | CodeRay::Scanners::Ruby |
| In: |
lib/coderay/scanners/ruby/string_state.rb
lib/coderay/scanners/ruby.rb |
| Parent: | Object |
# File lib/coderay/scanners/ruby.rb, line 19
19: def interpreted_string_state
20: StringState.new :string, true, '"'
21: end
# File lib/coderay/scanners/ruby.rb, line 29
29: def scan_tokens encoder, options
30: state, heredocs = options[:state] || @state
31: heredocs = heredocs.dup if heredocs.is_a?(Array)
32:
33: if state && state.instance_of?(StringState)
34: encoder.begin_group state.type
35: end
36:
37: last_state = nil
38:
39: method_call_expected = false
40: value_expected = true
41:
42: inline_block_stack = nil
43: inline_block_curly_depth = 0
44:
45: if heredocs
46: state = heredocs.shift
47: encoder.begin_group state.type
48: heredocs = nil if heredocs.empty?
49: end
50:
51: # def_object_stack = nil
52: # def_object_paren_depth = 0
53:
54: patterns = Patterns # avoid constant lookup
55:
56: unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
57:
58: until eos?
59:
60: if state.instance_of? ::Symbol
61:
62: if match = scan(/[ \t\f\v]+/)
63: encoder.text_token match, :space
64:
65: elsif match = scan(/\n/)
66: if heredocs
67: unscan # heredoc scanning needs \n at start
68: state = heredocs.shift
69: encoder.begin_group state.type
70: heredocs = nil if heredocs.empty?
71: else
72: state = :initial if state == :undef_comma_expected
73: encoder.text_token match, :space
74: value_expected = true
75: end
76:
77: elsif match = scan(bol? ? / \#(!)?.* | #{patterns::RUBYDOC_OR_DATA} /ox : /\#.*/)
78: encoder.text_token match, self[1] ? :doctype : :comment
79:
80: elsif match = scan(/\\\n/)
81: if heredocs
82: unscan # heredoc scanning needs \n at start
83: encoder.text_token scan(/\\/), :space
84: state = heredocs.shift
85: encoder.begin_group state.type
86: heredocs = nil if heredocs.empty?
87: else
88: encoder.text_token match, :space
89: end
90:
91: elsif state == :initial
92:
93: # IDENTS #
94: if !method_call_expected &&
95: match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
96: /#{patterns::METHOD_NAME}/o)
97: value_expected = false
98: kind = patterns::IDENT_KIND[match]
99: if kind == :ident
100: if match[/\A[A-Z]/] && !(match[/[!?]$/] || match?(/\(/))
101: kind = :constant
102: end
103: elsif kind == :keyword
104: state = patterns::KEYWORD_NEW_STATE[match]
105: value_expected = true if patterns::KEYWORDS_EXPECTING_VALUE[match]
106: end
107: value_expected = true if !value_expected && check(/#{patterns::VALUE_FOLLOWS}/o)
108: encoder.text_token match, kind
109:
110: elsif method_call_expected &&
111: match = scan(unicode ? /#{patterns::METHOD_AFTER_DOT}/uo :
112: /#{patterns::METHOD_AFTER_DOT}/o)
113: if method_call_expected == '::' && match[/\A[A-Z]/] && !match?(/\(/)
114: encoder.text_token match, :constant
115: else
116: encoder.text_token match, :ident
117: end
118: method_call_expected = false
119: value_expected = check(/#{patterns::VALUE_FOLLOWS}/o)
120:
121: # OPERATORS #
122: elsif !method_call_expected && match = scan(/ (\.(?!\.)|::) | (?: \.\.\.? | ==?=? | [,\(\[\{] )() | [\)\]\}] /x)
123: method_call_expected = self[1]
124: value_expected = !method_call_expected && self[2]
125: if inline_block_stack
126: case match
127: when '{'
128: inline_block_curly_depth += 1
129: when '}'
130: inline_block_curly_depth -= 1
131: if inline_block_curly_depth == 0 # closing brace of inline block reached
132: state, inline_block_curly_depth, heredocs = inline_block_stack.pop
133: inline_block_stack = nil if inline_block_stack.empty?
134: heredocs = nil if heredocs && heredocs.empty?
135: encoder.text_token match, :inline_delimiter
136: encoder.end_group :inline
137: next
138: end
139: end
140: end
141: encoder.text_token match, :operator
142:
143: elsif match = scan(unicode ? /#{patterns::SYMBOL}/uo :
144: /#{patterns::SYMBOL}/o)
145: case delim = match[1]
146: when ?', ?"
147: encoder.begin_group :symbol
148: encoder.text_token ':', :symbol
149: match = delim.chr
150: encoder.text_token match, :delimiter
151: state = self.class::StringState.new :symbol, delim == ?", match
152: else
153: encoder.text_token match, :symbol
154: value_expected = false
155: end
156:
157: elsif match = scan(/ ' (?:(?>[^'\\]*) ')? | " (?:(?>[^"\\\#]*) ")? /mx)
158: encoder.begin_group :string
159: if match.size == 1
160: encoder.text_token match, :delimiter
161: state = self.class::StringState.new :string, match == '"', match # important for streaming
162: else
163: encoder.text_token match[0,1], :delimiter
164: encoder.text_token match[1..-2], :content if match.size > 2
165: encoder.text_token match[-1,1], :delimiter
166: encoder.end_group :string
167: value_expected = false
168: end
169:
170: elsif match = scan(unicode ? /#{patterns::INSTANCE_VARIABLE}/uo :
171: /#{patterns::INSTANCE_VARIABLE}/o)
172: value_expected = false
173: encoder.text_token match, :instance_variable
174:
175: elsif value_expected && match = scan(/\//)
176: encoder.begin_group :regexp
177: encoder.text_token match, :delimiter
178: state = self.class::StringState.new :regexp, true, '/'
179:
180: elsif match = scan(value_expected ? /[-+]?#{patterns::NUMERIC}/o : /#{patterns::NUMERIC}/o)
181: if method_call_expected
182: encoder.text_token match, :error
183: method_call_expected = false
184: else
185: encoder.text_token match, self[1] ? :float : :integer # TODO: send :hex/:octal/:binary
186: end
187: value_expected = false
188:
189: elsif match = scan(/ [-+!~^\/]=? | [:;] | [*|&]{1,2}=? | >>? /x)
190: value_expected = true
191: encoder.text_token match, :operator
192:
193: elsif value_expected && match = scan(/#{patterns::HEREDOC_OPEN}/o)
194: quote = self[3]
195: delim = self[quote ? 4 : 2]
196: kind = patterns::QUOTE_TO_TYPE[quote]
197: encoder.begin_group kind
198: encoder.text_token match, :delimiter
199: encoder.end_group kind
200: heredocs ||= [] # create heredocs if empty
201: heredocs << self.class::StringState.new(kind, quote != "'", delim,
202: self[1] == '-' ? :indented : :linestart)
203: value_expected = false
204:
205: elsif value_expected && match = scan(/#{patterns::FANCY_STRING_START}/o)
206: kind = patterns::FANCY_STRING_KIND[self[1]]
207: encoder.begin_group kind
208: state = self.class::StringState.new kind, patterns::FANCY_STRING_INTERPRETED[self[1]], self[2]
209: encoder.text_token match, :delimiter
210:
211: elsif value_expected && match = scan(/#{patterns::CHARACTER}/o)
212: value_expected = false
213: encoder.text_token match, :integer
214:
215: elsif match = scan(/ %=? | <(?:<|=>?)? | \? /x)
216: value_expected = true
217: encoder.text_token match, :operator
218:
219: elsif match = scan(/`/)
220: encoder.begin_group :shell
221: encoder.text_token match, :delimiter
222: state = self.class::StringState.new :shell, true, match
223:
224: elsif match = scan(unicode ? /#{patterns::GLOBAL_VARIABLE}/uo :
225: /#{patterns::GLOBAL_VARIABLE}/o)
226: encoder.text_token match, :global_variable
227: value_expected = false
228:
229: elsif match = scan(unicode ? /#{patterns::CLASS_VARIABLE}/uo :
230: /#{patterns::CLASS_VARIABLE}/o)
231: encoder.text_token match, :class_variable
232: value_expected = false
233:
234: elsif match = scan(/\\\z/)
235: encoder.text_token match, :space
236:
237: else
238: if method_call_expected
239: method_call_expected = false
240: next
241: end
242: unless unicode
243: # check for unicode
244: $DEBUG_BEFORE, $DEBUG = $DEBUG, false
245: begin
246: if check(/./mu).size > 1
247: # seems like we should try again with unicode
248: unicode = true
249: end
250: rescue
251: # bad unicode char; use getch
252: ensure
253: $DEBUG = $DEBUG_BEFORE
254: end
255: next if unicode
256: end
257:
258: encoder.text_token getch, :error
259:
260: end
261:
262: if last_state
263: state = last_state
264: last_state = nil
265: end
266:
267: elsif state == :def_expected
268: if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
269: /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
270: encoder.text_token match, :method
271: state = :initial
272: else
273: last_state = :dot_expected
274: state = :initial
275: end
276:
277: elsif state == :dot_expected
278: if match = scan(/\.|::/)
279: # invalid definition
280: state = :def_expected
281: encoder.text_token match, :operator
282: else
283: state = :initial
284: end
285:
286: elsif state == :module_expected
287: if match = scan(/<</)
288: encoder.text_token match, :operator
289: else
290: state = :initial
291: if match = scan(unicode ? / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /oux :
292: / (?:#{patterns::IDENT}::)* #{patterns::IDENT} /ox)
293: encoder.text_token match, :class
294: end
295: end
296:
297: elsif state == :undef_expected
298: state = :undef_comma_expected
299: if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
300: /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
301: encoder.text_token match, :method
302: elsif match = scan(/#{patterns::SYMBOL}/o)
303: case delim = match[1]
304: when ?', ?"
305: encoder.begin_group :symbol
306: encoder.text_token ':', :symbol
307: match = delim.chr
308: encoder.text_token match, :delimiter
309: state = self.class::StringState.new :symbol, delim == ?", match
310: state.next_state = :undef_comma_expected
311: else
312: encoder.text_token match, :symbol
313: end
314: else
315: state = :initial
316: end
317:
318: elsif state == :undef_comma_expected
319: if match = scan(/,/)
320: encoder.text_token match, :operator
321: state = :undef_expected
322: else
323: state = :initial
324: end
325:
326: elsif state == :alias_expected
327: match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
328: /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
329:
330: if match
331: encoder.text_token self[1], (self[1][0] == ?: ? :symbol : :method)
332: encoder.text_token self[2], :space
333: encoder.text_token self[3], (self[3][0] == ?: ? :symbol : :method)
334: end
335: state = :initial
336:
337: else
338: #:nocov:
339: raise_inspect 'Unknown state: %p' % [state], encoder
340: #:nocov:
341: end
342:
343: else # StringState
344:
345: match = scan_until(state.pattern) || scan_rest
346: unless match.empty?
347: encoder.text_token match, :content
348: break if eos?
349: end
350:
351: if state.heredoc && self[1] # end of heredoc
352: match = getch
353: match << scan_until(/$/) unless eos?
354: encoder.text_token match, :delimiter unless match.empty?
355: encoder.end_group state.type
356: state = state.next_state
357: next
358: end
359:
360: case match = getch
361:
362: when state.delim
363: if state.paren_depth
364: state.paren_depth -= 1
365: if state.paren_depth > 0
366: encoder.text_token match, :content
367: next
368: end
369: end
370: encoder.text_token match, :delimiter
371: if state.type == :regexp && !eos?
372: match = scan(/#{patterns::REGEXP_MODIFIERS}/o)
373: encoder.text_token match, :modifier unless match.empty?
374: end
375: encoder.end_group state.type
376: value_expected = false
377: state = state.next_state
378:
379: when '\\'
380: if state.interpreted
381: if esc = scan(/#{patterns::ESCAPE}/o)
382: encoder.text_token match + esc, :char
383: else
384: encoder.text_token match, :error
385: end
386: else
387: case esc = getch
388: when nil
389: encoder.text_token match, :content
390: when state.delim, '\\'
391: encoder.text_token match + esc, :char
392: else
393: encoder.text_token match + esc, :content
394: end
395: end
396:
397: when '#'
398: case peek(1)
399: when '{'
400: inline_block_stack ||= []
401: inline_block_stack << [state, inline_block_curly_depth, heredocs]
402: value_expected = true
403: state = :initial
404: inline_block_curly_depth = 1
405: encoder.begin_group :inline
406: encoder.text_token match + getch, :inline_delimiter
407: when '$', '@'
408: encoder.text_token match, :escape
409: last_state = state
410: state = :initial
411: else
412: #:nocov:
413: raise_inspect 'else-case # reached; #%p not handled' % [peek(1)], encoder
414: #:nocov:
415: end
416:
417: when state.opening_paren
418: state.paren_depth += 1
419: encoder.text_token match, :content
420:
421: else
422: #:nocov
423: raise_inspect 'else-case " reached; %p not handled, state = %p' % [match, state], encoder
424: #:nocov:
425:
426: end
427:
428: end
429:
430: end
431:
432: # cleaning up
433: if state.is_a? StringState
434: encoder.end_group state.type
435: end
436:
437: if options[:keep_state]
438: if state.is_a?(StringState) && state.heredoc
439: (heredocs ||= []).unshift state
440: state = :initial
441: elsif heredocs && heredocs.empty?
442: heredocs = nil
443: end
444: @state = state, heredocs
445: end
446:
447: if inline_block_stack
448: until inline_block_stack.empty?
449: state, = *inline_block_stack.pop
450: encoder.end_group :inline
451: encoder.end_group state.type
452: end
453: end
454:
455: encoder
456: end