| Class | CodeRay::Scanners::Python |
| In: |
lib/coderay/scanners/python.rb
|
| Parent: | Scanner |
Scanner for Python. Supports Python 3.
Based on pygments’ PythonLexer, see dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
# File lib/coderay/scanners/python.rb, line 103
103: def scan_tokens encoder, options
104:
105: state = :initial
106: string_delimiter = nil
107: string_raw = false
108: string_type = nil
109: docstring_coming = match?(/#{DOCSTRING_COMING}/o)
110: last_token_dot = false
111: unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
112: from_import_state = []
113:
114: until eos?
115:
116: if state == :string
117: if match = scan(STRING_DELIMITER_REGEXP[string_delimiter])
118: encoder.text_token match, :delimiter
119: encoder.end_group string_type
120: string_type = nil
121: state = :initial
122: next
123: elsif string_delimiter.size == 3 && match = scan(/\n/)
124: encoder.text_token match, :content
125: elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter])
126: encoder.text_token match, :content
127: elsif !string_raw && match = scan(/ \\ #{ESCAPE} /ox)
128: encoder.text_token match, :char
129: elsif match = scan(/ \\ #{UNICODE_ESCAPE} /ox)
130: encoder.text_token match, :char
131: elsif match = scan(/ \\ . /x)
132: encoder.text_token match, :content
133: elsif match = scan(/ \\ | $ /x)
134: encoder.end_group string_type
135: string_type = nil
136: encoder.text_token match, :error
137: state = :initial
138: else
139: raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state
140: end
141:
142: elsif match = scan(/ [ \t]+ | \\?\n /x)
143: encoder.text_token match, :space
144: if match == "\n"
145: state = :initial if state == :include_expected
146: docstring_coming = true if match?(/#{DOCSTRING_COMING}/o)
147: end
148: next
149:
150: elsif match = scan(/ \# [^\n]* /mx)
151: encoder.text_token match, :comment
152: next
153:
154: elsif state == :initial
155:
156: if match = scan(/#{OPERATOR}/o)
157: encoder.text_token match, :operator
158:
159: elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
160: string_delimiter = self[2]
161: string_type = docstring_coming ? :docstring : :string
162: docstring_coming = false if docstring_coming
163: encoder.begin_group string_type
164: string_raw = false
165: modifiers = self[1]
166: unless modifiers.empty?
167: string_raw = !!modifiers.index(?r)
168: encoder.text_token modifiers, :modifier
169: match = string_delimiter
170: end
171: state = :string
172: encoder.text_token match, :delimiter
173:
174: # TODO: backticks
175:
176: elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
177: kind = IDENT_KIND[match]
178: # TODO: keyword arguments
179: kind = :ident if last_token_dot
180: if kind == :old_keyword
181: kind = check(/\(/) ? :ident : :keyword
182: elsif kind == :predefined && check(/ *=/)
183: kind = :ident
184: elsif kind == :keyword
185: state = DEF_NEW_STATE[match]
186: from_import_state << match.to_sym if state == :include_expected
187: end
188: encoder.text_token match, kind
189:
190: elsif match = scan(/@[a-zA-Z0-9_.]+[lL]?/)
191: encoder.text_token match, :decorator
192:
193: elsif match = scan(/0[xX][0-9A-Fa-f]+[lL]?/)
194: encoder.text_token match, :hex
195:
196: elsif match = scan(/0[bB][01]+[lL]?/)
197: encoder.text_token match, :binary
198:
199: elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
200: if scan(/[jJ]/)
201: match << matched
202: encoder.text_token match, :imaginary
203: else
204: encoder.text_token match, :float
205: end
206:
207: elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
208: encoder.text_token match, :octal
209:
210: elsif match = scan(/\d+([lL])?/)
211: if self[1] == nil && scan(/[jJ]/)
212: match << matched
213: encoder.text_token match, :imaginary
214: else
215: encoder.text_token match, :integer
216: end
217:
218: else
219: encoder.text_token getch, :error
220:
221: end
222:
223: elsif state == :def_expected
224: state = :initial
225: if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
226: encoder.text_token match, :method
227: else
228: next
229: end
230:
231: elsif state == :class_expected
232: state = :initial
233: if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
234: encoder.text_token match, :class
235: else
236: next
237: end
238:
239: elsif state == :include_expected
240: if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
241: if match == 'as'
242: encoder.text_token match, :keyword
243: from_import_state << :as
244: elsif from_import_state.first == :from && match == 'import'
245: encoder.text_token match, :keyword
246: from_import_state << :import
247: elsif from_import_state.last == :as
248: # encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
249: encoder.text_token match, :ident
250: from_import_state.pop
251: elsif IDENT_KIND[match] == :keyword
252: unscan
253: match = nil
254: state = :initial
255: next
256: else
257: encoder.text_token match, :include
258: end
259: elsif match = scan(/,/)
260: from_import_state.pop if from_import_state.last == :as
261: encoder.text_token match, :operator
262: else
263: from_import_state = []
264: state = :initial
265: next
266: end
267:
268: else
269: raise_inspect 'Unknown state', encoder, state
270:
271: end
272:
273: last_token_dot = match == '.'
274:
275: end
276:
277: if state == :string
278: encoder.end_group string_type
279: end
280:
281: encoder
282: end