| Class | CodeRay::Scanners::HTML |
| In: |
lib/coderay/scanners/html.rb
|
| Parent: | Scanner |
| ATTR_NAME | = | /[\w.:-]+/ |
| ATTR_VALUE_UNQUOTED | = | ATTR_NAME |
| TAG_END | = | /\/?>/ |
| HEX | = | /[0-9a-fA-F]/ |
| ENTITY | = | / & (?: \w+ | \# (?: \d+ | x#{HEX}+ ) ) ; /ox |
| PLAIN_STRING_CONTENT | = | { "'" => /[^&'>\n]+/, '"' => /[^&">\n]+/, } |
# File lib/coderay/scanners/html.rb, line 45
45: def scan_tokens tokens, options
46:
47: state = @state
48: plain_string_content = @plain_string_content
49:
50: until eos?
51:
52: kind = nil
53: match = nil
54:
55: if scan(/\s+/m)
56: kind = :space
57:
58: else
59:
60: case state
61:
62: when :initial
63: if scan(/<!--.*?-->/m)
64: kind = :comment
65: elsif scan(/<!DOCTYPE.*?>/m)
66: kind = :doctype
67: elsif scan(/<\?xml.*?\?>/m)
68: kind = :preprocessor
69: elsif scan(/<\?.*?\?>|<%.*?%>/m)
70: kind = :comment
71: elsif scan(/<\/[-\w_.:]*>/m)
72: kind = :tag
73: elsif match = scan(/<[-\w_.:]+>?/m)
74: kind = :tag
75: state = :attribute unless match[-1] == ?>
76: elsif scan(/[^<>&]+/)
77: kind = :plain
78: elsif scan(/#{ENTITY}/ox)
79: kind = :entity
80: elsif scan(/[<>&]/)
81: kind = :error
82: else
83: raise_inspect '[BUG] else-case reached with state %p' % [state], tokens
84: end
85:
86: when :attribute
87: if scan(/#{TAG_END}/)
88: kind = :tag
89: state = :initial
90: elsif scan(/#{ATTR_NAME}/o)
91: kind = :attribute_name
92: state = :attribute_equal
93: else
94: kind = :error
95: getch
96: end
97:
98: when :attribute_equal
99: if scan(/=/)
100: kind = :operator
101: state = :attribute_value
102: elsif scan(/#{ATTR_NAME}/o)
103: kind = :attribute_name
104: elsif scan(/#{TAG_END}/o)
105: kind = :tag
106: state = :initial
107: elsif scan(/./)
108: kind = :error
109: state = :attribute
110: end
111:
112: when :attribute_value
113: if scan(/#{ATTR_VALUE_UNQUOTED}/o)
114: kind = :attribute_value
115: state = :attribute
116: elsif match = scan(/["']/)
117: tokens << [:open, :string]
118: state = :attribute_value_string
119: plain_string_content = PLAIN_STRING_CONTENT[match]
120: kind = :delimiter
121: elsif scan(/#{TAG_END}/o)
122: kind = :tag
123: state = :initial
124: else
125: kind = :error
126: getch
127: end
128:
129: when :attribute_value_string
130: if scan(plain_string_content)
131: kind = :content
132: elsif scan(/['"]/)
133: tokens << [matched, :delimiter]
134: tokens << [:close, :string]
135: state = :attribute
136: next
137: elsif scan(/#{ENTITY}/ox)
138: kind = :entity
139: elsif scan(/&/)
140: kind = :content
141: elsif scan(/[\n>]/)
142: tokens << [:close, :string]
143: kind = :error
144: state = :initial
145: end
146:
147: else
148: raise_inspect 'Unknown state: %p' % [state], tokens
149:
150: end
151:
152: end
153:
154: match ||= matched
155: if $DEBUG and not kind
156: raise_inspect 'Error token %p in line %d' %
157: [[match, kind], line], tokens, state
158: end
159: raise_inspect 'Empty token', tokens unless match
160:
161: tokens << [match, kind]
162: end
163:
164: if options[:keep_state]
165: @state = state
166: @plain_string_content = plain_string_content
167: end
168:
169: tokens
170: end