need help with the ply.lex library

closed account (Dy7SLyTq)
im building a web browser, and im getting stuck with html comments

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from ply.lex import *

tokens = (
     "LeftAngle",        # <
     "LeftAngleSlash",   # </
     "RightAngle",       # >
     "Equals",           # =
     "String",           # Ex. "World!"
     "Word"              # Ex. Hello
)

state = (
     ("htmlcomment", "exclusive")
)

t_ignore = " " # Shortcut for Whitespace

def t_htmlcomment(Token):
     r'<!--'
     Token.lexer.begin("htmlcomment")

def t_htmlcomment_end(Token):
     r'-->'
     Token.lexer.lineno += Token.value.count("\n")
     Token.lexer.begin("INITIAL")

def t_htmlcomment_error(Token):
     Token.lexer.skip(1)

def t_LeftAngleSlash(Token):
     r'</'
     return Token

def t_LeftAngle(Token):
     r'<'
     return Token

def t_RightAngle(Token):
     r'>'
     return Token

def t_Equals(Token):
     r'='
     return Token

def t_String(Token):
     r'"[^"]*"'
     Token.value = Token[1 : -1]
     return Token

def t_Word(Token):
     r'[^ <>\n]+'
     return Token

def t_NewLine(Token):
     r'\n'
     Token.lexer.lineno += 1
     pass

PageSource = """hello <!-- comment --> all"""

HTMLLexer = lex()
HTMLLexer.input(PageSource)

while True:
     Token = HTMLLexer.token()

     if not Token:
          break

     print Token


WARNING: No t_error rule is defined
LexToken(Word,'hello',1,0)
Traceback (most recent call last):
File "main.py", line 67, in <module>
Token = HTMLLexer.token()
File "/usr/lib/python2.7/dist-packages/ply/lex.py", line 355, in token
newtok = func(tok)
File "main.py", line 20, in t_htmlcomment
Token.lexer.begin("htmlcomment")
File "/usr/lib/python2.7/dist-packages/ply/lex.py", line 272, in begin
raise ValueError("Undefined state")
ValueError: Undefined state
Topic archived. No new replies allowed.