im building a web browser, and im getting stuck with html comments
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
|
from ply.lex import *
tokens = (
"LeftAngle", # <
"LeftAngleSlash", # </
"RightAngle", # >
"Equals", # =
"String", # Ex. "World!"
"Word" # Ex. Hello
)
state = (
("htmlcomment", "exclusive")
)
t_ignore = " " # Shortcut for Whitespace
def t_htmlcomment(Token):
r'<!--'
Token.lexer.begin("htmlcomment")
def t_htmlcomment_end(Token):
r'-->'
Token.lexer.lineno += Token.value.count("\n")
Token.lexer.begin("INITIAL")
def t_htmlcomment_error(Token):
Token.lexer.skip(1)
def t_LeftAngleSlash(Token):
r'</'
return Token
def t_LeftAngle(Token):
r'<'
return Token
def t_RightAngle(Token):
r'>'
return Token
def t_Equals(Token):
r'='
return Token
def t_String(Token):
r'"[^"]*"'
Token.value = Token[1 : -1]
return Token
def t_Word(Token):
r'[^ <>\n]+'
return Token
def t_NewLine(Token):
r'\n'
Token.lexer.lineno += 1
pass
PageSource = """hello <!-- comment --> all"""
HTMLLexer = lex()
HTMLLexer.input(PageSource)
while True:
Token = HTMLLexer.token()
if not Token:
break
print Token
|
WARNING: No t_error rule is defined
LexToken(Word,'hello',1,0)
Traceback (most recent call last):
File "main.py", line 67, in <module>
Token = HTMLLexer.token()
File "/usr/lib/python2.7/dist-packages/ply/lex.py", line 355, in token
newtok = func(tok)
File "main.py", line 20, in t_htmlcomment
Token.lexer.begin("htmlcomment")
File "/usr/lib/python2.7/dist-packages/ply/lex.py", line 272, in begin
raise ValueError("Undefined state")
ValueError: Undefined state