public class LagartoParser
extends java.lang.Object
TagVisitor for callbacks.
Works by the HTML5 specs for tokenization, as described
on WhatWG.
Differences from the specs:
| Modifier and Type | Class and Description |
|---|---|
protected class |
LagartoParser.ScriptEscape
Since escaping states inside the SCRIPT tag are rare, we want to use them
lazy, only when really needed.
|
protected class |
LagartoParser.XmlDeclaration |
| Constructor and Description |
|---|
LagartoParser(char[] input)
Creates parser on char array.
|
LagartoParser(java.lang.CharSequence input)
Creates parser on a char sequence.
|
LagartoParser(LagartoParserConfig parserConfig,
char[] input)
Creates parser on char array.
|
LagartoParser(LagartoParserConfig parserConfig,
java.lang.CharSequence input)
Creates parser on a char sequence.
|
| Modifier and Type | Method and Description |
|---|---|
protected void |
_error(java.lang.String message)
Prepares error message and reports it to the visitor.
|
LagartoParser |
configure(java.util.function.Consumer<LagartoParserConfig> configConsumer)
Configures the parser.
|
protected void |
consumeCharacterReference() |
protected void |
consumeCharacterReference(char allowedChar) |
protected void |
emitCData(java.lang.CharSequence charSequence) |
protected void |
emitComment(int from,
int to)
Emits a comment.
|
protected void |
emitDoctype() |
protected void |
emitScript(int from,
int to) |
protected void |
emitTag() |
protected void |
emitText()
Emits text if there is some content.
|
protected void |
emitXml() |
protected void |
errorCharReference() |
protected void |
errorEOF() |
protected void |
errorInvalidToken() |
LagartoParserConfig |
getConfig()
Returns
configuration of the parser. |
protected void |
initialize()
Initializes parser.
|
void |
parse(TagVisitor visitor)
Parses content and emits event to provided
TagVisitor. |
protected void |
textEmitChar(char c)
Emits characters into the local text buffer.
|
protected void |
textEmitChars(char[] buffer) |
protected void |
textEmitChars(int from,
int to) |
protected void |
textStart()
Resets text buffer.
|
protected java.lang.CharSequence |
textWrap() |
protected TagVisitor visitor
protected jodd.lagarto.ParsedTag tag
protected jodd.lagarto.ParsedDoctype doctype
protected final jodd.lagarto.CharsInput in
protected final LagartoParserConfig config
protected boolean parsing
protected State DATA_STATE
protected State TAG_OPEN
protected State END_TAG_OPEN
protected State TAG_NAME
protected State BEFORE_ATTRIBUTE_NAME
protected State ATTRIBUTE_NAME
protected State AFTER_ATTRIBUTE_NAME
protected State BEFORE_ATTRIBUTE_VALUE
protected State ATTR_VALUE_UNQUOTED
protected State ATTR_VALUE_SINGLE_QUOTED
protected State ATTR_VALUE_DOUBLE_QUOTED
protected State AFTER_ATTRIBUTE_VALUE_QUOTED
protected State SELF_CLOSING_START_TAG
protected State BOGUS_COMMENT
protected State MARKUP_DECLARATION_OPEN
protected int rawTextStart
protected int rawTextEnd
protected char[] rawTagName
protected State RAWTEXT
protected State RAWTEXT_LESS_THAN_SIGN
protected State RAWTEXT_END_TAG_OPEN
protected State RAWTEXT_END_TAG_NAME
protected int rcdataTagStart
protected char[] rcdataTagName
protected State RCDATA
protected State RCDATA_LESS_THAN_SIGN
protected State RCDATA_END_TAG_OPEN
protected State RCDATA_END_TAG_NAME
protected int commentStart
protected State COMMENT_START
protected State COMMENT_START_DASH
protected State COMMENT
protected State COMMENT_END_DASH
protected State COMMENT_END
protected State COMMENT_END_BANG
protected State DOCTYPE
protected State BEFORE_DOCTYPE_NAME
protected State DOCTYPE_NAME
protected State AFTER_DOCUMENT_NAME
protected int doctypeIdNameStart
protected State AFTER_DOCTYPE_PUBLIC_KEYWORD
protected State BEFORE_DOCTYPE_PUBLIC_IDENTIFIER
protected State DOCTYPE_PUBLIC_IDENTIFIER_DOUBLE_QUOTED
protected State DOCTYPE_PUBLIC_IDENTIFIER_SINGLE_QUOTED
protected State AFTER_DOCTYPE_PUBLIC_IDENTIFIER
protected State BETWEEN_DOCTYPE_PUBLIC_AND_SYSTEM_IDENTIFIERS
protected State BOGUS_DOCTYPE
protected State AFTER_DOCTYPE_SYSTEM_KEYWORD
protected State BEFORE_DOCTYPE_SYSTEM_IDENTIFIER
protected State DOCTYPE_SYSTEM_IDENTIFIER_DOUBLE_QUOTED
protected State DOCTYPE_SYSTEM_IDENTIFIER_SINGLE_QUOTED
protected State AFTER_DOCTYPE_SYSTEM_IDENTIFIER
protected int scriptStartNdx
protected int scriptEndNdx
protected int scriptEndTagName
protected State SCRIPT_DATA
protected State SCRIPT_DATA_LESS_THAN_SIGN
protected State SCRIPT_DATA_END_TAG_OPEN
protected State SCRIPT_DATA_END_TAG_NAME
protected LagartoParser.ScriptEscape scriptEscape
protected LagartoParser.XmlDeclaration xmlDeclaration
protected char[] text
protected int textLen
protected int attrStartNdx
protected int attrEndNdx
protected State state
public LagartoParser(LagartoParserConfig parserConfig, char[] input)
public LagartoParser(char[] input)
public LagartoParser(LagartoParserConfig parserConfig, java.lang.CharSequence input)
public LagartoParser(java.lang.CharSequence input)
protected void initialize()
public LagartoParserConfig getConfig()
configuration of the parser.public LagartoParser configure(java.util.function.Consumer<LagartoParserConfig> configConsumer)
public void parse(TagVisitor visitor)
TagVisitor.protected void consumeCharacterReference(char allowedChar)
protected void consumeCharacterReference()
protected void textEmitChar(char c)
protected void textStart()
protected void textEmitChars(int from,
int to)
protected void textEmitChars(char[] buffer)
protected java.lang.CharSequence textWrap()
protected void emitTag()
protected void emitComment(int from,
int to)
protected void emitText()
protected void emitScript(int from,
int to)
protected void emitDoctype()
protected void emitXml()
protected void emitCData(java.lang.CharSequence charSequence)
protected void errorEOF()
protected void errorInvalidToken()
protected void errorCharReference()
protected void _error(java.lang.String message)