org.eclipse.mylar.internal.tasks.core
Class HtmlStreamTokenizer

java.lang.Object
  extended by org.eclipse.mylar.internal.tasks.core.HtmlStreamTokenizer

public class HtmlStreamTokenizer
extends java.lang.Object


Nested Class Summary
private static class HtmlStreamTokenizer.State
          Enum class for parser state.
static class HtmlStreamTokenizer.Token
          Class for current token.
 
Field Summary
private  java.net.URL base
          base URL for resolving relative URLs
private static java.util.HashMap<java.lang.String,java.lang.Character> entities
          names and values of HTML entity references
private  boolean escapeTagValues
          Allow class client to choose if tag attributes are escaped or not
private  java.io.BufferedReader in
          reader from which to parse the text
private  int pushbackChar
          holds a character that was read and then determined not to be part of the current token
private  HtmlStreamTokenizer.Token pushbackToken
          holds a token that was read and then put back in the queue to be returned again on nextToken call
private  int quoteChar
          current quote delimiter (single or double)
private  HtmlStreamTokenizer.State state
          parser state
private  java.lang.StringBuffer textBuffer
          buffer holding the text of the current token
private  java.lang.StringBuffer whitespaceBuffer
          buffer holding whitespace preceding the current token
 
Constructor Summary
HtmlStreamTokenizer(java.io.Reader in, java.net.URL base)
          Constructor.
 
Method Summary
 void escapeTagAttributes(boolean value)
           
 HtmlStreamTokenizer.Token nextToken()
          Returns the next token from the stream.
private static void parseAttributes(HtmlTag tag, java.lang.String s, int i, boolean escapeValues)
          parses HTML tag attributes from a buffer and sets them in an HtmlTag
private static java.lang.Character parseReference(java.lang.String s)
          Parses HTML character and entity references and returns the corresponding character.
private static void parseTag(java.lang.String s, HtmlTag tag, boolean escapeValues)
          Parses an HTML tag out of a string of characters.
 void pushback(HtmlStreamTokenizer.Token token)
          Pushes the token back into the queue, to be returned by the subsequent call to nextToken
static java.lang.String unescape(java.lang.String s)
          Returns a string with HTML escapes changed into their corresponding characters.
static java.lang.StringBuffer unescape(java.lang.StringBuffer sb)
          Replaces (in-place) HTML escapes in a StringBuffer with their corresponding characters.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

state

private HtmlStreamTokenizer.State state
parser state


in

private java.io.BufferedReader in
reader from which to parse the text


base

private java.net.URL base
base URL for resolving relative URLs


textBuffer

private java.lang.StringBuffer textBuffer
buffer holding the text of the current token


whitespaceBuffer

private java.lang.StringBuffer whitespaceBuffer
buffer holding whitespace preceding the current token


pushbackToken

private HtmlStreamTokenizer.Token pushbackToken
holds a token that was read and then put back in the queue to be returned again on nextToken call


pushbackChar

private int pushbackChar
holds a character that was read and then determined not to be part of the current token


quoteChar

private int quoteChar
current quote delimiter (single or double)


escapeTagValues

private boolean escapeTagValues
Allow class client to choose if tag attributes are escaped or not


entities

private static java.util.HashMap<java.lang.String,java.lang.Character> entities
names and values of HTML entity references

Constructor Detail

HtmlStreamTokenizer

public HtmlStreamTokenizer(java.io.Reader in,
                           java.net.URL base)
Constructor.

Parameters:
in - reader for the HTML document to tokenize
base - URL for resolving relative URLs
Method Detail

escapeTagAttributes

public void escapeTagAttributes(boolean value)

nextToken

public HtmlStreamTokenizer.Token nextToken()
                                    throws java.io.IOException,
                                           java.text.ParseException
Returns the next token from the stream.

Throws:
java.io.IOException
java.text.ParseException

pushback

public void pushback(HtmlStreamTokenizer.Token token)
Pushes the token back into the queue, to be returned by the subsequent call to nextToken


parseTag

private static void parseTag(java.lang.String s,
                             HtmlTag tag,
                             boolean escapeValues)
                      throws java.text.ParseException
Parses an HTML tag out of a string of characters.

Throws:
java.text.ParseException

parseAttributes

private static void parseAttributes(HtmlTag tag,
                                    java.lang.String s,
                                    int i,
                                    boolean escapeValues)
                             throws java.text.ParseException
parses HTML tag attributes from a buffer and sets them in an HtmlTag

Throws:
java.text.ParseException

unescape

public static java.lang.String unescape(java.lang.String s)
Returns a string with HTML escapes changed into their corresponding characters.


unescape

public static java.lang.StringBuffer unescape(java.lang.StringBuffer sb)
Replaces (in-place) HTML escapes in a StringBuffer with their corresponding characters.


parseReference

private static java.lang.Character parseReference(java.lang.String s)
Parses HTML character and entity references and returns the corresponding character.