Class HtmlTreeBuilder

java.lang.Object
org.jsoup.parser.TreeBuilder
org.jsoup.parser.HtmlTreeBuilder

public class HtmlTreeBuilder extends TreeBuilder
HTML Tree Builder; creates a DOM from Tokens.
  • Field Details

    • TagsSearchInScope

      static final String[] TagsSearchInScope
    • TagSearchList

      static final String[] TagSearchList
    • TagSearchButton

      static final String[] TagSearchButton
    • TagSearchTableScope

      static final String[] TagSearchTableScope
    • TagSearchSelectScope

      static final String[] TagSearchSelectScope
    • TagSearchEndTags

      static final String[] TagSearchEndTags
    • TagThoroughSearchEndTags

      static final String[] TagThoroughSearchEndTags
    • TagSearchSpecial

      static final String[] TagSearchSpecial
    • MaxScopeSearchDepth

      public static final int MaxScopeSearchDepth
      See Also:
    • state

      private HtmlTreeBuilderState state
    • originalState

      private HtmlTreeBuilderState originalState
    • baseUriSetFromDoc

      private boolean baseUriSetFromDoc
    • headElement

      @Nullable private Element headElement
    • formElement

      @Nullable private FormElement formElement
    • contextElement

      @Nullable private Element contextElement
    • formattingElements

      private ArrayList<Element> formattingElements
    • tmplInsertMode

      private ArrayList<HtmlTreeBuilderState> tmplInsertMode
    • pendingTableCharacters

      private List<Token.Character> pendingTableCharacters
    • emptyEnd

      private Token.EndTag emptyEnd
    • framesetOk

      private boolean framesetOk
    • fosterInserts

      private boolean fosterInserts
    • fragmentParsing

      private boolean fragmentParsing
    • maxQueueDepth

      private static final int maxQueueDepth
      See Also:
    • specificScopeTarget

      private String[] specificScopeTarget
    • maxUsedFormattingElements

      private static final int maxUsedFormattingElements
      See Also:
  • Constructor Details

    • HtmlTreeBuilder

      public HtmlTreeBuilder()
  • Method Details

    • defaultSettings

      ParseSettings defaultSettings()
      Specified by:
      defaultSettings in class TreeBuilder
    • newInstance

      HtmlTreeBuilder newInstance()
      Description copied from class: TreeBuilder
      Create a new copy of this TreeBuilder
      Specified by:
      newInstance in class TreeBuilder
      Returns:
      copy, ready for a new parse
    • initialiseParse

      @ParametersAreNonnullByDefault protected void initialiseParse(Reader input, String baseUri, Parser parser)
      Overrides:
      initialiseParse in class TreeBuilder
    • parseFragment

      List<Node> parseFragment(String inputFragment, @Nullable Element context, String baseUri, Parser parser)
      Specified by:
      parseFragment in class TreeBuilder
    • process

      protected boolean process(Token token)
      Specified by:
      process in class TreeBuilder
    • process

      boolean process(Token token, HtmlTreeBuilderState state)
    • transition

      void transition(HtmlTreeBuilderState state)
    • state

    • markInsertionMode

      void markInsertionMode()
    • originalState

      HtmlTreeBuilderState originalState()
    • framesetOk

      void framesetOk(boolean framesetOk)
    • framesetOk

      boolean framesetOk()
    • getDocument

      Document getDocument()
    • getBaseUri

      String getBaseUri()
    • maybeSetBaseUri

      void maybeSetBaseUri(Element base)
    • isFragmentParsing

      boolean isFragmentParsing()
    • error

      void error(HtmlTreeBuilderState state)
    • insert

      Element insert(Token.StartTag startTag)
    • insertStartTag

      Element insertStartTag(String startTagName)
    • insert

      void insert(Element el)
    • insert

      private void insert(Element el, @Nullable Token token)
    • insertEmpty

      Element insertEmpty(Token.StartTag startTag)
    • insertForm

      FormElement insertForm(Token.StartTag startTag, boolean onStack, boolean checkTemplateStack)
    • insert

      void insert(Token.Comment commentToken)
    • insert

      void insert(Token.Character characterToken)
      Inserts the provided character token into the current element.
    • insert

      void insert(Token.Character characterToken, Element el)
    • insertNode

      private void insertNode(Node node, @Nullable Token token)
      Inserts the provided character token into the provided element. Use when not going onto stack element
    • pop

      Element pop()
    • push

      void push(Element element)
    • getStack

      ArrayList<Element> getStack()
    • onStack

      boolean onStack(Element el)
    • onStack

      boolean onStack(String elName)
    • onStack

      private static boolean onStack(ArrayList<Element> queue, Element element)
    • getFromStack

      @Nullable Element getFromStack(String elName)
    • removeFromStack

      boolean removeFromStack(Element el)
    • popStackToClose

      @Nullable Element popStackToClose(String elName)
    • popStackToClose

      void popStackToClose(String... elNames)
    • popStackToBefore

      void popStackToBefore(String elName)
    • clearStackToTableContext

      void clearStackToTableContext()
    • clearStackToTableBodyContext

      void clearStackToTableBodyContext()
    • clearStackToTableRowContext

      void clearStackToTableRowContext()
    • clearStackToContext

      private void clearStackToContext(String... nodeNames)
    • aboveOnStack

      @Nullable Element aboveOnStack(Element el)
    • insertOnStackAfter

      void insertOnStackAfter(Element after, Element in)
    • replaceOnStack

      void replaceOnStack(Element out, Element in)
    • replaceInQueue

      private void replaceInQueue(ArrayList<Element> queue, Element out, Element in)
    • resetInsertionMode

      boolean resetInsertionMode()
      Reset the insertion mode, by searching up the stack for an appropriate insertion mode. The stack search depth is limited to maxQueueDepth.
      Returns:
      true if the insertion mode was actually changed.
    • resetBody

      void resetBody()
      Places the body back onto the stack and moves to InBody, for cases in AfterBody / AfterAfterBody when more content comes
    • inSpecificScope

      private boolean inSpecificScope(String targetName, String[] baseTypes, String[] extraTypes)
    • inSpecificScope

      private boolean inSpecificScope(String[] targetNames, String[] baseTypes, @Nullable String[] extraTypes)
    • inScope

      boolean inScope(String[] targetNames)
    • inScope

      boolean inScope(String targetName)
    • inScope

      boolean inScope(String targetName, String[] extras)
    • inListItemScope

      boolean inListItemScope(String targetName)
    • inButtonScope

      boolean inButtonScope(String targetName)
    • inTableScope

      boolean inTableScope(String targetName)
    • inSelectScope

      boolean inSelectScope(String targetName)
    • onStackNot

      boolean onStackNot(String[] allowedTags)
      Tests if there is some element on the stack that is not in the provided set.
    • setHeadElement

      void setHeadElement(Element headElement)
    • getHeadElement

      Element getHeadElement()
    • isFosterInserts

      boolean isFosterInserts()
    • setFosterInserts

      void setFosterInserts(boolean fosterInserts)
    • getFormElement

      @Nullable FormElement getFormElement()
    • setFormElement

      void setFormElement(FormElement formElement)
    • resetPendingTableCharacters

      void resetPendingTableCharacters()
    • getPendingTableCharacters

      List<Token.Character> getPendingTableCharacters()
    • addPendingTableCharacters

      void addPendingTableCharacters(Token.Character c)
    • generateImpliedEndTags

      void generateImpliedEndTags(String excludeTag)
      13.2.6.3 Closing elements that have implied end tags When the steps below require the UA to generate implied end tags, then, while the current node is a dd element, a dt element, an li element, an optgroup element, an option element, a p element, an rb element, an rp element, an rt element, or an rtc element, the UA must pop the current node off the stack of open elements. If a step requires the UA to generate implied end tags but lists an element to exclude from the process, then the UA must perform the above steps as if that element was not in the above list. When the steps below require the UA to generate all implied end tags thoroughly, then, while the current node is a caption element, a colgroup element, a dd element, a dt element, an li element, an optgroup element, an option element, a p element, an rb element, an rp element, an rt element, an rtc element, a tbody element, a td element, a tfoot element, a th element, a thead element, or a tr element, the UA must pop the current node off the stack of open elements.
      Parameters:
      excludeTag - If a step requires the UA to generate implied end tags but lists an element to exclude from the process, then the UA must perform the above steps as if that element was not in the above list.
    • generateImpliedEndTags

      void generateImpliedEndTags()
    • generateImpliedEndTags

      void generateImpliedEndTags(boolean thorough)
      Pops elements off the stack according to the implied end tag rules
      Parameters:
      thorough - if we are thorough (includes table elements etc) or not
    • closeElement

      void closeElement(String name)
    • isSpecial

      boolean isSpecial(Element el)
    • lastFormattingElement

      Element lastFormattingElement()
    • positionOfElement

      int positionOfElement(Element el)
    • removeLastFormattingElement

      Element removeLastFormattingElement()
    • pushActiveFormattingElements

      void pushActiveFormattingElements(Element in)
    • pushWithBookmark

      void pushWithBookmark(Element in, int bookmark)
    • checkActiveFormattingElements

      void checkActiveFormattingElements(Element in)
    • isSameFormattingElement

      private boolean isSameFormattingElement(Element a, Element b)
    • reconstructFormattingElements

      void reconstructFormattingElements()
    • clearFormattingElementsToLastMarker

      void clearFormattingElementsToLastMarker()
    • removeFromActiveFormattingElements

      void removeFromActiveFormattingElements(Element el)
    • isInActiveFormattingElements

      boolean isInActiveFormattingElements(Element el)
    • getActiveFormattingElement

      @Nullable Element getActiveFormattingElement(String nodeName)
    • replaceActiveFormattingElement

      void replaceActiveFormattingElement(Element out, Element in)
    • insertMarkerToFormattingElements

      void insertMarkerToFormattingElements()
    • insertInFosterParent

      void insertInFosterParent(Node in)
    • pushTemplateMode

      void pushTemplateMode(HtmlTreeBuilderState state)
    • popTemplateMode

      @Nullable HtmlTreeBuilderState popTemplateMode()
    • templateModeSize

      int templateModeSize()
    • currentTemplateMode

      @Nullable HtmlTreeBuilderState currentTemplateMode()
    • toString

      public String toString()
      Overrides:
      toString in class Object
    • isContentForTagData

      protected boolean isContentForTagData(String normalName)
      Description copied from class: TreeBuilder
      (An internal method, visible for Element. For HTML parse, signals that script and style text should be treated as Data Nodes).
      Overrides:
      isContentForTagData in class TreeBuilder