HTMLParser.h   [plain text]

    This file is part of the KDE libraries

    Copyright (C) 1997 Martin Jones (
              (C) 1997 Torben Weis (
              (C) 1998 Waldo Bastian (
              (C) 1999 Lars Knoll (
    Copyright (C) 2004, 2005, 2006 Apple Computer, Inc.

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public License
    along with this library; see the file COPYING.LIB.  If not, write to
    the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.
// KDE HTML Widget -- HTML Parser


#include "HTMLDocument.h"

namespace WebCore {

class DocumentFragment;
class FrameView;
class HTMLElement;
class HTMLFormElement;
class HTMLHeadElement;
class HTMLMapElement;
class HTMLStackElem;
class Token;

 * The parser for html. It receives a stream of tokens from the HTMLTokenizer, and
 * builds up the Document structure form it.
class HTMLParser
    virtual ~HTMLParser();

     * parses one token delivered by the tokenizer
    PassRefPtr<Node> parseToken(Token*);
     * tokenizer says it's not going to be sending us any more tokens
    void finished();

     * resets the parser
    void reset();

    bool skipMode() const { return !discard_until.isNull(); }
    bool noSpaces() const { return !inBody; }
    bool selectMode() const { return inSelect; }

    HTMLDocument *doc() const { return static_cast<HTMLDocument *>(document); }

    void setCurrent(Node* newCurrent);
    void setSkipMode(const QualifiedName& qName) { discard_until = qName.localName(); }

    Document* document;

    PassRefPtr<Node> getNode(Token*);
    bool bodyCreateErrorCheck(Token*, RefPtr<Node>&);
    bool canvasCreateErrorCheck(Token*, RefPtr<Node>&);
    bool commentCreateErrorCheck(Token*, RefPtr<Node>&);
    bool ddCreateErrorCheck(Token*, RefPtr<Node>&);
    bool dtCreateErrorCheck(Token*, RefPtr<Node>&);
    bool formCreateErrorCheck(Token*, RefPtr<Node>&);
    bool framesetCreateErrorCheck(Token*, RefPtr<Node>&);
    bool headCreateErrorCheck(Token*, RefPtr<Node>&);
    bool iframeCreateErrorCheck(Token*, RefPtr<Node>&);
    bool isindexCreateErrorCheck(Token*, RefPtr<Node>&);
    bool mapCreateErrorCheck(Token*, RefPtr<Node>&);
    bool nestedCreateErrorCheck(Token*, RefPtr<Node>&);
    bool nestedStyleCreateErrorCheck(Token*, RefPtr<Node>&);
    bool noembedCreateErrorCheck(Token*, RefPtr<Node>&);
    bool noframesCreateErrorCheck(Token*, RefPtr<Node>&);
    bool nolayerCreateErrorCheck(Token*, RefPtr<Node>&);
    bool noscriptCreateErrorCheck(Token*, RefPtr<Node>&);
    bool selectCreateErrorCheck(Token*, RefPtr<Node>&);
    bool tableCellCreateErrorCheck(Token*, RefPtr<Node>&);
    bool tableSectionCreateErrorCheck(Token*, RefPtr<Node>&);
    bool textCreateErrorCheck(Token*, RefPtr<Node>&);

    void processCloseTag(Token *);

    bool insertNode(Node *n, bool flat = false);
    bool handleError(Node* n, bool flat, const AtomicString& localName, int tagPriority);
    // The currently active element (the one new elements will be added to).  Can be a DocumentFragment, a Document or an Element.
    Node* current;

    bool currentIsReferenced;

    HTMLStackElem *blockStack;

    void pushBlock(const AtomicString& tagName, int _level);
    void popBlock(const AtomicString& tagName);
    void popBlock(const QualifiedName& qName) { return popBlock(qName.localName()); } // Convenience function for readability.
    void popOneBlock(bool delBlock = true);
    void popInlineBlocks();

    void freeBlock();

    void createHead();

    bool isResidualStyleTag(const AtomicString& tagName);
    bool isAffectedByResidualStyle(const AtomicString& tagName);
    void handleResidualStyleCloseTagAcrossBlocks(HTMLStackElem* elem);
    void reopenResidualStyleTags(HTMLStackElem* elem, Node* malformedTableParent);

    bool allowNestedRedundantTag(const AtomicString& tagName);
    static bool isHeaderTag(const AtomicString& tagName);
    void popNestedHeaderTag();

    bool isInline(Node* node) const;
    Node *parseTelephoneNumbers(Node *inputNode);
    Text *parseNextPhoneNumber(Text *inputText);
     * currently active form
    HTMLFormElement *form;

     * current map
    HTMLMapElement *map;

     * the head element. Needed for crappy html which defines <base> after </head>
    HTMLHeadElement *head;

     * a possible <isindex> element in the head. Compatibility hack for
     * html from the stone age
    RefPtr<Node> isindex;
    Node* handleIsindex(Token*);

     * inserts the stupid isIndex element.
    void startBody();

    bool inBody;
    bool haveContent;
    bool haveFrameSet;
    bool end;
    bool inSelect;

     * tells the parser to discard all tags, until it reaches the one specified
    AtomicString discard_until;

    bool headLoaded;
    bool m_fragment;
    int inStrayTableContent;

#endif // HTMLPARSER_H