/** * This file is part of the DOM implementation for KDE. * * Copyright (C) 2000 Peter Kelly (pmk@post.com) * Copyright (C) 2003 Apple Computer, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public License * along with this library; see the file COPYING.LIB. If not, write to * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ #include "xml_tokenizer.h" #include "xml/dom_docimpl.h" #include "xml/dom_textimpl.h" #include "xml/dom_xmlimpl.h" #include "html/html_headimpl.h" #include "rendering/render_object.h" #include "misc/htmltags.h" #include "misc/htmlattrs.h" #include "misc/loader.h" #include "khtmlview.h" #include "khtml_part.h" #include #include #include using namespace DOM; using namespace khtml; XMLHandler::XMLHandler(DocumentPtr *_doc, KHTMLView *_view) : errorLine(0) { m_doc = _doc; if ( m_doc ) m_doc->ref(); m_view = _view; m_currentNode = _doc->document(); } XMLHandler::~XMLHandler() { if ( m_doc ) m_doc->deref(); } QString XMLHandler::errorProtocol() { return errorProt; } bool XMLHandler::startDocument() { // at the beginning of parsing: do some initialization errorProt = ""; state = StateInit; return true; } bool XMLHandler::startElement( const QString& namespaceURI, const QString& /*localName*/, const QString& qName, const QXmlAttributes& atts ) { if (m_currentNode->nodeType() == Node::TEXT_NODE) exitText(); int exceptioncode = 0; ElementImpl *newElement = m_doc->document()->createElementNS(namespaceURI,qName,exceptioncode); if (!newElement) return false; int i; for (i = 0; i < atts.length(); i++) { DOMString uri(atts.uri(i)); DOMString ln(atts.localName(i)); DOMString val(atts.value(i)); NodeImpl::Id id = m_doc->document()->attrId(uri.implementation(), ln.implementation(), false /* allocate */); newElement->setAttribute(id, val.implementation(), exceptioncode); if (exceptioncode) // exception setting attributes return false; } // FIXME: This hack ensures implicit table bodies get constructed in XHTML and XML files. // We want to consolidate this with the HTML parser and HTML DOM code at some point. // For now, it's too risky to rip that code up. if (m_currentNode->id() == ID_TABLE && newElement->id() == ID_TR && m_currentNode->isHTMLElement() && newElement->isHTMLElement()) { NodeImpl* implicitTBody = new HTMLTableSectionElementImpl( m_doc, ID_TBODY, true /* implicit */ ); m_currentNode->addChild(implicitTBody); if (m_view && !implicitTBody->attached()) implicitTBody->attach(); m_currentNode = implicitTBody; } if (m_currentNode->addChild(newElement)) { if (m_view && !newElement->attached()) newElement->attach(); m_currentNode = newElement; return true; } else { delete newElement; return false; } // ### DOM spec states: "if there is no markup inside an element's content, the text is contained in a // single object implementing the Text interface that is the only child of the element."... do we // need to ensure that empty elements always have an empty text child? } bool XMLHandler::endElement( const QString& /*namespaceURI*/, const QString& /*localName*/, const QString& /*qName*/ ) { if (m_currentNode->nodeType() == Node::TEXT_NODE) exitText(); if (m_currentNode->parentNode() != 0) { m_currentNode->closeRenderer(); do { m_currentNode = m_currentNode->parentNode(); } while (m_currentNode && m_currentNode->implicitNode()); } // ### else error return true; } bool XMLHandler::startCDATA() { if (m_currentNode->nodeType() == Node::TEXT_NODE) exitText(); NodeImpl *newNode = m_doc->document()->createCDATASection(""); if (m_currentNode->addChild(newNode)) { if (m_view && !newNode->attached()) newNode->attach(); m_currentNode = newNode; return true; } else { delete newNode; return false; } } bool XMLHandler::endCDATA() { if (m_currentNode->parentNode() != 0) m_currentNode = m_currentNode->parentNode(); return true; } bool XMLHandler::characters( const QString& ch ) { if (m_currentNode->nodeType() == Node::TEXT_NODE || m_currentNode->nodeType() == Node::CDATA_SECTION_NODE || enterText()) { int exceptioncode = 0; static_cast(m_currentNode)->appendData(ch,exceptioncode); if (exceptioncode) return false; return true; } else return false; } bool XMLHandler::comment(const QString & ch) { if (m_currentNode->nodeType() == Node::TEXT_NODE) exitText(); // ### handle exceptions m_currentNode->addChild(m_doc->document()->createComment(ch)); return true; } bool XMLHandler::processingInstruction(const QString &target, const QString &data) { if (m_currentNode->nodeType() == Node::TEXT_NODE) exitText(); // ### handle exceptions ProcessingInstructionImpl *pi = m_doc->document()->createProcessingInstruction(target,data); m_currentNode->addChild(pi); // don't load stylesheets for standalone documents if (m_doc->document()->part()) { pi->checkStyleSheet(); } return true; } QString XMLHandler::errorString() { #if APPLE_CHANGES // FIXME: Does the user ever see this? return "error"; #else return i18n("the document is not in the correct file format"); #endif } bool XMLHandler::fatalError( const QXmlParseException& exception ) { #if APPLE_CHANGES // FIXME: Does the user ever see this? errorProt += QString("fatal parsing error: %1 in line %2, column %3") #else errorProt += i18n( "fatal parsing error: %1 in line %2, column %3" ) #endif .arg( exception.message() ) .arg( exception.lineNumber() ) .arg( exception.columnNumber() ); errorLine = exception.lineNumber(); errorCol = exception.columnNumber(); return false; } bool XMLHandler::enterText() { NodeImpl *newNode = m_doc->document()->createTextNode(""); if (m_currentNode->addChild(newNode)) { m_currentNode = newNode; return true; } else { delete newNode; return false; } } void XMLHandler::exitText() { if (m_view && m_currentNode && !m_currentNode->attached()) m_currentNode->attach(); NodeImpl* par = m_currentNode->parentNode(); if (par != 0) m_currentNode = par; } bool XMLHandler::attributeDecl(const QString &/*eName*/, const QString &/*aName*/, const QString &/*type*/, const QString &/*valueDefault*/, const QString &/*value*/) { // qt's xml parser (as of 2.2.3) does not currently give us values for type, valueDefault and // value. When it does, we can store these somewhere and have default attributes on elements return true; } bool XMLHandler::externalEntityDecl(const QString &/*name*/, const QString &/*publicId*/, const QString &/*systemId*/) { // ### insert these too - is there anything special we have to do here? return true; } bool XMLHandler::internalEntityDecl(const QString &name, const QString &value) { EntityImpl *e = new EntityImpl(m_doc,name); // ### further parse entities inside the value and add them as separate nodes (or entityreferences)? e->addChild(m_doc->document()->createTextNode(value)); // ### FIXME // if (m_doc->document()->doctype()) // static_cast(m_doc->document()->doctype()->entities())->addNode(e); return true; } bool XMLHandler::notationDecl(const QString &name, const QString &publicId, const QString &systemId) { // ### FIXME // if (m_doc->document()->doctype()) { // NotationImpl *n = new NotationImpl(m_doc,name,publicId,systemId); // static_cast(m_doc->document()->doctype()->notations())->addNode(n); // } return true; } bool XMLHandler::unparsedEntityDecl(const QString &/*name*/, const QString &/*publicId*/, const QString &/*systemId*/, const QString &/*notationName*/) { // ### return true; } //------------------------------------------------------------------------------ XMLTokenizer::XMLTokenizer(DOM::DocumentPtr *_doc, KHTMLView *_view) { m_doc = _doc; if ( m_doc ) m_doc->ref(); m_view = _view; m_xmlCode = ""; m_scriptsIt = 0; m_cachedScript = 0; } XMLTokenizer::~XMLTokenizer() { if ( m_doc ) m_doc->deref(); if (m_scriptsIt) delete m_scriptsIt; if (m_cachedScript) m_cachedScript->deref(this); } void XMLTokenizer::begin() { } void XMLTokenizer::write( const QString &str, bool /*appendData*/ ) { m_xmlCode += str; } void XMLTokenizer::end() { emit finishedParsing(); } void XMLTokenizer::finish() { // parse xml file XMLHandler handler(m_doc,m_view); QXmlInputSource source; source.setData(m_xmlCode); QXmlSimpleReader reader; reader.setContentHandler( &handler ); reader.setLexicalHandler( &handler ); reader.setErrorHandler( &handler ); reader.setDeclHandler( &handler ); reader.setDTDHandler( &handler ); bool ok = reader.parse( source ); if (!ok) { // An error occurred during parsing of the code. Display an error page to the user (the DOM // tree is created manually and includes an excerpt from the code where the error is located) // ### for multiple error messages, display the code for each (can this happen?) // Clear the document int exceptioncode = 0; while (m_doc->document()->hasChildNodes()) static_cast(m_doc->document())->removeChild(m_doc->document()->firstChild(),exceptioncode); QString line; QString errorLocPtr; if (handler.errorLine) { QTextIStream stream(&m_xmlCode); for (unsigned long lineno = 0; lineno < handler.errorLine-1; lineno++) stream.readLine(); line = stream.readLine(); for (unsigned long colno = 0; colno < handler.errorCol-1; colno++) errorLocPtr += " "; errorLocPtr += "^"; } // Create elements for display DocumentImpl *doc = m_doc->document(); NodeImpl *html = doc->createElementNS(XHTML_NAMESPACE,"html",exceptioncode); NodeImpl *body = doc->createElementNS(XHTML_NAMESPACE,"body",exceptioncode); NodeImpl *h1 = doc->createElementNS(XHTML_NAMESPACE,"h1",exceptioncode); #if APPLE_CHANGES // FIXME: Is there some alternative to having this text hardcoded here? NodeImpl *headingText = doc->createTextNode("XML parsing error"); #else NodeImpl *headingText = doc->createTextNode(i18n("XML parsing error")); #endif NodeImpl *errorText = doc->createTextNode(handler.errorProtocol()); NodeImpl *hr = 0; NodeImpl *pre = 0; NodeImpl *lineText = 0; NodeImpl *errorLocText = 0; if (!line.isNull()) { hr = doc->createElementNS(XHTML_NAMESPACE,"hr",exceptioncode); pre = doc->createElementNS(XHTML_NAMESPACE,"pre",exceptioncode); lineText = doc->createTextNode(line+"\n"); errorLocText = doc->createTextNode(errorLocPtr); } // Construct DOM tree. We ignore exceptions as we assume they will not be thrown here (due to the // fact we are using a known tag set) doc->appendChild(html,exceptioncode); html->appendChild(body,exceptioncode); body->appendChild(h1,exceptioncode); h1->appendChild(headingText,exceptioncode); body->appendChild(errorText,exceptioncode); if (hr) body->appendChild(hr,exceptioncode); if (pre) { body->appendChild(pre,exceptioncode); pre->appendChild(lineText,exceptioncode); pre->appendChild(errorLocText,exceptioncode); } // Close the renderers so that they update their display correctly // ### this should not be necessary, but requires changes in the rendering code... h1->closeRenderer(); if (pre) pre->closeRenderer(); body->closeRenderer(); m_doc->document()->recalcStyle( NodeImpl::Inherit ); m_doc->document()->updateRendering(); end(); } else { // Parsing was successful. Now locate all html