KHTML
step.cpp
Go to the documentation of this file.
00001 /* 00002 * step.cc - Copyright 2005 Frerich Raabe <raabe@kde.org> 00003 * Copyright 2010 Maksim Orlovich <maksim@kde.org> 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions 00007 * are met: 00008 * 00009 * 1. Redistributions of source code must retain the above copyright 00010 * notice, this list of conditions and the following disclaimer. 00011 * 2. Redistributions in binary form must reproduce the above copyright 00012 * notice, this list of conditions and the following disclaimer in the 00013 * documentation and/or other materials provided with the distribution. 00014 * 00015 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 00016 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00017 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 00018 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 00019 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 00020 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00021 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00022 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00023 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 00024 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00025 */ 00026 #include "step.h" 00027 00028 #include "dom/dom3_xpath.h" 00029 #include "xml/dom_docimpl.h" 00030 #include "xml/dom_nodeimpl.h" 00031 #include "xml/dom_textimpl.h" 00032 #include "xml/dom3_xpathimpl.h" 00033 00034 #include "step.h" 00035 00036 #include <QtDebug> 00037 00038 using namespace DOM; 00039 using namespace DOM::XPath; 00040 using namespace khtml; 00041 using namespace khtml::XPath; 00042 00043 00044 static bool areAdjacentTextNodes( NodeImpl *n, NodeImpl *m ) 00045 { 00046 if ( !n || !m ) { 00047 return false; 00048 } 00049 00050 if ( n->nodeType() != Node::TEXT_NODE && n->nodeType() != Node::CDATA_SECTION_NODE ) { 00051 return false; 00052 } 00053 if ( m->nodeType() != Node::TEXT_NODE && m->nodeType() != Node::CDATA_SECTION_NODE ) { 00054 return false; 00055 } 00056 00057 // ### 00058 #ifdef __GNUC__ 00059 #warning "Might need more generic adjacency -- check" 00060 #endif 00061 00062 return ( n->nextSibling() && ( n->nextSibling() == m ) ) || 00063 ( m->nextSibling() && ( m->nextSibling() == n ) ); 00064 } 00065 00066 static DomNodeList compressTextNodes( const DomNodeList &nodes ) 00067 { 00068 DomNodeList outNodes = new StaticNodeListImpl; 00069 00070 for ( unsigned long n = 0; n < nodes->length(); ++n) { 00071 NodeImpl* node = nodes->item( n ); 00072 NodeImpl* next = n+1 < nodes->length() ? nodes->item( n+1 ) : 0; 00073 00074 if ( !next || !areAdjacentTextNodes( node, next ) ) { 00075 outNodes->append( node ); 00076 } else if ( areAdjacentTextNodes( node, next ) ) { 00077 QString s = node->nodeValue().string(); 00078 00079 // n2 is a potential successor, and is always in-range 00080 unsigned long n2 = n+1; 00081 while (n2 < nodes->length() && areAdjacentTextNodes( nodes->item( n2 ), nodes->item( n2-1) ) ) { 00082 s += nodes->item( n2 )->nodeValue().string(); 00083 ++n2; 00084 } 00085 outNodes->append( node->document()->createTextNode( new DOMStringImpl( s.data(), s.length() ) ) ); 00086 } 00087 } 00088 return outNodes; 00089 } 00090 00091 QString Step::axisAsString( AxisType axis ) 00092 { 00093 switch ( axis ) { 00094 case AncestorAxis: return "ancestor"; 00095 case AncestorOrSelfAxis: return "ancestor-or-self"; 00096 case AttributeAxis: return "attribute"; 00097 case ChildAxis: return "child"; 00098 case DescendantAxis: return "descendant"; 00099 case DescendantOrSelfAxis: return "descendant-or-self"; 00100 case FollowingAxis: return "following"; 00101 case FollowingSiblingAxis: return "following-sibling"; 00102 case NamespaceAxis: return "namespace"; 00103 case ParentAxis: return "parent"; 00104 case PrecedingAxis: return "preceding"; 00105 case PrecedingSiblingAxis: return "preceding-sibling"; 00106 case SelfAxis: return "self"; 00107 } 00108 return QString(); 00109 } 00110 00111 Step::Step(): m_compileState( NotCompiled ) 00112 { 00113 } 00114 00115 Step::Step( AxisType axis, const DOMString &nodeTest, 00116 const QList<Predicate *> &predicates ) 00117 : m_axis( axis ), 00118 m_nodeTest( nodeTest ), 00119 m_compileState( NotCompiled ), 00120 m_predicates( predicates ) 00121 { 00122 } 00123 00124 Step::~Step() 00125 { 00126 qDeleteAll( m_predicates ); 00127 } 00128 00129 DomNodeList Step::evaluate( NodeImpl *context ) const 00130 { 00131 assert( context ); 00132 #ifdef XPATH_VERBOSE 00133 kDebug(6011) << "Evaluating step, axis='" << axisAsString( m_axis ) <<"'" 00134 << ", nodetest='" << m_nodeTest << "'" 00135 << ", " << m_predicates.count() << " predicates"; 00136 kDebug(6011) << DOM::getPrintableName( context->id() ); 00137 #endif 00138 00139 DomNodeList inNodes = nodesInAxis( context ), outNodes; 00140 00141 // ### optimization opportunity: can say DocumentOrder for most 00142 StaticNodeListImpl::NormalizationKind known = StaticNodeListImpl::AxisOrder; 00143 inNodes->setKnownNormalization(known); 00144 00145 #ifdef XPATH_VERBOSE 00146 kDebug(6011) << "Axis " << axisAsString( m_axis ) << " matches " << inNodes->length() << " nodes."; 00147 #endif 00148 00149 inNodes = nodeTestMatches( context, inNodes ); 00150 inNodes->setKnownNormalization(known); // nodeTest doesn't change order 00151 00152 #ifdef XPATH_VERBOSE 00153 kDebug(6011) << "\tNodetest " << m_nodeTest << " trims this number to " << inNodes->length(); 00154 #endif 00155 00156 if ( m_predicates.isEmpty() ) 00157 return inNodes; 00158 00159 foreach( Predicate *predicate, m_predicates ) { 00160 outNodes = new StaticNodeListImpl; 00161 Expression::evaluationContext().size = int(inNodes->length()); 00162 Expression::evaluationContext().position = 1; 00163 00164 for ( unsigned long n = 0; n < inNodes->length(); ++n ) { 00165 NodeImpl* node = inNodes->item( n ); 00166 Expression::evaluationContext().node = node; 00167 EvaluationContext backupCtx = Expression::evaluationContext(); 00168 #ifdef XPATH_VERBOSE 00169 kDebug() << Expression::evaluationContext().position << "/" << node; 00170 #endif 00171 if ( predicate->evaluate() ) { 00172 outNodes->append( node ); 00173 } 00174 Expression::evaluationContext() = backupCtx; 00175 ++Expression::evaluationContext().position; 00176 } 00177 #ifdef XPATH_VERBOSE 00178 kDebug(6011) << "\tPredicate trims this number to " << outNodes->length(); 00179 #endif 00180 inNodes = outNodes; 00181 inNodes->setKnownNormalization(known); // predicates don't change order 00182 } 00183 00184 return outNodes; 00185 } 00186 00187 DomNodeList Step::nodesInAxis( NodeImpl *context ) const 00188 { 00189 //assert(context); 00190 DomNodeList nodes = new StaticNodeListImpl; 00191 switch ( m_axis ) { 00192 case ChildAxis: { 00193 NodeImpl *n = xpathFirstChild( context ); 00194 while ( n ) { 00195 nodes->append( n ); 00196 n = n->nextSibling(); 00197 } 00198 return nodes; 00199 } 00200 case DescendantAxis: { 00201 collectChildrenRecursively( nodes, context ); 00202 return nodes; 00203 } 00204 case ParentAxis: { 00205 NodeImpl* p = xpathParentNode( context ); 00206 00207 if ( p ) 00208 nodes->append( p ); 00209 return nodes; 00210 } 00211 case AncestorAxis: { 00212 NodeImpl *n = xpathParentNode( context ); 00213 while ( n ) { 00214 nodes->append( n ); 00215 n = xpathParentNode( n ); 00216 } 00217 return nodes; 00218 } 00219 case FollowingSiblingAxis: { 00220 if ( context->nodeType() == Node::ATTRIBUTE_NODE || 00221 context->nodeType() == Node::XPATH_NAMESPACE_NODE ) { 00222 return nodes; // empty 00223 } 00224 00225 NodeImpl *n = context->nextSibling(); 00226 while ( n ) { 00227 nodes->append( n ); 00228 n = n->nextSibling(); 00229 } 00230 return nodes; 00231 } 00232 case PrecedingSiblingAxis: { 00233 if ( context->nodeType() == Node::ATTRIBUTE_NODE || 00234 context->nodeType() == Node::XPATH_NAMESPACE_NODE ) { 00235 return nodes; // empty 00236 } 00237 00238 NodeImpl *n = context->previousSibling(); 00239 while ( n ) { 00240 nodes->append( n ); 00241 n = n->previousSibling(); 00242 } 00243 return nodes; 00244 } 00245 case FollowingAxis: { 00246 NodeImpl *p = context; 00247 while ( !isRootDomNode( p ) ) { 00248 NodeImpl *n = nextSiblingForFollowing( p ); 00249 while ( n ) { 00250 nodes->append( n ); 00251 collectChildrenRecursively( nodes, n ); 00252 n = n->nextSibling(); 00253 } 00254 p = xpathParentNode( p ); 00255 } 00256 return nodes; 00257 } 00258 case PrecedingAxis: { 00259 NodeImpl *p = context; 00260 while ( !isRootDomNode( p ) ) { 00261 NodeImpl *n = p->previousSibling(); 00262 while ( n ) { 00263 collectChildrenReverse( nodes, n ); 00264 nodes->append( n ); 00265 n = n->previousSibling(); 00266 } 00267 p = xpathParentNode( p ); 00268 } 00269 return nodes; 00270 } 00271 case AttributeAxis: { 00272 if ( context->nodeType() != Node::ELEMENT_NODE ) { 00273 return nodes; // empty 00274 } 00275 00276 NamedAttrMapImpl *attrs = static_cast<ElementImpl*>(context)->attributes( true /*read-only*/ ); 00277 if ( !attrs ) { 00278 return nodes; 00279 } 00280 00281 for ( unsigned long i = 0; i < attrs->length(); ++i ) { 00282 nodes->append( attrs->item( i ) ); 00283 } 00284 return nodes; 00285 } 00286 case NamespaceAxis: { 00287 // ### TODO: Need to implement this, but not a priority, since 00288 // other impls don't. 00289 #if 0 00290 if ( context->nodeType() != Node::ELEMENT_NODE ) { 00291 return nodes; 00292 } 00293 00294 bool foundXmlNsNode = false; 00295 NodeImpl *node = context; 00296 while ( node ) { 00297 NamedAttrMapImpl *attrs = 00298 node->isElementNode() ? static_cast<ElementImpl*>(node)->attributes() : 0; 00299 if ( !attrs ) { 00300 node = xpathParentNode( node ); 00301 continue; 00302 } 00303 00304 for ( unsigned long i = 0; i < attrs->length(); ++i ) { 00305 NodeImpl *n = attrs->item( i ); 00306 if ( n->nodeName().string().startsWith( "xmlns:" ) ) { 00307 nodes->append( n ); 00308 } else if ( n->nodeName() == "xmlns" && 00309 !foundXmlNsNode 00310 ) { 00311 foundXmlNsNode = true; 00312 if ( !n->nodeValue().string().isEmpty() ) { 00313 nodes->append( n ); 00314 } 00315 } 00316 } 00317 node = xpathParentNode( node ); 00318 } 00319 #endif 00320 return nodes; 00321 } 00322 case SelfAxis: 00323 nodes->append( context ); 00324 return nodes; 00325 case DescendantOrSelfAxis: 00326 nodes->append( context ); 00327 collectChildrenRecursively( nodes, context ); 00328 return nodes; 00329 case AncestorOrSelfAxis: { 00330 nodes->append( context ); 00331 NodeImpl *n = xpathParentNode( context ); 00332 while ( n ) { 00333 nodes->append( n ); 00334 n = xpathParentNode( n ); 00335 } 00336 return nodes; 00337 } 00338 default: 00339 kWarning(6011) << "Unknown axis " << axisAsString( m_axis ) << " passed to Step::nodesInAxis"; 00340 } 00341 00342 return nodes; // empty 00343 } 00344 00345 void Step::compileNodeTest(bool htmlCompat) const 00346 { 00347 m_compileState = htmlCompat ? CompiledForHTML : CompiledForXML; 00348 00349 if ( m_nodeTest == "*" ) { 00350 m_nodeTestType = NT_Star; 00351 } else if ( m_nodeTest == "text()" ) { 00352 m_nodeTestType = NT_Text; 00353 } else if ( m_nodeTest == "comment()" ) { 00354 m_nodeTestType = NT_Comment; 00355 } else if ( m_nodeTest.startsWith( "processing-instruction" ) ) { 00356 DOMString param; 00357 00358 // ### is this right? (parens?) 00359 const int space = m_nodeTest.find( ' ' ); 00360 if ( space > -1 ) { 00361 param = m_nodeTest.substring( space + 1 ); 00362 } 00363 00364 if ( param.isEmpty() ) { 00365 m_nodeTestType = NT_PI; 00366 } else { 00367 m_nodeTestType = NT_PI_Lit; 00368 m_piInfo = param; 00369 } 00370 } else if ( m_nodeTest == "node()" ) { 00371 m_nodeTestType = NT_AnyNode; 00372 } else { 00373 // Some sort of name combo. 00374 PrefixName prefix; 00375 LocalName localName; 00376 00377 splitPrefixLocalName( m_nodeTest, prefix, localName, htmlCompat ); 00378 00379 if ( prefix.id() == DOM::emptyPrefix ) { 00380 // localname only 00381 m_nodeTestType = NT_LocalName; 00382 m_localName = localName; 00383 } else if ( localName.toString() == "*" ) { 00384 // namespace only 00385 m_nodeTestType = NT_Namespace; 00386 m_namespace = NamespaceName::fromString(namespaceFromNodetest( m_nodeTest ) ); 00387 } else { 00388 // Both parts. 00389 m_nodeTestType = NT_QName; 00390 m_localName = localName; 00391 m_namespace = NamespaceName::fromString(namespaceFromNodetest( m_nodeTest ) ); 00392 } 00393 } 00394 } 00395 00396 DomNodeList Step::nodeTestMatches( NodeImpl* ctx, const DomNodeList &nodes ) const 00397 { 00398 CompileState desired = ctx->htmlCompat() ? CompiledForHTML : CompiledForXML; 00399 if ( m_compileState != desired ) 00400 compileNodeTest( ctx->htmlCompat() ); 00401 00402 if ( m_nodeTestType == NT_AnyNode) // no need for a new set 00403 return nodes; 00404 00405 DomNodeList matches = new StaticNodeListImpl; 00406 00407 // A lot of things can be handled by just checking the type, 00408 // and maybe a hair more special handling 00409 int matchType; 00410 switch ( m_nodeTestType ) { 00411 case NT_Star: 00412 matchType = primaryNodeType( m_axis ); 00413 break; 00414 case NT_Comment: 00415 matchType = Node::COMMENT_NODE; 00416 break; 00417 case NT_Text: 00418 matchType = Node::TEXT_NODE; 00419 break; 00420 case NT_PI: 00421 case NT_PI_Lit: 00422 matchType = Node::PROCESSING_INSTRUCTION_NODE; 00423 break; 00424 default: 00425 matchType = -1; 00426 } 00427 00428 if ( matchType != -1 ) { 00429 for ( unsigned long n = 0; n < nodes->length(); ++n ) { 00430 NodeImpl *node = nodes->item( n ); 00431 int nodeType = node->nodeType(); 00432 if ( nodeType == matchType ) { 00433 if ( m_nodeTestType == NT_PI_Lit && node->nodeName() != m_piInfo ) 00434 continue; 00435 00436 matches->append( node ); 00437 } 00438 00439 if ( matchType == NT_Text && nodeType == Node::CDATA_SECTION_NODE ) 00440 matches->append( node ); 00441 } 00442 return matches; 00443 } 00444 00445 // Now we are checking by name. We always want to filter out 00446 // the primary axis here 00447 // ### CHECK: this used to have special handling for some axes. 00448 matchType = primaryNodeType( m_axis ); 00449 for ( unsigned long n = 0; n < nodes->length(); ++n ) { 00450 NodeImpl *node = nodes->item( n ); 00451 if ( node->nodeType() != matchType ) 00452 continue; 00453 00454 if ( m_nodeTestType == NT_LocalName ) { 00455 if ( localNamePart( node->id() ) == m_localName.id() ) 00456 matches->append( node ); 00457 } else if ( m_nodeTestType == NT_Namespace ) { 00458 if ( namespacePart( node->id() ) == m_namespace.id() ) 00459 matches->append( node ); 00460 } else { 00461 // NT_QName 00462 if ( node->id() == makeId( m_namespace.id(), m_localName.id() ) ) 00463 matches->append( node ); 00464 } 00465 } 00466 return matches; 00467 } 00468 00469 void Step::optimize() 00470 { 00471 foreach( Predicate *predicate, m_predicates ) { 00472 predicate->optimize(); 00473 } 00474 } 00475 00476 QString Step::dump() const 00477 { 00478 QString s = QString( "<step axis=\"%1\" nodetest=\"%2\">" ).arg( axisAsString( m_axis ) ).arg( m_nodeTest.string() ); 00479 foreach( Predicate *predicate, m_predicates ) { 00480 s += predicate->dump(); 00481 } 00482 s += "</step>"; 00483 return s; 00484 } 00485 00486 DOMString Step::namespaceFromNodetest( const DOMString &nodeTest ) const 00487 { 00488 int i = nodeTest.find( ':' ); 00489 if ( i == -1 ) { 00490 return DOMString(); 00491 } 00492 00493 DOMString prefix( nodeTest.substring( 0, i ) ); 00494 XPathNSResolverImpl *resolver = Expression::evaluationContext().resolver; 00495 00496 DOM::DOMString ns; 00497 if (resolver) 00498 ns = resolver->lookupNamespaceURI( prefix ); 00499 00500 if ( ns.isNull() ) 00501 Expression::reportNamespaceErr(); 00502 00503 return ns; 00504 } 00505 00506 unsigned int Step::primaryNodeType( AxisType axis ) const 00507 { 00508 switch ( axis ) { 00509 case AttributeAxis: 00510 return Node::ATTRIBUTE_NODE; 00511 case NamespaceAxis: 00512 return Node::XPATH_NAMESPACE_NODE; 00513 default: 00514 return Node::ELEMENT_NODE; 00515 } 00516 } 00517 00518 // kate: indent-width 4; replace-tabs off; tab-width 4; space-indent off;
KDE 4.6 API Reference