code.vuplus.com Git - vuplus_webkit/blob - Source/WebCore/xml/XPathParser.cpp

   1 /*
   2  * Copyright 2005 Maksim Orlovich <maksim@kde.org>
   3  * Copyright (C) 2006 Apple Computer, Inc.
   4  * Copyright (C) 2007 Alexey Proskuryakov <ap@webkit.org>
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  *
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26  */
  27
  28 #include "config.h"
  29 #include "XPathParser.h"
  30
  31 #if ENABLE(XPATH)
  32
  33 #include "ExceptionCode.h"
  34 #include "XPathEvaluator.h"
  35 #include "XPathException.h"
  36 #include "XPathNSResolver.h"
  37 #include "XPathStep.h"
  38 #include <wtf/StdLibExtras.h>
  39 #include <wtf/text/StringHash.h>
  40
  41 int xpathyyparse(void*);
  42
  43 using namespace WTF;
  44 using namespace Unicode;
  45
  46 namespace WebCore {
  47 namespace XPath {
  48
  49 class LocationPath;
  50
  51 #include "XPathGrammar.h"
  52
  53 Parser* Parser::currentParser = 0;
  54
  55 enum XMLCat { NameStart, NameCont, NotPartOfName };
  56
  57 typedef HashMap<String, Step::Axis> AxisNamesMap;
  58
  59 static XMLCat charCat(UChar aChar)
  60 {
  61     //### might need to add some special cases from the XML spec.
  62
  63     if (aChar == '_')
  64         return NameStart;
  65
  66     if (aChar == '.' || aChar == '-')
  67         return NameCont;
  68     CharCategory category = Unicode::category(aChar);
  69     if (category & (Letter_Uppercase | Letter_Lowercase | Letter_Other | Letter_Titlecase | Number_Letter))
  70         return NameStart;
  71     if (category & (Mark_NonSpacing | Mark_SpacingCombining | Mark_Enclosing | Letter_Modifier | Number_DecimalDigit))
  72         return NameCont;
  73     return NotPartOfName;
  74 }
  75
  76 static void setUpAxisNamesMap(AxisNamesMap& axisNames)
  77 {
  78     struct AxisName {
  79         const char* name;
  80         Step::Axis axis;
  81     };
  82     const AxisName axisNameList[] = {
  83         { "ancestor", Step::AncestorAxis },
  84         { "ancestor-or-self", Step::AncestorOrSelfAxis },
  85         { "attribute", Step::AttributeAxis },
  86         { "child", Step::ChildAxis },
  87         { "descendant", Step::DescendantAxis },
  88         { "descendant-or-self", Step::DescendantOrSelfAxis },
  89         { "following", Step::FollowingAxis },
  90         { "following-sibling", Step::FollowingSiblingAxis },
  91         { "namespace", Step::NamespaceAxis },
  92         { "parent", Step::ParentAxis },
  93         { "preceding", Step::PrecedingAxis },
  94         { "preceding-sibling", Step::PrecedingSiblingAxis },
  95         { "self", Step::SelfAxis }
  96     };
  97     for (unsigned i = 0; i < sizeof(axisNameList) / sizeof(axisNameList[0]); ++i)
  98         axisNames.set(axisNameList[i].name, axisNameList[i].axis);
  99 }
 100
 101 static bool isAxisName(const String& name, Step::Axis& type)
 102 {
 103     DEFINE_STATIC_LOCAL(AxisNamesMap, axisNames, ());
 104
 105     if (axisNames.isEmpty())
 106         setUpAxisNamesMap(axisNames);
 107
 108     AxisNamesMap::iterator it = axisNames.find(name);
 109     if (it == axisNames.end())
 110         return false;
 111     type = it->second;
 112     return true;
 113 }
 114
 115 static bool isNodeTypeName(const String& name)
 116 {
 117     DEFINE_STATIC_LOCAL(HashSet<String>, nodeTypeNames, ());
 118     if (nodeTypeNames.isEmpty()) {
 119         nodeTypeNames.add("comment");
 120         nodeTypeNames.add("text");
 121         nodeTypeNames.add("processing-instruction");
 122         nodeTypeNames.add("node");
 123     }
 124     return nodeTypeNames.contains(name);
 125 }
 126
 127 // Returns whether the current token can possibly be a binary operator, given
 128 // the previous token. Necessary to disambiguate some of the operators
 129 // (* (multiply), div, and, or, mod) in the [32] Operator rule
 130 // (check http://www.w3.org/TR/xpath#exprlex).
 131 bool Parser::isBinaryOperatorContext() const
 132 {
 133     switch (m_lastTokenType) {
 134     case 0:
 135     case '@': case AXISNAME: case '(': case '[': case ',':
 136     case AND: case OR: case MULOP:
 137     case '/': case SLASHSLASH: case '|': case PLUS: case MINUS:
 138     case EQOP: case RELOP:
 139         return false;
 140     default:
 141         return true;
 142     }
 143 }
 144
 145 void Parser::skipWS()
 146 {
 147     while (m_nextPos < m_data.length() && isSpaceOrNewline(m_data[m_nextPos]))
 148         ++m_nextPos;
 149 }
 150
 151 Token Parser::makeTokenAndAdvance(int code, int advance)
 152 {
 153     m_nextPos += advance;
 154     return Token(code);
 155 }
 156
 157 Token Parser::makeTokenAndAdvance(int code, NumericOp::Opcode val, int advance)
 158 {
 159     m_nextPos += advance;
 160     return Token(code, val);
 161 }
 162
 163 Token Parser::makeTokenAndAdvance(int code, EqTestOp::Opcode val, int advance)
 164 {
 165     m_nextPos += advance;
 166     return Token(code, val);
 167 }
 168
 169 // Returns next char if it's there and interesting, 0 otherwise
 170 char Parser::peekAheadHelper()
 171 {
 172     if (m_nextPos + 1 >= m_data.length())
 173         return 0;
 174     UChar next = m_data[m_nextPos + 1];
 175     if (next >= 0xff)
 176         return 0;
 177     return next;
 178 }
 179
 180 char Parser::peekCurHelper()
 181 {
 182     if (m_nextPos >= m_data.length())
 183         return 0;
 184     UChar next = m_data[m_nextPos];
 185     if (next >= 0xff)
 186         return 0;
 187     return next;
 188 }
 189
 190 Token Parser::lexString()
 191 {
 192     UChar delimiter = m_data[m_nextPos];
 193     int startPos = m_nextPos + 1;
 194
 195     for (m_nextPos = startPos; m_nextPos < m_data.length(); ++m_nextPos) {
 196         if (m_data[m_nextPos] == delimiter) {
 197             String value = m_data.substring(startPos, m_nextPos - startPos);
 198             if (value.isNull())
 199                 value = "";
 200             ++m_nextPos; // Consume the char.
 201             return Token(LITERAL, value);
 202         }
 203     }
 204
 205     // Ouch, went off the end -- report error.
 206     return Token(XPATH_ERROR);
 207 }
 208
 209 Token Parser::lexNumber()
 210 {
 211     int startPos = m_nextPos;
 212     bool seenDot = false;
 213
 214     // Go until end or a non-digits character.
 215     for (; m_nextPos < m_data.length(); ++m_nextPos) {
 216         UChar aChar = m_data[m_nextPos];
 217         if (aChar >= 0xff) break;
 218
 219         if (aChar < '0' || aChar > '9') {
 220             if (aChar == '.' && !seenDot)
 221                 seenDot = true;
 222             else
 223                 break;
 224         }
 225     }
 226
 227     return Token(NUMBER, m_data.substring(startPos, m_nextPos - startPos));
 228 }
 229
 230 bool Parser::lexNCName(String& name)
 231 {
 232     int startPos = m_nextPos;
 233     if (m_nextPos >= m_data.length())
 234         return false;
 235
 236     if (charCat(m_data[m_nextPos]) != NameStart)
 237         return false;
 238
 239     // Keep going until we get a character that's not good for names.
 240     for (; m_nextPos < m_data.length(); ++m_nextPos)
 241         if (charCat(m_data[m_nextPos]) == NotPartOfName)
 242             break;
 243
 244     name = m_data.substring(startPos, m_nextPos - startPos);
 245     return true;
 246 }
 247
 248 bool Parser::lexQName(String& name)
 249 {
 250     String n1;
 251     if (!lexNCName(n1))
 252         return false;
 253
 254     skipWS();
 255
 256     // If the next character is :, what we just got it the prefix, if not,
 257     // it's the whole thing.
 258     if (peekAheadHelper() != ':') {
 259         name = n1;
 260         return true;
 261     }
 262
 263     String n2;
 264     if (!lexNCName(n2))
 265         return false;
 266
 267     name = n1 + ":" + n2;
 268     return true;
 269 }
 270
 271 Token Parser::nextTokenInternal()
 272 {
 273     skipWS();
 274
 275     if (m_nextPos >= m_data.length())
 276         return Token(0);
 277
 278     char code = peekCurHelper();
 279     switch (code) {
 280     case '(': case ')': case '[': case ']':
 281     case '@': case ',': case '|':
 282         return makeTokenAndAdvance(code);
 283     case '\'':
 284     case '\"':
 285         return lexString();
 286     case '0': case '1': case '2': case '3': case '4':
 287     case '5': case '6': case '7': case '8': case '9':
 288         return lexNumber();
 289     case '.': {
 290         char next = peekAheadHelper();
 291         if (next == '.')
 292             return makeTokenAndAdvance(DOTDOT, 2);
 293         if (next >= '0' && next <= '9')
 294             return lexNumber();
 295         return makeTokenAndAdvance('.');
 296     }
 297     case '/':
 298         if (peekAheadHelper() == '/')
 299             return makeTokenAndAdvance(SLASHSLASH, 2);
 300         return makeTokenAndAdvance('/');
 301     case '+':
 302         return makeTokenAndAdvance(PLUS);
 303     case '-':
 304         return makeTokenAndAdvance(MINUS);
 305     case '=':
 306         return makeTokenAndAdvance(EQOP, EqTestOp::OP_EQ);
 307     case '!':
 308         if (peekAheadHelper() == '=')
 309             return makeTokenAndAdvance(EQOP, EqTestOp::OP_NE, 2);
 310         return Token(XPATH_ERROR);
 311     case '<':
 312         if (peekAheadHelper() == '=')
 313             return makeTokenAndAdvance(RELOP, EqTestOp::OP_LE, 2);
 314         return makeTokenAndAdvance(RELOP, EqTestOp::OP_LT);
 315     case '>':
 316         if (peekAheadHelper() == '=')
 317             return makeTokenAndAdvance(RELOP, EqTestOp::OP_GE, 2);
 318         return makeTokenAndAdvance(RELOP, EqTestOp::OP_GT);
 319     case '*':
 320         if (isBinaryOperatorContext())
 321             return makeTokenAndAdvance(MULOP, NumericOp::OP_Mul);
 322         ++m_nextPos;
 323         return Token(NAMETEST, "*");
 324     case '$': { // $ QName
 325         m_nextPos++;
 326         String name;
 327         if (!lexQName(name))
 328             return Token(XPATH_ERROR);
 329         return Token(VARIABLEREFERENCE, name);
 330     }
 331     }
 332
 333     String name;
 334     if (!lexNCName(name))
 335         return Token(XPATH_ERROR);
 336
 337     skipWS();
 338     // If we're in an operator context, check for any operator names
 339     if (isBinaryOperatorContext()) {
 340         if (name == "and") //### hash?
 341             return Token(AND);
 342         if (name == "or")
 343             return Token(OR);
 344         if (name == "mod")
 345             return Token(MULOP, NumericOp::OP_Mod);
 346         if (name == "div")
 347             return Token(MULOP, NumericOp::OP_Div);
 348     }
 349
 350     // See whether we are at a :
 351     if (peekCurHelper() == ':') {
 352         m_nextPos++;
 353         // Any chance it's an axis name?
 354         if (peekCurHelper() == ':') {
 355             m_nextPos++;
 356
 357             //It might be an axis name.
 358             Step::Axis axis;
 359             if (isAxisName(name, axis))
 360                 return Token(AXISNAME, axis);
 361             // Ugh, :: is only valid in axis names -> error
 362             return Token(XPATH_ERROR);
 363         }
 364
 365         // Seems like this is a fully qualified qname, or perhaps the * modified one from NameTest
 366         skipWS();
 367         if (peekCurHelper() == '*') {
 368             m_nextPos++;
 369             return Token(NAMETEST, name + ":*");
 370         }
 371
 372         // Make a full qname.
 373         String n2;
 374         if (!lexNCName(n2))
 375             return Token(XPATH_ERROR);
 376
 377         name = name + ":" + n2;
 378     }
 379
 380     skipWS();
 381     if (peekCurHelper() == '(') {
 382         //note: we don't swallow the (here!
 383
 384         //either node type of function name
 385         if (isNodeTypeName(name)) {
 386             if (name == "processing-instruction")
 387                 return Token(PI, name);
 388
 389             return Token(NODETYPE, name);
 390         }
 391         //must be a function name.
 392         return Token(FUNCTIONNAME, name);
 393     }
 394
 395     // At this point, it must be NAMETEST.
 396     return Token(NAMETEST, name);
 397 }
 398
 399 Token Parser::nextToken()
 400 {
 401     Token toRet = nextTokenInternal();
 402     m_lastTokenType = toRet.type;
 403     return toRet;
 404 }
 405
 406 Parser::Parser()
 407 {
 408     reset(String());
 409 }
 410
 411 Parser::~Parser()
 412 {
 413 }
 414
 415 void Parser::reset(const String& data)
 416 {
 417     m_nextPos = 0;
 418     m_data = data;
 419     m_lastTokenType = 0;
 420
 421     m_topExpr = 0;
 422     m_gotNamespaceError = false;
 423 }
 424
 425 int Parser::lex(void* data)
 426 {
 427     YYSTYPE* yylval = static_cast<YYSTYPE*>(data);
 428     Token tok = nextToken();
 429
 430     switch (tok.type) {
 431     case AXISNAME:
 432         yylval->axis = tok.axis;
 433         break;
 434     case MULOP:
 435         yylval->numop = tok.numop;
 436         break;
 437     case RELOP:
 438     case EQOP:
 439         yylval->eqop = tok.eqop;
 440         break;
 441     case NODETYPE:
 442     case PI:
 443     case FUNCTIONNAME:
 444     case LITERAL:
 445     case VARIABLEREFERENCE:
 446     case NUMBER:
 447     case NAMETEST:
 448         yylval->str = new String(tok.str);
 449         registerString(yylval->str);
 450         break;
 451     }
 452
 453     return tok.type;
 454 }
 455
 456 bool Parser::expandQName(const String& qName, String& localName, String& namespaceURI)
 457 {
 458     size_t colon = qName.find(':');
 459     if (colon != notFound) {
 460         if (!m_resolver)
 461             return false;
 462         namespaceURI = m_resolver->lookupNamespaceURI(qName.left(colon));
 463         if (namespaceURI.isNull())
 464             return false;
 465         localName = qName.substring(colon + 1);
 466     } else
 467         localName = qName;
 468
 469     return true;
 470 }
 471
 472 Expression* Parser::parseStatement(const String& statement, PassRefPtr<XPathNSResolver> resolver, ExceptionCode& ec)
 473 {
 474     reset(statement);
 475
 476     m_resolver = resolver;
 477
 478     Parser* oldParser = currentParser;
 479     currentParser = this;
 480     int parseError = xpathyyparse(this);
 481     currentParser = oldParser;
 482
 483     if (parseError) {
 484         deleteAllValues(m_parseNodes);
 485         m_parseNodes.clear();
 486
 487         HashSet<Vector<Predicate*>*>::iterator pend = m_predicateVectors.end();
 488         for (HashSet<Vector<Predicate*>*>::iterator it = m_predicateVectors.begin(); it != pend; ++it) {
 489             deleteAllValues(**it);
 490             delete *it;
 491         }
 492         m_predicateVectors.clear();
 493
 494         HashSet<Vector<Expression*>*>::iterator eend = m_expressionVectors.end();
 495         for (HashSet<Vector<Expression*>*>::iterator it = m_expressionVectors.begin(); it != eend; ++it) {
 496             deleteAllValues(**it);
 497             delete *it;
 498         }
 499         m_expressionVectors.clear();
 500
 501         deleteAllValues(m_strings);
 502         m_strings.clear();
 503
 504         deleteAllValues(m_nodeTests);
 505         m_nodeTests.clear();
 506
 507         m_topExpr = 0;
 508
 509         if (m_gotNamespaceError)
 510             ec = NAMESPACE_ERR;
 511         else
 512             ec = XPathException::INVALID_EXPRESSION_ERR;
 513         return 0;
 514     }
 515
 516     ASSERT(m_parseNodes.size() == 1);
 517     ASSERT(*m_parseNodes.begin() == m_topExpr);
 518     ASSERT(m_expressionVectors.size() == 0);
 519     ASSERT(m_predicateVectors.size() == 0);
 520     ASSERT(m_strings.size() == 0);
 521     ASSERT(m_nodeTests.size() == 0);
 522
 523     m_parseNodes.clear();
 524     Expression* result = m_topExpr;
 525     m_topExpr = 0;
 526
 527     return result;
 528 }
 529
 530 void Parser::registerParseNode(ParseNode* node)
 531 {
 532     if (node == 0)
 533         return;
 534
 535     ASSERT(!m_parseNodes.contains(node));
 536
 537     m_parseNodes.add(node);
 538 }
 539
 540 void Parser::unregisterParseNode(ParseNode* node)
 541 {
 542     if (node == 0)
 543         return;
 544
 545     ASSERT(m_parseNodes.contains(node));
 546
 547     m_parseNodes.remove(node);
 548 }
 549
 550 void Parser::registerPredicateVector(Vector<Predicate*>* vector)
 551 {
 552     if (vector == 0)
 553         return;
 554
 555     ASSERT(!m_predicateVectors.contains(vector));
 556
 557     m_predicateVectors.add(vector);
 558 }
 559
 560 void Parser::deletePredicateVector(Vector<Predicate*>* vector)
 561 {
 562     if (vector == 0)
 563         return;
 564
 565     ASSERT(m_predicateVectors.contains(vector));
 566
 567     m_predicateVectors.remove(vector);
 568     delete vector;
 569 }
 570
 571
 572 void Parser::registerExpressionVector(Vector<Expression*>* vector)
 573 {
 574     if (vector == 0)
 575         return;
 576
 577     ASSERT(!m_expressionVectors.contains(vector));
 578
 579     m_expressionVectors.add(vector);
 580 }
 581
 582 void Parser::deleteExpressionVector(Vector<Expression*>* vector)
 583 {
 584     if (vector == 0)
 585         return;
 586
 587     ASSERT(m_expressionVectors.contains(vector));
 588
 589     m_expressionVectors.remove(vector);
 590     delete vector;
 591 }
 592
 593 void Parser::registerString(String* s)
 594 {
 595     if (s == 0)
 596         return;
 597
 598     ASSERT(!m_strings.contains(s));
 599
 600     m_strings.add(s);
 601 }
 602
 603 void Parser::deleteString(String* s)
 604 {
 605     if (s == 0)
 606         return;
 607
 608     ASSERT(m_strings.contains(s));
 609
 610     m_strings.remove(s);
 611     delete s;
 612 }
 613
 614 void Parser::registerNodeTest(Step::NodeTest* t)
 615 {
 616     if (t == 0)
 617         return;
 618
 619     ASSERT(!m_nodeTests.contains(t));
 620
 621     m_nodeTests.add(t);
 622 }
 623
 624 void Parser::deleteNodeTest(Step::NodeTest* t)
 625 {
 626     if (t == 0)
 627         return;
 628
 629     ASSERT(m_nodeTests.contains(t));
 630
 631     m_nodeTests.remove(t);
 632     delete t;
 633 }
 634
 635 }
 636 }
 637
 638 #endif // ENABLE(XPATH)