/* * Copyright (C) 2003 Apple Computer, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #import "KWQRegExp.h" #import "KWQLogging.h" #import <sys/types.h> #import <JavaScriptCore/pcre.h> #import <JavaScriptCore/ustring.h> using KJS::convertUTF16OffsetsToUTF8Offsets; using KJS::convertUTF8OffsetsToUTF16Offsets; class QRegExp::KWQRegExpPrivate { public: KWQRegExpPrivate(); KWQRegExpPrivate(QString pattern, bool caseSensitive, bool glob); ~KWQRegExpPrivate(); void compile(bool caseSensitive, bool glob); QString pattern; pcre *regex; uint refCount; int lastMatchPos; int lastMatchLength; }; QRegExp::KWQRegExpPrivate::KWQRegExpPrivate() : pattern(""), refCount(0) { compile(true, false); } QRegExp::KWQRegExpPrivate::KWQRegExpPrivate(QString p, bool caseSensitive, bool glob) : pattern(p), refCount(0), lastMatchPos(-1), lastMatchLength(-1) { compile(caseSensitive, glob); } static QString RegExpFromGlob(QString glob) { QString result = glob; // escape regexp metacharacters which are NOT glob metacharacters result.replace(QRegExp("\\\\"), "\\\\"); result.replace(QRegExp("\\."), "\\."); result.replace(QRegExp("\\+"), "\\+"); result.replace(QRegExp("\\$"), "\\$"); // FIXME: incorrect for ^ inside bracket group result.replace(QRegExp("\\^"), "\\^"); // translate glob metacharacters into regexp metacharacters result.replace(QRegExp("\\*"), ".*"); result.replace(QRegExp("\\?"), "."); // Require the glob to match the whole string result = "^" + result + "$"; return result; } void QRegExp::KWQRegExpPrivate::compile(bool caseSensitive, bool glob) { QString p; if (glob) { p = RegExpFromGlob(pattern); } else { p = pattern; } // Note we don't honor the Qt syntax for various character classes. If we convert // to a different underlying engine, we may need to change client code that relies // on the regex syntax (see KWQKHTMLPart.mm for a couple examples). QCString asUTF8; const char *cpattern; if (p.isAllASCII()) { cpattern = p.ascii(); } else { asUTF8 = p.utf8(); cpattern = asUTF8; } const char *errorMessage; int errorOffset; regex = pcre_compile(cpattern, PCRE_UTF8 | (caseSensitive ? 0 : PCRE_CASELESS), &errorMessage, &errorOffset, NULL); if (regex == NULL) { ERROR("KWQRegExp: pcre_compile failed with '%s'", errorMessage); } } QRegExp::KWQRegExpPrivate::~KWQRegExpPrivate() { pcre_free(regex); } QRegExp::QRegExp() : d(new QRegExp::KWQRegExpPrivate()) { } QRegExp::QRegExp(const QString &pattern, bool caseSensitive, bool glob) : d(new QRegExp::KWQRegExpPrivate(pattern, caseSensitive, glob)) { } QRegExp::QRegExp(const char *cpattern) : d(new QRegExp::KWQRegExpPrivate(cpattern, true, false)) { } QRegExp::QRegExp(const QRegExp &re) : d (re.d) { } QRegExp::~QRegExp() { } QRegExp &QRegExp::operator=(const QRegExp &re) { QRegExp tmp(re); KWQRefPtr<QRegExp::KWQRegExpPrivate> tmpD = tmp.d; tmp.d = d; d = tmpD; return *this; } QString QRegExp::pattern() const { return d->pattern; } int QRegExp::match(const QString &str, int startFrom, int *matchLength) const { QCString asUTF8; const char *cstring; if (str.isAllASCII()) { cstring = str.ascii(); } else { asUTF8 = str.utf8(); cstring = asUTF8; } // first 2 offsets are start and end offsets; 3rd entry is used internally by pcre int offsets[3]; convertUTF16OffsetsToUTF8Offsets(cstring, &startFrom, 1); int result = pcre_exec(d->regex, NULL, cstring, strlen(cstring), startFrom, startFrom == 0 ? 0 : PCRE_NOTBOL, offsets, 3); if (result < 0) { if (result != PCRE_ERROR_NOMATCH) { ERROR("KWQRegExp: pcre_exec() failed with result %d", result); } d->lastMatchPos = -1; d->lastMatchLength = -1; return -1; } ASSERT(result < 2); // 1 means 1 match; 0 means more than one match, first one is recorded in offsets convertUTF8OffsetsToUTF16Offsets(cstring, offsets, 2); d->lastMatchPos = offsets[0]; d->lastMatchLength = offsets[1] - offsets[0]; if (matchLength != NULL) { *matchLength = d->lastMatchLength; } return d->lastMatchPos; } int QRegExp::search(const QString &str, int startFrom) const { if (startFrom < 0) { startFrom = str.length() - startFrom; } return match(str, startFrom, NULL); } int QRegExp::searchRev(const QString &str) const { // FIXME: Total hack for now. Search forward, return the last, greedy match int start = 0; int pos; int lastPos = -1; int lastMatchLength = -1; do { int matchLength; pos = match(str, start, &matchLength); if (pos >= 0) { if ((pos+matchLength) > (lastPos+lastMatchLength)) { // replace last match if this one is later and not a subset of the last match lastPos = pos; lastMatchLength = matchLength; } start = pos + 1; } } while (pos != -1); d->lastMatchPos = lastPos; d->lastMatchLength = lastMatchLength; return lastPos; } int QRegExp::pos(int n) { ASSERT(n == 0); return d->lastMatchPos; } int QRegExp::matchedLength() const { return d->lastMatchLength; }