ContentSearchUtilities.cpp [plain text]
#include "config.h"
#include "ContentSearchUtilities.h"
#if ENABLE(INSPECTOR)
#include "InspectorJSTypeBuilders.h"
#include "InspectorValues.h"
#include "RegularExpression.h"
#include "Yarr.h"
#include <wtf/BumpPointerAllocator.h>
#include <wtf/StdLibExtras.h>
#include <wtf/text/StringBuilder.h>
using namespace JSC::Yarr;
namespace Inspector {
namespace ContentSearchUtilities {
static const char regexSpecialCharacters[] = "[](){}+-*.,?\\^$|";
static String createSearchRegexSource(const String& text)
{
StringBuilder result;
for (unsigned i = 0; i < text.length(); i++) {
UChar character = text[i];
if (isASCII(character) && strchr(regexSpecialCharacters, character))
result.append('\\');
result.append(character);
}
return result.toString();
}
static inline size_t sizetExtractor(const size_t* value)
{
return *value;
}
TextPosition textPositionFromOffset(size_t offset, const Vector<size_t>& lineEndings)
{
const size_t* foundNextStart = approximateBinarySearch<size_t, size_t>(lineEndings, lineEndings.size(), offset, sizetExtractor);
size_t lineIndex = foundNextStart - &lineEndings.at(0);
if (offset >= *foundNextStart)
++lineIndex;
size_t lineStartOffset = lineIndex > 0 ? lineEndings.at(lineIndex - 1) : 0;
size_t column = offset - lineStartOffset;
return TextPosition(OrdinalNumber::fromZeroBasedInt(lineIndex), OrdinalNumber::fromZeroBasedInt(column));
}
static Vector<std::pair<size_t, String>> getRegularExpressionMatchesByLines(const JSC::Yarr::RegularExpression& regex, const String& text)
{
Vector<std::pair<size_t, String>> result;
if (text.isEmpty())
return result;
std::unique_ptr<Vector<size_t>> endings(lineEndings(text));
size_t size = endings->size();
size_t start = 0;
for (size_t lineNumber = 0; lineNumber < size; ++lineNumber) {
size_t nextStart = endings->at(lineNumber);
String line = text.substring(start, nextStart - start);
int matchLength;
if (regex.match(line, 0, &matchLength) != -1)
result.append(std::pair<size_t, String>(lineNumber, line));
start = nextStart;
}
return result;
}
std::unique_ptr<Vector<size_t>> lineEndings(const String& text)
{
auto result = std::make_unique<Vector<size_t>>();
size_t start = 0;
while (start < text.length()) {
size_t nextStart = text.findNextLineStart(start);
if (nextStart == notFound) {
result->append(text.length());
break;
}
result->append(nextStart);
start = nextStart;
}
result->append(text.length());
return result;
}
static PassRefPtr<Inspector::TypeBuilder::GenericTypes::SearchMatch> buildObjectForSearchMatch(size_t lineNumber, const String& lineContent)
{
return Inspector::TypeBuilder::GenericTypes::SearchMatch::create()
.setLineNumber(lineNumber)
.setLineContent(lineContent)
.release();
}
JSC::Yarr::RegularExpression createSearchRegex(const String& query, bool caseSensitive, bool isRegex)
{
String regexSource = isRegex ? query : createSearchRegexSource(query);
return JSC::Yarr::RegularExpression(regexSource, caseSensitive ? TextCaseSensitive : TextCaseInsensitive);
}
int countRegularExpressionMatches(const JSC::Yarr::RegularExpression& regex, const String& content)
{
if (content.isEmpty())
return 0;
int result = 0;
int position;
unsigned start = 0;
int matchLength;
while ((position = regex.match(content, start, &matchLength)) != -1) {
if (start >= content.length())
break;
if (matchLength > 0)
++result;
start = position + 1;
}
return result;
}
PassRefPtr<Inspector::TypeBuilder::Array<Inspector::TypeBuilder::GenericTypes::SearchMatch>> searchInTextByLines(const String& text, const String& query, const bool caseSensitive, const bool isRegex)
{
RefPtr<Inspector::TypeBuilder::Array<Inspector::TypeBuilder::GenericTypes::SearchMatch>> result = Inspector::TypeBuilder::Array<Inspector::TypeBuilder::GenericTypes::SearchMatch>::create();
JSC::Yarr::RegularExpression regex = ContentSearchUtilities::createSearchRegex(query, caseSensitive, isRegex);
Vector<std::pair<size_t, String>> matches = getRegularExpressionMatchesByLines(regex, text);
for (const auto& match : matches)
result->addItem(buildObjectForSearchMatch(match.first, match.second));
return result;
}
static String scriptCommentPattern(const String& name)
{
return "//[#@][\040\t]" + name + "=[\040\t]*([^\\s\'\"]*)[\040\t]*$";
}
static String stylesheetCommentPattern(const String& name)
{
return "/\\*[#@][\040\t]" + name + "=[\040\t]*([^\\s\'\"]*)[\040\t]*\\*/";
}
static String findMagicComment(const String& content, const String& patternString)
{
const char* error = nullptr;
JSC::Yarr::YarrPattern pattern(patternString, false, true, &error);
ASSERT(!error);
BumpPointerAllocator regexAllocator;
OwnPtr<JSC::Yarr::BytecodePattern> bytecodePattern = JSC::Yarr::byteCompile(pattern, ®exAllocator);
ASSERT(bytecodePattern);
ASSERT(pattern.m_numSubpatterns == 1);
Vector<int, 4> matches;
matches.resize(4);
unsigned result = JSC::Yarr::interpret(bytecodePattern.get(), content, 0, reinterpret_cast<unsigned*>(matches.data()));
if (result == JSC::Yarr::offsetNoMatch)
return String();
ASSERT(matches[2] > 0 && matches[3] > 0);
return content.substring(matches[2], matches[3] - matches[2]);
}
String findScriptSourceURL(const String& content)
{
return findMagicComment(content, scriptCommentPattern(ASCIILiteral("sourceURL")));
}
String findScriptSourceMapURL(const String& content)
{
return findMagicComment(content, scriptCommentPattern(ASCIILiteral("sourceMappingURL")));
}
String findStylesheetSourceMapURL(const String& content)
{
return findMagicComment(content, stylesheetCommentPattern(ASCIILiteral("sourceMappingURL")));
}
} }
#endif // ENABLE(INSPECTOR)