#include "internal.h"
static void
set_bit(uschar *start_bits, int c, BOOL caseless, compile_data *cd)
{
start_bits[c/8] |= (1 << (c&7));
if (caseless && (cd->ctypes[c] & ctype_letter) != 0)
{
ichar flipped = MAPCHAR(cd->fcc, c);
start_bits[flipped/8] |= (1 << (flipped&7));
}
}
static BOOL
set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless,
compile_data *cd)
{
register int c;
volatile int dummy;
do
{
const uschar *tcode = code + 3;
BOOL try_next = TRUE;
while (try_next)
{
if ((int)*tcode >= OP_BRA || *tcode == OP_ASSERT)
{
if (!set_start_bits(tcode, start_bits, caseless, cd))
return FALSE;
try_next = FALSE;
}
else switch(*tcode)
{
default:
return FALSE;
case OP_BRANUMBER:
tcode += 3;
break;
case OP_ASSERT_NOT:
case OP_ASSERTBACK:
case OP_ASSERTBACK_NOT:
do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
tcode += 3;
break;
case OP_OPT:
caseless = (tcode[1] & PCRE_CASELESS) != 0;
tcode += 2;
break;
case OP_BRAZERO:
case OP_BRAMINZERO:
if (!set_start_bits(++tcode, start_bits, caseless, cd))
return FALSE;
dummy = 1;
do tcode += (tcode[1] << 8) + tcode[2]; while (*tcode == OP_ALT);
tcode += 3;
break;
case OP_STAR:
case OP_MINSTAR:
case OP_QUERY:
case OP_MINQUERY:
#if PCRE_UTF16
if (tcode[1]) return FALSE;
set_bit(start_bits, tcode[2], caseless, cd);
#else
set_bit(start_bits, tcode[1], caseless, cd);
#endif
tcode += sizeof(ichar) + 1;
break;
case OP_UPTO:
case OP_MINUPTO:
#if PCRE_UTF16
if (tcode[3]) return FALSE;
set_bit(start_bits, tcode[4], caseless, cd);
#else
set_bit(start_bits, tcode[3], caseless, cd);
#endif
tcode += sizeof(ichar) + 3;
break;
case OP_EXACT:
tcode++;
case OP_CHARS:
tcode++;
case OP_PLUS:
case OP_MINPLUS:
#if PCRE_UTF16
if (tcode[1]) return FALSE;
set_bit(start_bits, tcode[2], caseless, cd);
#else
set_bit(start_bits, tcode[1], caseless, cd);
#endif
try_next = FALSE;
break;
case OP_NOT_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_digit];
try_next = FALSE;
break;
case OP_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_digit];
try_next = FALSE;
break;
case OP_NOT_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_space];
try_next = FALSE;
break;
case OP_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_space];
try_next = FALSE;
break;
case OP_NOT_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_word];
try_next = FALSE;
break;
case OP_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_word];
try_next = FALSE;
break;
case OP_TYPEPLUS:
case OP_TYPEMINPLUS:
tcode++;
break;
case OP_TYPEEXACT:
tcode += 3;
break;
case OP_TYPEUPTO:
case OP_TYPEMINUPTO:
tcode += 2;
case OP_TYPESTAR:
case OP_TYPEMINSTAR:
case OP_TYPEQUERY:
case OP_TYPEMINQUERY:
switch(tcode[1])
{
case OP_NOT_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_digit];
break;
case OP_DIGIT:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_digit];
break;
case OP_NOT_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_space];
break;
case OP_WHITESPACE:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_space];
break;
case OP_NOT_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |= ~cd->cbits[c+cbit_word];
break;
case OP_WORDCHAR:
for (c = 0; c < 32; c++)
start_bits[c] |= cd->cbits[c+cbit_word];
break;
}
tcode += 2;
break;
case OP_CLASS:
{
tcode++;
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c];
tcode += 32;
switch (*tcode)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
tcode++;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5;
else try_next = FALSE;
break;
default:
try_next = FALSE;
break;
}
}
break;
}
}
code += (code[1] << 8) + code[2];
}
while (*code == OP_ALT);
return TRUE;
}
pcre_extra *
pcre_study(const pcre *external_re, int options, const char **errorptr)
{
uschar start_bits[32];
real_pcre_extra *extra;
const real_pcre *re = (const real_pcre *)external_re;
compile_data compile_block;
*errorptr = NULL;
if (re == NULL || re->magic_number != MAGIC_NUMBER)
{
*errorptr = "argument is not a compiled regular expression";
return NULL;
}
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
{
*errorptr = "unknown or incorrect option bit(s) set";
return NULL;
}
if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0)
return NULL;
compile_block.lcc = (const ichar *)(re->tables + lcc_offset);
compile_block.fcc = (const ichar *)(re->tables + fcc_offset);
compile_block.cbits = re->tables + cbits_offset;
compile_block.ctypes = re->tables + ctypes_offset;
memset(start_bits, 0, 32 * sizeof(uschar));
if (!set_start_bits(re->code, start_bits, (re->options & PCRE_CASELESS) != 0,
&compile_block)) return NULL;
extra = (real_pcre_extra *)(pcre_malloc)(sizeof(real_pcre_extra));
if (extra == NULL)
{
*errorptr = "failed to get memory";
return NULL;
}
extra->options = PCRE_STUDY_MAPPED;
memcpy(extra->start_bits, start_bits, sizeof(start_bits));
return (pcre_extra *)extra;
}