#include "config.h"
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#include <stdio.h>
#include <ctype.h>
#include <sys/types.h>
#ifndef PARAMS
# if defined __GNUC__ || (defined __STDC__ && __STDC__)
# define PARAMS(args) args
# else
# define PARAMS(args) ()
# endif
#endif
#if defined(STDC_HEADERS)
# include <stddef.h>
#else
# include <sys/types.h>
#endif
#ifdef HAVE_STDLIB_H
# include <stdlib.h>
#endif
#if !defined(__STDC__) && !defined(_MSC_VER)
# define volatile
#endif
#ifdef HAVE_PROTOTYPES
# define _(args) args
#else
# define _(args) ()
#endif
#ifdef RUBY_PLATFORM
#include "defines.h"
#undef xmalloc
#undef xrealloc
#undef xcalloc
#undef xfree
# define RUBY
extern int rb_prohibit_interrupt;
extern int rb_trap_pending;
void rb_trap_exec _((void));
# define CHECK_INTS do {\
if (!rb_prohibit_interrupt) {\
if (rb_trap_pending) rb_trap_exec();\
}\
} while (0)
#endif
#ifdef __GNUC__
# ifndef atarist
# ifndef alloca
# define alloca __builtin_alloca
# endif
# endif
#else
# ifdef HAVE_ALLOCA_H
# include <alloca.h>
# else
# ifdef _AIX
#pragma alloca
# else
# ifndef alloca
void *alloca ();
# endif
# endif
# endif
#endif
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#define xmalloc malloc
#define xrealloc realloc
#define xcalloc calloc
#define xfree free
#ifdef C_ALLOCA
#define FREE_VARIABLES() alloca(0)
#else
#define FREE_VARIABLES()
#endif
#define FREE_AND_RETURN_VOID(stackb) do { \
FREE_VARIABLES(); \
if (stackb != stacka) xfree(stackb); \
return; \
} while(0)
#define FREE_AND_RETURN(stackb,val) do { \
FREE_VARIABLES(); \
if (stackb != stacka) xfree(stackb); \
return(val); \
} while(0)
#define DOUBLE_STACK(type) do { \
type *stackx; \
unsigned int xlen = stacke - stackb; \
if (stackb == stacka) { \
stackx = (type*)xmalloc(2 * xlen * sizeof(type)); \
if (!stackx) goto memory_exhausted; \
memcpy(stackx, stackb, xlen * sizeof (type)); \
} \
else { \
stackx = (type*)xrealloc(stackb, 2 * xlen * sizeof(type)); \
if (!stackx) goto memory_exhausted; \
} \
\
stackp = stackx + (stackp - stackb); \
stackb = stackx; \
stacke = stackb + 2 * xlen; \
} while (0)
#define RE_TALLOC(n,t) ((t*)alloca((n)*sizeof(t)))
#define TMALLOC(n,t) ((t*)xmalloc((n)*sizeof(t)))
#define TREALLOC(s,n,t) (s=((t*)xrealloc(s,(n)*sizeof(t))))
#define EXPAND_FAIL_STACK() DOUBLE_STACK(unsigned char*)
#define ENSURE_FAIL_STACK(n) \
do { \
if (stacke - stackp <= (n)) { \
\
\
\
EXPAND_FAIL_STACK(); \
} \
} while (0)
#include "regex.h"
static void store_jump _((char*, int, char*));
static void insert_jump _((int, char*, char*, char*));
static void store_jump_n _((char*, int, char*, unsigned));
static void insert_jump_n _((int, char*, char*, char*, unsigned));
static void insert_op_2 _((int, char*, char*, int, int));
static int memcmp_translate _((unsigned char*, unsigned char*, int));
#define Sword 1
#define Sword2 2
#define SYNTAX(c) re_syntax_table[c]
static char re_syntax_table[256];
static void init_syntax_once _((void));
static const unsigned char *translate = 0;
static void init_regs _((struct re_registers*, unsigned int));
static void bm_init_skip _((int *, unsigned char*, int, const unsigned char*));
static int current_mbctype = MBCTYPE_ASCII;
#undef P
#ifdef RUBY
#include "util.h"
void rb_warn _((const char*, ...));
# define re_warning(x) rb_warn(x)
#endif
#ifndef re_warning
# define re_warning(x)
#endif
static void
init_syntax_once()
{
register int c;
static int done = 0;
if (done)
return;
memset(re_syntax_table, 0, sizeof re_syntax_table);
for (c=0; c<=0x7f; c++)
if (isalnum(c))
re_syntax_table[c] = Sword;
re_syntax_table['_'] = Sword;
for (c=0x80; c<=0xff; c++)
if (isalnum(c))
re_syntax_table[c] = Sword2;
done = 1;
}
void
re_set_casetable(table)
const char *table;
{
translate = (const unsigned char*)table;
}
#undef ISASCII
#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
# define ISASCII(c) 1
#else
# define ISASCII(c) isascii(c)
#endif
#ifdef isblank
# define ISBLANK(c) (ISASCII(c) && isblank(c))
#else
# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
#endif
#ifdef isgraph
# define ISGRAPH(c) (ISASCII(c) && isgraph(c))
#else
# define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
#endif
#undef ISPRINT
#define ISPRINT(c) (ISASCII(c) && isprint(c))
#define ISDIGIT(c) (ISASCII(c) && isdigit(c))
#define ISALNUM(c) (ISASCII(c) && isalnum(c))
#define ISALPHA(c) (ISASCII(c) && isalpha(c))
#define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
#define ISLOWER(c) (ISASCII(c) && islower(c))
#define ISPUNCT(c) (ISASCII(c) && ispunct(c))
#define ISSPACE(c) (ISASCII(c) && isspace(c))
#define ISUPPER(c) (ISASCII(c) && isupper(c))
#define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
#ifndef NULL
# define NULL (void *)0
#endif
#undef SIGN_EXTEND_CHAR
#if __STDC__
# define SIGN_EXTEND_CHAR(c) ((signed char)(c))
#else
# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
#endif
enum regexpcode
{
unused=0,
exactn=1,
begline,
endline,
begbuf,
endbuf,
endbuf2,
begpos,
jump,
jump_past_alt,
on_failure_jump,
finalize_jump,
maybe_finalize_jump,
dummy_failure_jump,
push_dummy_failure,
succeed_n,
jump_n,
try_next,
finalize_push,
finalize_push_n,
set_number_at,
anychar,
anychar_repeat,
charset,
charset_not,
start_memory,
stop_memory,
start_paren,
stop_paren,
casefold_on,
casefold_off,
option_set,
start_nowidth,
stop_nowidth,
pop_and_fail,
stop_backtrack,
duplicate,
wordchar,
notwordchar,
wordbeg,
wordend,
wordbound,
notwordbound
};
#ifndef NFAILURES
#define NFAILURES 160
#endif
#define STORE_NUMBER(destination, number) \
do { (destination)[0] = (number) & 0377; \
(destination)[1] = (number) >> 8; } while (0)
#define STORE_NUMBER_AND_INCR(destination, number) \
do { STORE_NUMBER(destination, number); \
(destination) += 2; } while (0)
#define EXTRACT_NUMBER(destination, source) \
do { (destination) = *(source) & 0377; \
(destination) += SIGN_EXTEND_CHAR(*(char*)((source) + 1)) << 8; } while (0)
#define EXTRACT_NUMBER_AND_INCR(destination, source) \
do { EXTRACT_NUMBER(destination, source); \
(source) += 2; } while (0)
long
re_set_syntax(syntax)
long syntax;
{
return 0;
}
#define TRANSLATE_P() ((options&RE_OPTION_IGNORECASE) && translate)
#define MAY_TRANSLATE() ((bufp->options&(RE_OPTION_IGNORECASE|RE_MAY_IGNORECASE)) && translate)
#define PATFETCH(c) \
do {if (p == pend) goto end_of_pattern; \
c = (unsigned char) *p++; \
if (TRANSLATE_P()) c = (unsigned char)translate[c]; \
} while (0)
#define PATFETCH_RAW(c) \
do {if (p == pend) goto end_of_pattern; \
c = (unsigned char)*p++; \
} while (0)
#define PATUNFETCH p--
#define MBC2WC(c, p) \
do { \
if (current_mbctype == MBCTYPE_UTF8) { \
int n = mbclen(c) - 1; \
c &= (1<<(BYTEWIDTH-2-n)) - 1; \
while (n--) { \
c = c << 6 | (*p++ & ((1<<6)-1)); \
} \
} \
else { \
c <<= 8; \
c |= (unsigned char)*(p)++; \
} \
} while (0)
#define PATFETCH_MBC(c) \
do { \
if (p + mbclen(c) - 1 >= pend) goto end_of_pattern; \
MBC2WC(c, p); \
} while(0)
#define WC2MBC1ST(c) \
((current_mbctype != MBCTYPE_UTF8) ? ((c<0x100) ? (c) : (((c)>>8)&0xff)) : utf8_firstbyte(c))
typedef unsigned int (*mbc_startpos_func_t) _((const char *string, unsigned int pos));
static unsigned int asc_startpos _((const char *string, unsigned int pos));
static unsigned int euc_startpos _((const char *string, unsigned int pos));
static unsigned int sjis_startpos _((const char *string, unsigned int pos));
static unsigned int utf8_startpos _((const char *string, unsigned int pos));
static const mbc_startpos_func_t mbc_startpos_func[4] = {
asc_startpos, euc_startpos, sjis_startpos, utf8_startpos
};
#define mbc_startpos(start, pos) (*mbc_startpos_func[current_mbctype])((start), (pos))
static unsigned int
utf8_firstbyte(c)
unsigned long c;
{
if (c < 0x80) return c;
if (c <= 0x7ff) return ((c>>6)&0xff)|0xc0;
if (c <= 0xffff) return ((c>>12)&0xff)|0xe0;
if (c <= 0x1fffff) return ((c>>18)&0xff)|0xf0;
if (c <= 0x3ffffff) return ((c>>24)&0xff)|0xf8;
if (c <= 0x7fffffff) return ((c>>30)&0xff)|0xfc;
#if SIZEOF_INT > 4
if (c <= 0xfffffffff) return 0xfe;
#else
return 0xfe;
#endif
}
#if 0
static void
print_mbc(c)
unsigned int c;
{
if (current_mbctype == MBCTYPE_UTF8) {
if (c < 0x80)
printf("%c", (int)c);
else if (c <= 0x7ff)
printf("%c%c", (int)utf8_firstbyte(c), (int)(c & 0x3f));
else if (c <= 0xffff)
printf("%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 6) & 0x3f),
(int)(c & 0x3f));
else if (c <= 0x1fffff)
printf("%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 12) & 0x3f),
(int)((c >> 6) & 0x3f), (int)(c & 0x3f));
else if (c <= 0x3ffffff)
printf("%c%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 18) & 0x3f),
(int)((c >> 12) & 0x3f), (int)((c >> 6) & 0x3f), (int)(c & 0x3f));
else if (c <= 0x7fffffff)
printf("%c%c%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 24) & 0x3f),
(int)((c >> 18) & 0x3f), (int)((c >> 12) & 0x3f),
(int)((c >> 6) & 0x3f), (int)(c & 0x3f));
}
else if (c < 0xff) {
printf("\\%o", (int)c);
}
else {
printf("%c%c", (int)(c >> BYTEWIDTH), (int)(c &0xff));
}
}
#endif
#define INIT_BUF_SIZE 28
#define GET_BUFFER_SPACE(n) \
do { \
while (b - bufp->buffer + (n) >= bufp->allocated) \
EXTEND_BUFFER; \
} while (0)
#define BUFPUSH(ch) \
do { \
GET_BUFFER_SPACE(1); \
*b++ = (char)(ch); \
} while (0)
#define EXTEND_BUFFER \
do { char *old_buffer = bufp->buffer; \
if (bufp->allocated == (1L<<16)) goto too_big; \
bufp->allocated *= 2; \
if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16); \
bufp->buffer = (char*)xrealloc(bufp->buffer, bufp->allocated); \
if (bufp->buffer == 0) \
goto memory_exhausted; \
b = (b - old_buffer) + bufp->buffer; \
if (fixup_alt_jump) \
fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \
if (laststart) \
laststart = (laststart - old_buffer) + bufp->buffer; \
begalt = (begalt - old_buffer) + bufp->buffer; \
if (pending_exact) \
pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
} while (0)
#define SET_LIST_BIT(c) \
(b[(unsigned char)(c) / BYTEWIDTH] \
|= 1 << ((unsigned char)(c) % BYTEWIDTH))
#define GET_UNSIGNED_NUMBER(num) \
do { if (p != pend) { \
PATFETCH(c); \
while (ISDIGIT(c)) { \
if (num < 0) \
num = 0; \
num = num * 10 + c - '0'; \
if (p == pend) \
break; \
PATFETCH(c); \
} \
} \
} while (0)
#define STREQ(s1, s2) ((strcmp(s1, s2) == 0))
#define CHAR_CLASS_MAX_LENGTH 6
#define IS_CHAR_CLASS(string) \
(STREQ(string, "alpha") || STREQ(string, "upper") \
|| STREQ(string, "lower") || STREQ(string, "digit") \
|| STREQ(string, "alnum") || STREQ(string, "xdigit") \
|| STREQ(string, "space") || STREQ(string, "print") \
|| STREQ(string, "punct") || STREQ(string, "graph") \
|| STREQ(string, "cntrl") || STREQ(string, "blank"))
#define STORE_MBC(p, c) \
do { \
(p)[0] = (unsigned char)(((c) >>24) & 0xff); \
(p)[1] = (unsigned char)(((c) >>16) & 0xff); \
(p)[2] = (unsigned char)(((c) >> 8) & 0xff); \
(p)[3] = (unsigned char)(((c) >> 0) & 0xff); \
} while (0)
#define STORE_MBC_AND_INCR(p, c) \
do { \
*(p)++ = (unsigned char)(((c) >>24) & 0xff); \
*(p)++ = (unsigned char)(((c) >>16) & 0xff); \
*(p)++ = (unsigned char)(((c) >> 8) & 0xff); \
*(p)++ = (unsigned char)(((c) >> 0) & 0xff); \
} while (0)
#define EXTRACT_MBC(p) \
((unsigned int)((unsigned char)(p)[0] << 24 | \
(unsigned char)(p)[1] << 16 | \
(unsigned char)(p)[2] << 8 | \
(unsigned char)(p)[3]))
#define EXTRACT_MBC_AND_INCR(p) \
((unsigned int)((p) += 4, \
(unsigned char)(p)[-4] << 24 | \
(unsigned char)(p)[-3] << 16 | \
(unsigned char)(p)[-2] << 8 | \
(unsigned char)(p)[-1]))
#define EXTRACT_UNSIGNED(p) \
((unsigned char)(p)[0] | (unsigned char)(p)[1] << 8)
#define EXTRACT_UNSIGNED_AND_INCR(p) \
((p) += 2, (unsigned char)(p)[-2] | (unsigned char)(p)[-1] << 8)
static void
set_list_bits(c1, c2, b)
unsigned long c1, c2;
unsigned char *b;
{
unsigned char sbc_size = b[-1];
unsigned short mbc_size = EXTRACT_UNSIGNED(&b[sbc_size]);
unsigned short beg, end, upb;
if (c1 > c2)
return;
b = &b[sbc_size + 2];
for (beg = 0, upb = mbc_size; beg < upb; ) {
unsigned short mid = (unsigned short)(beg + upb) >> 1;
if ((int)c1 - 1 > (int)EXTRACT_MBC(&b[mid*8+4]))
beg = mid + 1;
else
upb = mid;
}
for (end = beg, upb = mbc_size; end < upb; ) {
unsigned short mid = (unsigned short)(end + upb) >> 1;
if ((int)c2 >= (int)EXTRACT_MBC(&b[mid*8]) - 1)
end = mid + 1;
else
upb = mid;
}
if (beg != end) {
if (c1 > EXTRACT_MBC(&b[beg*8]))
c1 = EXTRACT_MBC(&b[beg*8]);
if (c2 < EXTRACT_MBC(&b[(end - 1)*8+4]))
c2 = EXTRACT_MBC(&b[(end - 1)*8+4]);
}
if (end < mbc_size && end != beg + 1)
memmove(&b[(beg + 1)*8], &b[end*8], (mbc_size - end)*8);
STORE_MBC(&b[beg*8 + 0], c1);
STORE_MBC(&b[beg*8 + 4], c2);
mbc_size += beg - end + 1;
STORE_NUMBER(&b[-2], mbc_size);
}
static int
is_in_list_sbc(c, b)
unsigned long c;
const unsigned char *b;
{
unsigned short size;
size = *b++;
return ((int)c / BYTEWIDTH < (int)size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH);
}
static int
is_in_list_mbc(c, b)
unsigned long c;
const unsigned char *b;
{
unsigned short size;
unsigned short i, j;
size = *b++;
b += size + 2;
size = EXTRACT_UNSIGNED(&b[-2]);
if (size == 0) return 0;
for (i = 0, j = size; i < j; ) {
unsigned short k = (unsigned short)(i + j) >> 1;
if (c > EXTRACT_MBC(&b[k*8+4]))
i = k + 1;
else
j = k;
}
if (i < size && EXTRACT_MBC(&b[i*8]) <= c)
return 1;
return 0;
}
static int
is_in_list(c, b)
unsigned long c;
const unsigned char *b;
{
return is_in_list_sbc(c, b) || (current_mbctype ? is_in_list_mbc(c, b) : 0);
}
#if 0
static void
print_partial_compiled_pattern(start, end)
unsigned char *start;
unsigned char *end;
{
int mcnt, mcnt2;
unsigned char *p = start;
unsigned char *pend = end;
if (start == NULL) {
printf("(null)\n");
return;
}
while (p < pend) {
switch ((enum regexpcode)*p++) {
case unused:
printf("/unused");
break;
case exactn:
mcnt = *p++;
printf("/exactn/%d", mcnt);
do {
putchar('/');
printf("%c", *p++);
}
while (--mcnt);
break;
case start_memory:
mcnt = *p++;
printf("/start_memory/%d/%d", mcnt, *p++);
break;
case stop_memory:
mcnt = *p++;
printf("/stop_memory/%d/%d", mcnt, *p++);
break;
case start_paren:
printf("/start_paren");
break;
case stop_paren:
printf("/stop_paren");
break;
case casefold_on:
printf("/casefold_on");
break;
case casefold_off:
printf("/casefold_off");
break;
case option_set:
printf("/option_set/%d", *p++);
break;
case start_nowidth:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/start_nowidth//%d", mcnt);
break;
case stop_nowidth:
printf("/stop_nowidth//");
p += 2;
break;
case pop_and_fail:
printf("/pop_and_fail");
break;
case stop_backtrack:
printf("/stop_backtrack//");
p += 2;
break;
case duplicate:
printf("/duplicate/%d", *p++);
break;
case anychar:
printf("/anychar");
break;
case anychar_repeat:
printf("/anychar_repeat");
break;
case charset:
case charset_not:
{
register int c;
printf("/charset%s",
(enum regexpcode)*(p - 1) == charset_not ? "_not" : "");
mcnt = *p++;
printf("/%d", mcnt);
for (c = 0; c < mcnt; c++) {
unsigned bit;
unsigned char map_byte = p[c];
putchar('/');
for (bit = 0; bit < BYTEWIDTH; bit++)
if (map_byte & (1 << bit))
printf("%c", c * BYTEWIDTH + bit);
}
p += mcnt;
mcnt = EXTRACT_UNSIGNED_AND_INCR(p);
putchar('/');
while (mcnt--) {
print_mbc(EXTRACT_MBC_AND_INCR(p));
putchar('-');
print_mbc(EXTRACT_MBC_AND_INCR(p));
}
break;
}
case begline:
printf("/begline");
break;
case endline:
printf("/endline");
break;
case on_failure_jump:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/on_failure_jump//%d", mcnt);
break;
case dummy_failure_jump:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/dummy_failure_jump//%d", mcnt);
break;
case push_dummy_failure:
printf("/push_dummy_failure");
break;
case finalize_jump:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/finalize_jump//%d", mcnt);
break;
case maybe_finalize_jump:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/maybe_finalize_jump//%d", mcnt);
break;
case jump_past_alt:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/jump_past_alt//%d", mcnt);
break;
case jump:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/jump//%d", mcnt);
break;
case succeed_n:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
EXTRACT_NUMBER_AND_INCR(mcnt2, p);
printf("/succeed_n//%d//%d", mcnt, mcnt2);
break;
case jump_n:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
EXTRACT_NUMBER_AND_INCR(mcnt2, p);
printf("/jump_n//%d//%d", mcnt, mcnt2);
break;
case set_number_at:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
EXTRACT_NUMBER_AND_INCR(mcnt2, p);
printf("/set_number_at//%d//%d", mcnt, mcnt2);
break;
case try_next:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/try_next//%d", mcnt);
break;
case finalize_push:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
printf("/finalize_push//%d", mcnt);
break;
case finalize_push_n:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
EXTRACT_NUMBER_AND_INCR(mcnt2, p);
printf("/finalize_push_n//%d//%d", mcnt, mcnt2);
break;
case wordbound:
printf("/wordbound");
break;
case notwordbound:
printf("/notwordbound");
break;
case wordbeg:
printf("/wordbeg");
break;
case wordend:
printf("/wordend");
case wordchar:
printf("/wordchar");
break;
case notwordchar:
printf("/notwordchar");
break;
case begbuf:
printf("/begbuf");
break;
case endbuf:
printf("/endbuf");
break;
case endbuf2:
printf("/endbuf2");
break;
case begpos:
printf("/begpos");
break;
default:
printf("?%d", *(p-1));
}
}
printf("/\n");
}
static void
print_compiled_pattern(bufp)
struct re_pattern_buffer *bufp;
{
unsigned char *buffer = (unsigned char*)bufp->buffer;
print_partial_compiled_pattern(buffer, buffer + bufp->used);
}
#endif
static char*
calculate_must_string(start, end)
char *start;
char *end;
{
int mcnt;
int max = 0;
unsigned char *p = (unsigned char *)start;
unsigned char *pend = (unsigned char *)end;
char *must = 0;
if (start == NULL) return 0;
while (p < pend) {
switch ((enum regexpcode)*p++) {
case unused:
break;
case exactn:
mcnt = *p;
if (mcnt > max) {
must = (char *)p;
max = mcnt;
}
p += mcnt+1;
break;
case start_memory:
case stop_memory:
p += 2;
break;
case duplicate:
case option_set:
p++;
break;
case casefold_on:
case casefold_off:
return 0;
case pop_and_fail:
case anychar:
case anychar_repeat:
case begline:
case endline:
case wordbound:
case notwordbound:
case wordbeg:
case wordend:
case wordchar:
case notwordchar:
case begbuf:
case endbuf:
case endbuf2:
case begpos:
case push_dummy_failure:
case start_paren:
case stop_paren:
break;
case charset:
case charset_not:
mcnt = *p++;
p += mcnt;
mcnt = EXTRACT_UNSIGNED_AND_INCR(p);
while (mcnt--) {
p += 8;
}
break;
case on_failure_jump:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
if (mcnt > 0) p += mcnt;
if ((enum regexpcode)p[-3] == jump) {
p -= 2;
EXTRACT_NUMBER_AND_INCR(mcnt, p);
if (mcnt > 0) p += mcnt;
}
break;
case dummy_failure_jump:
case succeed_n:
case try_next:
case jump:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
if (mcnt > 0) p += mcnt;
break;
case start_nowidth:
case stop_nowidth:
case stop_backtrack:
case finalize_jump:
case maybe_finalize_jump:
case finalize_push:
p += 2;
break;
case jump_n:
case set_number_at:
case finalize_push_n:
p += 4;
break;
default:
break;
}
}
return must;
}
static unsigned int
read_backslash(c)
int c;
{
switch (c) {
case 'n':
return '\n';
case 't':
return '\t';
case 'r':
return '\r';
case 'f':
return '\f';
case 'v':
return '\v';
case 'a':
return '\007';
case 'b':
return '\010';
case 'e':
return '\033';
}
return c;
}
static unsigned int
read_special(p, pend, pp)
const char *p, *pend, **pp;
{
int c;
PATFETCH_RAW(c);
switch (c) {
case 'M':
PATFETCH_RAW(c);
if (c != '-') return -1;
PATFETCH_RAW(c);
*pp = p;
if (c == '\\') {
return read_special(--p, pend, pp) | 0x80;
}
else if (c == -1) return ~0;
else {
return ((c & 0xff) | 0x80);
}
case 'C':
PATFETCH_RAW(c);
if (c != '-') return ~0;
case 'c':
PATFETCH_RAW(c);
*pp = p;
if (c == '\\') {
c = read_special(--p, pend, pp);
}
else if (c == '?') return 0177;
else if (c == -1) return ~0;
return c & 0x9f;
default:
PATFETCH_RAW(c);
*pp = p;
return read_backslash(c);
}
end_of_pattern:
return ~0;
}
const char *
re_compile_pattern(pattern, size, bufp)
const char *pattern;
int size;
struct re_pattern_buffer *bufp;
{
register char *b = bufp->buffer;
register const char *p = pattern;
const char *nextp;
const char *pend = pattern + size;
register unsigned int c, c1 = 0;
const char *p0;
int numlen;
#define ERROR_MSG_MAX_SIZE 200
static char error_msg[ERROR_MSG_MAX_SIZE+1];
char *pending_exact = 0;
char *fixup_alt_jump = 0;
char *laststart = 0;
char zero_times_ok;
char many_times_ok;
char greedy;
char *begalt = b;
const char *beg_interval;
int lower_bound;
int upper_bound;
int stacka[40];
int *stackb = stacka;
int *stackp = stackb;
int *stacke = stackb + 40;
int regnum = 1;
int range = 0;
int had_mbchar = 0;
int had_num_literal = 0;
int had_char_class = 0;
int options = bufp->options;
bufp->fastmap_accurate = 0;
bufp->must = 0;
bufp->must_skip = 0;
init_syntax_once();
if (bufp->allocated == 0) {
bufp->allocated = INIT_BUF_SIZE;
bufp->buffer = (char*)xrealloc(bufp->buffer, INIT_BUF_SIZE);
if (!bufp->buffer) goto memory_exhausted;
begalt = b = bufp->buffer;
}
while (p != pend) {
PATFETCH(c);
switch (c) {
case '$':
if (bufp->options & RE_OPTION_SINGLELINE) {
BUFPUSH(endbuf);
}
else {
p0 = p;
while (p0 != pend) {
if (*p0 == '\\' && p0 + 1 != pend
&& (p0[1] == 'b' || p0[1] == 'B'))
p0 += 2;
else
break;
}
BUFPUSH(endline);
}
break;
case '^':
if (bufp->options & RE_OPTION_SINGLELINE)
BUFPUSH(begbuf);
else
BUFPUSH(begline);
break;
case '+':
case '?':
case '*':
if (!laststart) {
snprintf(error_msg, ERROR_MSG_MAX_SIZE,
"invalid regular expression; there's no previous pattern, to which '%c' would define cardinality at %d",
c, p-pattern);
FREE_AND_RETURN(stackb, error_msg);
}
zero_times_ok = c != '+';
many_times_ok = c != '?';
greedy = 1;
if (p != pend) {
PATFETCH(c);
switch (c) {
case '?':
greedy = 0;
break;
case '*':
case '+':
goto nested_meta;
default:
PATUNFETCH;
break;
}
}
repeat:
if (!laststart)
break;
if (greedy && many_times_ok && *laststart == anychar && b - laststart <= 2) {
if (b[-1] == stop_paren)
b--;
if (zero_times_ok)
*laststart = anychar_repeat;
else {
BUFPUSH(anychar_repeat);
}
break;
}
if (many_times_ok) {
GET_BUFFER_SPACE(3);
store_jump(b,greedy?maybe_finalize_jump:finalize_push,laststart-3);
b += 3;
}
GET_BUFFER_SPACE(3);
insert_jump(on_failure_jump, laststart, b + 3, b);
b += 3;
if (zero_times_ok) {
if (greedy == 0) {
GET_BUFFER_SPACE(3);
insert_jump(try_next, laststart, b + 3, b);
b += 3;
}
}
else {
GET_BUFFER_SPACE(3);
insert_jump(dummy_failure_jump, laststart, laststart + 6, b);
b += 3;
}
break;
case '.':
laststart = b;
BUFPUSH(anychar);
break;
case '[':
if (p == pend)
FREE_AND_RETURN(stackb, "invalid regular expression; '[' can't be the last character ie. can't start range at the end of pattern");
while ((b - bufp->buffer + 9 + (1 << BYTEWIDTH) / BYTEWIDTH)
> bufp->allocated)
EXTEND_BUFFER;
laststart = b;
if (*p == '^') {
BUFPUSH(charset_not);
p++;
}
else
BUFPUSH(charset);
p0 = p;
BUFPUSH((1 << BYTEWIDTH) / BYTEWIDTH);
memset(b, 0, (1 << BYTEWIDTH) / BYTEWIDTH + 2);
had_mbchar = 0;
had_num_literal = 0;
had_char_class = 0;
for (;;) {
int size;
unsigned last = (unsigned)-1;
if ((size = EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH])) || current_mbctype) {
size = (1 << BYTEWIDTH) / BYTEWIDTH + 2 + size*8 + 8;
while (b + size + 1 > bufp->buffer + bufp->allocated)
EXTEND_BUFFER;
}
range_retry:
if (range && had_char_class) {
FREE_AND_RETURN(stackb, "invalid regular expression; can't use character class as an end value of range");
}
PATFETCH_RAW(c);
if (c == ']') {
if (p == p0 + 1) {
if (p == pend)
FREE_AND_RETURN(stackb, "invalid regular expression; empty character class");
re_warning("character class has `]' without escape");
}
else
break;
}
if (had_char_class && c == '-' && *p != ']')
FREE_AND_RETURN(stackb, "invalid regular expression; can't use character class as a start value of range");
if (ismbchar(c)) {
PATFETCH_MBC(c);
had_mbchar++;
}
had_char_class = 0;
if (c == '-' && ((p != p0 + 1 && *p != ']') ||
(p[0] == '-' && p[1] != ']') ||
range))
re_warning("character class has `-' without escape");
if (c == '[' && *p != ':')
re_warning("character class has `[' without escape");
if (c == '\\') {
PATFETCH_RAW(c);
switch (c) {
case 'w':
for (c = 0; c < (1 << BYTEWIDTH); c++) {
if (SYNTAX(c) == Sword ||
(!current_mbctype && SYNTAX(c) == Sword2))
SET_LIST_BIT(c);
}
if (current_mbctype) {
set_list_bits(0x80, 0xffffffff, b);
}
had_char_class = 1;
last = -1;
continue;
case 'W':
for (c = 0; c < (1 << BYTEWIDTH); c++) {
if (SYNTAX(c) != Sword &&
((current_mbctype && !re_mbctab[c]) ||
(!current_mbctype && SYNTAX(c) != Sword2)))
SET_LIST_BIT(c);
}
had_char_class = 1;
last = -1;
continue;
case 's':
for (c = 0; c < 256; c++)
if (ISSPACE(c))
SET_LIST_BIT(c);
had_char_class = 1;
last = -1;
continue;
case 'S':
for (c = 0; c < 256; c++)
if (!ISSPACE(c))
SET_LIST_BIT(c);
if (current_mbctype)
set_list_bits(0x80, 0xffffffff, b);
had_char_class = 1;
last = -1;
continue;
case 'd':
for (c = '0'; c <= '9'; c++)
SET_LIST_BIT(c);
had_char_class = 1;
last = -1;
continue;
case 'D':
for (c = 0; c < 256; c++)
if (!ISDIGIT(c))
SET_LIST_BIT(c);
if (current_mbctype)
set_list_bits(0x80, 0xffffffff, b);
had_char_class = 1;
last = -1;
continue;
case 'x':
c = scan_hex(p, 2, &numlen);
if (numlen == 0) goto invalid_escape;
p += numlen;
had_num_literal = 1;
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
PATUNFETCH;
c = scan_oct(p, 3, &numlen);
p += numlen;
had_num_literal = 1;
break;
case 'M':
case 'C':
case 'c':
{
const char *pp;
--p;
c = read_special(p, pend, &pp);
if (c > 255) goto invalid_escape;
p = pp;
had_num_literal = 1;
}
break;
default:
c = read_backslash(c);
if (ismbchar(c)) {
PATFETCH_MBC(c);
had_mbchar++;
}
break;
}
}
else if (c == '[' && *p == ':') {
char str[CHAR_CLASS_MAX_LENGTH + 1];
PATFETCH_RAW(c);
c1 = 0;
if (p == pend)
FREE_AND_RETURN(stackb, "invalid regular expression; re can't end '[[:'");
for (;;) {
PATFETCH_RAW(c);
if (c == ':' || c == ']' || p == pend
|| c1 == CHAR_CLASS_MAX_LENGTH)
break;
str[c1++] = c;
}
str[c1] = '\0';
if (c == ':' && *p == ']') {
int ch;
char is_alnum = STREQ(str, "alnum");
char is_alpha = STREQ(str, "alpha");
char is_blank = STREQ(str, "blank");
char is_cntrl = STREQ(str, "cntrl");
char is_digit = STREQ(str, "digit");
char is_graph = STREQ(str, "graph");
char is_lower = STREQ(str, "lower");
char is_print = STREQ(str, "print");
char is_punct = STREQ(str, "punct");
char is_space = STREQ(str, "space");
char is_upper = STREQ(str, "upper");
char is_xdigit = STREQ(str, "xdigit");
if (!IS_CHAR_CLASS(str)){
snprintf(error_msg, ERROR_MSG_MAX_SIZE,
"invalid regular expression; [:%s:] is not a character class", str);
FREE_AND_RETURN(stackb, error_msg);
}
PATFETCH(c);
if (p == pend)
FREE_AND_RETURN(stackb, "invalid regular expression; range doesn't have ending ']' after a character class");
for (ch = 0; ch < 1 << BYTEWIDTH; ch++) {
if ( (is_alnum && ISALNUM(ch))
|| (is_alpha && ISALPHA(ch))
|| (is_blank && ISBLANK(ch))
|| (is_cntrl && ISCNTRL(ch))
|| (is_digit && ISDIGIT(ch))
|| (is_graph && ISGRAPH(ch))
|| (is_lower && ISLOWER(ch))
|| (is_print && ISPRINT(ch))
|| (is_punct && ISPUNCT(ch))
|| (is_space && ISSPACE(ch))
|| (is_upper && ISUPPER(ch))
|| (is_xdigit && ISXDIGIT(ch)))
SET_LIST_BIT(ch);
}
had_char_class = 1;
continue;
}
else {
c1 += 2;
while (c1--)
PATUNFETCH;
re_warning("character class has `[' without escape");
c = '[';
}
}
if (range) {
if (last > c)
goto invalid_pattern;
range = 0;
if (had_mbchar == 0) {
if (TRANSLATE_P()) {
for (;last<=c;last++)
SET_LIST_BIT(translate[last]);
}
else {
for (;last<=c;last++)
SET_LIST_BIT(last);
}
}
else if (had_mbchar == 2) {
set_list_bits(last, c, b);
}
else {
goto invalid_pattern;
}
}
else if (p[0] == '-' && p[1] != ']') {
last = c;
PATFETCH_RAW(c1);
range = 1;
goto range_retry;
}
else {
if (TRANSLATE_P() && c < 0x100) c = (unsigned char)translate[c];
if (had_mbchar == 0 && (!current_mbctype || !had_num_literal)) {
SET_LIST_BIT(c);
had_num_literal = 0;
}
else {
set_list_bits(c, c, b);
}
}
had_mbchar = 0;
}
while ((int)b[-1] > 0 && b[b[-1] - 1] == 0)
b[-1]--;
if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH)
memmove(&b[(unsigned char)b[-1]], &b[(1 << BYTEWIDTH) / BYTEWIDTH],
2 + EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH])*8);
b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[(unsigned char)b[-1]])*8;
had_num_literal = 0;
break;
case '(':
{
int old_options = options;
int push_option = 0;
int casefold = 0;
PATFETCH(c);
if (c == '?') {
int negative = 0;
PATFETCH_RAW(c);
switch (c) {
case 'x': case 'm': case 'i': case '-':
for (;;) {
switch (c) {
case '-':
negative = 1;
break;
case ':':
case ')':
break;
case 'x':
if (negative)
options &= ~RE_OPTION_EXTENDED;
else
options |= RE_OPTION_EXTENDED;
break;
case 'm':
if (negative) {
if (options&RE_OPTION_MULTILINE) {
options &= ~RE_OPTION_MULTILINE;
}
}
else if (!(options&RE_OPTION_MULTILINE)) {
options |= RE_OPTION_MULTILINE;
}
push_option = 1;
break;
case 'i':
if (negative) {
if (options&RE_OPTION_IGNORECASE) {
options &= ~RE_OPTION_IGNORECASE;
}
}
else if (!(options&RE_OPTION_IGNORECASE)) {
options |= RE_OPTION_IGNORECASE;
}
casefold = 1;
break;
default:
FREE_AND_RETURN(stackb, "undefined (?...) inline option");
}
if (c == ')') {
c = '#';
break;
}
if (c == ':') break;
PATFETCH_RAW(c);
}
break;
case '#':
for (;;) {
PATFETCH(c);
if (c == ')') break;
}
c = '#';
break;
case ':':
case '=':
case '!':
case '>':
break;
default:
FREE_AND_RETURN(stackb, "undefined (?...) sequence");
}
}
else {
PATUNFETCH;
c = '(';
}
if (c == '#') {
if (push_option) {
BUFPUSH(option_set);
BUFPUSH(options);
}
if (casefold) {
if (options & RE_OPTION_IGNORECASE)
BUFPUSH(casefold_on);
else
BUFPUSH(casefold_off);
}
break;
}
if (stackp+8 >= stacke) {
DOUBLE_STACK(int);
}
*stackp++ = b - bufp->buffer;
*stackp++ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
*stackp++ = begalt - bufp->buffer;
switch (c) {
case '(':
BUFPUSH(start_memory);
BUFPUSH(regnum);
*stackp++ = regnum++;
*stackp++ = b - bufp->buffer;
BUFPUSH(0);
if (regnum >= RE_REG_MAX) goto too_big;
break;
case '=':
case '!':
case '>':
BUFPUSH(start_nowidth);
*stackp++ = b - bufp->buffer;
BUFPUSH(0);
BUFPUSH(0);
if (c != '!') break;
BUFPUSH(on_failure_jump);
*stackp++ = b - bufp->buffer;
BUFPUSH(0);
BUFPUSH(0);
break;
case ':':
BUFPUSH(start_paren);
pending_exact = 0;
default:
break;
}
if (push_option) {
BUFPUSH(option_set);
BUFPUSH(options);
}
if (casefold) {
if (options & RE_OPTION_IGNORECASE)
BUFPUSH(casefold_on);
else
BUFPUSH(casefold_off);
}
*stackp++ = c;
*stackp++ = old_options;
fixup_alt_jump = 0;
laststart = 0;
begalt = b;
}
break;
case ')':
if (stackp == stackb)
FREE_AND_RETURN(stackb, "unmatched )");
pending_exact = 0;
if (fixup_alt_jump) {
BUFPUSH(push_dummy_failure);
store_jump(fixup_alt_jump, jump, b);
}
if (options != stackp[-1]) {
if ((options ^ stackp[-1]) & RE_OPTION_IGNORECASE) {
BUFPUSH((options&RE_OPTION_IGNORECASE)?casefold_off:casefold_on);
}
if ((options ^ stackp[-1]) != RE_OPTION_IGNORECASE) {
BUFPUSH(option_set);
BUFPUSH(stackp[-1]);
}
}
p0 = b;
options = *--stackp;
switch (c = *--stackp) {
case '(':
{
char *loc = bufp->buffer + *--stackp;
*loc = regnum - stackp[-1];
BUFPUSH(stop_memory);
BUFPUSH(stackp[-1]);
BUFPUSH(regnum - stackp[-1]);
stackp--;
}
break;
case '!':
BUFPUSH(pop_and_fail);
STORE_NUMBER(bufp->buffer+stackp[-1], b - bufp->buffer - stackp[-1] - 2);
stackp--;
case '=':
BUFPUSH(stop_nowidth);
STORE_NUMBER(bufp->buffer+stackp[-1], b - bufp->buffer - stackp[-1] - 2);
BUFPUSH(0);
BUFPUSH(0);
stackp--;
break;
case '>':
BUFPUSH(stop_backtrack);
STORE_NUMBER(bufp->buffer+stackp[-1], b - bufp->buffer - stackp[-1] - 2);
BUFPUSH(0);
BUFPUSH(0);
stackp--;
break;
case ':':
BUFPUSH(stop_paren);
break;
default:
break;
}
begalt = *--stackp + bufp->buffer;
stackp--;
fixup_alt_jump = *stackp ? *stackp + bufp->buffer - 1 : 0;
laststart = *--stackp + bufp->buffer;
if (c == '!' || c == '=') laststart = b;
break;
case '|':
GET_BUFFER_SPACE(3);
insert_jump(on_failure_jump, begalt, b + 6, b);
pending_exact = 0;
b += 3;
if (fixup_alt_jump)
store_jump(fixup_alt_jump, jump_past_alt, b);
fixup_alt_jump = b;
GET_BUFFER_SPACE(3);
b += 3;
laststart = 0;
begalt = b;
break;
case '{':
if (!laststart) {
snprintf(error_msg, ERROR_MSG_MAX_SIZE,
"invalid regular expression; there's no previous pattern, to which '{' would define cardinality at %d",
p-pattern);
FREE_AND_RETURN(stackb, error_msg);
}
if( p == pend)
FREE_AND_RETURN(stackb, "invalid regular expression; '{' can't be last character" );
beg_interval = p - 1;
lower_bound = -1;
upper_bound = -1;
GET_UNSIGNED_NUMBER(lower_bound);
if (c == ',') {
GET_UNSIGNED_NUMBER(upper_bound);
}
else
upper_bound = lower_bound;
if (lower_bound < 0 || c != '}')
goto unfetch_interval;
if (lower_bound >= RE_DUP_MAX || upper_bound >= RE_DUP_MAX)
FREE_AND_RETURN(stackb, "too big quantifier in {,}");
if (upper_bound < 0) upper_bound = RE_DUP_MAX;
if (lower_bound > upper_bound)
FREE_AND_RETURN(stackb, "can't do {n,m} with n > m");
beg_interval = 0;
pending_exact = 0;
greedy = 1;
if (p != pend) {
PATFETCH(c);
if (c == '?') greedy = 0;
else PATUNFETCH;
}
if (lower_bound == 0) {
zero_times_ok = 1;
if (upper_bound == RE_DUP_MAX) {
many_times_ok = 1;
goto repeat;
}
if (upper_bound == 1) {
many_times_ok = 0;
goto repeat;
}
}
if (lower_bound == 1) {
if (upper_bound == 1) {
break;
}
if (upper_bound == RE_DUP_MAX) {
many_times_ok = 1;
zero_times_ok = 0;
goto repeat;
}
}
if (upper_bound == 0) {
GET_BUFFER_SPACE(3);
insert_jump(jump, laststart, b + 3, b);
b += 3;
break;
}
if (lower_bound == upper_bound) {
int mcnt;
int skip_stop_paren = 0;
if (b[-1] == stop_paren) {
skip_stop_paren = 1;
b--;
}
if (*laststart == exactn && laststart[1]+2 == b - laststart
&& laststart[1]*lower_bound < 256) {
mcnt = laststart[1];
GET_BUFFER_SPACE((lower_bound-1)*mcnt);
laststart[1] = lower_bound*mcnt;
while (--lower_bound) {
memcpy(b, laststart+2, mcnt);
b += mcnt;
}
if (skip_stop_paren) BUFPUSH(stop_paren);
break;
}
if (lower_bound < 5 && b - laststart < 10) {
mcnt = b - laststart;
GET_BUFFER_SPACE((lower_bound-1)*mcnt);
while (--lower_bound) {
memcpy(b, laststart, mcnt);
b += mcnt;
}
if (skip_stop_paren) BUFPUSH(stop_paren);
break;
}
if (skip_stop_paren) b++;
}
{
unsigned nbytes = upper_bound == 1 ? 10 : 20;
if (lower_bound == 0 && greedy == 0) {
GET_BUFFER_SPACE(3);
insert_jump(try_next, laststart, b + 3, b);
b += 3;
}
GET_BUFFER_SPACE(nbytes);
insert_jump_n(succeed_n, laststart, b + (nbytes/2),
b, lower_bound);
b += 5;
insert_op_2(set_number_at, laststart, b, 5, lower_bound);
b += 5;
if (upper_bound > 1) {
GET_BUFFER_SPACE(5);
store_jump_n(b, greedy?jump_n:finalize_push_n, laststart + 5,
upper_bound - 1);
b += 5;
insert_op_2(set_number_at, laststart, b, b - laststart,
upper_bound - 1);
b += 5;
}
}
break;
unfetch_interval:
re_warning("regexp has invalid interval");
p = beg_interval;
beg_interval = 0;
PATFETCH(c);
goto normal_char;
case '\\':
if (p == pend)
FREE_AND_RETURN(stackb, "invalid regular expression; '\\' can't be last character");
PATFETCH_RAW(c);
switch (c) {
case 's':
case 'S':
case 'd':
case 'D':
while (b - bufp->buffer + 9 + (1 << BYTEWIDTH) / BYTEWIDTH
> bufp->allocated)
EXTEND_BUFFER;
laststart = b;
if (c == 's' || c == 'd') {
BUFPUSH(charset);
}
else {
BUFPUSH(charset_not);
}
BUFPUSH((1 << BYTEWIDTH) / BYTEWIDTH);
memset(b, 0, (1 << BYTEWIDTH) / BYTEWIDTH + 2);
if (c == 's' || c == 'S') {
SET_LIST_BIT(' ');
SET_LIST_BIT('\t');
SET_LIST_BIT('\n');
SET_LIST_BIT('\r');
SET_LIST_BIT('\f');
}
else {
char cc;
for (cc = '0'; cc <= '9'; cc++) {
SET_LIST_BIT(cc);
}
}
while ((int)b[-1] > 0 && b[b[-1] - 1] == 0)
b[-1]--;
if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH)
memmove(&b[(unsigned char)b[-1]], &b[(1 << BYTEWIDTH) / BYTEWIDTH],
2 + EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH])*8);
b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[(unsigned char)b[-1]])*8;
break;
case 'w':
laststart = b;
BUFPUSH(wordchar);
break;
case 'W':
laststart = b;
BUFPUSH(notwordchar);
break;
#ifndef RUBY
case '<':
BUFPUSH(wordbeg);
break;
case '>':
BUFPUSH(wordend);
break;
#endif
case 'b':
BUFPUSH(wordbound);
break;
case 'B':
BUFPUSH(notwordbound);
break;
case 'A':
BUFPUSH(begbuf);
break;
case 'Z':
if ((bufp->options & RE_OPTION_SINGLELINE) == 0) {
BUFPUSH(endbuf2);
break;
}
case 'z':
BUFPUSH(endbuf);
break;
case 'G':
BUFPUSH(begpos);
break;
case 'x':
had_mbchar = 0;
c = scan_hex(p, 2, &numlen);
if (numlen == 0) goto invalid_escape;
p += numlen;
had_num_literal = 1;
goto numeric_char;
case '0':
had_mbchar = 0;
c = scan_oct(p, 2, &numlen);
p += numlen;
had_num_literal = 1;
goto numeric_char;
case '1': case '2': case '3':
case '4': case '5': case '6':
case '7': case '8': case '9':
PATUNFETCH;
p0 = p;
had_mbchar = 0;
c1 = 0;
GET_UNSIGNED_NUMBER(c1);
if (!ISDIGIT(c)) PATUNFETCH;
if (9 < c1 && c1 >= regnum) {
c = scan_oct(p0, 3, &numlen) & 0xff;
p = p0 + numlen;
c1 = 0;
had_num_literal = 1;
goto numeric_char;
}
laststart = b;
BUFPUSH(duplicate);
BUFPUSH(c1);
break;
case 'M':
case 'C':
case 'c':
p0 = --p;
c = read_special(p, pend, &p0);
if (c > 255) goto invalid_escape;
p = p0;
had_num_literal = 1;
goto numeric_char;
default:
c = read_backslash(c);
goto normal_char;
}
break;
case '#':
if (options & RE_OPTION_EXTENDED) {
while (p != pend) {
PATFETCH(c);
if (c == '\n') break;
}
break;
}
goto normal_char;
case ' ':
case '\t':
case '\f':
case '\r':
case '\n':
if (options & RE_OPTION_EXTENDED)
break;
default:
if (c == ']')
re_warning("regexp has `]' without escape");
else if (c == '}')
re_warning("regexp has `}' without escape");
normal_char:
had_mbchar = 0;
if (ismbchar(c)) {
had_mbchar = 1;
c1 = p - pattern;
}
numeric_char:
nextp = p + mbclen(c) - 1;
if (!pending_exact || pending_exact + *pending_exact + 1 != b
|| *pending_exact >= (c1 ? 0176 : 0177)
|| (nextp < pend &&
( *nextp == '+' || *nextp == '?'
|| *nextp == '*' || *nextp == '^'
|| *nextp == '{'))) {
laststart = b;
BUFPUSH(exactn);
pending_exact = b;
BUFPUSH(0);
}
if (had_num_literal || c == 0xff) {
BUFPUSH(0xff);
(*pending_exact)++;
had_num_literal = 0;
}
BUFPUSH(c);
(*pending_exact)++;
if (had_mbchar) {
int len = mbclen(c) - 1;
while (len--) {
PATFETCH_RAW(c);
BUFPUSH(c);
(*pending_exact)++;
}
}
}
}
if (fixup_alt_jump)
store_jump(fixup_alt_jump, jump, b);
if (stackp != stackb)
FREE_AND_RETURN(stackb, "unmatched (");
laststart = bufp->buffer;
if (laststart != b) {
if (*laststart == dummy_failure_jump) laststart += 3;
else if (*laststart == try_next) laststart += 3;
if (*laststart == anychar_repeat) {
bufp->options |= RE_OPTIMIZE_ANCHOR;
}
}
bufp->used = b - bufp->buffer;
bufp->re_nsub = regnum;
laststart = bufp->buffer;
if (laststart != b) {
if (*laststart == start_memory) laststart += 3;
if (*laststart == exactn) {
bufp->options |= RE_OPTIMIZE_EXACTN;
bufp->must = laststart+1;
}
}
if (!bufp->must) {
bufp->must = calculate_must_string(bufp->buffer, b);
}
if (current_mbctype == MBCTYPE_SJIS) bufp->options |= RE_OPTIMIZE_NO_BM;
else if (bufp->must) {
int i;
int len = (unsigned char)bufp->must[0];
for (i=1; i<len; i++) {
if ((unsigned char)bufp->must[i] == 0xff ||
(current_mbctype && ismbchar(bufp->must[i]))) {
bufp->options |= RE_OPTIMIZE_NO_BM;
break;
}
}
if (!(bufp->options & RE_OPTIMIZE_NO_BM)) {
bufp->must_skip = (int *) xmalloc((1 << BYTEWIDTH)*sizeof(int));
bm_init_skip(bufp->must_skip, (unsigned char*)bufp->must+1,
(unsigned char)bufp->must[0],
(unsigned char*)(MAY_TRANSLATE()?translate:0));
}
}
bufp->regstart = TMALLOC(regnum, unsigned char*);
bufp->regend = TMALLOC(regnum, unsigned char*);
bufp->old_regstart = TMALLOC(regnum, unsigned char*);
bufp->old_regend = TMALLOC(regnum, unsigned char*);
bufp->reg_info = TMALLOC(regnum, register_info_type);
bufp->best_regstart = TMALLOC(regnum, unsigned char*);
bufp->best_regend = TMALLOC(regnum, unsigned char*);
FREE_AND_RETURN(stackb, 0);
invalid_pattern:
FREE_AND_RETURN(stackb, "invalid regular expression");
end_of_pattern:
FREE_AND_RETURN(stackb, "premature end of regular expression");
too_big:
FREE_AND_RETURN(stackb, "regular expression too big");
memory_exhausted:
FREE_AND_RETURN(stackb, "memory exhausted");
nested_meta:
FREE_AND_RETURN(stackb, "nested *?+ in regexp");
invalid_escape:
FREE_AND_RETURN(stackb, "Invalid escape character syntax");
}
void
re_free_pattern(bufp)
struct re_pattern_buffer *bufp;
{
xfree(bufp->buffer);
xfree(bufp->fastmap);
if (bufp->must_skip) xfree(bufp->must_skip);
xfree(bufp->regstart);
xfree(bufp->regend);
xfree(bufp->old_regstart);
xfree(bufp->old_regend);
xfree(bufp->best_regstart);
xfree(bufp->best_regend);
xfree(bufp->reg_info);
xfree(bufp);
}
static void
store_jump(from, opcode, to)
char *from, *to;
int opcode;
{
from[0] = (char)opcode;
STORE_NUMBER(from + 1, to - (from + 3));
}
static void
insert_jump(op, from, to, current_end)
int op;
char *from, *to, *current_end;
{
register char *pfrom = current_end;
register char *pto = current_end + 3;
while (pfrom != from)
*--pto = *--pfrom;
store_jump(from, op, to);
}
static void
store_jump_n(from, opcode, to, n)
char *from, *to;
int opcode;
unsigned n;
{
from[0] = (char)opcode;
STORE_NUMBER(from + 1, to - (from + 3));
STORE_NUMBER(from + 3, n);
}
static void
insert_jump_n(op, from, to, current_end, n)
int op;
char *from, *to, *current_end;
unsigned n;
{
register char *pfrom = current_end;
register char *pto = current_end + 5;
while (pfrom != from)
*--pto = *--pfrom;
store_jump_n(from, op, to, n);
}
#if 0
static void
insert_op(op, there, current_end)
int op;
char *there, *current_end;
{
register char *pfrom = current_end;
register char *pto = current_end + 1;
while (pfrom != there)
*--pto = *--pfrom;
there[0] = (char)op;
}
#endif
static void
insert_op_2(op, there, current_end, num_1, num_2)
int op;
char *there, *current_end;
int num_1, num_2;
{
register char *pfrom = current_end;
register char *pto = current_end + 5;
while (pfrom != there)
*--pto = *--pfrom;
there[0] = (char)op;
STORE_NUMBER(there + 1, num_1);
STORE_NUMBER(there + 3, num_2);
}
#define trans_eq(c1, c2, translate) (translate?(translate[c1]==translate[c2]):((c1)==(c2)))
static int
slow_match(little, lend, big, bend, translate)
const unsigned char *little, *lend;
const unsigned char *big, *bend;
const unsigned char *translate;
{
int c;
while (little < lend && big < bend) {
c = *little++;
if (c == 0xff)
c = *little++;
if (!trans_eq(*big++, c, translate)) break;
}
if (little == lend) return 1;
return 0;
}
static int
slow_search(little, llen, big, blen, translate)
const unsigned char *little;
int llen;
const unsigned char *big;
int blen;
const char *translate;
{
const unsigned char *bsave = big;
const unsigned char *bend = big + blen;
register int c;
int fescape = 0;
c = *little;
if (c == 0xff) {
c = little[1];
fescape = 1;
}
else if (translate && !ismbchar(c)) {
c = translate[c];
}
while (big < bend) {
if (fescape) {
while (big < bend) {
if (*big == c) break;
big++;
}
}
else if (translate && !ismbchar(c)) {
while (big < bend) {
if (ismbchar(*big)) big+=mbclen(*big)-1;
else if (translate[*big] == c) break;
big++;
}
}
else {
while (big < bend) {
if (*big == c) break;
if (ismbchar(*big)) big+=mbclen(*big)-1;
big++;
}
}
if (slow_match(little, little+llen, big, bend, (unsigned char *)translate))
return big - bsave;
big+=mbclen(*big);
}
return -1;
}
static void
bm_init_skip(skip, pat, m, translate)
int *skip;
unsigned char *pat;
int m;
const unsigned char *translate;
{
int j, c;
for (c=0; c<256; c++) {
skip[c] = m;
}
if (translate) {
for (j=0; j<m-1; j++) {
skip[translate[pat[j]]] = m-1-j;
}
}
else {
for (j=0; j<m-1; j++) {
skip[pat[j]] = m-1-j;
}
}
}
static int
bm_search(little, llen, big, blen, skip, translate)
const unsigned char *little;
int llen;
const unsigned char *big;
int blen;
int *skip;
const unsigned char *translate;
{
int i, j, k;
i = llen-1;
if (translate) {
while (i < blen) {
k = i;
j = llen-1;
while (j >= 0 && translate[big[k]] == translate[little[j]]) {
k--;
j--;
}
if (j < 0) return k+1;
i += skip[translate[big[i]]];
}
return -1;
}
while (i < blen) {
k = i;
j = llen-1;
while (j >= 0 && big[k] == little[j]) {
k--;
j--;
}
if (j < 0) return k+1;
i += skip[big[i]];
}
return -1;
}
static int
re_compile_fastmap0(bufp)
struct re_pattern_buffer *bufp;
{
unsigned char *pattern = (unsigned char*)bufp->buffer;
int size = bufp->used;
register char *fastmap = bufp->fastmap;
register unsigned char *p = pattern;
register unsigned char *pend = pattern + size;
register int j, k;
unsigned is_a_succeed_n;
unsigned char *stacka[NFAILURES];
unsigned char **stackb = stacka;
unsigned char **stackp = stackb;
unsigned char **stacke = stackb + NFAILURES;
int options = bufp->options;
memset(fastmap, 0, (1 << BYTEWIDTH));
bufp->fastmap_accurate = 1;
bufp->can_be_null = 0;
while (p) {
is_a_succeed_n = 0;
if (p == pend) {
bufp->can_be_null = 1;
break;
}
#ifdef SWITCH_ENUM_BUG
switch ((int)((enum regexpcode)*p++))
#else
switch ((enum regexpcode)*p++)
#endif
{
case exactn:
if (p[1] == 0xff) {
if (TRANSLATE_P())
fastmap[translate[p[2]]] = 2;
else
fastmap[p[2]] = 2;
bufp->options |= RE_OPTIMIZE_BMATCH;
}
else if (TRANSLATE_P())
fastmap[translate[p[1]]] = 1;
else
fastmap[p[1]] = 1;
break;
case begline:
case begbuf:
case begpos:
case endbuf:
case endbuf2:
case wordbound:
case notwordbound:
case wordbeg:
case wordend:
case pop_and_fail:
case push_dummy_failure:
case start_paren:
case stop_paren:
continue;
case casefold_on:
bufp->options |= RE_MAY_IGNORECASE;
options |= RE_OPTION_IGNORECASE;
continue;
case casefold_off:
options &= ~RE_OPTION_IGNORECASE;
continue;
case option_set:
options = *p++;
continue;
case endline:
if (TRANSLATE_P())
fastmap[translate['\n']] = 1;
else
fastmap['\n'] = 1;
if ((options & RE_OPTION_SINGLELINE) == 0 && bufp->can_be_null == 0)
bufp->can_be_null = 2;
break;
case jump_n:
case finalize_jump:
case maybe_finalize_jump:
case jump:
case jump_past_alt:
case dummy_failure_jump:
case finalize_push:
case finalize_push_n:
EXTRACT_NUMBER_AND_INCR(j, p);
p += j;
if (j > 0)
continue;
if ((enum regexpcode)*p != on_failure_jump
&& (enum regexpcode)*p != try_next
&& (enum regexpcode)*p != succeed_n)
continue;
p++;
EXTRACT_NUMBER_AND_INCR(j, p);
p += j;
if (stackp != stackb && *stackp == p)
stackp--;
continue;
case try_next:
case start_nowidth:
case stop_nowidth:
case stop_backtrack:
p += 2;
continue;
case succeed_n:
is_a_succeed_n = 1;
EXTRACT_NUMBER(k, p + 2);
if (k != 0) {
p += 4;
continue;
}
case on_failure_jump:
EXTRACT_NUMBER_AND_INCR(j, p);
if (p + j < pend) {
if (stackp == stacke) {
EXPAND_FAIL_STACK();
}
*++stackp = p + j;
}
else {
bufp->can_be_null = 1;
}
if (is_a_succeed_n)
EXTRACT_NUMBER_AND_INCR(k, p);
continue;
case set_number_at:
p += 4;
continue;
case start_memory:
case stop_memory:
p += 2;
continue;
case duplicate:
bufp->can_be_null = 1;
if (*p >= bufp->re_nsub) break;
fastmap['\n'] = 1;
case anychar_repeat:
case anychar:
for (j = 0; j < (1 << BYTEWIDTH); j++) {
if (j != '\n' || (options & RE_OPTION_MULTILINE))
fastmap[j] = 1;
}
if (bufp->can_be_null) {
FREE_AND_RETURN(stackb, 0);
}
if ((enum regexpcode)p[-1] == anychar_repeat) {
continue;
}
break;
case wordchar:
for (j = 0; j < 0x80; j++) {
if (SYNTAX(j) == Sword)
fastmap[j] = 1;
}
switch (current_mbctype) {
case MBCTYPE_ASCII:
for (j = 0x80; j < (1 << BYTEWIDTH); j++) {
if (SYNTAX(j) == Sword2)
fastmap[j] = 1;
}
break;
case MBCTYPE_EUC:
case MBCTYPE_SJIS:
case MBCTYPE_UTF8:
for (j = 0x80; j < (1 << BYTEWIDTH); j++) {
if (re_mbctab[j])
fastmap[j] = 1;
}
break;
}
break;
case notwordchar:
for (j = 0; j < 0x80; j++)
if (SYNTAX(j) != Sword)
fastmap[j] = 1;
switch (current_mbctype) {
case MBCTYPE_ASCII:
for (j = 0x80; j < (1 << BYTEWIDTH); j++) {
if (SYNTAX(j) != Sword2)
fastmap[j] = 1;
}
break;
case MBCTYPE_EUC:
case MBCTYPE_SJIS:
case MBCTYPE_UTF8:
for (j = 0x80; j < (1 << BYTEWIDTH); j++) {
if (!re_mbctab[j])
fastmap[j] = 1;
}
break;
}
break;
case charset:
for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) {
int tmp = TRANSLATE_P()?translate[j]:j;
fastmap[tmp] = 1;
}
{
unsigned short size;
unsigned long c, beg, end;
p += p[-1] + 2;
size = EXTRACT_UNSIGNED(&p[-2]);
for (j = 0; j < (int)size; j++) {
c = EXTRACT_MBC(&p[j*8]);
beg = WC2MBC1ST(c);
c = EXTRACT_MBC(&p[j*8+4]);
end = WC2MBC1ST(c);
while (beg <= end) {
if (c < 0x100) {
fastmap[beg] = 2;
bufp->options |= RE_OPTIMIZE_BMATCH;
}
else if (ismbchar(beg))
fastmap[beg] = 1;
beg++;
}
}
}
break;
case charset_not:
for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
if (!ismbchar(j))
fastmap[j] = 1;
for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) {
if (!ismbchar(j))
fastmap[j] = 1;
}
{
unsigned short size;
unsigned long c, beg;
int num_literal = 0;
p += p[-1] + 2;
size = EXTRACT_UNSIGNED(&p[-2]);
if (size == 0) {
for (j = 0x80; j < (1 << BYTEWIDTH); j++)
if (ismbchar(j))
fastmap[j] = 1;
break;
}
for (j = 0,c = 0;j < (int)size; j++) {
unsigned int cc = EXTRACT_MBC(&p[j*8]);
beg = WC2MBC1ST(cc);
while (c <= beg) {
if (ismbchar(c))
fastmap[c] = 1;
c++;
}
cc = EXTRACT_MBC(&p[j*8+4]);
if (cc < 0xff) {
num_literal = 1;
while (c <= cc) {
if (ismbchar(c))
fastmap[c] = 1;
c++;
}
}
c = WC2MBC1ST(cc);
}
for (j = c; j < (1 << BYTEWIDTH); j++) {
if (num_literal)
fastmap[j] = 1;
if (ismbchar(j))
fastmap[j] = 1;
}
}
break;
case unused:
break;
}
if (stackp != stackb)
p = *stackp--;
else
break;
}
FREE_AND_RETURN(stackb, 0);
memory_exhausted:
FREE_AND_RETURN(stackb, -2);
}
void
re_compile_fastmap(bufp)
struct re_pattern_buffer *bufp;
{
(void)re_compile_fastmap0(bufp);
}
int
re_mbc_startpos(string, size, startpos, range)
const char *string;
int size, startpos, range;
{
int i = mbc_startpos(string, startpos);
if (i < startpos) {
if (range > 0) {
startpos = i + mbclen(string[i]);
}
else {
int len = mbclen(string[i]);
if (i + len <= startpos)
startpos = i + len;
else
startpos = i;
}
}
return startpos;
}
int
re_adjust_startpos(bufp, string, size, startpos, range)
struct re_pattern_buffer *bufp;
const char *string;
int size, startpos, range;
{
if (!bufp->fastmap_accurate) {
int ret = re_compile_fastmap0(bufp);
if (ret) return ret;
}
if (current_mbctype && startpos>0 && !(bufp->options&RE_OPTIMIZE_BMATCH)) {
startpos = re_mbc_startpos(string, size, startpos, range);
}
return startpos;
}
static int re_match_exec _((struct re_pattern_buffer *, const char *, int, int, int,
struct re_registers *));
int
re_search(bufp, string, size, startpos, range, regs)
struct re_pattern_buffer *bufp;
const char *string;
int size, startpos, range;
struct re_registers *regs;
{
register char *fastmap = bufp->fastmap;
int val, anchor = 0, initpos = startpos;
if (startpos < 0 || startpos > size)
return -1;
if (!string) {
if (size == 0) string = "";
else return -1;
}
if (fastmap && !bufp->fastmap_accurate) {
int ret = re_compile_fastmap0(bufp);
if (ret) return ret;
}
if (bufp->used > 0) {
switch ((enum regexpcode)bufp->buffer[0]) {
case begbuf:
begbuf_match:
if (range > 0) {
if (startpos > 0) return -1;
else {
val = re_match(bufp, string, size, 0, regs);
if (val >= 0) return 0;
return val;
}
}
break;
case begline:
anchor = 1;
break;
case begpos:
val = re_match(bufp, string, size, startpos, regs);
if (val >= 0) return startpos;
return val;
default:
break;
}
}
if (bufp->options & RE_OPTIMIZE_ANCHOR) {
if (bufp->options&RE_OPTION_MULTILINE && range > 0) {
goto begbuf_match;
}
anchor = 1;
}
if (bufp->must) {
int len = ((unsigned char*)bufp->must)[0];
int pos, pbeg, pend;
pbeg = startpos;
pend = startpos + range;
if (pbeg > pend) {
pos = pend; pend = pbeg; pbeg = pos;
}
pend = size;
if (bufp->options & RE_OPTIMIZE_NO_BM) {
pos = slow_search((unsigned char *)(bufp->must+1), len,
(unsigned char*)(string+pbeg), pend-pbeg,
(char *)(MAY_TRANSLATE()?translate:0));
}
else {
pos = bm_search((unsigned char *)(bufp->must+1), len,
(unsigned char *)(string+pbeg), pend-pbeg,
bufp->must_skip,
MAY_TRANSLATE()?translate:0);
}
if (pos == -1) return -1;
if (range > 0 && (bufp->options & RE_OPTIMIZE_EXACTN)) {
startpos += pos;
range -= pos;
if (range < 0) return -1;
}
}
for (;;) {
if (fastmap && startpos < size
&& bufp->can_be_null != 1 && !(anchor && startpos == 0)) {
if (range > 0) {
register unsigned char *p, c;
int irange = range;
p = (unsigned char*)string+startpos;
while (range > 0) {
c = *p++;
if (ismbchar(c)) {
int len;
if (fastmap[c])
break;
len = mbclen(c) - 1;
while (len--) {
c = *p++;
range--;
if (fastmap[c] == 2)
goto startpos_adjust;
}
}
else {
if (fastmap[MAY_TRANSLATE() ? translate[c] : c])
break;
}
range--;
}
startpos_adjust:
startpos += irange - range;
}
else {
register unsigned char c;
c = string[startpos];
c &= 0xff;
if (MAY_TRANSLATE() ? !fastmap[translate[c]] : !fastmap[c])
goto advance;
}
}
if (startpos > size) return -1;
if ((anchor || !bufp->can_be_null) && range > 0 && size > 0 && startpos == size)
return -1;
val = re_match_exec(bufp, string, size, startpos, initpos, regs);
if (val >= 0) return startpos;
if (val == -2) return -2;
#ifndef NO_ALLOCA
#ifdef C_ALLOCA
alloca(0);
#endif
#endif
if (range > 0) {
if (anchor && startpos < size &&
(startpos < 1 || string[startpos-1] != '\n')) {
while (range > 0 && string[startpos] != '\n') {
range--;
startpos++;
}
}
}
advance:
if (!range)
break;
else if (range > 0) {
const char *d = string + startpos;
if (ismbchar(*d)) {
int len = mbclen(*d) - 1;
range-=len, startpos+=len;
if (!range)
break;
}
range--, startpos++;
}
else {
range++, startpos--;
{
const char *s, *d, *p;
s = string; d = string + startpos;
for (p = d; p-- > s && ismbchar(*p); )
;
if (!((d - p) & 1)) {
if (!range)
break;
range++, startpos--;
}
}
}
}
return -1;
}
#define IS_ACTIVE(R) ((R).bits.is_active)
#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
#define NUM_REG_ITEMS 3
#define NUM_COUNT_ITEMS 2
#define NUM_NONREG_ITEMS 4
#define MAX_NUM_FAILURE_ITEMS (num_regs * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
#define NUM_FAILURE_ITEMS (last_used_reg * NUM_REG_ITEMS + NUM_NONREG_ITEMS + 1)
#define PUSH_FAILURE_COUNT(ptr) \
do { \
int c; \
EXTRACT_NUMBER(c, ptr); \
ENSURE_FAIL_STACK(NUM_COUNT_ITEMS); \
*stackp++ = (unsigned char*)(long)c; \
*stackp++ = (ptr); \
num_failure_counts++; \
} while (0)
#define PUSH_FAILURE_POINT(pattern_place, string_place) \
do { \
long last_used_reg, this_reg; \
\
\
for (last_used_reg = num_regs-1; last_used_reg > 0; last_used_reg--)\
if (!REG_UNSET(regstart[last_used_reg])) \
break; \
\
ENSURE_FAIL_STACK(NUM_FAILURE_ITEMS); \
*stackp++ = (unsigned char*)(long)num_failure_counts; \
num_failure_counts = 0; \
\
\
for (this_reg = 1; this_reg <= last_used_reg; this_reg++) { \
*stackp++ = regstart[this_reg]; \
*stackp++ = regend[this_reg]; \
*stackp++ = reg_info[this_reg].word; \
} \
\
\
*stackp++ = (unsigned char*)last_used_reg; \
\
*stackp++ = pattern_place; \
*stackp++ = string_place; \
*stackp++ = (unsigned char*)(long)options; \
*stackp++ = (unsigned char*)0; \
} while(0)
#define NON_GREEDY ((unsigned char*)1)
#define POP_FAILURE_COUNT() \
do { \
unsigned char *ptr = *--stackp; \
int count = (long)*--stackp; \
STORE_NUMBER(ptr, count); \
} while (0)
#define POP_FAILURE_POINT() \
do { \
long temp; \
stackp -= NUM_NONREG_ITEMS; \
temp = (long)*--stackp; \
temp *= NUM_REG_ITEMS; \
stackp -= temp; \
temp = (long)*--stackp; \
while (temp--) { \
POP_FAILURE_COUNT(); \
} \
num_failure_counts = 0; \
} while(0)
#define REG_UNSET_VALUE ((unsigned char*)-1)
#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
#define PREFETCH if (d == dend) goto fail
#define SET_REGS_MATCHED \
do { unsigned this_reg; \
for (this_reg = 0; this_reg < num_regs; this_reg++) { \
if (IS_ACTIVE(reg_info[this_reg])) \
MATCHED_SOMETHING(reg_info[this_reg]) = 1; \
else \
MATCHED_SOMETHING(reg_info[this_reg]) = 0; \
} \
} while(0)
#define AT_STRINGS_BEG(d) ((d) == string)
#define AT_STRINGS_END(d) ((d) == dend)
#define IS_A_LETTER(d) (SYNTAX(*(d)) == Sword || \
(current_mbctype ? \
(re_mbctab[*(d)] && ((d)+mbclen(*(d)))<=dend): \
SYNTAX(*(d)) == Sword2))
#define PREV_IS_A_LETTER(d) ((current_mbctype == MBCTYPE_SJIS)? \
IS_A_LETTER((d)-(!AT_STRINGS_BEG((d)-1)&& \
ismbchar((d)[-2])?2:1)): \
((current_mbctype && ((d)[-1] >= 0x80)) || \
IS_A_LETTER((d)-1)))
static void
init_regs(regs, num_regs)
struct re_registers *regs;
unsigned int num_regs;
{
int i;
regs->num_regs = num_regs;
if (num_regs < RE_NREGS)
num_regs = RE_NREGS;
if (regs->allocated == 0) {
regs->beg = TMALLOC(num_regs, int);
regs->end = TMALLOC(num_regs, int);
regs->allocated = num_regs;
}
else if (regs->allocated < num_regs) {
TREALLOC(regs->beg, num_regs, int);
TREALLOC(regs->end, num_regs, int);
regs->allocated = num_regs;
}
for (i=0; i<num_regs; i++) {
regs->beg[i] = regs->end[i] = -1;
}
}
int
re_match(bufp, string_arg, size, pos, regs)
struct re_pattern_buffer *bufp;
const char *string_arg;
int size, pos;
struct re_registers *regs;
{
return re_match_exec(bufp, string_arg, size, pos, pos, regs);
}
static int
re_match_exec(bufp, string_arg, size, pos, beg, regs)
struct re_pattern_buffer *bufp;
const char *string_arg;
int size, pos, beg;
struct re_registers *regs;
{
register unsigned char *p = (unsigned char*)bufp->buffer;
unsigned char *p1;
register unsigned char *pend = p + bufp->used;
unsigned num_regs = bufp->re_nsub;
unsigned char *string = (unsigned char*)string_arg;
register unsigned char *d, *dend;
register int mcnt;
int options = bufp->options;
unsigned char **const stacka = 0;
unsigned char **stackb;
unsigned char **stackp;
unsigned char **stacke;
unsigned char **regstart = bufp->regstart;
unsigned char **regend = bufp->regend;
unsigned char **old_regstart = bufp->old_regstart;
unsigned char **old_regend = bufp->old_regend;
register_info_type *reg_info = bufp->reg_info;
unsigned best_regs_set = 0;
unsigned char **best_regstart = bufp->best_regstart;
unsigned char **best_regend = bufp->best_regend;
int num_failure_counts = 0;
if (regs) {
init_regs(regs, num_regs);
}
stackb = TMALLOC(MAX_NUM_FAILURE_ITEMS * NFAILURES, unsigned char*);
stackp = stackb;
stacke = &stackb[MAX_NUM_FAILURE_ITEMS * NFAILURES];
#ifdef DEBUG_REGEX
fprintf(stderr, "Entering re_match(%s)\n", string_arg);
#endif
for (mcnt = 0; mcnt < num_regs; mcnt++) {
regstart[mcnt] = regend[mcnt]
= old_regstart[mcnt] = old_regend[mcnt]
= best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE;
#ifdef __CHECKER__
reg_info[mcnt].word = 0;
#endif
IS_ACTIVE (reg_info[mcnt]) = 0;
MATCHED_SOMETHING (reg_info[mcnt]) = 0;
}
d = string + pos, dend = string + size;
for (;;) {
#ifdef DEBUG_REGEX
fprintf(stderr,
"regex loop(%d): matching 0x%02d\n",
p - (unsigned char*)bufp->buffer,
*p);
#endif
if (p == pend) {
if ((bufp->options & RE_OPTION_LONGEST) && d != dend) {
if (best_regs_set)
goto restore_best_regs;
while (stackp != stackb && stackp[-1] == NON_GREEDY) {
if (best_regs_set)
goto restore_best_regs;
POP_FAILURE_POINT();
}
if (stackp != stackb) {
if (! best_regs_set || (d > best_regend[0])) {
best_regs_set = 1;
best_regend[0] = d;
for (mcnt = 1; mcnt < num_regs; mcnt++) {
best_regstart[mcnt] = regstart[mcnt];
best_regend[mcnt] = regend[mcnt];
}
}
goto fail;
}
else if (best_regs_set) {
restore_best_regs:
d = best_regend[0];
for (mcnt = 0; mcnt < num_regs; mcnt++) {
regstart[mcnt] = best_regstart[mcnt];
regend[mcnt] = best_regend[mcnt];
}
}
}
if (regs) {
regs->beg[0] = pos;
regs->end[0] = d - string;
for (mcnt = 1; mcnt < num_regs; mcnt++) {
if (REG_UNSET(regend[mcnt])) {
regs->beg[mcnt] = -1;
regs->end[mcnt] = -1;
continue;
}
regs->beg[mcnt] = regstart[mcnt] - string;
regs->end[mcnt] = regend[mcnt] - string;
}
}
FREE_AND_RETURN(stackb, (d - pos - string));
}
#ifdef SWITCH_ENUM_BUG
switch ((int)((enum regexpcode)*p++))
#else
switch ((enum regexpcode)*p++)
#endif
{
case start_memory:
old_regstart[*p] = regstart[*p];
regstart[*p] = d;
IS_ACTIVE(reg_info[*p]) = 1;
MATCHED_SOMETHING(reg_info[*p]) = 0;
p += 2;
continue;
case stop_memory:
old_regend[*p] = regend[*p];
regend[*p] = d;
IS_ACTIVE(reg_info[*p]) = 0;
p += 2;
continue;
case start_paren:
case stop_paren:
break;
case duplicate:
{
int regno = *p++;
register unsigned char *d2, *dend2;
if (regno >= num_regs) goto fail;
if (IS_ACTIVE(reg_info[regno])) goto fail;
d2 = regstart[regno];
if (REG_UNSET(d2)) goto fail;
dend2 = regend[regno];
if (REG_UNSET(dend2)) goto fail;
for (;;) {
if (d2 == dend2) break;
PREFETCH;
mcnt = dend - d;
if (mcnt > dend2 - d2)
mcnt = dend2 - d2;
if ((options & RE_OPTION_IGNORECASE)
? memcmp_translate(d, d2, mcnt)
: memcmp((char*)d, (char*)d2, mcnt))
goto fail;
d += mcnt, d2 += mcnt;
}
}
break;
case start_nowidth:
PUSH_FAILURE_POINT(0, d);
if (stackp - stackb > RE_DUP_MAX) {
FREE_AND_RETURN(stackb,(-2));
}
EXTRACT_NUMBER_AND_INCR(mcnt, p);
STORE_NUMBER(p+mcnt, stackp - stackb);
continue;
case stop_nowidth:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
stackp = stackb + mcnt;
d = stackp[-3];
POP_FAILURE_POINT();
continue;
case stop_backtrack:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
stackp = stackb + mcnt;
POP_FAILURE_POINT();
continue;
case pop_and_fail:
EXTRACT_NUMBER(mcnt, p+1);
stackp = stackb + mcnt;
POP_FAILURE_POINT();
goto fail;
case anychar:
PREFETCH;
if (ismbchar(*d)) {
if (d + mbclen(*d) > dend)
goto fail;
SET_REGS_MATCHED;
d += mbclen(*d);
break;
}
if (!(options&RE_OPTION_MULTILINE)
&& (TRANSLATE_P() ? translate[*d] : *d) == '\n')
goto fail;
SET_REGS_MATCHED;
d++;
break;
case anychar_repeat:
for (;;) {
PUSH_FAILURE_POINT(p, d);
PREFETCH;
if (ismbchar(*d)) {
if (d + mbclen(*d) > dend)
goto fail;
SET_REGS_MATCHED;
d += mbclen(*d);
continue;
}
if (!(options&RE_OPTION_MULTILINE) &&
(TRANSLATE_P() ? translate[*d] : *d) == '\n')
goto fail;
SET_REGS_MATCHED;
d++;
}
break;
case charset:
case charset_not:
{
int not;
int part = 0;
unsigned char *dsave = d + 1;
int cc, c;
PREFETCH;
c = (unsigned char)*d++;
if (ismbchar(c)) {
if (d + mbclen(c) - 1 <= dend) {
cc = c;
MBC2WC(c, d);
not = is_in_list_mbc(c, p);
if (!not) {
part = not = is_in_list_sbc(cc, p);
}
} else {
not = is_in_list(c, p);
}
}
else {
if (TRANSLATE_P())
c = (unsigned char)translate[c];
not = is_in_list(c, p);
}
if (*(p - 1) == (unsigned char)charset_not) {
not = !not;
}
if (!not) goto fail;
p += 1 + *p + 2 + EXTRACT_UNSIGNED(&p[1 + *p])*8;
SET_REGS_MATCHED;
if (part) d = dsave;
break;
}
case begline:
if (size == 0 || AT_STRINGS_BEG(d))
break;
if (d[-1] == '\n' && !AT_STRINGS_END(d))
break;
goto fail;
case endline:
if (AT_STRINGS_END(d)) {
break;
}
else if (*d == '\n')
break;
goto fail;
case begbuf:
if (AT_STRINGS_BEG(d))
break;
goto fail;
case endbuf:
if (AT_STRINGS_END(d))
break;
goto fail;
case endbuf2:
if (AT_STRINGS_END(d)) {
break;
}
if (*d == '\n' && AT_STRINGS_END(d+1))
break;
goto fail;
case begpos:
if (d - string == beg)
break;
goto fail;
case on_failure_jump:
on_failure:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
PUSH_FAILURE_POINT(p + mcnt, d);
continue;
case maybe_finalize_jump:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
p1 = p;
while (p1 + 2 < pend) {
if ((enum regexpcode)*p1 == stop_memory ||
(enum regexpcode)*p1 == start_memory)
p1 += 3;
else if (
(enum regexpcode)*p1 == stop_paren)
p1 += 1;
else
break;
}
if (p1 == pend)
p[-3] = (unsigned char)finalize_jump;
else if (*p1 == (unsigned char)exactn ||
*p1 == (unsigned char)endline) {
register int c = *p1 == (unsigned char)endline ? '\n' : p1[2];
register unsigned char *p2 = p + mcnt;
if (p2[3] == (unsigned char)exactn && p2[5] != c)
p[-3] = (unsigned char)finalize_jump;
else if (p2[3] == (unsigned char)charset ||
p2[3] == (unsigned char)charset_not) {
int not;
if (ismbchar(c)) {
unsigned char *pp = p1+3;
MBC2WC(c, pp);
}
not = is_in_list(c, p2 + 4);
if (p2[3] == (unsigned char)charset_not)
not = !not;
if (!not)
p[-3] = (unsigned char)finalize_jump;
}
}
p -= 2;
if (p[-1] != (unsigned char)finalize_jump) {
p[-1] = (unsigned char)jump;
goto nofinalize;
}
case finalize_jump:
if (stackp > stackb && stackp[-3] == d) {
p = stackp[-4];
POP_FAILURE_POINT();
continue;
}
POP_FAILURE_POINT();
case jump_past_alt:
case jump:
nofinalize:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
if (mcnt < 0 && stackp > stackb && stackp[-3] == d)
goto fail;
p += mcnt;
continue;
case dummy_failure_jump:
PUSH_FAILURE_POINT(0, 0);
goto nofinalize;
case push_dummy_failure:
p1 = p;
while (p1 + 2 < pend) {
if ((enum regexpcode)*p1 == stop_memory ||
(enum regexpcode)*p1 == start_memory)
p1 += 3;
else if (
(enum regexpcode)*p1 == stop_paren)
p1 += 1;
else
break;
}
if (p1 < pend && (enum regexpcode)*p1 == jump)
p[-1] = unused;
else
PUSH_FAILURE_POINT(0, 0);
break;
case succeed_n:
EXTRACT_NUMBER(mcnt, p + 2);
if (mcnt != 0) {
mcnt--;
p += 2;
PUSH_FAILURE_COUNT(p);
STORE_NUMBER_AND_INCR(p, mcnt);
PUSH_FAILURE_POINT(0, 0);
}
else {
goto on_failure;
}
continue;
case jump_n:
EXTRACT_NUMBER(mcnt, p + 2);
if (mcnt) {
mcnt--;
PUSH_FAILURE_COUNT(p + 2);
STORE_NUMBER(p + 2, mcnt);
goto nofinalize;
}
else
p += 4;
continue;
case set_number_at:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
p1 = p + mcnt;
EXTRACT_NUMBER_AND_INCR(mcnt, p);
STORE_NUMBER(p1, mcnt);
continue;
case try_next:
EXTRACT_NUMBER_AND_INCR(mcnt, p);
if (p + mcnt < pend) {
PUSH_FAILURE_POINT(p, d);
stackp[-1] = NON_GREEDY;
}
p += mcnt;
continue;
case finalize_push:
POP_FAILURE_POINT();
EXTRACT_NUMBER_AND_INCR(mcnt, p);
if (mcnt < 0 && stackp > stackb && stackp[-3] == d)
goto fail;
PUSH_FAILURE_POINT(p + mcnt, d);
stackp[-1] = NON_GREEDY;
continue;
case finalize_push_n:
EXTRACT_NUMBER(mcnt, p + 2);
if (mcnt) {
int pos, i;
mcnt--;
STORE_NUMBER(p + 2, mcnt);
EXTRACT_NUMBER(pos, p);
EXTRACT_NUMBER(i, p+pos+5);
if (i > 0) goto nofinalize;
POP_FAILURE_POINT();
EXTRACT_NUMBER_AND_INCR(mcnt, p);
PUSH_FAILURE_POINT(p + mcnt, d);
stackp[-1] = NON_GREEDY;
p += 2;
}
else
p += 4;
continue;
case unused:
continue;
case casefold_on:
options |= RE_OPTION_IGNORECASE;
continue;
case casefold_off:
options &= ~RE_OPTION_IGNORECASE;
continue;
case option_set:
options = *p++;
continue;
case wordbound:
if (AT_STRINGS_BEG(d)) {
if (AT_STRINGS_END(d)) goto fail;
if (IS_A_LETTER(d)) break;
else goto fail;
}
if (AT_STRINGS_END(d)) {
if (PREV_IS_A_LETTER(d)) break;
else goto fail;
}
if (PREV_IS_A_LETTER(d) != IS_A_LETTER(d))
break;
goto fail;
case notwordbound:
if (AT_STRINGS_BEG(d)) {
if (IS_A_LETTER(d)) goto fail;
else break;
}
if (AT_STRINGS_END(d)) {
if (PREV_IS_A_LETTER(d)) goto fail;
else break;
}
if (PREV_IS_A_LETTER(d) != IS_A_LETTER(d))
goto fail;
break;
case wordbeg:
if (IS_A_LETTER(d) && (AT_STRINGS_BEG(d) || !PREV_IS_A_LETTER(d)))
break;
goto fail;
case wordend:
if (!AT_STRINGS_BEG(d) && PREV_IS_A_LETTER(d)
&& (!IS_A_LETTER(d) || AT_STRINGS_END(d)))
break;
goto fail;
case wordchar:
PREFETCH;
if (!IS_A_LETTER(d))
goto fail;
if (ismbchar(*d) && d + mbclen(*d) - 1 < dend)
d += mbclen(*d) - 1;
d++;
SET_REGS_MATCHED;
break;
case notwordchar:
PREFETCH;
if (IS_A_LETTER(d))
goto fail;
if (ismbchar(*d) && d + mbclen(*d) - 1 < dend)
d += mbclen(*d) - 1;
d++;
SET_REGS_MATCHED;
break;
case exactn:
mcnt = *p++;
if (TRANSLATE_P()) {
do {
unsigned char c;
PREFETCH;
if (*p == 0xff) {
p++;
if (!--mcnt
|| AT_STRINGS_END(d)
|| (unsigned char)*d++ != (unsigned char)*p++)
goto fail;
continue;
}
c = *d++;
if (ismbchar(c)) {
int n;
if (c != (unsigned char)*p++)
goto fail;
for (n = mbclen(c) - 1; n > 0; n--)
if (!--mcnt
|| AT_STRINGS_END(d)
|| (unsigned char)*d++ != (unsigned char)*p++)
goto fail;
continue;
}
if ((unsigned char)translate[c] != (unsigned char)translate[*p++])
goto fail;
}
while (--mcnt);
}
else {
do {
PREFETCH;
if (*p == 0xff) {p++; mcnt--;}
if (*d++ != *p++) goto fail;
}
while (--mcnt);
}
SET_REGS_MATCHED;
break;
}
#ifdef RUBY
CHECK_INTS;
#endif
continue;
fail:
if (stackp != stackb) {
short last_used_reg, this_reg;
if (stackp[-4] == 0 || (best_regs_set && stackp[-1] == NON_GREEDY)) {
POP_FAILURE_POINT();
goto fail;
}
stackp--;
options = (long)*--stackp;
d = *--stackp;
p = *--stackp;
last_used_reg = (long)*--stackp;
for (this_reg = num_regs - 1; this_reg > last_used_reg; this_reg--) {
regend[this_reg] = REG_UNSET_VALUE;
regstart[this_reg] = REG_UNSET_VALUE;
IS_ACTIVE(reg_info[this_reg]) = 0;
MATCHED_SOMETHING(reg_info[this_reg]) = 0;
}
for ( ; this_reg > 0; this_reg--) {
reg_info[this_reg].word = *--stackp;
regend[this_reg] = *--stackp;
regstart[this_reg] = *--stackp;
}
mcnt = (long)*--stackp;
while (mcnt--) {
POP_FAILURE_COUNT();
}
if (p < pend) {
int is_a_jump_n = 0;
int failed_paren = 0;
p1 = p;
switch ((enum regexpcode)*p1) {
case jump_n:
case finalize_push_n:
is_a_jump_n = 1;
case maybe_finalize_jump:
case finalize_jump:
case finalize_push:
case jump:
p1++;
EXTRACT_NUMBER_AND_INCR(mcnt, p1);
if (mcnt >= 0) break;
p1 += mcnt;
if (( is_a_jump_n && (enum regexpcode)*p1 == succeed_n) ||
(!is_a_jump_n && (enum regexpcode)*p1 == on_failure_jump)) {
if (failed_paren) {
p1++;
EXTRACT_NUMBER_AND_INCR(mcnt, p1);
PUSH_FAILURE_POINT(p1 + mcnt, d);
}
goto fail;
}
break;
default:
;
}
}
}
else
break;
}
if (best_regs_set)
goto restore_best_regs;
FREE_AND_RETURN(stackb,(-1));
memory_exhausted:
FREE_AND_RETURN(stackb,(-2));
}
static int
memcmp_translate(s1, s2, len)
unsigned char *s1, *s2;
register int len;
{
register unsigned char *p1 = s1, *p2 = s2, c;
while (len) {
c = *p1++;
if (ismbchar(c)) {
int n;
if (c != *p2++) return 1;
for (n = mbclen(c) - 1; n > 0; n--)
if (!--len || *p1++ != *p2++)
return 1;
}
else
if (translate[c] != translate[*p2++])
return 1;
len--;
}
return 0;
}
void
re_copy_registers(regs1, regs2)
struct re_registers *regs1, *regs2;
{
int i;
if (regs1 == regs2) return;
if (regs1->allocated == 0) {
regs1->beg = TMALLOC(regs2->num_regs, int);
regs1->end = TMALLOC(regs2->num_regs, int);
regs1->allocated = regs2->num_regs;
}
else if (regs1->allocated < regs2->num_regs) {
TREALLOC(regs1->beg, regs2->num_regs, int);
TREALLOC(regs1->end, regs2->num_regs, int);
regs1->allocated = regs2->num_regs;
}
for (i=0; i<regs2->num_regs; i++) {
regs1->beg[i] = regs2->beg[i];
regs1->end[i] = regs2->end[i];
}
regs1->num_regs = regs2->num_regs;
}
void
re_free_registers(regs)
struct re_registers *regs;
{
if (regs->allocated == 0) return;
if (regs->beg) xfree(regs->beg);
if (regs->end) xfree(regs->end);
}
static const unsigned char mbctab_ascii[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
static const unsigned char mbctab_euc[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
};
static const unsigned char mbctab_sjis[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
};
static const unsigned char mbctab_sjis_trail[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
};
static const unsigned char mbctab_utf8[] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0,
};
const unsigned char *re_mbctab = mbctab_ascii;
void
re_mbcinit(mbctype)
int mbctype;
{
switch (mbctype) {
case MBCTYPE_ASCII:
re_mbctab = mbctab_ascii;
current_mbctype = MBCTYPE_ASCII;
break;
case MBCTYPE_EUC:
re_mbctab = mbctab_euc;
current_mbctype = MBCTYPE_EUC;
break;
case MBCTYPE_SJIS:
re_mbctab = mbctab_sjis;
current_mbctype = MBCTYPE_SJIS;
break;
case MBCTYPE_UTF8:
re_mbctab = mbctab_utf8;
current_mbctype = MBCTYPE_UTF8;
break;
}
}
#define mbc_isfirst(t, c) (t)[(unsigned char)(c)]
#define mbc_len(t, c) ((t)[(unsigned char)(c)]+1)
static unsigned int
asc_startpos(string, pos)
const char *string;
unsigned int pos;
{
return pos;
}
#define euc_islead(c) ((unsigned char)((c) - 0xa1) > 0xfe - 0xa1)
#define euc_mbclen(c) mbc_len(mbctab_euc, (c))
static unsigned int
euc_startpos(string, pos)
const char *string;
unsigned int pos;
{
unsigned int i = pos, w;
while (i > 0 && !euc_islead(string[i])) {
--i;
}
if (i == pos || i + (w = euc_mbclen(string[i])) > pos) {
return i;
}
i += w;
return i + ((pos - i) & ~1);
}
#define sjis_isfirst(c) mbc_isfirst(mbctab_sjis, (c))
#define sjis_istrail(c) mbctab_sjis_trail[(unsigned char)(c)]
#define sjis_mbclen(c) mbc_len(mbctab_sjis, (c))
static unsigned int
sjis_startpos(string, pos)
const char *string;
unsigned int pos;
{
unsigned int i = pos, w;
if (i > 0 && sjis_istrail(string[i])) {
do {
if (!sjis_isfirst(string[--i])) {
++i;
break;
}
} while (i > 0);
}
if (i == pos || i + (w = sjis_mbclen(string[i])) > pos) {
return i;
}
i += w;
return i + ((pos - i) & ~1);
}
#define utf8_islead(c) ((unsigned char)((c) & 0xc0) != 0x80)
#define utf8_mbclen(c) mbc_len(mbctab_utf8, (c))
static unsigned int
utf8_startpos(string, pos)
const char *string;
unsigned int pos;
{
unsigned int i = pos, w;
while (i > 0 && !utf8_islead(string[i])) {
--i;
}
if (i == pos || i + (w = utf8_mbclen(string[i])) > pos) {
return i;
}
return i + w;
}