#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <sys/types.h>
#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC
# define MBS_SUPPORT
# include <wchar.h>
# include <wctype.h>
#endif
#include "system.h"
#include "grep.h"
#include "regex.h"
#include "dfa.h"
#include "kwset.h"
#include "error.h"
#include "xalloc.h"
#ifdef HAVE_LIBPCRE
# include <pcre.h>
#endif
#define NCHAR (UCHAR_MAX + 1)
#define WCHAR(C) (ISALNUM(C) || (C) == '_')
static struct dfa dfa;
static struct patterns
{
struct re_pattern_buffer regexbuf;
struct re_registers regs;
} patterns0;
struct patterns *patterns;
size_t pcount;
static kwset_t kwset;
static int kwset_exact_matches;
#if defined(MBS_SUPPORT)
static char* check_multibyte_string PARAMS ((char const *buf, size_t size));
#endif
static void kwsinit PARAMS ((void));
static void kwsmusts PARAMS ((void));
static void Gcompile PARAMS ((char const *, size_t));
static void Ecompile PARAMS ((char const *, size_t));
static size_t EGexecute PARAMS ((char const *, size_t, size_t *, int ));
static void Fcompile PARAMS ((char const *, size_t));
static size_t Fexecute PARAMS ((char const *, size_t, size_t *, int));
static void Pcompile PARAMS ((char const *, size_t ));
static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int));
void
dfaerror (char const *mesg)
{
error (2, 0, mesg);
}
static void
kwsinit (void)
{
static char trans[NCHAR];
int i;
if (match_icase)
for (i = 0; i < NCHAR; ++i)
trans[i] = TOLOWER (i);
if (!(kwset = kwsalloc (match_icase ? trans : (char *) 0)))
error (2, 0, _("memory exhausted"));
}
static void
kwsmusts (void)
{
struct dfamust const *dm;
char const *err;
if (dfa.musts)
{
kwsinit ();
for (dm = dfa.musts; dm; dm = dm->next)
{
if (!dm->exact)
continue;
++kwset_exact_matches;
if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
error (2, 0, err);
}
for (dm = dfa.musts; dm; dm = dm->next)
{
if (dm->exact)
continue;
if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != 0)
error (2, 0, err);
}
if ((err = kwsprep (kwset)) != 0)
error (2, 0, err);
}
}
#ifdef MBS_SUPPORT
static char*
check_multibyte_string(char const *buf, size_t size)
{
char *mb_properties = malloc(size);
mbstate_t cur_state;
int i;
memset(&cur_state, 0, sizeof(mbstate_t));
memset(mb_properties, 0, sizeof(char)*size);
for (i = 0; i < size ;)
{
size_t mbclen;
mbclen = mbrlen(buf + i, size - i, &cur_state);
if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
{
mbclen = 1;
}
mb_properties[i] = mbclen;
i += mbclen;
}
return mb_properties;
}
#endif
static void
Gcompile (char const *pattern, size_t size)
{
const char *err;
char const *sep;
size_t total = size;
char const *motif = pattern;
re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE);
dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte);
do
{
size_t len;
sep = memchr (motif, '\n', total);
if (sep)
{
len = sep - motif;
sep++;
total -= (len + 1);
}
else
{
len = total;
total = 0;
}
patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
if (patterns == NULL)
error (2, errno, _("memory exhausted"));
patterns[pcount] = patterns0;
if ((err = re_compile_pattern (motif, len,
&(patterns[pcount].regexbuf))) != 0)
error (2, 0, err);
pcount++;
motif = sep;
} while (sep && total != 0);
if (match_words || match_lines)
{
static char const line_beg[] = "^\\(";
static char const line_end[] = "\\)$";
static char const word_beg[] = "\\(^\\|[^[:alnum:]_]\\)\\(";
static char const word_end[] = "\\)\\([^[:alnum:]_]\\|$\\)";
char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
size_t i;
strcpy (n, match_lines ? line_beg : word_beg);
i = strlen (n);
memcpy (n + i, pattern, size);
i += size;
strcpy (n + i, match_lines ? line_end : word_end);
i += strlen (n + i);
pattern = n;
size = i;
}
dfacomp (pattern, size, &dfa, 1);
kwsmusts ();
}
static void
Ecompile (char const *pattern, size_t size)
{
const char *err;
const char *sep;
size_t total = size;
char const *motif = pattern;
if (strcmp (matcher, "awk") == 0)
{
re_set_syntax (RE_SYNTAX_AWK);
dfasyntax (RE_SYNTAX_AWK, match_icase, eolbyte);
}
else
{
re_set_syntax (RE_SYNTAX_POSIX_EGREP);
dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte);
}
do
{
size_t len;
sep = memchr (motif, '\n', total);
if (sep)
{
len = sep - motif;
sep++;
total -= (len + 1);
}
else
{
len = total;
total = 0;
}
patterns = realloc (patterns, (pcount + 1) * sizeof (*patterns));
if (patterns == NULL)
error (2, errno, _("memory exhausted"));
patterns[pcount] = patterns0;
if ((err = re_compile_pattern (motif, len,
&(patterns[pcount].regexbuf))) != 0)
error (2, 0, err);
pcount++;
motif = sep;
} while (sep && total != 0);
if (match_words || match_lines)
{
static char const line_beg[] = "^(";
static char const line_end[] = ")$";
static char const word_beg[] = "(^|[^[:alnum:]_])(";
static char const word_end[] = ")([^[:alnum:]_]|$)";
char *n = malloc (sizeof word_beg - 1 + size + sizeof word_end);
size_t i;
strcpy (n, match_lines ? line_beg : word_beg);
i = strlen(n);
memcpy (n + i, pattern, size);
i += size;
strcpy (n + i, match_lines ? line_end : word_end);
i += strlen (n + i);
pattern = n;
size = i;
}
dfacomp (pattern, size, &dfa, 1);
kwsmusts ();
}
static size_t
EGexecute (char const *buf, size_t size, size_t *match_size, int exact)
{
register char const *buflim, *beg, *end;
char eol = eolbyte;
int backref, start, len;
struct kwsmatch kwsm;
size_t i;
#ifdef MBS_SUPPORT
char *mb_properties = NULL;
#endif
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1 && kwset)
mb_properties = check_multibyte_string(buf, size);
#endif
buflim = buf + size;
for (beg = end = buf; end < buflim; beg = end)
{
if (!exact)
{
if (kwset)
{
size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm);
if (offset == (size_t) -1)
{
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1)
free(mb_properties);
#endif
return (size_t)-1;
}
beg += offset;
end = memchr(beg, eol, buflim - beg);
end++;
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0)
continue;
#endif
while (beg > buf && beg[-1] != eol)
--beg;
if (kwsm.index < kwset_exact_matches)
goto success;
if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
continue;
}
else
{
size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
if (offset == (size_t) -1)
break;
beg += offset;
end = memchr (beg, eol, buflim - beg);
end++;
while (beg > buf && beg[-1] != eol)
--beg;
}
if (!backref)
goto success;
}
else
end = beg + size;
for (i = 0; i < pcount; i++)
{
patterns[i].regexbuf.not_eol = 0;
if (0 <= (start = re_search (&(patterns[i].regexbuf), beg,
end - beg - 1, 0,
end - beg - 1, &(patterns[i].regs))))
{
len = patterns[i].regs.end[0] - start;
if (exact)
{
*match_size = len;
return start;
}
if ((!match_lines && !match_words)
|| (match_lines && len == end - beg - 1))
goto success;
if (match_words)
while (start >= 0)
{
if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1]))
&& (len == end - beg - 1
|| !WCHAR ((unsigned char) beg[start + len])))
goto success;
if (len > 0)
{
--len;
patterns[i].regexbuf.not_eol = 1;
len = re_match (&(patterns[i].regexbuf), beg,
start + len, start,
&(patterns[i].regs));
}
if (len <= 0)
{
if (start == end - beg - 1)
break;
++start;
patterns[i].regexbuf.not_eol = 0;
start = re_search (&(patterns[i].regexbuf), beg,
end - beg - 1,
start, end - beg - 1 - start,
&(patterns[i].regs));
len = patterns[i].regs.end[0] - start;
}
}
}
}
}
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1 && mb_properties)
free (mb_properties);
#endif
return (size_t) -1;
success:
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1 && mb_properties)
free (mb_properties);
#endif
*match_size = end - beg;
return beg - buf;
}
static void
Fcompile (char const *pattern, size_t size)
{
char const *beg, *lim, *err;
kwsinit ();
beg = pattern;
do
{
for (lim = beg; lim < pattern + size && *lim != '\n'; ++lim)
;
if ((err = kwsincr (kwset, beg, lim - beg)) != 0)
error (2, 0, err);
if (lim < pattern + size)
++lim;
beg = lim;
}
while (beg < pattern + size);
if ((err = kwsprep (kwset)) != 0)
error (2, 0, err);
}
static size_t
Fexecute (char const *buf, size_t size, size_t *match_size, int exact)
{
register char const *beg, *try, *end;
register size_t len;
char eol = eolbyte;
struct kwsmatch kwsmatch;
#ifdef MBS_SUPPORT
char *mb_properties;
if (MB_CUR_MAX > 1)
mb_properties = check_multibyte_string (buf, size);
#endif
for (beg = buf; beg <= buf + size; ++beg)
{
size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch);
if (offset == (size_t) -1)
{
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1)
free(mb_properties);
#endif
return offset;
}
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0)
continue;
#endif
beg += offset;
len = kwsmatch.size[0];
if (exact)
{
*match_size = len;
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1)
free (mb_properties);
#endif
return beg - buf;
}
if (match_lines)
{
if (beg > buf && beg[-1] != eol)
continue;
if (beg + len < buf + size && beg[len] != eol)
continue;
goto success;
}
else if (match_words)
for (try = beg; len; )
{
if (try > buf && WCHAR((unsigned char) try[-1]))
break;
if (try + len < buf + size && WCHAR((unsigned char) try[len]))
{
offset = kwsexec (kwset, beg, --len, &kwsmatch);
if (offset == (size_t) -1)
{
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1)
free (mb_properties);
#endif
return offset;
}
try = beg + offset;
len = kwsmatch.size[0];
}
else
goto success;
}
else
goto success;
}
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1)
free (mb_properties);
#endif
return -1;
success:
end = memchr (beg + len, eol, (buf + size) - (beg + len));
end++;
while (buf < beg && beg[-1] != eol)
--beg;
*match_size = end - beg;
#ifdef MBS_SUPPORT
if (MB_CUR_MAX > 1)
free (mb_properties);
#endif
return beg - buf;
}
#if HAVE_LIBPCRE
static pcre *cre;
static pcre_extra *extra;
#endif
static void
Pcompile (char const *pattern, size_t size)
{
#if !HAVE_LIBPCRE
error (2, 0, _("The -P option is not supported"));
#else
int e;
char const *ep;
char *re = xmalloc (4 * size + 7);
int flags = PCRE_MULTILINE | (match_icase ? PCRE_CASELESS : 0);
char const *patlim = pattern + size;
char *n = re;
char const *p;
char const *pnul;
if (eolbyte != '\n')
error (2, 0, _("The -P and -z options cannot be combined"));
*n = '\0';
if (match_lines)
strcpy (n, "^(");
if (match_words)
strcpy (n, "\\b(");
n += strlen (n);
for (p = pattern; (pnul = memchr (p, '\0', patlim - p)); p = pnul + 1)
{
memcpy (n, p, pnul - p);
n += pnul - p;
for (p = pnul; pattern < p && p[-1] == '\\'; p--)
continue;
n -= (pnul - p) & 1;
strcpy (n, "\\000");
n += 4;
}
memcpy (n, p, patlim - p);
n += patlim - p;
*n = '\0';
if (match_words)
strcpy (n, ")\\b");
if (match_lines)
strcpy (n, ")$");
cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
if (!cre)
error (2, 0, ep);
extra = pcre_study (cre, 0, &ep);
if (ep)
error (2, 0, ep);
free (re);
#endif
}
static size_t
Pexecute (char const *buf, size_t size, size_t *match_size, int exact)
{
#if !HAVE_LIBPCRE
abort ();
return -1;
#else
int sub[300];
int e = pcre_exec (cre, extra, buf, size, 0, 0,
sub, sizeof sub / sizeof *sub);
if (e <= 0)
{
switch (e)
{
case PCRE_ERROR_NOMATCH:
return -1;
case PCRE_ERROR_NOMEMORY:
error (2, 0, _("Memory exhausted"));
default:
abort ();
}
}
else
{
char const *beg = buf + sub[0];
char const *end = buf + sub[1];
char const *buflim = buf + size;
char eol = eolbyte;
if (!exact)
{
end = memchr (end, eol, buflim - end);
end++;
while (buf < beg && beg[-1] != eol)
--beg;
}
*match_size = end - beg;
return beg - buf;
}
#endif
}
struct matcher const matchers[] = {
{ "default", Gcompile, EGexecute },
{ "grep", Gcompile, EGexecute },
{ "egrep", Ecompile, EGexecute },
{ "awk", Ecompile, EGexecute },
{ "fgrep", Fcompile, Fexecute },
{ "perl", Pcompile, Pexecute },
{ "", 0, 0 },
};