#include "m4.h"
#ifdef ENABLE_CHANGEWORD
#include "regex.h"
#endif
enum input_type
{
INPUT_FILE,
INPUT_STRING,
INPUT_MACRO
};
typedef enum input_type input_type;
struct input_block
{
struct input_block *prev;
input_type type;
union
{
struct
{
char *string;
}
u_s;
struct
{
FILE *file;
const char *name;
int lineno;
int out_lineno;
boolean advance_line;
}
u_f;
builtin_func *func;
}
u;
};
typedef struct input_block input_block;
const char *current_file;
int current_line;
static struct obstack token_stack;
static struct obstack *wrapup_stack;
static struct obstack *current_input;
static char *token_bottom;
static input_block *isp;
static input_block *wsp;
static input_block *next;
static boolean start_of_input_line;
#define CHAR_EOF 256
#define CHAR_MACRO 257
STRING rquote;
STRING lquote;
STRING bcomm;
STRING ecomm;
#ifdef ENABLE_CHANGEWORD
# define DEFAULT_WORD_REGEXP "[_a-zA-Z][_a-zA-Z0-9]*"
static char *word_start;
static struct re_pattern_buffer word_regexp;
static int default_word_regexp;
static struct re_registers regs;
#else
# define default_word_regexp 1
#endif
#ifdef DEBUG_INPUT
static const char *token_type_string (token_type);
#endif
void
push_file (FILE *fp, const char *title)
{
input_block *i;
if (next != NULL)
{
obstack_free (current_input, next);
next = NULL;
}
if (debug_level & DEBUG_TRACE_INPUT)
DEBUG_MESSAGE1 ("input read from %s", title);
i = (input_block *) obstack_alloc (current_input,
sizeof (struct input_block));
i->type = INPUT_FILE;
i->u.u_f.name = current_file;
i->u.u_f.lineno = current_line;
i->u.u_f.out_lineno = output_current_line;
i->u.u_f.advance_line = start_of_input_line;
current_file = obstack_copy0 (current_input, title, strlen (title));
current_line = 1;
output_current_line = -1;
i->u.u_f.file = fp;
i->prev = isp;
isp = i;
}
void
push_macro (builtin_func *func)
{
input_block *i;
if (next != NULL)
{
obstack_free (current_input, next);
next = NULL;
}
i = (input_block *) obstack_alloc (current_input,
sizeof (struct input_block));
i->type = INPUT_MACRO;
i->u.func = func;
i->prev = isp;
isp = i;
}
struct obstack *
push_string_init (void)
{
if (next != NULL)
{
M4ERROR ((warning_status, 0,
"INTERNAL ERROR: recursive push_string!"));
abort ();
}
next = (input_block *) obstack_alloc (current_input,
sizeof (struct input_block));
next->type = INPUT_STRING;
return current_input;
}
const char *
push_string_finish (void)
{
const char *ret = NULL;
if (next == NULL)
return NULL;
if (obstack_object_size (current_input) > 0)
{
obstack_1grow (current_input, '\0');
next->u.u_s.string = obstack_finish (current_input);
next->prev = isp;
isp = next;
ret = isp->u.u_s.string;
}
else
obstack_free (current_input, next);
next = NULL;
return ret;
}
void
push_wrapup (const char *s)
{
input_block *i;
i = (input_block *) obstack_alloc (wrapup_stack,
sizeof (struct input_block));
i->prev = wsp;
i->type = INPUT_STRING;
i->u.u_s.string = obstack_copy0 (wrapup_stack, s, strlen (s));
wsp = i;
}
static void
pop_input (void)
{
input_block *tmp = isp->prev;
switch (isp->type)
{
case INPUT_STRING:
case INPUT_MACRO:
break;
case INPUT_FILE:
if (debug_level & DEBUG_TRACE_INPUT)
{
if (isp->u.u_f.lineno)
DEBUG_MESSAGE2 ("input reverted to %s, line %d",
isp->u.u_f.name, isp->u.u_f.lineno);
else
DEBUG_MESSAGE ("input exhausted");
}
if (ferror (isp->u.u_f.file))
{
M4ERROR ((warning_status, 0, "read error"));
fclose (isp->u.u_f.file);
retcode = EXIT_FAILURE;
}
else if (fclose (isp->u.u_f.file) == EOF)
{
M4ERROR ((warning_status, errno, "error reading file"));
retcode = EXIT_FAILURE;
}
current_file = isp->u.u_f.name;
current_line = isp->u.u_f.lineno;
output_current_line = isp->u.u_f.out_lineno;
start_of_input_line = isp->u.u_f.advance_line;
if (tmp == NULL)
{
next = isp;
isp = NULL;
return;
}
output_current_line = -1;
break;
default:
M4ERROR ((warning_status, 0,
"INTERNAL ERROR: input stack botch in pop_input ()"));
abort ();
}
obstack_free (current_input, isp);
next = NULL;
isp = tmp;
}
boolean
pop_wrapup (void)
{
next = NULL;
obstack_free (current_input, NULL);
free (current_input);
if (wsp == NULL)
{
obstack_free (wrapup_stack, NULL);
free (wrapup_stack);
return FALSE;
}
current_input = wrapup_stack;
wrapup_stack = (struct obstack *) xmalloc (sizeof (struct obstack));
obstack_init (wrapup_stack);
isp = wsp;
wsp = NULL;
return TRUE;
}
static void
init_macro_token (token_data *td)
{
if (isp->type != INPUT_MACRO)
{
M4ERROR ((warning_status, 0,
"INTERNAL ERROR: bad call to init_macro_token ()"));
abort ();
}
TOKEN_DATA_TYPE (td) = TOKEN_FUNC;
TOKEN_DATA_FUNC (td) = isp->u.func;
}
static int
peek_input (void)
{
int ch;
while (1)
{
if (isp == NULL)
return CHAR_EOF;
switch (isp->type)
{
case INPUT_STRING:
ch = to_uchar (isp->u.u_s.string[0]);
if (ch != '\0')
return ch;
break;
case INPUT_FILE:
ch = getc (isp->u.u_f.file);
if (ch != EOF)
{
ungetc (ch, isp->u.u_f.file);
return ch;
}
break;
case INPUT_MACRO:
return CHAR_MACRO;
default:
M4ERROR ((warning_status, 0,
"INTERNAL ERROR: input stack botch in peek_input ()"));
abort ();
}
pop_input ();
}
}
#define next_char() \
(isp && isp->type == INPUT_STRING && isp->u.u_s.string[0] \
? to_uchar (*isp->u.u_s.string++) \
: next_char_1 ())
static int
next_char_1 (void)
{
int ch;
if (start_of_input_line)
{
start_of_input_line = FALSE;
current_line++;
}
while (1)
{
if (isp == NULL)
return CHAR_EOF;
switch (isp->type)
{
case INPUT_STRING:
ch = to_uchar (*isp->u.u_s.string++);
if (ch != '\0')
return ch;
break;
case INPUT_FILE:
ch = getc (isp->u.u_f.file);
if (ch != EOF)
{
if (ch == '\n')
start_of_input_line = TRUE;
return ch;
}
break;
case INPUT_MACRO:
pop_input ();
return CHAR_MACRO;
default:
M4ERROR ((warning_status, 0,
"INTERNAL ERROR: input stack botch in next_char ()"));
abort ();
}
pop_input ();
}
}
void
skip_line (void)
{
int ch;
const char *file = current_file;
int line = current_line;
while ((ch = next_char ()) != CHAR_EOF && ch != '\n')
;
if (ch == CHAR_EOF)
M4ERROR_AT_LINE ((warning_status, 0, file, line,
"Warning: end of file treated as newline"));
}
static boolean
match_input (const char *s, boolean consume)
{
int n;
int ch;
const char *t;
boolean result = FALSE;
ch = peek_input ();
if (ch != to_uchar (*s))
return FALSE;
if (s[1] == '\0')
{
if (consume)
(void) next_char ();
return TRUE;
}
(void) next_char ();
for (n = 1, t = s++; (ch = peek_input ()) == to_uchar (*s++); )
{
(void) next_char ();
n++;
if (*s == '\0')
{
if (consume)
return TRUE;
result = TRUE;
break;
}
}
{
struct obstack *h = push_string_init ();
obstack_grow (h, t, n);
}
push_string_finish ();
return result;
}
#define MATCH(ch, s, consume) \
(to_uchar ((s)[0]) == (ch) \
&& (ch) != '\0' \
&& ((s)[1] == '\0' || (match_input ((s) + (consume), consume))))
void
input_init (void)
{
current_file = "";
current_line = 0;
obstack_init (&token_stack);
current_input = (struct obstack *) xmalloc (sizeof (struct obstack));
obstack_init (current_input);
wrapup_stack = (struct obstack *) xmalloc (sizeof (struct obstack));
obstack_init (wrapup_stack);
obstack_1grow (&token_stack, '\0');
token_bottom = obstack_finish (&token_stack);
isp = NULL;
wsp = NULL;
next = NULL;
start_of_input_line = FALSE;
lquote.string = xstrdup (DEF_LQUOTE);
lquote.length = strlen (lquote.string);
rquote.string = xstrdup (DEF_RQUOTE);
rquote.length = strlen (rquote.string);
bcomm.string = xstrdup (DEF_BCOMM);
bcomm.length = strlen (bcomm.string);
ecomm.string = xstrdup (DEF_ECOMM);
ecomm.length = strlen (ecomm.string);
#ifdef ENABLE_CHANGEWORD
set_word_regexp (user_word_regexp);
#endif
}
void
set_quotes (const char *lq, const char *rq)
{
free (lquote.string);
free (rquote.string);
lquote.string = xstrdup (lq ? lq : DEF_LQUOTE);
lquote.length = strlen (lquote.string);
rquote.string = xstrdup (rq ? rq : DEF_RQUOTE);
rquote.length = strlen (rquote.string);
}
void
set_comment (const char *bc, const char *ec)
{
free (bcomm.string);
free (ecomm.string);
bcomm.string = xstrdup (bc ? bc : DEF_BCOMM);
bcomm.length = strlen (bcomm.string);
ecomm.string = xstrdup (ec ? ec : DEF_ECOMM);
ecomm.length = strlen (ecomm.string);
}
#ifdef ENABLE_CHANGEWORD
static void
init_pattern_buffer (struct re_pattern_buffer *buf)
{
buf->translate = NULL;
buf->fastmap = NULL;
buf->buffer = NULL;
buf->allocated = 0;
}
void
set_word_regexp (const char *regexp)
{
int i;
char test[2];
const char *msg;
struct re_pattern_buffer new_word_regexp;
if (!*regexp || !strcmp (regexp, DEFAULT_WORD_REGEXP))
{
default_word_regexp = TRUE;
return;
}
init_pattern_buffer (&new_word_regexp);
msg = re_compile_pattern (regexp, strlen (regexp), &new_word_regexp);
regfree (&new_word_regexp);
if (msg != NULL)
{
M4ERROR ((warning_status, 0,
"bad regular expression `%s': %s", regexp, msg));
return;
}
regfree (&word_regexp);
msg = re_compile_pattern (regexp, strlen (regexp), &word_regexp);
re_set_registers (&word_regexp, ®s, regs.num_regs, regs.start, regs.end);
if (msg != NULL)
{
M4ERROR ((EXIT_FAILURE, 0,
"INTERNAL ERROR: expression recompilation `%s': %s",
regexp, msg));
}
default_word_regexp = FALSE;
if (word_start == NULL)
word_start = xmalloc (256);
word_start[0] = '\0';
test[1] = '\0';
for (i = 1; i < 256; i++)
{
test[0] = i;
word_start[i] = re_search (&word_regexp, test, 1, 0, 0, NULL) >= 0;
}
}
#endif
token_type
next_token (token_data *td)
{
int ch;
int quote_level;
token_type type;
#ifdef ENABLE_CHANGEWORD
int startpos;
char *orig_text = 0;
#endif
const char *file = current_file;
int line = current_line;
obstack_free (&token_stack, token_bottom);
obstack_1grow (&token_stack, '\0');
token_bottom = obstack_finish (&token_stack);
ch = peek_input ();
if (ch == CHAR_EOF)
{
#ifdef DEBUG_INPUT
fprintf (stderr, "next_token -> EOF\n");
#endif
return TOKEN_EOF;
}
if (ch == CHAR_MACRO)
{
init_macro_token (td);
(void) next_char ();
#ifdef DEBUG_INPUT
fprintf (stderr, "next_token -> MACDEF (%s)\n",
find_builtin_by_addr (TOKEN_DATA_FUNC (td))->name);
#endif
return TOKEN_MACDEF;
}
(void) next_char ();
if (MATCH (ch, bcomm.string, TRUE))
{
obstack_grow (&token_stack, bcomm.string, bcomm.length);
while ((ch = next_char ()) != CHAR_EOF
&& !MATCH (ch, ecomm.string, TRUE))
obstack_1grow (&token_stack, ch);
if (ch != CHAR_EOF)
obstack_grow (&token_stack, ecomm.string, ecomm.length);
else
M4ERROR_AT_LINE ((EXIT_FAILURE, 0, file, line,
"ERROR: end of file in comment"));
type = TOKEN_STRING;
}
else if (default_word_regexp && (isalpha (ch) || ch == '_'))
{
obstack_1grow (&token_stack, ch);
while ((ch = peek_input ()) != CHAR_EOF && (isalnum (ch) || ch == '_'))
{
obstack_1grow (&token_stack, ch);
(void) next_char ();
}
type = TOKEN_WORD;
}
#ifdef ENABLE_CHANGEWORD
else if (!default_word_regexp && word_start[ch])
{
obstack_1grow (&token_stack, ch);
while (1)
{
ch = peek_input ();
if (ch == CHAR_EOF)
break;
obstack_1grow (&token_stack, ch);
startpos = re_search (&word_regexp, obstack_base (&token_stack),
obstack_object_size (&token_stack), 0, 0,
®s);
if (startpos != 0 ||
regs.end [0] != obstack_object_size (&token_stack))
{
*(((char *) obstack_base (&token_stack)
+ obstack_object_size (&token_stack)) - 1) = '\0';
break;
}
next_char ();
}
obstack_1grow (&token_stack, '\0');
orig_text = obstack_finish (&token_stack);
if (regs.start[1] != -1)
obstack_grow (&token_stack,orig_text + regs.start[1],
regs.end[1] - regs.start[1]);
else
obstack_grow (&token_stack, orig_text,regs.end[0]);
type = TOKEN_WORD;
}
#endif
else if (!MATCH (ch, lquote.string, TRUE))
{
switch (ch)
{
case '(':
type = TOKEN_OPEN;
break;
case ',':
type = TOKEN_COMMA;
break;
case ')':
type = TOKEN_CLOSE;
break;
default:
type = TOKEN_SIMPLE;
break;
}
obstack_1grow (&token_stack, ch);
}
else
{
quote_level = 1;
while (1)
{
ch = next_char ();
if (ch == CHAR_EOF)
M4ERROR_AT_LINE ((EXIT_FAILURE, 0, file, line,
"ERROR: end of file in string"));
if (MATCH (ch, rquote.string, TRUE))
{
if (--quote_level == 0)
break;
obstack_grow (&token_stack, rquote.string, rquote.length);
}
else if (MATCH (ch, lquote.string, TRUE))
{
quote_level++;
obstack_grow (&token_stack, lquote.string, lquote.length);
}
else
obstack_1grow (&token_stack, ch);
}
type = TOKEN_STRING;
}
obstack_1grow (&token_stack, '\0');
TOKEN_DATA_TYPE (td) = TOKEN_TEXT;
TOKEN_DATA_TEXT (td) = obstack_finish (&token_stack);
#ifdef ENABLE_CHANGEWORD
if (orig_text == NULL)
orig_text = TOKEN_DATA_TEXT (td);
TOKEN_DATA_ORIG_TEXT (td) = orig_text;
#endif
#ifdef DEBUG_INPUT
fprintf (stderr, "next_token -> %s (%s)\n",
token_type_string (type), TOKEN_DATA_TEXT (td));
#endif
return type;
}
token_type
peek_token (void)
{
int ch = peek_input ();
if (ch == CHAR_EOF)
{
#ifdef DEBUG_INPUT
fprintf (stderr, "peek_token -> EOF\n");
#endif
return TOKEN_EOF;
}
if (ch == CHAR_MACRO)
{
#ifdef DEBUG_INPUT
fprintf (stderr, "peek_token -> MACDEF\n");
#endif
return TOKEN_MACDEF;
}
if (MATCH (ch, bcomm.string, FALSE))
{
#ifdef DEBUG_INPUT
fprintf (stderr, "peek_token -> COMMENT\n");
#endif
return TOKEN_STRING;
}
if ((default_word_regexp && (isalpha (ch) || ch == '_'))
#ifdef ENABLE_CHANGEWORD
|| (! default_word_regexp && word_start[ch])
#endif
)
{
#ifdef DEBUG_INPUT
fprintf (stderr, "peek_token -> WORD\n");
#endif
return TOKEN_WORD;
}
if (MATCH (ch, lquote.string, FALSE))
{
#ifdef DEBUG_INPUT
fprintf (stderr, "peek_token -> QUOTE\n");
#endif
return TOKEN_STRING;
}
switch (ch)
{
case '(':
#ifdef DEBUG_INPUT
fprintf (stderr, "peek_token -> OPEN\n");
#endif
return TOKEN_OPEN;
case ',':
#ifdef DEBUG_INPUT
fprintf (stderr, "peek_token -> COMMA\n");
#endif
return TOKEN_COMMA;
case ')':
#ifdef DEBUG_INPUT
fprintf (stderr, "peek_token -> CLOSE\n");
#endif
return TOKEN_CLOSE;
default:
#ifdef DEBUG_INPUT
fprintf (stderr, "peek_token -> SIMPLE\n");
#endif
return TOKEN_SIMPLE;
}
}
#ifdef DEBUG_INPUT
static const char *
token_type_string (token_type t)
{
switch (t)
{
case TOKEN_EOF:
return "EOF";
case TOKEN_STRING:
return "STRING";
case TOKEN_WORD:
return "WORD";
case TOKEN_OPEN:
return "OPEN";
case TOKEN_COMMA:
return "COMMA";
case TOKEN_CLOSE:
return "CLOSE";
case TOKEN_SIMPLE:
return "SIMPLE";
case TOKEN_MACDEF:
return "MACDEF";
default:
abort ();
}
}
static void
print_token (const char *s, token_type t, token_data *td)
{
fprintf (stderr, "%s: ", s);
switch (t)
{
case TOKEN_OPEN:
case TOKEN_COMMA:
case TOKEN_CLOSE:
case TOKEN_SIMPLE:
fprintf (stderr, "char:");
break;
case TOKEN_WORD:
fprintf (stderr, "word:");
break;
case TOKEN_STRING:
fprintf (stderr, "string:");
break;
case TOKEN_MACDEF:
fprintf (stderr, "macro: %p\n", TOKEN_DATA_FUNC (td));
break;
case TOKEN_EOF:
fprintf (stderr, "eof\n");
break;
}
fprintf (stderr, "\t\"%s\"\n", TOKEN_DATA_TEXT (td));
}
static void M4_GNUC_UNUSED
lex_debug (void)
{
token_type t;
token_data td;
while ((t = next_token (&td)) != TOKEN_EOF)
print_token ("lex", t, &td);
}
#endif