#include "render.h"
#include "htmltable.h"
#include "htmlparse.h"
#include "htmllex.h"
#include "utils.h"
#ifdef HAVE_LIBEXPAT
#include <expat.h>
#endif
#ifndef XML_STATUS_ERROR
#define XML_STATUS_ERROR 0
#endif
typedef struct {
#ifdef HAVE_LIBEXPAT
XML_Parser parser;
#endif
char* ptr;
int tok;
agxbuf* xb;
char warn;
char error;
char inCell;
char mode;
char* currtok;
char* prevtok;
int currtoklen;
int prevtoklen;
} lexstate_t;
static lexstate_t state;
static void
error_context ()
{
agxbclear (state.xb);
if (state.prevtoklen > 0)
agxbput_n (state.xb, state.prevtok, state.prevtoklen);
agxbput_n (state.xb, state.currtok, state.currtoklen);
agerr (AGPREV, "... %s ...\n", agxbuse (state.xb));
}
void
htmlerror(const char *msg)
{
if (state.error) return;
state.error = 1;
agerr (AGERR, "%s in line %d \n", msg, htmllineno());
error_context();
}
#ifdef HAVE_LIBEXPAT
static void
lexerror (const char* name)
{
state.tok = T_error;
state.error = 1;
agerr(AGERR, "Unknown HTML element <%s> on line %d \n",
name, htmllineno());
}
typedef int (*attrFn)(void*, char*);
typedef int (*bcmpfn)(const void *, const void *);
#define MAX_CHAR (((unsigned char)(~0)) >> 1)
#define MIN_CHAR ((signed char)(~MAX_CHAR))
#define MAX_UCHAR ((unsigned char)(~0))
#define MAX_USHORT ((unsigned short)(~0))
typedef struct {
char* name;
attrFn action;
} attr_item;
#define ISIZE (sizeof(attr_item))
static int
icmp (attr_item* i, attr_item* j)
{
return strcasecmp(i->name, j->name);
}
static int
bgcolorfn (htmldata_t* p, char* v)
{
p->bgcolor = strdup(v);
return 0;
}
static int
hreffn (htmldata_t* p, char* v)
{
p->href = strdup(v);
return 0;
}
static int
portfn (htmldata_t* p, char* v)
{
p->port = strdup(v);
return 0;
}
static int
doInt (char* v, char* s, int min, int max, long* ul)
{
int rv = 0;
char* ep;
long b = strtol(v,&ep,10);
if (ep == v) {
agerr(AGWARN, "Improper %s value %s - ignored", s, v);
rv = 1;
}
else if (b > max) {
agerr(AGWARN, "%s value %s > %d - too large - ignored", s, v, max);
rv = 1;
}
else if (b < min) {
agerr(AGWARN, "%s value %s < %d - too small - ignored", s, v, min);
rv = 1;
}
else *ul = b;
return rv;
}
static int
borderfn (htmldata_t* p, char* v)
{
long u;
if (doInt(v, "BORDER", 0, MAX_UCHAR, &u)) return 1;
p->border = (unsigned char)u;
p->flags |= BORDER_SET;
return 0;
}
static int
cellpaddingfn (htmldata_t* p, char* v)
{
long u;
if (doInt(v, "CELLPADDING", 0, MAX_UCHAR, &u)) return 1;
p->pad = (unsigned char)u;
p->flags |= PAD_SET;
return 0;
}
static int
cellspacingfn (htmldata_t* p, char* v)
{
long u;
if (doInt(v, "CELLSPACING", MIN_CHAR, MAX_CHAR, &u)) return 1;
p->space = (signed char)u;
p->flags |= SPACE_SET;
return 0;
}
static int
cellborderfn (htmltbl_t* p, char* v)
{
long u;
if (doInt(v, "CELLSBORDER", 0, MAX_CHAR, &u)) return 1;
p->cb = (unsigned char)u;
return 0;
}
static int
fixedsizefn (htmldata_t* p, char* v)
{
int rv = 0;
char c = toupper(*v);
if ((c == 'T') && !strcasecmp(v+1,"RUE"))
p->flags |= FIXED_FLAG;
else if ((c != 'F') || strcasecmp(v+1,"ALSE")) {
agerr(AGWARN, "Illegal value %s for FIXEDSIZE - ignored\n", v);
rv = 1;
}
return rv;
}
static int
valignfn (htmldata_t* p, char* v)
{
int rv = 0;
char c = toupper(*v);
if ((c == 'B') && !strcasecmp(v+1,"OTTOM"))
p->flags |= VALIGN_BOTTOM;
else if ((c == 'T') && !strcasecmp(v+1,"OP"))
p->flags |= VALIGN_TOP;
else if ((c != 'M') || strcasecmp(v+1,"IDDLE")) {
agerr(AGWARN, "Illegal value %s for VALIGN - ignored\n", v);
rv = 1;
}
return rv;
}
static int
halignfn (htmldata_t* p, char* v)
{
int rv = 0;
char c = toupper(*v);
if ((c == 'L') && !strcasecmp(v+1,"EFT"))
p->flags |= HALIGN_LEFT;
else if ((c == 'R') && !strcasecmp(v+1,"IGHT"))
p->flags |= HALIGN_RIGHT;
else if ((c != 'C') || strcasecmp(v+1,"ENTER")) {
agerr(AGWARN, "Illegal value %s for ALIGN - ignored\n", v);
rv = 1;
}
return rv;
}
static int
heightfn (htmldata_t* p, char* v)
{
long u;
if (doInt(v, "HEIGHT", 0, MAX_USHORT, &u)) return 1;
p->space = (unsigned short)u;
return 0;
}
static int
widthfn (htmldata_t* p, char* v)
{
long u;
if (doInt(v, "WIDTH", 0, MAX_USHORT, &u)) return 1;
p->width = (unsigned short)u;
return 0;
}
static int
rowspanfn (htmlcell_t* p, char* v)
{
long u;
if (doInt(v, "ROWSPAN", 0, MAX_UCHAR, &u)) return 1;
if (u == 0) {
agerr(AGWARN, "ROWSPAN value cannot be 0 - ignored\n");
return 1;
}
p->rspan = (unsigned char)u;
return 0;
}
static int
colspanfn (htmlcell_t* p, char* v)
{
long u;
if (doInt(v, "COLSPAN", 0, MAX_UCHAR, &u)) return 1;
if (u == 0) {
agerr(AGWARN, "COLSPAN value cannot be 0 - ignored\n");
return 1;
}
p->cspan = (unsigned char)u;
return 0;
}
static int
alignfn (int* p, char* v)
{
int rv = 0;
char c = toupper(*v);
if ((c == 'R') && !strcasecmp(v+1,"IGHT"))
*p = 'r';
else if ((c == 'L') || !strcasecmp(v+1,"EFT"))
*p = 'l';
else if ((c != 'C') && strcasecmp(v+1,"ENTER")) {
agerr(AGWARN, "Illegal value %s for ALIGN - ignored\n", v);
rv = 1;
}
return rv;
}
static attr_item tbl_items[] = {
{"align", (attrFn)halignfn},
{"bgcolor", (attrFn)bgcolorfn},
{"border", (attrFn)borderfn},
{"cellborder", (attrFn)cellborderfn},
{"cellpadding", (attrFn)cellpaddingfn},
{"cellspacing", (attrFn)cellspacingfn},
{"fixedsize", (attrFn)fixedsizefn},
{"height", (attrFn)heightfn},
{"href", (attrFn)hreffn},
{"port", (attrFn)portfn},
{"valign", (attrFn)valignfn},
{"width", (attrFn)widthfn},
};
static attr_item cell_items[] = {
{"align", (attrFn)halignfn},
{"bgcolor", (attrFn)bgcolorfn},
{"border", (attrFn)borderfn},
{"cellpadding", (attrFn)cellpaddingfn},
{"cellspacing", (attrFn)cellspacingfn},
{"colspan", (attrFn)colspanfn},
{"fixedsize", (attrFn)fixedsizefn},
{"height", (attrFn)heightfn},
{"href", (attrFn)hreffn},
{"port", (attrFn)portfn},
{"rowspan", (attrFn)rowspanfn},
{"valign", (attrFn)valignfn},
{"width", (attrFn)widthfn},
};
static attr_item br_items[] = {
{"align", (attrFn)alignfn},
};
static void
doAttrs (void* tp, attr_item* items, int nel, char **atts, char* s)
{
char* name;
char* val;
attr_item* ip;
attr_item key;
while ((name = *atts++) != NULL) {
val = *atts++;
key.name = name;
ip = (attr_item*)bsearch(&key,items,nel,ISIZE, (bcmpfn)icmp);
if (ip)
state.warn |= ip->action (tp, val);
else {
agerr(AGWARN, "Illegal attribute %s in %s - ignored\n", name, s);
state.warn = 1;
}
}
}
static void
mkBR (char **atts)
{
htmllval.i = 'n';
doAttrs (&htmllval.i,br_items, sizeof(br_items)/ISIZE, atts, "<BR>");
}
static htmlcell_t*
mkCell (char **atts)
{
htmlcell_t* cell = NEW(htmlcell_t);
cell->cspan = 1;
cell->rspan = 1;
doAttrs (cell,cell_items, sizeof(cell_items)/ISIZE, atts, "<TD>");
return cell;
}
static htmltbl_t*
mkTbl (char **atts)
{
htmltbl_t* tbl = NEW(htmltbl_t);
tbl->rc = -1;
tbl->cb = -1;
doAttrs (tbl,tbl_items, sizeof(tbl_items)/ISIZE, atts, "<TABLE>");
return tbl;
}
static void
startElement (void* user, const char *name, char **atts)
{
if (strcasecmp (name, "TABLE") == 0) {
htmllval.tbl = mkTbl (atts);
state.inCell = 0;
state.tok = T_table;
}
else if ((strcasecmp (name, "TR") == 0) || (strcasecmp (name, "TH") == 0)) {
state.inCell = 0;
state.tok = T_row;
}
else if (strcasecmp (name, "TD") == 0) {
state.inCell = 1;
htmllval.cell = mkCell (atts);
state.tok = T_cell;
}
else if (strcasecmp (name, "BR") == 0) {
mkBR (atts);
state.tok = T_br;
}
else if (strcasecmp (name, "HTML") == 0) {
state.tok = T_html;
}
else {
lexerror (name);
}
}
static void
endElement (void *user, const char *name)
{
if (strcasecmp (name, "TABLE") == 0) {
state.tok = T_end_table;
state.inCell = 1;
}
else if ((strcasecmp (name, "TR") == 0) || (strcasecmp (name, "TH") == 0)) {
state.tok = T_end_row;
}
else if (strcasecmp (name, "TD") == 0) {
state.tok = T_end_cell;
state.inCell = 0;
}
else if (strcasecmp (name, "HTML") == 0) {
state.tok = T_end_html;
}
else if (strcasecmp (name, "BR") == 0) {
if (state.tok == T_br) state.tok = T_BR;
else state.tok = T_end_br;
}
else {
lexerror (name);
}
}
static void
characterData (void *user, const char *s, int length)
{
int i;
char c;
if (state.inCell) {
for (i = length; i; i--) {
c = *s++;
if (c >= ' ') {
agxbputc(state.xb,c);
}
}
state.tok = T_string;
}
}
#endif
void
initHTMLlexer (char* src, agxbuf* xb)
{
#ifdef HAVE_LIBEXPAT
state.xb = xb;
state.ptr = src;
state.mode = 0;
state.warn = 0;
state.error = 0;
state.currtoklen = 0;
state.prevtoklen = 0;
state.inCell = 1;
state.parser = XML_ParserCreate (NULL);
XML_SetElementHandler (state.parser,
(XML_StartElementHandler)startElement, endElement);
XML_SetCharacterDataHandler (state.parser, characterData);
#else
static int first;
if (first) {
agerr (AGWARN, "Not built with libexpat. Table formatting is not available.\n");
first++;
}
#endif
}
int
clearHTMLlexer ()
{
#ifdef HAVE_LIBEXPAT
int rv = state.warn;
XML_ParserFree (state.parser);
return rv;
#else
return 1;
#endif
}
#ifdef HAVE_LIBEXPAT
static char*
eatComment (char* p)
{
int depth = 1;
char* s = p;
char c;
while (depth && (c = *s++)) {
if (c == '<') depth++;
else if (c == '>') depth--;
}
s--;
if (*s) {
char* t = s-2;
if ((t < p) || strncmp(t,"--",2)) {
agerr (AGWARN, "Unclosed comment\n");
state.warn = 1;
}
}
return s;
}
static char*
findNext (char* s)
{
char* t = s+1;
if (*s == '<') {
if ((*t == '!') && !strncmp(t+1,"--",2))
t = eatComment (t+3);
else
while (*t && (*t != '>')) t++;
if (*t != '>') {
agerr (AGWARN, "Label closed before end of HTML element\n");
state.warn = 1;
}
else t++;
}
else {
while (*t && (*t != '<')) t++;
}
return t;
}
#endif
int
htmllineno ()
{
#ifdef HAVE_LIBEXPAT
return XML_GetCurrentLineNumber (state.parser);
#else
return 0;
#endif
}
#ifdef DEBUG
static void
printTok (int tok)
{
char* s;
switch (tok) {
case T_BR : s = "T_BR"; break;
case T_br : s = "T_br"; break;
case T_end_br : s = "T_end_br"; break;
case T_end_table : s = "T_end_table"; break;
case T_row : s = "T_row"; break;
case T_end_row : s = "T_end_row"; break;
case T_end_cell : s = "T_end_cell"; break;
case T_html : s = "T_html"; break;
case T_end_html : s = "T_end_html"; break;
case T_string : s = "T_string"; break;
case T_error : s = "T_error"; break;
case T_table : s = "T_table"; break;
case T_cell : s = "T_cell"; break;
default : s = "<unknown>";
}
if (tok == T_string) {
fprintf (stderr, "%s \"", s);
fwrite (agxbstart(state.xb), 1, agxblen(state.xb), stderr);
fprintf (stderr, "\"\n");
}
else
fprintf (stderr, "%s\n", s);
}
#endif
int
htmllex ()
{
#ifdef HAVE_LIBEXPAT
static char* begin_html = "<HTML>";
static char* end_html = "</HTML>";
char* s;
char* endp = 0;
int len;
int rv;
state.tok = 0;
do {
if (state.mode == 2) return EOF;
if (state.mode == 0) {
state.mode = 1;
s = begin_html;
len = strlen (s);
endp = 0;
}
else {
s = state.ptr;
if (*s == '\0') {
state.mode = 2;
s = end_html;
len = strlen (s);
}
else {
endp = findNext (s);
len = endp-s;
}
}
state.prevtok = state.currtok;
state.prevtoklen = state.currtoklen;
state.currtok = s;
state.currtoklen = len;
rv = XML_Parse (state.parser, s, len, (len ? 0 : 1));
if (rv == XML_STATUS_ERROR) {
if (!state.error) {
agerr (AGERR, "%s in line %d \n",
XML_ErrorString (XML_GetErrorCode (state.parser)),
htmllineno());
error_context ();
state.error = 1;
state.tok = T_error;
}
}
if (endp) state.ptr = endp;
} while (state.tok == 0);
return state.tok;
#else
return EOF;
#endif
}