#include <config.h>
#include <ctype.h>
#ifdef HAVE_STRING_H
# include <string.h>
#else
# include <strings.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <errno.h>
#include "wget.h"
#include "url.h"
#include "utils.h"
#include "ftp.h"
#include "html.h"
#ifndef errno
extern int errno;
#endif
static state_t global_state;
struct tag_attr {
char *tag;
char *attr;
};
static int
idmatch (struct tag_attr *tags, const char *tag, const char *attr)
{
int i;
if (!tag || !attr)
return 0;
for (i = 0; tags[i].tag; i++)
if (!strcasecmp (tags[i].tag, tag) && !strcasecmp (tags[i].attr, attr))
return 1;
return 0;
}
const char *
htmlfindurl (const char *buf, int bufsize, int *size, int init)
{
const char *p, *ph;
state_t *s;
static struct tag_attr html_allow[] = {
{ "a", "href" },
{ "img", "src" },
{ "img", "href" },
{ "body", "background" },
{ "frame", "src" },
{ "iframe", "src" },
{ "fig", "src" },
{ "overlay", "src" },
{ "applet", "code" },
{ "script", "src" },
{ "embed", "src" },
{ "bgsound", "src" },
{ "area", "href" },
{ "img", "lowsrc" },
{ "input", "src" },
{ "layer", "src" },
{ "table", "background"},
{ "th", "background"},
{ "td", "background"},
{ "base", "href" },
{ "meta", "content" },
{ NULL, NULL }
};
s = &global_state;
if (init)
{
DEBUGP (("Resetting a parser state.\n"));
memset (s, 0, sizeof (*s));
}
while (1)
{
if (!bufsize)
break;
if (!s->at_value)
{
if (*buf != '<')
for (; bufsize && *buf != '<'; ++buf, --bufsize);
if (!bufsize)
break;
for (++buf, --bufsize; bufsize && ISSPACE (*buf) && *buf != '>';
++buf, --bufsize);
if (!bufsize)
break;
p = buf;
for (; bufsize && !ISSPACE (*buf) && *buf != '>' && *buf != '=';
++buf, --bufsize);
if (!bufsize)
break;
if (*buf == '=')
{
++buf, --bufsize;
continue;
}
if (p == buf)
{
++buf, --bufsize;
continue;
}
s->tag = strdupdelim (p, buf);
if (*buf == '>')
{
free (s->tag);
s->tag = NULL;
++buf, --bufsize;
continue;
}
}
else
{
s->at_value = 0;
if (s->in_quote)
{
s->in_quote = 0;
for (; bufsize && *buf != s->quote_char; ++buf, --bufsize);
if (!bufsize)
break;
++buf, --bufsize;
}
if (!bufsize)
break;
if (*buf == '>')
{
FREE_MAYBE (s->tag);
FREE_MAYBE (s->attr);
s->tag = s->attr = NULL;
continue;
}
}
do
{
FREE_MAYBE (s->attr);
s->attr = NULL;
if (!bufsize)
break;
if (ISSPACE (*buf))
for (++buf, --bufsize; bufsize && ISSPACE (*buf) && *buf != '>';
++buf, --bufsize);
if (!bufsize || *buf == '>')
break;
if (*buf == '=')
{
++buf, --bufsize;
continue;
}
p = buf;
for (; bufsize && !ISSPACE (*buf) && *buf != '>' && *buf != '=';
++buf, --bufsize);
if (!bufsize || *buf == '>')
break;
s->attr = strdupdelim (p, buf);
if (*buf != '=')
{
for (; bufsize && ISSPACE (*buf) && *buf != '>'; ++buf, --bufsize);
if (!bufsize || *buf == '>')
break;
}
if (*buf != '=')
continue;
++buf, --bufsize;
for (; bufsize && ISSPACE (*buf) && *buf != '>'; ++buf, --bufsize);
if (!bufsize || *buf == '>')
break;
ph = NULL;
if (*buf == '\"' || *buf == '\'')
{
s->in_quote = 1;
s->quote_char = *buf;
p = buf + 1;
for (++buf, --bufsize;
bufsize && *buf != s->quote_char && *buf != '\n';
++buf, --bufsize)
if (*buf == '#')
ph = buf;
if (!bufsize)
{
s->in_quote = 0;
break;
}
if (*buf == '\n')
{
s->in_quote = 0;
continue;
}
}
else
{
p = buf;
for (; bufsize && !ISSPACE (*buf) && *buf != '>'; ++buf, --bufsize)
if (*buf == '#')
ph = buf;
if (!bufsize)
break;
}
*size = (ph ? ph : buf) - p;
if (*size && idmatch (html_allow, s->tag, s->attr))
{
if (!strcasecmp (s->tag, "base") && !strcasecmp (s->attr, "href"))
{
FREE_MAYBE (s->base);
s->base = strdupdelim (p, buf);
}
else if (!strcasecmp (s->tag, "meta") && !strcasecmp (s->attr, "content"))
{
for (; *size && ISDIGIT (*p); p++, *size -= 1);
if (*p == ';')
{
for (p++, *size -= 1; *size && ISSPACE (*p); p++, *size -= 1) ;
if (!strncasecmp (p, "URL=", 4))
{
p += 4, *size -= 4;
s->at_value = 1;
return p;
}
}
}
else
{
s->at_value = 1;
return p;
}
}
if (*buf == s->quote_char)
{
s->in_quote = 0;
++buf, --bufsize;
}
} while (*buf != '>');
FREE_MAYBE (s->tag);
FREE_MAYBE (s->attr);
s->tag = s->attr = NULL;
if (!bufsize)
break;
}
FREE_MAYBE (s->tag);
FREE_MAYBE (s->attr);
FREE_MAYBE (s->base);
memset (s, 0, sizeof (*s));
DEBUGP (("HTML parser ends here (state destroyed).\n"));
return NULL;
}
const char *
html_base (void)
{
return global_state.base;
}
static char *
html_quote_string (const char *s)
{
const char *b = s;
char *p, *res;
int i;
for (i = 0; *s; s++, i++)
{
if (*s == '&')
i += 4;
else if (*s == '<' || *s == '>')
i += 3;
else if (*s == '\"')
i += 5;
}
res = (char *)xmalloc (i + 1);
s = b;
for (p = res; *s; s++)
{
switch (*s)
{
case '&':
*p++ = '&';
*p++ = 'a';
*p++ = 'm';
*p++ = 'p';
*p++ = ';';
break;
case '<': case '>':
*p++ = '&';
*p++ = (*s == '<' ? 'l' : 'g');
*p++ = 't';
*p++ = ';';
break;
case '\"':
*p++ = '&';
*p++ = 'q';
*p++ = 'u';
*p++ = 'o';
*p++ = 't';
*p++ = ';';
break;
default:
*p++ = *s;
}
}
*p = '\0';
return res;
}
uerr_t
ftp_index (const char *file, struct urlinfo *u, struct fileinfo *f)
{
FILE *fp;
char *upwd;
char *htclfile;
if (!opt.dfp)
{
fp = fopen (file, "wb");
if (!fp)
{
logprintf (LOG_NOTQUIET, "%s: %s\n", file, strerror (errno));
return FOPENERR;
}
}
else
fp = opt.dfp;
if (u->user)
{
char *tmpu, *tmpp;
tmpu = CLEANDUP (u->user);
tmpp = u->passwd ? CLEANDUP (u->passwd) : NULL;
upwd = (char *)xmalloc (strlen (tmpu)
+ (tmpp ? (1 + strlen (tmpp)) : 0) + 2);
sprintf (upwd, "%s%s%s@", tmpu, tmpp ? ":" : "", tmpp ? tmpp : "");
free (tmpu);
FREE_MAYBE (tmpp);
}
else
upwd = xstrdup ("");
fprintf (fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML 2.0//EN\">\n");
fprintf (fp, "<html>\n<head>\n<title>");
fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
fprintf (fp, "</title>\n</head>\n<body>\n<h1>");
fprintf (fp, _("Index of /%s on %s:%d"), u->dir, u->host, u->port);
fprintf (fp, "</h1>\n<hr>\n<pre>\n");
while (f)
{
fprintf (fp, " ");
if (f->tstamp != -1)
{
static char *months[] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
struct tm *ptm = localtime ((time_t *)&f->tstamp);
fprintf (fp, "%d %s %02d ", ptm->tm_year + 1900, months[ptm->tm_mon],
ptm->tm_mday);
if (ptm->tm_hour)
fprintf (fp, "%02d:%02d ", ptm->tm_hour, ptm->tm_min);
else
fprintf (fp, " ");
}
else
fprintf (fp, _("time unknown "));
switch (f->type)
{
case FT_PLAINFILE:
fprintf (fp, _("File "));
break;
case FT_DIRECTORY:
fprintf (fp, _("Directory "));
break;
case FT_SYMLINK:
fprintf (fp, _("Link "));
break;
default:
fprintf (fp, _("Not sure "));
break;
}
htclfile = html_quote_string (f->name);
fprintf (fp, "<a href=\"ftp://%s%s:%hu", upwd, u->host, u->port);
if (*u->dir != '/')
putc ('/', fp);
fprintf (fp, "%s", u->dir);
if (*u->dir)
putc ('/', fp);
fprintf (fp, "%s", htclfile);
if (f->type == FT_DIRECTORY)
putc ('/', fp);
fprintf (fp, "\">%s", htclfile);
if (f->type == FT_DIRECTORY)
putc ('/', fp);
fprintf (fp, "</a> ");
if (f->type == FT_PLAINFILE)
fprintf (fp, _(" (%s bytes)"), legible (f->size));
else if (f->type == FT_SYMLINK)
fprintf (fp, "-> %s", f->linkto ? f->linkto : "(nil)");
putc ('\n', fp);
free (htclfile);
f = f->next;
}
fprintf (fp, "</pre>\n</body>\n</html>\n");
free (upwd);
if (!opt.dfp)
fclose (fp);
else
fflush (fp);
return FTPOK;
}