#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <alloca.h>
#include "write-po.h"
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#if HAVE_ICONV
# include <iconv.h>
#endif
#include "c-ctype.h"
#include "po-charset.h"
#include "linebreak.h"
#include "msgl-ascii.h"
#include "write-properties.h"
#include "write-stringtable.h"
#include "xalloc.h"
#include "strstr.h"
#include "fwriteerror.h"
#include "exit.h"
#include "error-progname.h"
#include "error.h"
#include "xerror.h"
#include "gettext.h"
#define _(str) gettext (str)
#if HAVE_DECL_PUTC_UNLOCKED
# undef putc
# define putc putc_unlocked
#endif
const char *
make_format_description_string (enum is_format is_format, const char *lang,
bool debug)
{
static char result[100];
switch (is_format)
{
case possible:
if (debug)
{
sprintf (result, " possible-%s-format", lang);
break;
}
case yes_according_to_context:
case yes:
sprintf (result, " %s-format", lang);
break;
case no:
sprintf (result, " no-%s-format", lang);
break;
default:
abort ();
}
return result;
}
bool
significant_format_p (enum is_format is_format)
{
return is_format != undecided && is_format != impossible;
}
static bool
has_significant_format_p (const enum is_format is_format[NFORMATS])
{
size_t i;
for (i = 0; i < NFORMATS; i++)
if (significant_format_p (is_format[i]))
return true;
return false;
}
static const char *
make_c_width_description_string (enum is_wrap do_wrap)
{
const char *result = NULL;
switch (do_wrap)
{
case yes:
result = " wrap";
break;
case no:
result = " no-wrap";
break;
default:
abort ();
}
return result;
}
void
message_print_comment (const message_ty *mp, FILE *fp)
{
if (mp->comment != NULL)
{
size_t j;
for (j = 0; j < mp->comment->nitems; ++j)
{
const char *s = mp->comment->item[j];
do
{
const char *e;
putc ('#', fp);
if (*s != '\0' && *s != ' ')
putc (' ', fp);
e = strchr (s, '\n');
if (e == NULL)
{
fputs (s, fp);
s = NULL;
}
else
{
fwrite (s, 1, e - s, fp);
s = e + 1;
}
putc ('\n', fp);
}
while (s != NULL);
}
}
}
void
message_print_comment_dot (const message_ty *mp, FILE *fp)
{
if (mp->comment_dot != NULL)
{
size_t j;
for (j = 0; j < mp->comment_dot->nitems; ++j)
{
const char *s = mp->comment_dot->item[j];
putc ('#', fp);
putc ('.', fp);
if (*s != '\0' && *s != ' ')
putc (' ', fp);
fputs (s, fp);
putc ('\n', fp);
}
}
}
void
message_print_comment_filepos (const message_ty *mp, FILE *fp,
bool uniforum, size_t page_width)
{
if (mp->filepos_count != 0)
{
if (uniforum)
{
size_t j;
for (j = 0; j < mp->filepos_count; ++j)
{
lex_pos_ty *pp = &mp->filepos[j];
char *cp = pp->file_name;
while (cp[0] == '.' && cp[1] == '/')
cp += 2;
fprintf (fp, "# File: %s, line: %ld\n",
cp, (long) pp->line_number);
}
}
else
{
size_t column;
size_t j;
fputs ("#:", fp);
column = 2;
for (j = 0; j < mp->filepos_count; ++j)
{
lex_pos_ty *pp;
char buffer[21];
char *cp;
size_t len;
pp = &mp->filepos[j];
cp = pp->file_name;
while (cp[0] == '.' && cp[1] == '/')
cp += 2;
if (pp->line_number == (size_t)(-1))
buffer[0] = '\0';
else
sprintf (buffer, ":%ld", (long) pp->line_number);
len = strlen (cp) + strlen (buffer) + 1;
if (column > 2 && column + len >= page_width)
{
fputs ("\n#:", fp);
column = 2;
}
fprintf (fp, " %s%s", cp, buffer);
column += len;
}
putc ('\n', fp);
}
}
}
void
message_print_comment_flags (const message_ty *mp, FILE *fp, bool debug)
{
if ((mp->is_fuzzy && mp->msgstr[0] != '\0')
|| has_significant_format_p (mp->is_format)
|| mp->do_wrap == no)
{
bool first_flag = true;
size_t i;
putc ('#', fp);
putc (',', fp);
if (mp->is_fuzzy && mp->msgstr[0] != '\0')
{
fputs (" fuzzy", fp);
first_flag = false;
}
for (i = 0; i < NFORMATS; i++)
if (significant_format_p (mp->is_format[i]))
{
if (!first_flag)
putc (',', fp);
fputs (make_format_description_string (mp->is_format[i],
format_language[i], debug),
fp);
first_flag = false;
}
if (mp->do_wrap == no)
{
if (!first_flag)
putc (',', fp);
fputs (make_c_width_description_string (mp->do_wrap), fp);
first_flag = false;
}
putc ('\n', fp);
}
}
static size_t page_width = PAGE_WIDTH;
void
message_page_width_set (size_t n)
{
if (n == 0)
{
page_width = INT_MAX;
return;
}
if (n < 20)
n = 20;
page_width = n;
}
static bool wrap_strings = true;
void
message_page_width_ignore ()
{
wrap_strings = false;
}
static bool indent = false;
static bool uniforum = false;
static bool escape = false;
void
message_print_style_indent ()
{
indent = true;
}
void
message_print_style_uniforum ()
{
uniforum = true;
}
void
message_print_style_escape (bool flag)
{
escape = flag;
}
static bool use_syntax_properties = false;
void
message_print_syntax_properties ()
{
use_syntax_properties = true;
}
static bool use_syntax_stringtable = false;
void
message_print_syntax_stringtable ()
{
use_syntax_stringtable = true;
}
static inline void
memcpy_small (void *dst, const void *src, size_t n)
{
if (n > 0)
{
char *q = (char *) dst;
const char *p = (const char *) src;
*q = *p;
if (--n > 0)
do *++q = *++p; while (--n > 0);
}
}
static void
wrap (FILE *fp, const char *line_prefix, const char *name, const char *value,
enum is_wrap do_wrap, const char *charset)
{
const char *canon_charset;
const char *s;
bool first_line;
#if HAVE_ICONV
const char *envval;
iconv_t conv;
#endif
bool weird_cjk;
canon_charset = po_charset_canonicalize (charset);
#if HAVE_ICONV
envval = getenv ("OLD_PO_FILE_OUTPUT");
if (envval != NULL && *envval != '\0')
conv = (iconv_t)(-1);
else
if (canon_charset == NULL)
conv = (iconv_t)(-1);
else
# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
if (strcmp (canon_charset, "EUC-KR") == 0)
conv = (iconv_t)(-1);
else
# endif
# if defined __sun && !defined _LIBICONV_VERSION
if ( strcmp (canon_charset, "GB2312") == 0
|| strcmp (canon_charset, "EUC-TW") == 0
|| strcmp (canon_charset, "BIG5") == 0
|| strcmp (canon_charset, "BIG5-HKSCS") == 0
|| strcmp (canon_charset, "GBK") == 0
|| strcmp (canon_charset, "GB18030") == 0)
conv = (iconv_t)(-1);
else
# endif
conv = iconv_open ("UTF-8", canon_charset);
if (conv != (iconv_t)(-1))
weird_cjk = false;
else
#endif
if (canon_charset == NULL)
weird_cjk = false;
else
weird_cjk = po_is_charset_weird_cjk (canon_charset);
if (canon_charset == NULL)
canon_charset = po_charset_ascii;
s = value;
first_line = true;
do
{
# define is_escape(c) \
((c) == '\b' || (c) == '\f' || (c) == '\n' || (c) == '\r' || (c) == '\t')
const char *es;
const char *ep;
size_t portion_len;
char *portion;
char *overrides;
char *linebreaks;
char *pp;
char *op;
int startcol, startcol_after_break, width;
size_t i;
for (es = s; *es != '\0'; )
if (*es++ == '\n')
break;
for (ep = s, portion_len = 0; ep < es; ep++)
{
char c = *ep;
if (is_escape (c))
portion_len += 2;
else if (escape && !c_isprint ((unsigned char) c))
portion_len += 4;
else if (c == '\\' || c == '"')
portion_len += 2;
else
{
#if HAVE_ICONV
if (conv != (iconv_t)(-1))
{
char scratchbuf[64];
const char *inptr = ep;
size_t insize;
char *outptr = &scratchbuf[0];
size_t outsize = sizeof (scratchbuf);
size_t res;
res = (size_t)(-1);
for (insize = 1; inptr + insize <= es; insize++)
{
res = iconv (conv,
(ICONV_CONST char **) &inptr, &insize,
&outptr, &outsize);
if (!(res == (size_t)(-1) && errno == EINVAL))
break;
if (inptr != ep)
abort ();
}
if (res == (size_t)(-1))
{
if (errno == EILSEQ)
{
error (0, 0, _("invalid multibyte sequence"));
continue;
}
else
abort ();
}
insize = inptr - ep;
portion_len += insize;
ep += insize - 1;
}
else
#endif
{
if (weird_cjk
&& ep + 2 <= es
&& (unsigned char) ep[0] >= 0x80
&& (unsigned char) ep[1] >= 0x30)
{
portion_len += 2;
ep += 1;
}
else
portion_len += 1;
}
}
}
portion = (char *) xmalloc (portion_len);
overrides = (char *) xmalloc (portion_len);
memset (overrides, UC_BREAK_UNDEFINED, portion_len);
for (ep = s, pp = portion, op = overrides; ep < es; ep++)
{
char c = *ep;
if (is_escape (c))
{
switch (c)
{
case '\b': c = 'b'; break;
case '\f': c = 'f'; break;
case '\n': c = 'n'; break;
case '\r': c = 'r'; break;
case '\t': c = 't'; break;
default: abort ();
}
*pp++ = '\\';
*pp++ = c;
op++;
*op++ = UC_BREAK_PROHIBITED;
if (c != 'n' && c != 't')
error (0, 0, _("\
internationalized messages should not contain the `\\%c' escape sequence"),
c);
}
else if (escape && !c_isprint ((unsigned char) c))
{
*pp++ = '\\';
*pp++ = '0' + (((unsigned char) c >> 6) & 7);
*pp++ = '0' + (((unsigned char) c >> 3) & 7);
*pp++ = '0' + ((unsigned char) c & 7);
op++;
*op++ = UC_BREAK_PROHIBITED;
*op++ = UC_BREAK_PROHIBITED;
*op++ = UC_BREAK_PROHIBITED;
}
else if (c == '\\' || c == '"')
{
*pp++ = '\\';
*pp++ = c;
op++;
*op++ = UC_BREAK_PROHIBITED;
}
else
{
#if HAVE_ICONV
if (conv != (iconv_t)(-1))
{
char scratchbuf[64];
const char *inptr = ep;
size_t insize;
char *outptr = &scratchbuf[0];
size_t outsize = sizeof (scratchbuf);
size_t res;
res = (size_t)(-1);
for (insize = 1; inptr + insize <= es; insize++)
{
res = iconv (conv,
(ICONV_CONST char **) &inptr, &insize,
&outptr, &outsize);
if (!(res == (size_t)(-1) && errno == EINVAL))
break;
if (inptr != ep)
abort ();
}
if (res == (size_t)(-1))
{
if (errno == EILSEQ)
{
error (0, 0, _("invalid multibyte sequence"));
continue;
}
else
abort ();
}
insize = inptr - ep;
memcpy_small (pp, ep, insize);
pp += insize;
op += insize;
ep += insize - 1;
}
else
#endif
{
if (weird_cjk
&& ep + 2 <= es
&& (unsigned char) c >= 0x80
&& (unsigned char) ep[1] >= 0x30)
{
*pp++ = c;
ep += 1;
*pp++ = *ep;
op += 2;
}
else
{
*pp++ = c;
op++;
}
}
}
}
if (es > s && es[-1] == '\n')
overrides[portion_len - 2] = UC_BREAK_PROHIBITED;
linebreaks = (char *) xmalloc (portion_len);
startcol_after_break = (line_prefix ? strlen (line_prefix) : 0);
if (indent)
startcol_after_break = (startcol_after_break + 8) & ~7;
startcol_after_break++;
width = (wrap_strings && do_wrap != no ? page_width : INT_MAX) - 1;
width -= startcol_after_break;
recompute:
startcol = (line_prefix ? strlen (line_prefix) : 0);
if (first_line)
{
startcol += strlen (name);
if (indent)
startcol = (startcol + 8) & ~7;
else
startcol++;
}
else
{
if (indent)
startcol = (startcol + 8) & ~7;
}
startcol++;
startcol -= startcol_after_break;
mbs_width_linebreaks (portion, portion_len, width, startcol, 0,
overrides, canon_charset, linebreaks);
if (first_line && !indent
&& portion_len > 0
&& (*es != '\0'
|| startcol > width
|| memchr (linebreaks, UC_BREAK_POSSIBLE, portion_len) != NULL))
{
if (line_prefix != NULL)
fputs (line_prefix, fp);
fputs (name, fp);
fputs (" \"\"\n", fp);
first_line = false;
goto recompute;
}
if (line_prefix != NULL)
fputs (line_prefix, fp);
if (first_line)
{
fputs (name, fp);
putc (indent ? '\t' : ' ', fp);
first_line = false;
}
else
{
if (indent)
putc ('\t', fp);
}
putc ('"', fp);
for (i = 0; i < portion_len; i++)
{
if (linebreaks[i] == UC_BREAK_POSSIBLE)
{
fputs ("\"\n", fp);
if (line_prefix != NULL)
fputs (line_prefix, fp);
if (indent)
putc ('\t', fp);
putc ('"', fp);
}
putc (portion[i], fp);
}
fputs ("\"\n", fp);
free (linebreaks);
free (overrides);
free (portion);
s = es;
# undef is_escape
}
while (*s);
#if HAVE_ICONV
if (conv != (iconv_t)(-1))
iconv_close (conv);
#endif
}
static void
print_blank_line (FILE *fp)
{
if (uniforum)
fputs ("#\n", fp);
else
putc ('\n', fp);
}
static void
message_print (const message_ty *mp, FILE *fp, const char *charset,
bool blank_line, bool debug)
{
if (blank_line && (!uniforum
|| mp->comment == NULL
|| mp->comment->nitems == 0
|| mp->comment->item[0][0] != '\0'))
print_blank_line (fp);
message_print_comment (mp, fp);
message_print_comment_dot (mp, fp);
message_print_comment_filepos (mp, fp, uniforum, page_width);
message_print_comment_flags (mp, fp, debug);
if (!is_ascii_string (mp->msgid)
&& po_charset_canonicalize (charset) != po_charset_utf8)
multiline_warning (xasprintf (_("warning: ")),
xasprintf (_("\
The following msgid contains non-ASCII characters.\n\
This will cause problems to translators who use a character encoding\n\
different from yours. Consider using a pure ASCII msgid instead.\n\
%s\n"), mp->msgid));
wrap (fp, NULL, "msgid", mp->msgid, mp->do_wrap, charset);
if (mp->msgid_plural != NULL)
wrap (fp, NULL, "msgid_plural", mp->msgid_plural, mp->do_wrap, charset);
if (mp->msgid_plural == NULL)
wrap (fp, NULL, "msgstr", mp->msgstr, mp->do_wrap, charset);
else
{
char prefix_buf[20];
unsigned int i;
const char *p;
for (p = mp->msgstr, i = 0;
p < mp->msgstr + mp->msgstr_len;
p += strlen (p) + 1, i++)
{
sprintf (prefix_buf, "msgstr[%u]", i);
wrap (fp, NULL, prefix_buf, p, mp->do_wrap, charset);
}
}
}
static void
message_print_obsolete (const message_ty *mp, FILE *fp, const char *charset,
bool blank_line)
{
if (mp->msgstr[0] == '\0')
return;
if (blank_line)
print_blank_line (fp);
message_print_comment (mp, fp);
if (mp->is_fuzzy)
{
bool first = true;
putc ('#', fp);
putc (',', fp);
if (mp->is_fuzzy)
{
fputs (" fuzzy", fp);
first = false;
}
putc ('\n', fp);
}
if (!is_ascii_string (mp->msgid)
&& po_charset_canonicalize (charset) != po_charset_utf8)
multiline_warning (xasprintf (_("warning: ")),
xasprintf (_("\
The following msgid contains non-ASCII characters.\n\
This will cause problems to translators who use a character encoding\n\
different from yours. Consider using a pure ASCII msgid instead.\n\
%s\n"), mp->msgid));
wrap (fp, "#~ ", "msgid", mp->msgid, mp->do_wrap, charset);
if (mp->msgid_plural != NULL)
wrap (fp, "#~ ", "msgid_plural", mp->msgid_plural, mp->do_wrap, charset);
if (mp->msgid_plural == NULL)
wrap (fp, "#~ ", "msgstr", mp->msgstr, mp->do_wrap, charset);
else
{
char prefix_buf[20];
unsigned int i;
const char *p;
for (p = mp->msgstr, i = 0;
p < mp->msgstr + mp->msgstr_len;
p += strlen (p) + 1, i++)
{
sprintf (prefix_buf, "msgstr[%u]", i);
wrap (fp, "#~ ", prefix_buf, p, mp->do_wrap, charset);
}
}
}
static void
msgdomain_list_print_po (msgdomain_list_ty *mdlp, FILE *fp, bool debug)
{
size_t j, k;
bool blank_line;
blank_line = false;
for (k = 0; k < mdlp->nitems; k++)
{
message_list_ty *mlp;
const char *header;
char *charset;
if (!(k == 0
&& strcmp (mdlp->item[k]->domain, MESSAGE_DOMAIN_DEFAULT) == 0))
{
if (blank_line)
print_blank_line (fp);
fprintf (fp, "domain \"%s\"\n", mdlp->item[k]->domain);
blank_line = true;
}
mlp = mdlp->item[k]->messages;
header = NULL;
for (j = 0; j < mlp->nitems; ++j)
if (mlp->item[j]->msgid[0] == '\0' && !mlp->item[j]->obsolete)
{
header = mlp->item[j]->msgstr;
break;
}
charset = "ASCII";
if (header != NULL)
{
const char *charsetstr = strstr (header, "charset=");
if (charsetstr != NULL)
{
size_t len;
charsetstr += strlen ("charset=");
len = strcspn (charsetstr, " \t\n");
charset = (char *) alloca (len + 1);
memcpy (charset, charsetstr, len);
charset[len] = '\0';
if (strcmp (charset, "CHARSET") == 0)
charset = "ASCII";
}
}
for (j = 0; j < mlp->nitems; ++j)
if (!mlp->item[j]->obsolete)
{
message_print (mlp->item[j], fp, charset, blank_line, debug);
blank_line = true;
}
for (j = 0; j < mlp->nitems; ++j)
if (mlp->item[j]->obsolete)
{
message_print_obsolete (mlp->item[j], fp, charset, blank_line);
blank_line = true;
}
}
}
void
msgdomain_list_print (msgdomain_list_ty *mdlp, const char *filename,
bool force, bool debug)
{
FILE *fp;
if (!force)
{
bool found_nonempty = false;
size_t k;
for (k = 0; k < mdlp->nitems; k++)
{
message_list_ty *mlp = mdlp->item[k]->messages;
if (!(mlp->nitems == 0
|| (mlp->nitems == 1 && mlp->item[0]->msgid[0] == '\0')))
{
found_nonempty = true;
break;
}
}
if (!found_nonempty)
return;
}
if (use_syntax_properties || use_syntax_stringtable)
{
if (mdlp->nitems > 1)
{
if (use_syntax_properties)
error (EXIT_FAILURE, 0, _("Cannot output multiple translation domains into a single file with Java .properties syntax. Try using PO file syntax instead."));
if (use_syntax_stringtable)
error (EXIT_FAILURE, 0, _("Cannot output multiple translation domains into a single file with NeXTstep/GNUstep .strings syntax."));
}
if (mdlp->nitems == 1)
{
message_list_ty *mlp = mdlp->item[0]->messages;
const lex_pos_ty *has_plural;
size_t j;
has_plural = NULL;
for (j = 0; j < mlp->nitems; j++)
{
message_ty *mp = mlp->item[j];
if (mp->msgid_plural != NULL)
{
has_plural = &mp->pos;
break;
}
}
if (has_plural != NULL)
{
error_with_progname = false;
if (use_syntax_properties)
error_at_line (EXIT_FAILURE, 0,
has_plural->file_name, has_plural->line_number,
_("message catalog has plural form translations, but the output format does not support them. Try generating a Java class using \"msgfmt --java\", instead of a properties file."));
if (use_syntax_stringtable)
error_at_line (EXIT_FAILURE, 0,
has_plural->file_name, has_plural->line_number,
_("message catalog has plural form translations, but the output format does not support them."));
error_with_progname = true;
}
}
}
if (filename != NULL && strcmp (filename, "-") != 0
&& strcmp (filename, "/dev/stdout") != 0)
{
fp = fopen (filename, "w");
if (fp == NULL)
error (EXIT_FAILURE, errno, _("cannot create output file \"%s\""),
filename);
}
else
{
fp = stdout;
filename = _("standard output");
}
if (use_syntax_properties)
msgdomain_list_print_properties (mdlp, fp, page_width, debug);
else if (use_syntax_stringtable)
msgdomain_list_print_stringtable (mdlp, fp, page_width, debug);
else
msgdomain_list_print_po (mdlp, fp, debug);
if (fwriteerror (fp))
error (EXIT_FAILURE, errno, _("error while writing \"%s\" file"),
filename);
if (fp != stdout)
fclose (fp);
}
static int
cmp_by_msgid (const void *va, const void *vb)
{
const message_ty *a = *(const message_ty **) va;
const message_ty *b = *(const message_ty **) vb;
return strcmp (a->msgid, b->msgid);
}
void
msgdomain_list_sort_by_msgid (msgdomain_list_ty *mdlp)
{
size_t k;
for (k = 0; k < mdlp->nitems; k++)
{
message_list_ty *mlp = mdlp->item[k]->messages;
if (mlp->nitems > 0)
qsort (mlp->item, mlp->nitems, sizeof (mlp->item[0]), cmp_by_msgid);
}
}
static int
cmp_filepos (const void *va, const void *vb)
{
const lex_pos_ty *a = (const lex_pos_ty *) va;
const lex_pos_ty *b = (const lex_pos_ty *) vb;
int cmp;
cmp = strcmp (a->file_name, b->file_name);
if (cmp == 0)
cmp = (int) a->line_number - (int) b->line_number;
return cmp;
}
static void
msgdomain_list_sort_filepos (msgdomain_list_ty *mdlp)
{
size_t j, k;
for (k = 0; k < mdlp->nitems; k++)
{
message_list_ty *mlp = mdlp->item[k]->messages;
for (j = 0; j < mlp->nitems; j++)
{
message_ty *mp = mlp->item[j];
if (mp->filepos_count > 0)
qsort (mp->filepos, mp->filepos_count, sizeof (mp->filepos[0]),
cmp_filepos);
}
}
}
static int
cmp_by_filepos (const void *va, const void *vb)
{
const message_ty *a = *(const message_ty **) va;
const message_ty *b = *(const message_ty **) vb;
int cmp;
if (a->filepos_count == 0)
{
if (b->filepos_count != 0)
return -1;
}
if (b->filepos_count == 0)
return 1;
cmp = strcmp (a->filepos[0].file_name, b->filepos[0].file_name);
if (cmp != 0)
return cmp;
cmp = a->filepos[0].line_number - b->filepos[0].line_number;
if (cmp != 0)
return cmp;
return strcmp (a->msgid, b->msgid);
}
void
msgdomain_list_sort_by_filepos (msgdomain_list_ty *mdlp)
{
size_t k;
msgdomain_list_sort_filepos (mdlp);
for (k = 0; k < mdlp->nitems; k++)
{
message_list_ty *mlp = mdlp->item[k]->messages;
if (mlp->nitems > 0)
qsort (mlp->item, mlp->nitems, sizeof (mlp->item[0]), cmp_by_filepos);
}
}