aboutsummaryrefslogtreecommitdiff
path: root/quotearg.c
diff options
context:
space:
mode:
Diffstat (limited to 'quotearg.c')
-rw-r--r--quotearg.c651
1 files changed, 578 insertions, 73 deletions
diff --git a/quotearg.c b/quotearg.c
index e4926e404f80..bffa14b7a0a7 100644
--- a/quotearg.c
+++ b/quotearg.c
@@ -1,5 +1,5 @@
-/* Shell command argument quoting.
- Copyright (C) 1994, 1995, 1997 Free Software Foundation, Inc.
+/* quotearg.c - quote arguments for output
+ Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@@ -12,9 +12,8 @@
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
- along with this program; see the file COPYING.
- If not, write to the Free Software Foundation,
- 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
/* Written by Paul Eggert <eggert@twinsun.com> */
@@ -22,104 +21,610 @@
# include <config.h>
#endif
-#include <sys/types.h>
-#include <quotearg.h>
+#include "quotearg.h"
-/* Place into QUOTED a quoted version of ARG suitable for `system'.
- Return the length of the resulting string (which is not null-terminated).
- If QUOTED is null, return the length without any side effects. */
+#include "xalloc.h"
-size_t
-quote_system_arg (quoted, arg)
- char *quoted;
- char const *arg;
+#include <ctype.h>
+#include <errno.h>
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+#define N_(msgid) msgid
+
+#if HAVE_WCHAR_H
+
+/* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
+# include <stdio.h>
+# include <time.h>
+
+# include <wchar.h>
+#endif
+
+#if !HAVE_MBRTOWC
+/* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
+ other macros are defined only for documentation and to satisfy C
+ syntax. */
+# undef MB_CUR_MAX
+# define MB_CUR_MAX 1
+# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
+# define iswprint(wc) isprint ((unsigned char) (wc))
+# undef HAVE_MBSINIT
+#endif
+
+#if !defined mbsinit && !HAVE_MBSINIT
+# define mbsinit(ps) 1
+#endif
+
+#ifndef iswprint
+# if HAVE_WCTYPE_H
+# include <wctype.h>
+# endif
+# if !defined iswprint && !HAVE_ISWPRINT
+# define iswprint(wc) 1
+# endif
+#endif
+
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+
+#define INT_BITS (sizeof (int) * CHAR_BIT)
+
+struct quoting_options
+{
+ /* Basic quoting style. */
+ enum quoting_style style;
+
+ /* Quote the characters indicated by this bit vector even if the
+ quoting style would not normally require them to be quoted. */
+ int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
+};
+
+/* Names of quoting styles. */
+char const *const quoting_style_args[] =
+{
+ "literal",
+ "shell",
+ "shell-always",
+ "c",
+ "escape",
+ "locale",
+ "clocale",
+ 0
+};
+
+/* Correspondences to quoting style names. */
+enum quoting_style const quoting_style_vals[] =
+{
+ literal_quoting_style,
+ shell_quoting_style,
+ shell_always_quoting_style,
+ c_quoting_style,
+ escape_quoting_style,
+ locale_quoting_style,
+ clocale_quoting_style
+};
+
+/* The default quoting options. */
+static struct quoting_options default_quoting_options;
+
+/* Allocate a new set of quoting options, with contents initially identical
+ to O if O is not null, or to the default if O is null.
+ It is the caller's responsibility to free the result. */
+struct quoting_options *
+clone_quoting_options (struct quoting_options *o)
+{
+ int e = errno;
+ struct quoting_options *p = xmalloc (sizeof *p);
+ *p = *(o ? o : &default_quoting_options);
+ errno = e;
+ return p;
+}
+
+/* Get the value of O's quoting style. If O is null, use the default. */
+enum quoting_style
+get_quoting_style (struct quoting_options *o)
+{
+ return (o ? o : &default_quoting_options)->style;
+}
+
+/* In O (or in the default if O is null),
+ set the value of the quoting style to S. */
+void
+set_quoting_style (struct quoting_options *o, enum quoting_style s)
+{
+ (o ? o : &default_quoting_options)->style = s;
+}
+
+/* In O (or in the default if O is null),
+ set the value of the quoting options for character C to I.
+ Return the old value. Currently, the only values defined for I are
+ 0 (the default) and 1 (which means to quote the character even if
+ it would not otherwise be quoted). */
+int
+set_char_quoting (struct quoting_options *o, char c, int i)
{
- char const *a;
+ unsigned char uc = c;
+ int *p = (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
+ int shift = uc % INT_BITS;
+ int r = (*p >> shift) & 1;
+ *p ^= ((i & 1) ^ r) << shift;
+ return r;
+}
+
+/* MSGID approximates a quotation mark. Return its translation if it
+ has one; otherwise, return either it or "\"", depending on S. */
+static char const *
+gettext_quote (char const *msgid, enum quoting_style s)
+{
+ char const *translation = _(msgid);
+ if (translation == msgid && s == clocale_quoting_style)
+ translation = "\"";
+ return translation;
+}
+
+/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
+ argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
+ non-quoting-style part of O to control quoting.
+ Terminate the output with a null character, and return the written
+ size of the output, not counting the terminating null.
+ If BUFFERSIZE is too small to store the output string, return the
+ value that would have been returned had BUFFERSIZE been large enough.
+ If ARGSIZE is -1, use the string length of the argument for ARGSIZE.
+
+ This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
+ ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
+ style specified by O, and O may not be null. */
+
+static size_t
+quotearg_buffer_restyled (char *buffer, size_t buffersize,
+ char const *arg, size_t argsize,
+ enum quoting_style quoting_style,
+ struct quoting_options const *o)
+{
+ size_t i;
size_t len = 0;
+ char const *quote_string = 0;
+ size_t quote_string_len = 0;
+ int backslash_escapes = 0;
+ int unibyte_locale = MB_CUR_MAX == 1;
+
+#define STORE(c) \
+ do \
+ { \
+ if (len < buffersize) \
+ buffer[len] = (c); \
+ len++; \
+ } \
+ while (0)
+
+ switch (quoting_style)
+ {
+ case c_quoting_style:
+ STORE ('"');
+ backslash_escapes = 1;
+ quote_string = "\"";
+ quote_string_len = 1;
+ break;
+
+ case escape_quoting_style:
+ backslash_escapes = 1;
+ break;
+
+ case locale_quoting_style:
+ case clocale_quoting_style:
+ {
+ /* Get translations for open and closing quotation marks.
+
+ The message catalog should translate "`" to a left
+ quotation mark suitable for the locale, and similarly for
+ "'". If the catalog has no translation,
+ locale_quoting_style quotes `like this', and
+ clocale_quoting_style quotes "like this".
+
+ For example, an American English Unicode locale should
+ translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
+ should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
+ MARK). A British English Unicode locale should instead
+ translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
+ U+2019 (RIGHT SINGLE QUOTATION MARK), respectively. */
+
+ char const *left = gettext_quote (N_("`"), quoting_style);
+ char const *right = gettext_quote (N_("'"), quoting_style);
+ for (quote_string = left; *quote_string; quote_string++)
+ STORE (*quote_string);
+ backslash_escapes = 1;
+ quote_string = right;
+ quote_string_len = strlen (quote_string);
+ }
+ break;
- /* Scan ARG, copying it to QUOTED if QUOTED is not null,
- looking for shell metacharacters. */
+ case shell_always_quoting_style:
+ STORE ('\'');
+ quote_string = "'";
+ quote_string_len = 1;
+ break;
- for (a = arg; ; a++)
+ default:
+ break;
+ }
+
+ for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
{
- char c = *a;
+ unsigned char c;
+ unsigned char esc;
+
+ if (backslash_escapes
+ && quote_string_len
+ && i + quote_string_len <= argsize
+ && memcmp (arg + i, quote_string, quote_string_len) == 0)
+ STORE ('\\');
+
+ c = arg[i];
switch (c)
{
- case 0:
- /* ARG has no shell metacharacters. */
- return len;
+ case '\0':
+ if (backslash_escapes)
+ {
+ STORE ('\\');
+ STORE ('0');
+ STORE ('0');
+ c = '0';
+ }
+ break;
+
+ case '?':
+ switch (quoting_style)
+ {
+ case shell_quoting_style:
+ goto use_shell_always_quoting_style;
+
+ case c_quoting_style:
+ if (i + 2 < argsize && arg[i + 1] == '?')
+ switch (arg[i + 2])
+ {
+ case '!': case '\'':
+ case '(': case ')': case '-': case '/':
+ case '<': case '=': case '>':
+ /* Escape the second '?' in what would otherwise be
+ a trigraph. */
+ c = arg[i + 2];
+ i += 2;
+ STORE ('?');
+ STORE ('\\');
+ STORE ('?');
+ break;
+ }
+ break;
- case '=':
- if (*arg == '-')
+ default:
+ break;
+ }
+ break;
+
+ case '\a': esc = 'a'; goto c_escape;
+ case '\b': esc = 'b'; goto c_escape;
+ case '\f': esc = 'f'; goto c_escape;
+ case '\n': esc = 'n'; goto c_and_shell_escape;
+ case '\r': esc = 'r'; goto c_and_shell_escape;
+ case '\t': esc = 't'; goto c_and_shell_escape;
+ case '\v': esc = 'v'; goto c_escape;
+ case '\\': esc = c; goto c_and_shell_escape;
+
+ c_and_shell_escape:
+ if (quoting_style == shell_quoting_style)
+ goto use_shell_always_quoting_style;
+ c_escape:
+ if (backslash_escapes)
+ {
+ c = esc;
+ goto store_escape;
+ }
+ break;
+
+ case '#': case '~':
+ if (i != 0)
break;
/* Fall through. */
- case '\t': case '\n': case ' ':
- case '!': case '"': case '#': case '$': case '%': case '&': case '\'':
+ case ' ':
+ case '!': /* special in bash */
+ case '"': case '$': case '&':
case '(': case ')': case '*': case ';':
- case '<': case '>': case '?': case '[': case '\\':
- case '^': case '`': case '|': case '~':
+ case '<': case '>': case '[':
+ case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
+ case '`': case '|':
+ /* A shell special character. In theory, '$' and '`' could
+ be the first bytes of multibyte characters, which means
+ we should check them with mbrtowc, but in practice this
+ doesn't happen so it's not worth worrying about. */
+ if (quoting_style == shell_quoting_style)
+ goto use_shell_always_quoting_style;
+ break;
+
+ case '\'':
+ switch (quoting_style)
+ {
+ case shell_quoting_style:
+ goto use_shell_always_quoting_style;
+
+ case shell_always_quoting_style:
+ STORE ('\'');
+ STORE ('\\');
+ STORE ('\'');
+ break;
+
+ default:
+ break;
+ }
+ break;
+
+ case '%': case '+': case ',': case '-': case '.': case '/':
+ case '0': case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9': case ':': case '=':
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
+ case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
+ case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
+ case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
+ case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
+ case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
+ case 'o': case 'p': case 'q': case 'r': case 's': case 't':
+ case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
+ case '{': case '}':
+ /* These characters don't cause problems, no matter what the
+ quoting style is. They cannot start multibyte sequences. */
+ break;
+
+ default:
+ /* If we have a multibyte sequence, copy it until we reach
+ its end, find an error, or come back to the initial shift
+ state. For C-like styles, if the sequence has
+ unprintable characters, escape the whole sequence, since
+ we can't easily escape single characters within it. */
{
- /* ARG has a shell metacharacter.
- Start over, quoting it this time. */
+ /* Length of multibyte sequence found so far. */
+ size_t m;
- len = 0;
- c = *arg++;
+ int printable;
- /* If ARG is an option, quote just its argument.
- This is not necessary, but it looks nicer. */
- if (c == '-' && arg < a)
+ if (unibyte_locale)
{
- c = *arg++;
+ m = 1;
+ printable = isprint (c);
+ }
+ else
+ {
+ mbstate_t mbstate;
+ memset (&mbstate, 0, sizeof mbstate);
+
+ m = 0;
+ printable = 1;
+ if (argsize == SIZE_MAX)
+ argsize = strlen (arg);
- if (quoted)
+ do
{
- quoted[len] = '-';
- quoted[len + 1] = c;
- }
- len += 2;
-
- if (c == '-')
- while (arg < a)
- {
- c = *arg++;
- if (quoted)
- quoted[len] = c;
- len++;
- if (c == '=')
+ wchar_t w;
+ size_t bytes = mbrtowc (&w, &arg[i + m],
+ argsize - (i + m), &mbstate);
+ if (bytes == 0)
+ break;
+ else if (bytes == (size_t) -1)
+ {
+ printable = 0;
break;
- }
- c = *arg++;
+ }
+ else if (bytes == (size_t) -2)
+ {
+ printable = 0;
+ while (i + m < argsize && arg[i + m])
+ m++;
+ break;
+ }
+ else
+ {
+ if (! iswprint (w))
+ printable = 0;
+ m += bytes;
+ }
+ }
+ while (! mbsinit (&mbstate));
}
- if (quoted)
- quoted[len] = '\'';
- len++;
-
- for (; c; c = *arg++)
+ if (1 < m || (backslash_escapes && ! printable))
{
- if (c == '\'')
+ /* Output a multibyte sequence, or an escaped
+ unprintable unibyte character. */
+ size_t ilim = i + m;
+
+ for (;;)
{
- if (quoted)
+ if (backslash_escapes && ! printable)
{
- quoted[len] = '\'';
- quoted[len + 1] = '\\';
- quoted[len + 2] = '\'';
+ STORE ('\\');
+ STORE ('0' + (c >> 6));
+ STORE ('0' + ((c >> 3) & 7));
+ c = '0' + (c & 7);
}
- len += 3;
+ if (ilim <= i + 1)
+ break;
+ STORE (c);
+ c = arg[++i];
}
- if (quoted)
- quoted[len] = c;
- len++;
- }
- if (quoted)
- quoted[len] = '\'';
- return len + 1;
+ goto store_c;
+ }
}
}
- if (quoted)
- quoted[len] = c;
- len++;
+ if (! (backslash_escapes
+ && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
+ goto store_c;
+
+ store_escape:
+ STORE ('\\');
+
+ store_c:
+ STORE (c);
}
+
+ if (quote_string)
+ for (; *quote_string; quote_string++)
+ STORE (*quote_string);
+
+ if (len < buffersize)
+ buffer[len] = '\0';
+ return len;
+
+ use_shell_always_quoting_style:
+ return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
+ shell_always_quoting_style, o);
+}
+
+/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
+ argument ARG (of size ARGSIZE), using O to control quoting.
+ If O is null, use the default.
+ Terminate the output with a null character, and return the written
+ size of the output, not counting the terminating null.
+ If BUFFERSIZE is too small to store the output string, return the
+ value that would have been returned had BUFFERSIZE been large enough.
+ If ARGSIZE is -1, use the string length of the argument for ARGSIZE. */
+size_t
+quotearg_buffer (char *buffer, size_t buffersize,
+ char const *arg, size_t argsize,
+ struct quoting_options const *o)
+{
+ struct quoting_options const *p = o ? o : &default_quoting_options;
+ int e = errno;
+ size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
+ p->style, p);
+ errno = e;
+ return r;
+}
+
+/* Use storage slot N to return a quoted version of argument ARG.
+ ARG is of size ARGSIZE, but if that is -1, ARG is a null-terminated string.
+ OPTIONS specifies the quoting options.
+ The returned value points to static storage that can be
+ reused by the next call to this function with the same value of N.
+ N must be nonnegative. N is deliberately declared with type "int"
+ to allow for future extensions (using negative values). */
+static char *
+quotearg_n_options (int n, char const *arg, size_t argsize,
+ struct quoting_options const *options)
+{
+ int e = errno;
+
+ /* Preallocate a slot 0 buffer, so that the caller can always quote
+ one small component of a "memory exhausted" message in slot 0. */
+ static char slot0[256];
+ static unsigned int nslots = 1;
+ unsigned int n0 = n;
+ struct slotvec
+ {
+ size_t size;
+ char *val;
+ };
+ static struct slotvec slotvec0 = {sizeof slot0, slot0};
+ static struct slotvec *slotvec = &slotvec0;
+
+ if (n < 0)
+ abort ();
+
+ if (nslots <= n0)
+ {
+ unsigned int n1 = n0 + 1;
+ size_t s = n1 * sizeof *slotvec;
+
+ if (SIZE_MAX / UINT_MAX <= sizeof *slotvec
+ && n1 != s / sizeof *slotvec)
+ xalloc_die ();
+
+ if (slotvec == &slotvec0)
+ {
+ slotvec = xmalloc (sizeof *slotvec);
+ *slotvec = slotvec0;
+ }
+ slotvec = xrealloc (slotvec, s);
+ memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
+ nslots = n1;
+ }
+
+ {
+ size_t size = slotvec[n].size;
+ char *val = slotvec[n].val;
+ size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
+
+ if (size <= qsize)
+ {
+ slotvec[n].size = size = qsize + 1;
+ slotvec[n].val = val = xrealloc (val == slot0 ? 0 : val, size);
+ quotearg_buffer (val, size, arg, argsize, options);
+ }
+
+ errno = e;
+ return val;
+ }
+}
+
+char *
+quotearg_n (int n, char const *arg)
+{
+ return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
+}
+
+char *
+quotearg (char const *arg)
+{
+ return quotearg_n (0, arg);
+}
+
+/* Return quoting options for STYLE, with no extra quoting. */
+static struct quoting_options
+quoting_options_from_style (enum quoting_style style)
+{
+ struct quoting_options o;
+ o.style = style;
+ memset (o.quote_these_too, 0, sizeof o.quote_these_too);
+ return o;
+}
+
+char *
+quotearg_n_style (int n, enum quoting_style s, char const *arg)
+{
+ struct quoting_options const o = quoting_options_from_style (s);
+ return quotearg_n_options (n, arg, SIZE_MAX, &o);
+}
+
+char *
+quotearg_n_style_mem (int n, enum quoting_style s,
+ char const *arg, size_t argsize)
+{
+ struct quoting_options const o = quoting_options_from_style (s);
+ return quotearg_n_options (n, arg, argsize, &o);
+}
+
+char *
+quotearg_style (enum quoting_style s, char const *arg)
+{
+ return quotearg_n_style (0, s, arg);
+}
+
+char *
+quotearg_char (char const *arg, char ch)
+{
+ struct quoting_options options;
+ options = default_quoting_options;
+ set_char_quoting (&options, ch, 1);
+ return quotearg_n_options (0, arg, SIZE_MAX, &options);
+}
+
+char *
+quotearg_colon (char const *arg)
+{
+ return quotearg_char (arg, ':');
}