diff options
Diffstat (limited to 'contrib/gnu-sort/src/sort.c')
-rw-r--r-- | contrib/gnu-sort/src/sort.c | 662 |
1 files changed, 372 insertions, 290 deletions
diff --git a/contrib/gnu-sort/src/sort.c b/contrib/gnu-sort/src/sort.c index 5b75ebe45a92..b67582a03817 100644 --- a/contrib/gnu-sort/src/sort.c +++ b/contrib/gnu-sort/src/sort.c @@ -1,5 +1,5 @@ /* sort - sort lines of text (with all kinds of options). - Copyright (C) 88, 1991-2002 Free Software Foundation, Inc. + Copyright (C) 88, 1991-2004 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -27,12 +27,11 @@ #include <sys/types.h> #include <signal.h> #include <stdio.h> -#include <assert.h> #include "system.h" -#include "long-options.h" #include "error.h" #include "hard-locale.h" -#include "human.h" +#include "inttostr.h" +#include "long-options.h" #include "physmem.h" #include "posixver.h" #include "stdio-safer.h" @@ -50,9 +49,9 @@ struct rlimit { size_t rlim_cur; }; /* The official name of this program (e.g., no `g' prefix). */ #define PROGRAM_NAME "sort" -#define AUTHORS N_ ("Mike Haertel and Paul Eggert") +#define AUTHORS "Mike Haertel", "Paul Eggert" -#if HAVE_LANGINFO_H +#if HAVE_LANGINFO_CODESET # include <langinfo.h> #endif @@ -65,13 +64,6 @@ struct rlimit { size_t rlim_cur; }; double strtod (); #endif -/* Undefine, to avoid warning about redefinition on some systems. */ -/* FIXME: Remove these: use MIN/MAX from sys2.h. */ -#undef min -#define min(a, b) ((a) < (b) ? (a) : (b)) -#undef max -#define max(a, b) ((a) > (b) ? (a) : (b)) - #define UCHAR_LIM (UCHAR_MAX + 1) #define UCHAR(c) ((unsigned char) (c)) @@ -79,13 +71,17 @@ double strtod (); # define DEFAULT_TMPDIR "/tmp" #endif -/* Use this as exit status in case of error, not EXIT_FAILURE. This - is necessary because EXIT_FAILURE is usually 1 and POSIX requires - that sort exit with status 1 IFF invoked with -c and the input is - not properly sorted. Any other irregular exit must exit with a - status code greater than 1. */ -#define SORT_FAILURE 2 -#define SORT_OUT_OF_ORDER 1 +/* Exit statuses. */ +enum + { + /* POSIX says to exit with status 1 if invoked with -c and the + input is not properly sorted. */ + SORT_OUT_OF_ORDER = 1, + + /* POSIX says any other irregular exit must exit with a status + code greater than 1. */ + SORT_FAILURE = 2 + }; #define C_DECIMAL_POINT '.' #define NEGATION_SIGN '-' @@ -97,9 +93,9 @@ static char decimal_point; static int th_sep; /* if CHAR_MAX + 1, then there is no thousands separator */ /* Nonzero if the corresponding locales are hard. */ -static int hard_LC_COLLATE; +static bool hard_LC_COLLATE; # if HAVE_NL_LANGINFO -static int hard_LC_TIME; +static bool hard_LC_TIME; # endif # define IS_THOUSANDS_SEP(x) ((x) == th_sep) @@ -117,7 +113,7 @@ static int hard_LC_TIME; enum blanktype { bl_start, bl_end, bl_both }; /* The character marking end of line. Default to \n. */ -static int eolchar = '\n'; +static char eolchar = '\n'; /* Lines are held in core as counted strings. */ struct line @@ -141,32 +137,32 @@ struct buffer size_t alloc; /* Number of bytes allocated. */ size_t left; /* Number of bytes left from previous reads. */ size_t line_bytes; /* Number of bytes to reserve for each line. */ - int eof; /* An EOF has been read. */ + bool eof; /* An EOF has been read. */ }; struct keyfield { size_t sword; /* Zero-origin 'word' to start at. */ size_t schar; /* Additional characters to skip. */ - int skipsblanks; /* Skip leading white space at start. */ size_t eword; /* Zero-origin first word after field. */ size_t echar; /* Additional characters in field. */ - int skipeblanks; /* Skip trailing white space at finish. */ - int *ignore; /* Boolean array of characters to ignore. */ - char *translate; /* Translation applied to characters. */ - int numeric; /* Flag for numeric comparison. Handle + bool const *ignore; /* Boolean array of characters to ignore. */ + char const *translate; /* Translation applied to characters. */ + bool skipsblanks; /* Skip leading blanks at start. */ + bool skipeblanks; /* Skip trailing blanks at finish. */ + bool numeric; /* Flag for numeric comparison. Handle strings of digits with optional decimal point, but no exponential notation. */ - int general_numeric; /* Flag for general, numeric comparison. + bool general_numeric; /* Flag for general, numeric comparison. Handle numbers in exponential notation. */ - int month; /* Flag for comparison by month name. */ - int reverse; /* Reverse the sense of comparison. */ + bool month; /* Flag for comparison by month name. */ + bool reverse; /* Reverse the sense of comparison. */ struct keyfield *next; /* Next keyfield to try. */ }; struct month { - char *name; + char const *name; int val; }; @@ -174,20 +170,19 @@ struct month char *program_name; /* FIXME: None of these tables work with multibyte character sets. - Also, there are many other bugs when handling multibyte characters, - or even unibyte encodings where line boundaries are not in the - initial shift state. One way to fix this is to rewrite `sort' to - use wide characters internally, but doing this with good - performance is a bit tricky. */ + Also, there are many other bugs when handling multibyte characters. + One way to fix this is to rewrite `sort' to use wide characters + internally, but doing this with good performance is a bit + tricky. */ -/* Table of white space. */ -static int blanks[UCHAR_LIM]; +/* Table of blanks. */ +static bool blanks[UCHAR_LIM]; /* Table of non-printing characters. */ -static int nonprinting[UCHAR_LIM]; +static bool nonprinting[UCHAR_LIM]; /* Table of non-dictionary characters (not letters, digits, or blanks). */ -static int nondictionary[UCHAR_LIM]; +static bool nondictionary[UCHAR_LIM]; /* Translation table folding lower case to upper. */ static char fold_toupper[UCHAR_LIM]; @@ -242,36 +237,38 @@ static size_t temp_dir_count; /* Number of allocated slots in temp_dirs. */ static size_t temp_dir_alloc; -/* Our process ID. */ -static pid_t process_id; - /* Flag to reverse the order of all comparisons. */ -static int reverse; +static bool reverse; /* Flag for stable sort. This turns off the last ditch bytewise comparison of lines, and instead leaves lines in the same order they were read if all keys compare equal. */ -static int stable; +static bool stable; -/* Tab character separating fields. If NUL, then fields are separated - by the empty string between a non-whitespace character and a whitespace +/* If TAB has this value, blanks separate fields. */ +enum { TAB_DEFAULT = CHAR_MAX + 1 }; + +/* Tab character separating fields. If TAB_DEFAULT, then fields are + separated by the empty string between a non-blank character and a blank character. */ -static char tab; +static int tab = TAB_DEFAULT; /* Flag to remove consecutive duplicate lines from the output. Only the last of a sequence of equal lines will be output. */ -static int unique; +static bool unique; /* Nonzero if any of the input files are the standard input. */ -static int have_read_stdin; +static bool have_read_stdin; /* List of key field comparisons to be tried. */ static struct keyfield *keylist; +static void sortlines_temp (struct line *, size_t, struct line *); + void usage (int status) { - if (status != 0) + if (status != EXIT_SUCCESS) fprintf (stderr, _("Try `%s --help' for more information.\n"), program_name); else @@ -313,11 +310,11 @@ Other options:\n\ -S, --buffer-size=SIZE use SIZE for main memory buffer\n\ "), stdout); printf (_("\ - -t, --field-separator=SEP use SEP instead of non- to whitespace transition\n\ - -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s\n\ + -t, --field-separator=SEP use SEP instead of non-blank to blank transition\n\ + -T, --temporary-directory=DIR use DIR for temporaries, not $TMPDIR or %s;\n\ multiple options specify multiple directories\n\ - -u, --unique with -c: check for strict ordering\n\ - otherwise: output only the first of an equal run\n\ + -u, --unique with -c, check for strict ordering;\n\ + without -c, output only the first of an equal run\n\ "), DEFAULT_TMPDIR); fputs (_("\ -z, --zero-terminated end lines with 0 byte, not newline\n\ @@ -345,10 +342,7 @@ native byte values.\n\ "), stdout ); printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT); } - /* Don't use EXIT_FAILURE here in case it is defined to be 1. - POSIX requires that sort return 1 IFF invoked with -c and - the input is not properly sorted. */ - assert (status == 0 || status == SORT_FAILURE); + exit (status); } @@ -395,7 +389,7 @@ static struct tempnode *volatile temphead; static void cleanup (void) { - struct tempnode *node; + struct tempnode const *node; for (node = temphead; node; node = node->next) unlink (node->name); @@ -403,7 +397,7 @@ cleanup (void) /* Report MESSAGE for FILE, then clean up and exit. */ -static void die PARAMS ((char const *, char const *)) ATTRIBUTE_NORETURN; +static void die (char const *, char const *) ATTRIBUTE_NORETURN; static void die (char const *message, char const *file) { @@ -425,7 +419,7 @@ create_temp_file (FILE **pfp) char const *temp_dir = temp_dirs[temp_dir_index]; size_t len = strlen (temp_dir); struct tempnode *node = - (struct tempnode *) xmalloc (sizeof node->next + len + sizeof slashbase); + xmalloc (sizeof node->next + len + sizeof slashbase); char *file = node->name; memcpy (file, temp_dir, len); @@ -458,7 +452,7 @@ xfopen (const char *file, const char *how) { if (*how == 'r') { - have_read_stdin = 1; + have_read_stdin = true; fp = stdin; } else @@ -503,10 +497,7 @@ static void add_temp_dir (char const *dir) { if (temp_dir_count == temp_dir_alloc) - { - temp_dir_alloc = temp_dir_alloc ? temp_dir_alloc * 2 : 16; - temp_dirs = xrealloc (temp_dirs, sizeof (temp_dirs) * temp_dir_alloc); - } + temp_dirs = x2nrealloc (temp_dirs, &temp_dir_alloc, sizeof *temp_dirs); temp_dirs[temp_dir_count++] = dir; } @@ -525,7 +516,7 @@ zaptemp (const char *name) { unlink (name); *pnode = node->next; - free ((char *) node); + free (node); break; } } @@ -535,8 +526,9 @@ zaptemp (const char *name) static int struct_month_cmp (const void *m1, const void *m2) { - return strcmp (((const struct month *) m1)->name, - ((const struct month *) m2)->name); + struct month const *month1 = m1; + struct month const *month2 = m2; + return strcmp (month1->name, month2->name); } #endif @@ -550,16 +542,10 @@ inittables (void) for (i = 0; i < UCHAR_LIM; ++i) { - if (ISBLANK (i)) - blanks[i] = 1; - if (!ISPRINT (i)) - nonprinting[i] = 1; - if (!ISALNUM (i) && !ISBLANK (i)) - nondictionary[i] = 1; - if (ISLOWER (i)) - fold_toupper[i] = toupper (i); - else - fold_toupper[i] = i; + blanks[i] = !!ISBLANK (i); + nonprinting[i] = !ISPRINT (i); + nondictionary[i] = !ISALNUM (i) && !ISBLANK (i); + fold_toupper[i] = (ISLOWER (i) ? toupper (i) : i); } #if HAVE_NL_LANGINFO @@ -568,14 +554,14 @@ inittables (void) { for (i = 0; i < MONTHS_PER_YEAR; i++) { - char *s; + char const *s; size_t s_len; size_t j; char *name; s = (char *) nl_langinfo (ABMON_1 + i); s_len = strlen (s); - monthtab[i].name = name = (char *) xmalloc (s_len + 1); + monthtab[i].name = name = xmalloc (s_len + 1); monthtab[i].val = i + 1; for (j = 0; j < s_len; j++) @@ -583,7 +569,7 @@ inittables (void) name[j] = '\0'; } qsort ((void *) monthtab, MONTHS_PER_YEAR, - sizeof (struct month), struct_month_cmp); + sizeof *monthtab, struct_month_cmp); } #endif } @@ -631,6 +617,11 @@ specify_sort_size (char const *s) if (e == LONGINT_OK) { + /* If multiple sort sizes are specified, take the maximum, so + that option order does not matter. */ + if (n < sort_size) + return; + sort_size = n; if (sort_size == n) { @@ -689,13 +680,18 @@ default_sort_size (void) by FPS and FILES, which are alternate paths to the same files. NFILES gives the number of input files; NFPS may be less. Assume that each input line requires LINE_BYTES extra bytes' worth of line - information. Return at most SIZE_BOUND. */ + information. Do not exceed a bound on the size: if the bound is + not specified by the user, use a default. */ static size_t sort_buffer_size (FILE *const *fps, int nfps, char *const *files, int nfiles, - size_t line_bytes, size_t size_bound) + size_t line_bytes) { + /* A bound on the input size. If zero, the bound hasn't been + determined yet. */ + static size_t size_bound; + /* In the worst case, each input byte is a newline. */ size_t worst_case_per_input_byte = line_bytes + 1; @@ -717,7 +713,23 @@ sort_buffer_size (FILE *const *fps, int nfps, != 0) die (_("stat failed"), files[i]); - file_size = S_ISREG (st.st_mode) ? st.st_size : INPUT_FILE_SIZE_GUESS; + if (S_ISREG (st.st_mode)) + file_size = st.st_size; + else + { + /* The file has unknown size. If the user specified a sort + buffer size, use that; otherwise, guess the size. */ + if (sort_size) + return sort_size; + file_size = INPUT_FILE_SIZE_GUESS; + } + + if (! size_bound) + { + size_bound = sort_size; + if (! size_bound) + size_bound = default_sort_size (); + } /* Add the amount of memory needed to represent the worst case where the input consists entirely of newlines followed by a @@ -757,7 +769,7 @@ initbuf (struct buffer *buf, size_t line_bytes, size_t alloc) buf->line_bytes = line_bytes; buf->alloc = alloc; buf->used = buf->left = buf->nlines = 0; - buf->eof = 0; + buf->eof = false; } /* Return one past the limit of the line array. */ @@ -777,8 +789,12 @@ begfield (const struct line *line, const struct keyfield *key) register char *ptr = line->text, *lim = ptr + line->length - 1; register size_t sword = key->sword; register size_t schar = key->schar; + register size_t remaining_bytes; - if (tab) + /* The leading field separator itself is included in a field when -t + is absent. */ + + if (tab != TAB_DEFAULT) while (ptr < lim && sword--) { while (ptr < lim && *ptr != tab) @@ -799,7 +815,9 @@ begfield (const struct line *line, const struct keyfield *key) while (ptr < lim && blanks[UCHAR (*ptr)]) ++ptr; - if (schar < lim - ptr) + /* Advance PTR by SCHAR (if possible), but no further than LIM. */ + remaining_bytes = lim - ptr; + if (schar < remaining_bytes) ptr += schar; else ptr = lim; @@ -815,10 +833,7 @@ limfield (const struct line *line, const struct keyfield *key) { register char *ptr = line->text, *lim = ptr + line->length - 1; register size_t eword = key->eword, echar = key->echar; - - /* Note: from the POSIX spec: - The leading field separator itself is included in - a field when -t is not used. FIXME: move this comment up... */ + register size_t remaining_bytes; /* Move PTR past EWORD fields or to one past the last byte on LINE, whichever comes first. If there are more than EWORD fields, leave @@ -827,7 +842,7 @@ limfield (const struct line *line, const struct keyfield *key) `beginning' is the first character following the delimiting TAB. Otherwise, leave PTR pointing at the first `blank' character after the preceding field. */ - if (tab) + if (tab != TAB_DEFAULT) while (ptr < lim && eword--) { while (ptr < lim && *ptr != tab) @@ -876,7 +891,7 @@ limfield (const struct line *line, const struct keyfield *key) */ /* Make LIM point to the end of (one byte past) the current field. */ - if (tab) + if (tab != TAB_DEFAULT) { char *newlim; newlim = memchr (ptr, tab, lim - ptr); @@ -902,7 +917,8 @@ limfield (const struct line *line, const struct keyfield *key) ++ptr; /* Advance PTR by ECHAR (if possible), but no further than LIM. */ - if (echar < lim - ptr) + remaining_bytes = lim - ptr; + if (echar < remaining_bytes) ptr += echar; else ptr = lim; @@ -910,22 +926,24 @@ limfield (const struct line *line, const struct keyfield *key) return ptr; } -/* FIXME */ +/* Return the number of trailing blanks in FIELD, with LEN bytes. */ -static void -trim_trailing_blanks (const char *a_start, char **a_end) +static size_t +trailing_blanks (char const *field, size_t len) { - while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))]) - --(*a_end); + size_t i; + for (i = len; 0 < i && blanks[UCHAR (field[i - 1])]; i--) + continue; + return len - i; } /* Fill BUF reading from FP, moving buf->left bytes from the end of buf->buf to the beginning first. If EOF is reached and the file wasn't terminated by a newline, supply one. Set up BUF's line table too. FILE is the name of the file corresponding to FP. - Return nonzero if some input was read. */ + Return true if some input was read. */ -static int +static bool fillbuf (struct buffer *buf, register FILE *fp, char const *file) { struct keyfield const *key = keylist; @@ -934,7 +952,7 @@ fillbuf (struct buffer *buf, register FILE *fp, char const *file) size_t mergesize = merge_buffer_size - MIN_MERGE_BUFFER_SIZE; if (buf->eof) - return 0; + return false; if (buf->used != buf->left) { @@ -970,9 +988,9 @@ fillbuf (struct buffer *buf, register FILE *fp, char const *file) die (_("read failed"), file); if (feof (fp)) { - buf->eof = 1; + buf->eof = true; if (buf->buf == ptrlim) - return 0; + return false; if (ptrlim[-1] != eol) *ptrlim++ = eol; } @@ -992,9 +1010,11 @@ fillbuf (struct buffer *buf, register FILE *fp, char const *file) { /* Precompute the position of the first key for efficiency. */ - line->keylim = key->eword == -1 ? p : limfield (line, key); + line->keylim = (key->eword == SIZE_MAX + ? p + : limfield (line, key)); - if (key->sword != -1) + if (key->sword != SIZE_MAX) line->keybeg = begfield (line, key); else { @@ -1004,7 +1024,10 @@ fillbuf (struct buffer *buf, register FILE *fp, char const *file) line->keybeg = line_start; } if (key->skipeblanks) - trim_trailing_blanks (line->keybeg, &line->keylim); + { + size_t keylen = line->keylim - line->keybeg; + line->keylim -= trailing_blanks (line->keybeg, keylen); + } } line_start = ptr; @@ -1021,15 +1044,12 @@ fillbuf (struct buffer *buf, register FILE *fp, char const *file) { buf->left = ptr - line_start; merge_buffer_size = mergesize + MIN_MERGE_BUFFER_SIZE; - return 1; + return true; } /* The current input line is too long to fit in the buffer. Double the buffer size and try again. */ - if (2 * buf->alloc < buf->alloc) - xalloc_die (); - buf->alloc *= 2; - buf->buf = xrealloc (buf->buf, buf->alloc); + buf->buf = x2nrealloc (buf->buf, &buf->alloc, sizeof *(buf->buf)); } } @@ -1106,7 +1126,7 @@ static int numcompare (register const char *a, register const char *b) { register int tmpa, tmpb, tmp; - register size_t loga, logb; + register size_t log_a, log_b; tmpa = *a; tmpb = *b; @@ -1159,20 +1179,20 @@ numcompare (register const char *a, register const char *b) tmp = tmpb - tmpa; - for (loga = 0; ISDIGIT (tmpa); ++loga) + for (log_a = 0; ISDIGIT (tmpa); ++log_a) do tmpa = *++a; while (IS_THOUSANDS_SEP (tmpa)); - for (logb = 0; ISDIGIT (tmpb); ++logb) + for (log_b = 0; ISDIGIT (tmpb); ++log_b) do tmpb = *++b; while (IS_THOUSANDS_SEP (tmpb)); - if (loga != logb) - return loga < logb ? 1 : -1; + if (log_a != log_b) + return log_a < log_b ? 1 : -1; - if (!loga) + if (!log_a) return 0; return tmp; @@ -1221,20 +1241,20 @@ numcompare (register const char *a, register const char *b) tmp = tmpa - tmpb; - for (loga = 0; ISDIGIT (tmpa); ++loga) + for (log_a = 0; ISDIGIT (tmpa); ++log_a) do tmpa = *++a; while (IS_THOUSANDS_SEP (tmpa)); - for (logb = 0; ISDIGIT (tmpb); ++logb) + for (log_b = 0; ISDIGIT (tmpb); ++log_b) do tmpb = *++b; while (IS_THOUSANDS_SEP (tmpb)); - if (loga != logb) - return loga < logb ? -1 : 1; + if (log_a != log_b) + return log_a < log_b ? -1 : 1; - if (!loga) + if (!log_a) return 0; return tmp; @@ -1289,12 +1309,11 @@ getmonth (const char *s, size_t len) if (len == 0) return 0; - month = (char *) alloca (len + 1); + month = alloca (len + 1); for (i = 0; i < len; ++i) month[i] = fold_toupper[UCHAR (s[i])]; - while (blanks[UCHAR (month[i - 1])]) - --i; - month[i] = '\0'; + len -= trailing_blanks (month, len); + month[len] = '\0'; do { @@ -1319,7 +1338,7 @@ getmonth (const char *s, size_t len) static int keycompare (const struct line *a, const struct line *b) { - struct keyfield *key = keylist; + struct keyfield const *key = keylist; /* For the first iteration only, the key positions have been precomputed for us. */ @@ -1332,8 +1351,8 @@ keycompare (const struct line *a, const struct line *b) for (;;) { - register unsigned char *translate = (unsigned char *) key->translate; - register int *ignore = key->ignore; + register char const *translate = key->translate; + register bool const *ignore = key->ignore; /* Find the lengths. */ size_t lena = lima <= texta ? 0 : lima - texta; @@ -1341,12 +1360,8 @@ keycompare (const struct line *a, const struct line *b) if (key->skipeblanks) { - char *a_end = texta + lena; - char *b_end = textb + lenb; - trim_trailing_blanks (texta, &a_end); - trim_trailing_blanks (textb, &b_end); - lena = a_end - texta; - lenb = b_end - textb; + lena -= trailing_blanks (texta, lena); + lenb -= trailing_blanks (textb, lenb); } /* Actually compare the fields. */ @@ -1367,12 +1382,12 @@ keycompare (const struct line *a, const struct line *b) { if (ignore || translate) { - char *copy_a = (char *) alloca (lena + 1 + lenb + 1); + char *copy_a = alloca (lena + 1 + lenb + 1); char *copy_b = copy_a + lena + 1; size_t new_len_a, new_len_b, i; /* Ignore and/or translate chars before comparing. */ - for (new_len_a = new_len_b = i = 0; i < max (lena, lenb); i++) + for (new_len_a = new_len_b = i = 0; i < MAX (lena, lenb); i++) { if (i < lena) { @@ -1429,7 +1444,7 @@ keycompare (const struct line *a, const struct line *b) CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]); else - CMP_WITH_IGNORE (UCHAR (*texta), UCHAR (*textb)); + CMP_WITH_IGNORE (*texta, *textb); } else if (lena == 0) diff = - NONZERO (lenb); @@ -1449,7 +1464,7 @@ keycompare (const struct line *a, const struct line *b) } else { - diff = memcmp (texta, textb, min (lena, lenb)); + diff = memcmp (texta, textb, MIN (lena, lenb)); if (diff) goto not_equal; } @@ -1464,12 +1479,12 @@ keycompare (const struct line *a, const struct line *b) break; /* Find the beginning and limit of the next field. */ - if (key->eword != -1) + if (key->eword != SIZE_MAX) lima = limfield (a, key), limb = limfield (b, key); else lima = a->text + a->length - 1, limb = b->text + b->length - 1; - if (key->sword != -1) + if (key->sword != SIZE_MAX) texta = begfield (a, key), textb = begfield (b, key); else { @@ -1508,7 +1523,7 @@ compare (register const struct line *a, register const struct line *b) { diff = keycompare (a, b); alloca (0); - if (diff != 0 || unique || stable) + if (diff | unique | stable) return diff; } @@ -1519,30 +1534,31 @@ compare (register const struct line *a, register const struct line *b) if (alen == 0) diff = - NONZERO (blen); else if (blen == 0) - diff = NONZERO (alen); + diff = 1; else if (HAVE_SETLOCALE && hard_LC_COLLATE) diff = xmemcoll (a->text, alen, b->text, blen); - else if (! (diff = memcmp (a->text, b->text, min (alen, blen)))) + else if (! (diff = memcmp (a->text, b->text, MIN (alen, blen)))) diff = alen < blen ? -1 : alen != blen; return reverse ? -diff : diff; } -/* Check that the lines read from the given FP come in order. Print a +/* Check that the lines read from FILE_NAME come in order. Print a diagnostic (FILE_NAME, line number, contents of line) to stderr and return - one if they are not in order. Otherwise, print no diagnostic - and return zero. */ + false if they are not in order. Otherwise, print no diagnostic + and return true. */ -static int -checkfp (FILE *fp, char *file_name) +static bool +check (char const *file_name) { + FILE *fp = xfopen (file_name, "r"); struct buffer buf; /* Input buffer. */ struct line temp; /* Copy of previous line. */ size_t alloc = 0; uintmax_t line_number = 0; - struct keyfield *key = keylist; - int nonunique = 1 - unique; - int disordered = 0; + struct keyfield const *key = keylist; + bool nonunique = ! unique; + bool ordered = true; initbuf (&buf, sizeof (struct line), MAX (merge_buffer_size, sort_size)); @@ -1559,16 +1575,16 @@ checkfp (FILE *fp, char *file_name) { found_disorder: { - char hr_buf[LONGEST_HUMAN_READABLE + 1]; struct line const *disorder_line = line - 1; uintmax_t disorder_line_number = buffer_linelim (&buf) - disorder_line + line_number; + char hr_buf[INT_BUFSIZE_BOUND (uintmax_t)]; fprintf (stderr, _("%s: %s:%s: disorder: "), program_name, file_name, - human_readable (disorder_line_number, hr_buf, 1, 1)); + umaxtostr (disorder_line_number, hr_buf)); write_bytes (disorder_line->text, disorder_line->length, stderr, _("standard error")); - disordered = 1; + ordered = false; break; } } @@ -1609,7 +1625,7 @@ checkfp (FILE *fp, char *file_name) free (buf.buf); if (temp.text) free (temp.text); - return disordered; + return ordered; } /* Merge lines from FILES onto OFP. NFILES cannot be greater than @@ -1633,7 +1649,7 @@ mergefps (char **files, register int nfiles, such that cur[ord[0]] is the smallest line and will be next output. */ register int i, j, t; - struct keyfield *key = keylist; + struct keyfield const *key = keylist; saved.text = NULL; /* Read initial lines from each input file. */ @@ -1777,16 +1793,57 @@ mergefps (char **files, register int nfiles, xfclose (ofp, output_file); } +/* Merge into T the two sorted arrays of lines LO (with NLO members) + and HI (with NHI members). T, LO, and HI point just past their + respective arrays, and the arrays are in reverse order. NLO and + NHI must be positive, and HI - NHI must equal T - (NLO + NHI). */ + +static inline void +mergelines (struct line *t, + struct line const *lo, size_t nlo, + struct line const *hi, size_t nhi) +{ + for (;;) + if (compare (lo - 1, hi - 1) <= 0) + { + *--t = *--lo; + if (! --nlo) + { + /* HI - NHI equalled T - (NLO + NHI) when this function + began. Therefore HI must equal T now, and there is no + need to copy from HI to T. */ + return; + } + } + else + { + *--t = *--hi; + if (! --nhi) + { + do + *--t = *--lo; + while (--nlo); + + return; + } + } +} + /* Sort the array LINES with NLINES members, using TEMP for temporary space. + NLINES must be at least 2. The input and output arrays are in reverse order, and LINES and - TEMP point just past the end of their respective arrays. */ + TEMP point just past the end of their respective arrays. + + Use a recursive divide-and-conquer algorithm, in the style + suggested by Knuth volume 3 (2nd edition), exercise 5.2.4-23. Use + the optimization suggested by exercise 5.2.4-10; this requires room + for only 1.5*N lines, rather than the usual 2*N lines. Knuth + writes that this memory optimization was originally published by + D. A. Bell, Comp J. 1 (1958), 75. */ static void sortlines (struct line *lines, size_t nlines, struct line *temp) { - register struct line *lo, *hi, *t; - register size_t nlo, nhi; - if (nlines == 2) { if (0 < compare (&lines[-1], &lines[-2])) @@ -1795,32 +1852,51 @@ sortlines (struct line *lines, size_t nlines, struct line *temp) lines[-1] = lines[-2]; lines[-2] = tmp; } - return; } + else + { + size_t nlo = nlines / 2; + size_t nhi = nlines - nlo; + struct line *lo = lines; + struct line *hi = lines - nlo; + struct line *sorted_lo = temp; + + sortlines (hi, nhi, temp); + if (1 < nlo) + sortlines_temp (lo, nlo, sorted_lo); + else + sorted_lo[-1] = lo[-1]; - nlo = nlines / 2; - lo = lines; - nhi = nlines - nlo; - hi = lines - nlo; - - if (nlo > 1) - sortlines (lo, nlo, temp); + mergelines (lines, sorted_lo, nlo, hi, nhi); + } +} - if (nhi > 1) - sortlines (hi, nhi, temp); +/* Like sortlines (LINES, NLINES, TEMP), except output into TEMP + rather than sorting in place. */ - t = temp; +static void +sortlines_temp (struct line *lines, size_t nlines, struct line *temp) +{ + if (nlines == 2) + { + bool swap = (0 < compare (&lines[-1], &lines[-2])); + temp[-1] = lines[-1 - swap]; + temp[-2] = lines[-2 + swap]; + } + else + { + size_t nlo = nlines / 2; + size_t nhi = nlines - nlo; + struct line *lo = lines; + struct line *hi = lines - nlo; + struct line *sorted_hi = temp - nlo; - while (nlo && nhi) - if (compare (lo - 1, hi - 1) <= 0) - *--t = *--lo, --nlo; - else - *--t = *--hi, --nhi; - while (nlo--) - *--t = *--lo; + sortlines_temp (hi, nhi, sorted_hi); + if (1 < nlo) + sortlines (lo, nlo, temp); - for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo) - *--lo = *--t; + mergelines (temp, lo, nlo, sorted_hi, nhi); + } } /* Return the index of the first of NFILES FILES that is the same file @@ -1829,22 +1905,22 @@ sortlines (struct line *lines, size_t nlines, struct line *temp) output of a command like "cat OUTFILE". */ static int -first_same_file (char **files, int nfiles, char const *outfile) +first_same_file (char * const *files, int nfiles, char const *outfile) { int i; - int got_outstat = 0; + bool got_outstat = false; struct stat instat, outstat; for (i = 0; i < nfiles; i++) { - int standard_input = STREQ (files[i], "-"); + bool standard_input = STREQ (files[i], "-"); if (STREQ (outfile, files[i]) && ! standard_input) return i; if (! got_outstat) { - got_outstat = 1; + got_outstat = true; if ((STREQ (outfile, "-") ? fstat (STDOUT_FILENO, &outstat) : stat (outfile, &outstat)) @@ -1863,23 +1939,6 @@ first_same_file (char **files, int nfiles, char const *outfile) return nfiles; } -/* Check that each of the NFILES FILES is ordered. - Return a count of disordered files. */ - -static int -check (char **files, int nfiles) -{ - int i, disorders = 0; - FILE *fp; - - for (i = 0; i < nfiles; ++i) - { - fp = xfopen (files[i], "r"); - disorders += checkfp (fp, files[i]); - } - return disorders; -} - /* Merge NFILES FILES onto OUTPUT_FILE. However, merge at most MAX_MERGE input files directly onto OUTPUT_FILE. MAX_MERGE cannot exceed NMERGE. */ @@ -1912,15 +1971,11 @@ merge (char **files, int nfiles, int max_merge, char const *output_file) /* Sort NFILES FILES onto OUTPUT_FILE. */ static void -sort (char **files, int nfiles, char const *output_file) +sort (char * const *files, int nfiles, char const *output_file) { struct buffer buf; int n_temp_files = 0; - int output_file_created = 0; - - static size_t size; - if (! size && ! (size = sort_size)) - size = default_sort_size (); + bool output_file_created = false; buf.alloc = 0; @@ -1930,12 +1985,13 @@ sort (char **files, int nfiles, char const *output_file) char const *file = *files; FILE *fp = xfopen (file, "r"); FILE *tfp; + size_t bytes_per_line = (2 * sizeof (struct line) + - sizeof (struct line) / 2); if (! buf.alloc) - initbuf (&buf, 2 * sizeof (struct line), - sort_buffer_size (&fp, 1, files, nfiles, - 2 * sizeof (struct line), size)); - buf.eof = 0; + initbuf (&buf, bytes_per_line, + sort_buffer_size (&fp, 1, files, nfiles, bytes_per_line)); + buf.eof = false; files++; nfiles--; @@ -1945,9 +2001,8 @@ sort (char **files, int nfiles, char const *output_file) struct line *linebase; if (buf.eof && nfiles - && (2 * sizeof (struct line) + 1 - < (buf.alloc - buf.used - - 2 * sizeof (struct line) * buf.nlines))) + && (bytes_per_line + 1 + < (buf.alloc - buf.used - bytes_per_line * buf.nlines))) { /* End of file, but there is more input and buffer room. Concatenate the next input file; this is faster in @@ -1958,13 +2013,14 @@ sort (char **files, int nfiles, char const *output_file) line = buffer_linelim (&buf); linebase = line - buf.nlines; - sortlines (line, buf.nlines, linebase); + if (1 < buf.nlines) + sortlines (line, buf.nlines, linebase); if (buf.eof && !nfiles && !n_temp_files && !buf.left) { xfclose (fp, file); tfp = xfopen (output_file, "w"); temp_output = output_file; - output_file_created = 1; + output_file_created = true; } else { @@ -1997,11 +2053,11 @@ sort (char **files, int nfiles, char const *output_file) { int i = n_temp_files; struct tempnode *node; - char **tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *)); + char **tempfiles = xnmalloc (n_temp_files, sizeof *tempfiles); for (node = temphead; i > 0; node = node->next) tempfiles[--i] = node->name; merge (tempfiles, n_temp_files, NMERGE, output_file); - free ((char *) tempfiles); + free (tempfiles); } } @@ -2020,7 +2076,7 @@ insertkey (struct keyfield *key) /* Report a bad field specification SPEC, with extra info MSGID. */ -static void badfieldspec PARAMS ((char const *, char const *)) +static void badfieldspec (char const *, char const *) ATTRIBUTE_NORETURN; static void badfieldspec (char const *spec, char const *msgid) @@ -2050,6 +2106,7 @@ parse_field_count (char const *string, size_t *val, char const *msgid) break; /* Fall through. */ case LONGINT_OVERFLOW: + case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR: if (msgid) error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"), _(msgid), (int) (suffix - string), string); @@ -2089,7 +2146,7 @@ sighandler (int sig) signal (sig, SIG_DFL); #endif - kill (process_id, sig); + raise (sig); } /* Set the ordering options for KEY specified in S. @@ -2107,9 +2164,9 @@ set_ordering (register const char *s, struct keyfield *key, { case 'b': if (blanktype == bl_start || blanktype == bl_both) - key->skipsblanks = 1; + key->skipsblanks = true; if (blanktype == bl_end || blanktype == bl_both) - key->skipeblanks = 1; + key->skipeblanks = true; break; case 'd': key->ignore = nondictionary; @@ -2118,19 +2175,22 @@ set_ordering (register const char *s, struct keyfield *key, key->translate = fold_toupper; break; case 'g': - key->general_numeric = 1; + key->general_numeric = true; break; case 'i': - key->ignore = nonprinting; + /* Option order should not matter, so don't let -i override + -d. -d implies -i, but -i does not imply -d. */ + if (! key->ignore) + key->ignore = nonprinting; break; case 'M': - key->month = 1; + key->month = true; break; case 'n': - key->numeric = 1; + key->numeric = true; break; case 'r': - key->reverse = 1; + key->reverse = true; break; default: return (char *) s; @@ -2143,8 +2203,8 @@ set_ordering (register const char *s, struct keyfield *key, static struct keyfield * new_key (void) { - struct keyfield *key = (struct keyfield *) xcalloc (1, sizeof *key); - key->eword = -1; + struct keyfield *key = xzalloc (sizeof *key); + key->eword = SIZE_MAX; return key; } @@ -2154,10 +2214,11 @@ main (int argc, char **argv) struct keyfield *key; struct keyfield gkey; char const *s; - int i; int c = 0; - int checkonly = 0, mergeonly = 0, nfiles = 0; - int posix_pedantic = (getenv ("POSIXLY_CORRECT") != NULL); + bool checkonly = false; + bool mergeonly = false; + int nfiles = 0; + bool posixly_correct = (getenv ("POSIXLY_CORRECT") != NULL); bool obsolete_usage = (posix2_version () < 200112); char const *short_options = (obsolete_usage ? COMMON_SHORT_OPTIONS "y::" @@ -2165,19 +2226,22 @@ main (int argc, char **argv) char *minus = "-", **files; char const *outfile = minus; static int const sigs[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM }; - int nsigs = sizeof sigs / sizeof *sigs; + unsigned int nsigs = sizeof sigs / sizeof *sigs; #ifdef SA_NOCLDSTOP struct sigaction oldact, newact; #endif + initialize_main (&argc, &argv); program_name = argv[0]; - process_id = getpid (); setlocale (LC_ALL, ""); bindtextdomain (PACKAGE, LOCALEDIR); textdomain (PACKAGE); atexit (cleanup); + initialize_exit_failure (SORT_FAILURE); + atexit (close_stdout); + hard_LC_COLLATE = hard_locale (LC_COLLATE); #if HAVE_NL_LANGINFO hard_LC_TIME = hard_locale (LC_TIME); @@ -2186,7 +2250,7 @@ main (int argc, char **argv) #if HAVE_SETLOCALE /* Let's get locale's representation of the decimal point */ { - struct lconv *lconvp = localeconv (); + struct lconv const *lconvp = localeconv (); /* If the locale doesn't define a decimal point, or if the decimal point is multibyte, use the C decimal point. We don't support @@ -2202,42 +2266,44 @@ main (int argc, char **argv) } #endif - have_read_stdin = 0; + have_read_stdin = false; inittables (); - /* Change the way library functions fail. */ - xalloc_exit_failure = SORT_FAILURE; - xmemcoll_exit_failure = SORT_FAILURE; - #ifdef SA_NOCLDSTOP - sigemptyset (&caught_signals); - for (i = 0; i < nsigs; i++) - sigaddset (&caught_signals, sigs[i]); - newact.sa_handler = sighandler; - newact.sa_mask = caught_signals; - newact.sa_flags = 0; + { + unsigned int i; + sigemptyset (&caught_signals); + for (i = 0; i < nsigs; i++) + sigaddset (&caught_signals, sigs[i]); + newact.sa_handler = sighandler; + newact.sa_mask = caught_signals; + newact.sa_flags = 0; + } #endif - for (i = 0; i < nsigs; i++) - { - int sig = sigs[i]; + { + unsigned int i; + for (i = 0; i < nsigs; i++) + { + int sig = sigs[i]; #ifdef SA_NOCLDSTOP - sigaction (sig, NULL, &oldact); - if (oldact.sa_handler != SIG_IGN) - sigaction (sig, &newact, NULL); + sigaction (sig, NULL, &oldact); + if (oldact.sa_handler != SIG_IGN) + sigaction (sig, &newact, NULL); #else - if (signal (sig, SIG_IGN) != SIG_IGN) - signal (sig, sighandler); + if (signal (sig, SIG_IGN) != SIG_IGN) + signal (sig, sighandler); #endif - } + } + } - gkey.sword = gkey.eword = -1; + gkey.sword = gkey.eword = SIZE_MAX; gkey.ignore = NULL; gkey.translate = NULL; - gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0; - gkey.skipsblanks = gkey.skipeblanks = 0; + gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = false; + gkey.skipsblanks = gkey.skipeblanks = false; - files = (char **) xmalloc (sizeof (char *) * argc); + files = xnmalloc (argc, sizeof *files); for (;;) { @@ -2247,7 +2313,7 @@ main (int argc, char **argv) "-o FILE" or "-oFILE". */ if (c == -1 - || (posix_pedantic && nfiles != 0 + || (posixly_correct && nfiles != 0 && ! (obsolete_usage && ! checkonly && optind != argc @@ -2257,7 +2323,7 @@ main (int argc, char **argv) long_options, NULL)) == -1)) { - if (optind == argc) + if (argc <= optind) break; files[nfiles++] = argv[optind++]; } @@ -2274,7 +2340,7 @@ main (int argc, char **argv) if (s && *s == '.') s = parse_field_count (s + 1, &key->schar, NULL); if (! (key->sword | key->schar)) - key->sword = -1; + key->sword = SIZE_MAX; if (! s || *set_ordering (s, key, bl_start)) { free (key); @@ -2319,7 +2385,7 @@ main (int argc, char **argv) break; case 'c': - checkonly = 1; + checkonly = true; break; case 'k': @@ -2344,11 +2410,11 @@ main (int argc, char **argv) } } if (! (key->sword | key->schar)) - key->sword = -1; + key->sword = SIZE_MAX; s = set_ordering (s, key, bl_start); if (*s != ',') { - key->eword = -1; + key->eword = SIZE_MAX; key->echar = 0; } else @@ -2377,15 +2443,17 @@ main (int argc, char **argv) break; case 'm': - mergeonly = 1; + mergeonly = true; break; case 'o': + if (outfile != minus && strcmp (outfile, optarg) != 0) + error (SORT_FAILURE, 0, _("multiple output files specified")); outfile = optarg; break; case 's': - stable = 1; + stable = true; break; case 'S': @@ -2393,15 +2461,28 @@ main (int argc, char **argv) break; case 't': - tab = optarg[0]; - if (tab && optarg[1]) - { - /* Provoke with `sort -txx'. Complain about - "multi-character tab" instead of "multibyte tab", so - that the diagnostic's wording does not need to be - changed once multibyte characters are supported. */ - error (SORT_FAILURE, 0, _("multi-character tab `%s'"), optarg); - } + { + int newtab = optarg[0]; + if (! newtab) + error (SORT_FAILURE, 0, _("empty tab")); + if (optarg[1]) + { + if (strcmp (optarg, "\\0") == 0) + newtab = '\0'; + else + { + /* Provoke with `sort -txx'. Complain about + "multi-character tab" instead of "multibyte tab", so + that the diagnostic's wording does not need to be + changed once multibyte characters are supported. */ + error (SORT_FAILURE, 0, _("multi-character tab `%s'"), + optarg); + } + } + if (tab != TAB_DEFAULT && tab != newtab) + error (SORT_FAILURE, 0, _("incompatible tabs")); + tab = newtab; + } break; case 'T': @@ -2409,7 +2490,7 @@ main (int argc, char **argv) break; case 'u': - unique = 1; + unique = true; break; case 'y': @@ -2433,9 +2514,10 @@ main (int argc, char **argv) /* Inheritance of global options to individual keys. */ for (key = keylist; key; key = key->next) - if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse - && !key->skipeblanks && !key->month && !key->numeric - && !key->general_numeric) + if (! (key->ignore || key->translate + || (key->skipsblanks | key->reverse + | key->skipeblanks | key->month | key->numeric + | key->general_numeric))) { key->ignore = gkey.ignore; key->translate = gkey.translate; @@ -2447,9 +2529,9 @@ main (int argc, char **argv) key->reverse = gkey.reverse; } - if (!keylist && (gkey.ignore || gkey.translate || gkey.skipsblanks - || gkey.skipeblanks || gkey.month || gkey.numeric - || gkey.general_numeric)) + if (!keylist && (gkey.ignore || gkey.translate + || (gkey.skipsblanks | gkey.skipeblanks | gkey.month + | gkey.numeric | gkey.general_numeric))) insertkey (&gkey); reverse = gkey.reverse; @@ -2473,7 +2555,7 @@ main (int argc, char **argv) /* POSIX requires that sort return 1 IFF invoked with -c and the input is not properly sorted. */ - exit (check (files, nfiles) == 0 ? EXIT_SUCCESS : SORT_OUT_OF_ORDER); + exit (check (files[0]) ? EXIT_SUCCESS : SORT_OUT_OF_ORDER); } if (mergeonly) |