diff options
author | Nate Williams <nate@FreeBSD.org> | 1996-02-12 06:39:49 +0000 |
---|---|---|
committer | Nate Williams <nate@FreeBSD.org> | 1996-02-12 06:39:49 +0000 |
commit | cbf3709a42260826fd383a9b22337ef07467f794 (patch) | |
tree | 4f807a6a1425be9c97846490b4fbd1609de41921 /gnu | |
parent | 4525250aaa310a3e7bf59aed94fd168dd1e9e8f1 (diff) | |
download | src-test2-cbf3709a42260826fd383a9b22337ef07467f794.tar.gz src-test2-cbf3709a42260826fd383a9b22337ef07467f794.zip |
Notes
Diffstat (limited to 'gnu')
-rw-r--r-- | gnu/usr.bin/sort/config.h | 123 | ||||
-rw-r--r-- | gnu/usr.bin/sort/error.c | 130 | ||||
-rw-r--r-- | gnu/usr.bin/sort/error.h | 44 | ||||
-rw-r--r-- | gnu/usr.bin/sort/getopt.c | 770 | ||||
-rw-r--r-- | gnu/usr.bin/sort/getopt.h | 129 | ||||
-rw-r--r-- | gnu/usr.bin/sort/getopt1.c | 180 | ||||
-rw-r--r-- | gnu/usr.bin/sort/long-options.c | 79 | ||||
-rw-r--r-- | gnu/usr.bin/sort/long-options.h | 10 | ||||
-rw-r--r-- | gnu/usr.bin/sort/sort.1 | 231 | ||||
-rw-r--r-- | gnu/usr.bin/sort/sort.c | 2078 | ||||
-rw-r--r-- | gnu/usr.bin/sort/system.h | 205 | ||||
-rw-r--r-- | gnu/usr.bin/sort/version.c | 3 | ||||
-rw-r--r-- | gnu/usr.bin/sort/version.h | 1 | ||||
-rw-r--r-- | gnu/usr.bin/sort/xstrtod.c | 48 | ||||
-rw-r--r-- | gnu/usr.bin/sort/xstrtod.h | 15 |
15 files changed, 4046 insertions, 0 deletions
diff --git a/gnu/usr.bin/sort/config.h b/gnu/usr.bin/sort/config.h new file mode 100644 index 000000000000..27f201bb570e --- /dev/null +++ b/gnu/usr.bin/sort/config.h @@ -0,0 +1,123 @@ +/* config.h. Generated automatically by configure. */ +/* config.h.in. Generated automatically from configure.in by autoheader. */ + +/* Define if on AIX 3. + System headers sometimes define this. + We just want to avoid a redefinition error message. */ +#ifndef _ALL_SOURCE +/* #undef _ALL_SOURCE */ +#endif + +/* Define if using alloca.c. */ +/* #undef C_ALLOCA */ + +/* Define to empty if the keyword does not work. */ +/* #undef const */ + +/* Define to one of _getb67, GETB67, getb67 for Cray-2 and Cray-YMP systems. + This function is required for alloca.c support on those systems. */ +/* #undef CRAY_STACKSEG_END */ + +/* Define if you have alloca, as a function or macro. */ +#define HAVE_ALLOCA 1 + +/* Define if you have <alloca.h> and it should be used (not on Ultrix). */ +/* #undef HAVE_ALLOCA_H */ + +/* Define if you don't have vprintf but do have _doprnt. */ +/* #undef HAVE_DOPRNT */ + +/* Define if the `long double' type works. */ +#define HAVE_LONG_DOUBLE 1 + +/* Define if your struct stat has st_blksize. */ +#define HAVE_ST_BLKSIZE 1 + +/* Define if you have the vprintf function. */ +#define HAVE_VPRINTF 1 + +/* Define as __inline if that's what the C compiler calls it. */ +/* #undef inline */ + +/* Define if on MINIX. */ +/* #undef _MINIX */ + +/* Define to `long' if <sys/types.h> doesn't define. */ +/* #undef off_t */ + +/* Define if the system does not provide POSIX.1 features except + with this defined. */ +/* #undef _POSIX_1_SOURCE */ + +/* Define if you need to in order for stat and other things to work. */ +/* #undef _POSIX_SOURCE */ + +/* Define as the return type of signal handlers (int or void). */ +#define RETSIGTYPE void + +/* Define to `unsigned' if <sys/types.h> doesn't define. */ +/* #undef size_t */ + +/* If using the C implementation of alloca, define if you know the + direction of stack growth for your system; otherwise it will be + automatically deduced at run-time. + STACK_DIRECTION > 0 => grows toward higher addresses + STACK_DIRECTION < 0 => grows toward lower addresses + STACK_DIRECTION = 0 => direction of growth unknown + */ +/* #undef STACK_DIRECTION */ + +/* Define if the `S_IS*' macros in <sys/stat.h> do not work properly. */ +/* #undef STAT_MACROS_BROKEN */ + +/* Define if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Define if your processor stores words with the most significant + byte first (like Motorola and SPARC, unlike Intel and VAX). */ +/* #undef WORDS_BIGENDIAN */ + +/* Define to 1 if ANSI function prototypes are usable. */ +#define PROTOTYPES 1 + +/* Define to 1 if GNU regex should be used instead of GNU rx. */ +/* #undef WITH_REGEX */ + +/* Define if you have the isascii function. */ +#define HAVE_ISASCII 1 + +/* Define if you have the strchr function. */ +#define HAVE_STRCHR 1 + +/* Define if you have the strerror function. */ +#define HAVE_STRERROR 1 + +/* Define if you have the strrchr function. */ +#define HAVE_STRRCHR 1 + +/* Define if you have the <dirent.h> header file. */ +#define HAVE_DIRENT_H 1 + +/* Define if you have the <fcntl.h> header file. */ +#define HAVE_FCNTL_H 1 + +/* Define if you have the <limits.h> header file. */ +#define HAVE_LIMITS_H 1 + +/* Define if you have the <memory.h> header file. */ +#define HAVE_MEMORY_H 1 + +/* Define if you have the <ndir.h> header file. */ +/* #undef HAVE_NDIR_H */ + +/* Define if you have the <string.h> header file. */ +#define HAVE_STRING_H 1 + +/* Define if you have the <sys/dir.h> header file. */ +/* #undef HAVE_SYS_DIR_H */ + +/* Define if you have the <sys/ndir.h> header file. */ +/* #undef HAVE_SYS_NDIR_H */ + +/* Define if you have the <unistd.h> header file. */ +#define HAVE_UNISTD_H 1 diff --git a/gnu/usr.bin/sort/error.c b/gnu/usr.bin/sort/error.c new file mode 100644 index 000000000000..a36198b6b6e8 --- /dev/null +++ b/gnu/usr.bin/sort/error.c @@ -0,0 +1,130 @@ +/* error.c -- error handler for noninteractive utilities + Copyright (C) 1990, 91, 92, 93, 94, 95 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Written by David MacKenzie <djm@gnu.ai.mit.edu>. */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdio.h> + +#if HAVE_VPRINTF || HAVE_DOPRNT || _LIBC +# if __STDC__ +# include <stdarg.h> +# define VA_START(args, lastarg) va_start(args, lastarg) +# else +# include <varargs.h> +# define VA_START(args, lastarg) va_start(args) +# endif +#else +# define va_alist a1, a2, a3, a4, a5, a6, a7, a8 +# define va_dcl char *a1, *a2, *a3, *a4, *a5, *a6, *a7, *a8; +#endif + +#if STDC_HEADERS || _LIBC +# include <stdlib.h> +# include <string.h> +#else +void exit (); +#endif + +/* This variable is incremented each time `error' is called. */ +unsigned int error_message_count; + +/* If NULL, error will flush stdout, then print on stderr the program + name, a colon and a space. Otherwise, error will call this + function without parameters instead. */ +void (*error_print_progname) () = NULL; + +#ifdef _LIBC +#define program_name program_invocation_name +#endif + +/* The calling program should define program_name and set it to the + name of the executing program. */ +extern char *program_name; + +#if HAVE_STRERROR || _LIBC +# ifndef strerror /* On some systems, strerror is a macro */ +char *strerror (); +# endif +#else +static char * +private_strerror (errnum) + int errnum; +{ + extern char *sys_errlist[]; + extern int sys_nerr; + + if (errnum > 0 && errnum <= sys_nerr) + return sys_errlist[errnum]; + return "Unknown system error"; +} +#define strerror private_strerror +#endif + +/* Print the program name and error message MESSAGE, which is a printf-style + format string with optional args. + If ERRNUM is nonzero, print its corresponding system error message. + Exit with status STATUS if it is nonzero. */ +/* VARARGS */ + +void +#if defined(VA_START) && __STDC__ +error (int status, int errnum, const char *message, ...) +#else +error (status, errnum, message, va_alist) + int status; + int errnum; + char *message; + va_dcl +#endif +{ +#ifdef VA_START + va_list args; +#endif + + if (error_print_progname) + (*error_print_progname) (); + else + { + fflush (stdout); + fprintf (stderr, "%s: ", program_name); + } + +#ifdef VA_START + VA_START (args, message); +# if HAVE_VPRINTF || _LIBC + vfprintf (stderr, message, args); +# else + _doprnt (message, args, stderr); +# endif + va_end (args); +#else + fprintf (stderr, message, a1, a2, a3, a4, a5, a6, a7, a8); +#endif + + ++error_message_count; + + if (errnum) + fprintf (stderr, ": %s", strerror (errnum)); + putc ('\n', stderr); + fflush (stderr); + if (status) + exit (status); +} diff --git a/gnu/usr.bin/sort/error.h b/gnu/usr.bin/sort/error.h new file mode 100644 index 000000000000..481c54d2e11b --- /dev/null +++ b/gnu/usr.bin/sort/error.h @@ -0,0 +1,44 @@ +/* error.h -- declaration for error-reporting function + Copyright (C) 1995 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifndef _error_h_ +#define _error_h_ + +#ifndef __attribute__ +/* This feature is available in gcc versions 2.5 and later. */ +# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5) || __STRICT_ANSI__ +# define __attribute__(Spec) /* empty */ +# endif +/* The __-protected variants of `format' and `printf' attributes + are accepted by gcc versions 2.6.4 (effectively 2.7) and later. */ +# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 7) +# define __format__ format +# define __printf__ printf +# endif +#endif + +#if __STDC__ +void error (int, int, const char *, ...) \ + __attribute__ ((__format__ (__printf__, 3, 4))); +#else +void error (); +#endif + +/* This variable is incremented each time `error' is called. */ +extern unsigned int error_message_count; + +#endif /* _error_h_ */ diff --git a/gnu/usr.bin/sort/getopt.c b/gnu/usr.bin/sort/getopt.c new file mode 100644 index 000000000000..8bcf5594571c --- /dev/null +++ b/gnu/usr.bin/sort/getopt.c @@ -0,0 +1,770 @@ +/* Getopt for GNU. + NOTE: getopt is now part of the C library, so if you don't know what + "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu + before changing it! + + Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94, 95 + Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* This tells Alpha OSF/1 not to define a getopt prototype in <stdio.h>. + Ditto for AIX 3.2 and <stdlib.h>. */ +#ifndef _NO_PROTO +#define _NO_PROTO +#endif + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#if !defined (__STDC__) || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +#ifndef const +#define const +#endif +#endif + +#include <stdio.h> + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#if defined (_LIBC) || !defined (__GNU_LIBRARY__) + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +/* Don't include stdlib.h for non-GNU C libraries because some of them + contain conflicting prototypes for getopt. */ +#include <stdlib.h> +#endif /* GNU C library. */ + +#ifndef _ +/* This is for other GNU distributions with internationalized messages. + When compiling libc, the _ macro is predefined. */ +#ifdef HAVE_LIBINTL_H +# include <libintl.h> +# define _(msgid) gettext (msgid) +#else +# define _(msgid) (msgid) +#endif +#endif + +/* This version of `getopt' appears to the caller like standard Unix `getopt' + but it behaves differently for the user, since it allows the user + to intersperse the options with the other arguments. + + As `getopt' works, it permutes the elements of ARGV so that, + when it is done, all the options precede everything else. Thus + all application programs are extended to handle flexible argument order. + + Setting the environment variable POSIXLY_CORRECT disables permutation. + Then the behavior is completely standard. + + GNU application programs can use a third alternative mode in which + they can distinguish the relative order of options and other arguments. */ + +#include "getopt.h" + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +char *optarg = NULL; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns EOF, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +/* XXX 1003.2 says this must be 1 before any call. */ +int optind = 0; + +/* The next char to be scanned in the option-element + in which the last option character we returned was found. + This allows us to pick up the scan where we left off. + + If this is zero, or a null string, it means resume the scan + by advancing to the next ARGV-element. */ + +static char *nextchar; + +/* Callers store zero here to inhibit the error message + for unrecognized options. */ + +int opterr = 1; + +/* Set to an option character which was unrecognized. + This must be initialized on some systems to avoid linking in the + system's own getopt implementation. */ + +int optopt = '?'; + +/* Describe how to deal with options that follow non-option ARGV-elements. + + If the caller did not specify anything, + the default is REQUIRE_ORDER if the environment variable + POSIXLY_CORRECT is defined, PERMUTE otherwise. + + REQUIRE_ORDER means don't recognize them as options; + stop option processing when the first non-option is seen. + This is what Unix does. + This mode of operation is selected by either setting the environment + variable POSIXLY_CORRECT, or using `+' as the first character + of the list of option characters. + + PERMUTE is the default. We permute the contents of ARGV as we scan, + so that eventually all the non-options are at the end. This allows options + to be given in any order, even with programs that were not written to + expect this. + + RETURN_IN_ORDER is an option available to programs that were written + to expect options and other ARGV-elements in any order and that care about + the ordering of the two. We describe each non-option ARGV-element + as if it were the argument of an option with character code 1. + Using `-' as the first character of the list of option characters + selects this mode of operation. + + The special argument `--' forces an end of option-scanning regardless + of the value of `ordering'. In the case of RETURN_IN_ORDER, only + `--' can cause `getopt' to return EOF with `optind' != ARGC. */ + +static enum +{ + REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER +} ordering; + +/* Value of POSIXLY_CORRECT environment variable. */ +static char *posixly_correct; + +#ifdef __GNU_LIBRARY__ +/* We want to avoid inclusion of string.h with non-GNU libraries + because there are many ways it can cause trouble. + On some systems, it contains special magic macros that don't work + in GCC. */ +#include <string.h> +#define my_index strchr +#else + +/* Avoid depending on library functions or files + whose names are inconsistent. */ + +char *getenv (); + +static char * +my_index (str, chr) + const char *str; + int chr; +{ + while (*str) + { + if (*str == chr) + return (char *) str; + str++; + } + return 0; +} + +/* If using GCC, we can safely declare strlen this way. + If not using GCC, it is ok not to declare it. */ +#ifdef __GNUC__ +/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. + That was relevant to code that was here before. */ +#if !defined (__STDC__) || !__STDC__ +/* gcc with -traditional declares the built-in strlen to return int, + and has done so at least since version 2.4.5. -- rms. */ +extern int strlen (const char *); +#endif /* not __STDC__ */ +#endif /* __GNUC__ */ + +#endif /* not __GNU_LIBRARY__ */ + +/* Handle permutation of arguments. */ + +/* Describe the part of ARGV that contains non-options that have + been skipped. `first_nonopt' is the index in ARGV of the first of them; + `last_nonopt' is the index after the last of them. */ + +static int first_nonopt; +static int last_nonopt; + +/* Exchange two adjacent subsequences of ARGV. + One subsequence is elements [first_nonopt,last_nonopt) + which contains all the non-options that have been skipped so far. + The other is elements [last_nonopt,optind), which contains all + the options processed since those non-options were skipped. + + `first_nonopt' and `last_nonopt' are relocated so that they describe + the new indices of the non-options in ARGV after they are moved. */ + +static void +exchange (argv) + char **argv; +{ + int bottom = first_nonopt; + int middle = last_nonopt; + int top = optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; + + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; + + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } + + /* Update records for the slots the non-options now occupy. */ + + first_nonopt += (optind - last_nonopt); + last_nonopt = optind; +} + +/* Initialize the internal data when the first call is made. */ + +static const char * +_getopt_initialize (optstring) + const char *optstring; +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = optind = 1; + + nextchar = NULL; + + posixly_correct = getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + + return optstring; +} + +/* Scan elements of ARGV (whose length is ARGC) for option characters + given in OPTSTRING. + + If an element of ARGV starts with '-', and is not exactly "-" or "--", + then it is an option element. The characters of this element + (aside from the initial '-') are option characters. If `getopt' + is called repeatedly, it returns successively each of the option characters + from each of the option elements. + + If `getopt' finds another option character, it returns that character, + updating `optind' and `nextchar' so that the next call to `getopt' can + resume the scan with the following option character or ARGV-element. + + If there are no more option characters, `getopt' returns `EOF'. + Then `optind' is the index in ARGV of the first ARGV-element + that is not an option. (The ARGV-elements have been permuted + so that those that are not options now come last.) + + OPTSTRING is a string containing the legitimate option characters. + If an option character is seen that is not listed in OPTSTRING, + return '?' after printing an error message. If you set `opterr' to + zero, the error message is suppressed but we still return '?'. + + If a char in OPTSTRING is followed by a colon, that means it wants an arg, + so the following text in the same ARGV-element, or the text of the following + ARGV-element, is returned in `optarg'. Two colons mean an option that + wants an optional arg; if there is text in the current ARGV-element, + it is returned in `optarg', otherwise `optarg' is set to zero. + + If OPTSTRING starts with `-' or `+', it requests different methods of + handling the non-option ARGV-elements. + See the comments about RETURN_IN_ORDER and REQUIRE_ORDER, above. + + Long-named options begin with `--' instead of `-'. + Their names may be abbreviated as long as the abbreviation is unique + or is an exact match for some defined option. If they have an + argument, it follows the option name in the same ARGV-element, separated + from the option name by a `=', or else the in next ARGV-element. + When `getopt' finds a long-named option, it returns 0 if that option's + `flag' field is nonzero, the value of the option's `val' field + if the `flag' field is zero. + + The elements of ARGV aren't really const, because we permute them. + But we pretend they're const in the prototype to be compatible + with other systems. + + LONGOPTS is a vector of `struct option' terminated by an + element containing a name which is zero. + + LONGIND returns the index in LONGOPT of the long-named option found. + It is only valid when a long-named option has been found by the most + recent call. + + If LONG_ONLY is nonzero, '-' as well as '--' can introduce + long-named options. */ + +int +_getopt_internal (argc, argv, optstring, longopts, longind, long_only) + int argc; + char *const *argv; + const char *optstring; + const struct option *longopts; + int *longind; + int long_only; +{ + optarg = NULL; + + if (optind == 0) + { + optstring = _getopt_initialize (optstring); + optind = 1; /* Don't scan ARGV[0], the program name. */ + } + + if (nextchar == NULL || *nextchar == '\0') + { + /* Advance to the next ARGV-element. */ + + if (ordering == PERMUTE) + { + /* If we have just processed some options following some non-options, + exchange them so that the options come first. */ + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (last_nonopt != optind) + first_nonopt = optind; + + /* Skip any additional non-options + and extend the range of non-options previously skipped. */ + + while (optind < argc + && (argv[optind][0] != '-' || argv[optind][1] == '\0')) + optind++; + last_nonopt = optind; + } + + /* The special ARGV-element `--' means premature end of options. + Skip it like a null option, + then exchange with previous non-options as if it were an option, + then skip everything else like a non-option. */ + + if (optind != argc && !strcmp (argv[optind], "--")) + { + optind++; + + if (first_nonopt != last_nonopt && last_nonopt != optind) + exchange ((char **) argv); + else if (first_nonopt == last_nonopt) + first_nonopt = optind; + last_nonopt = argc; + + optind = argc; + } + + /* If we have done all the ARGV-elements, stop the scan + and back over any non-options that we skipped and permuted. */ + + if (optind == argc) + { + /* Set the next-arg-index to point at the non-options + that we previously skipped, so the caller will digest them. */ + if (first_nonopt != last_nonopt) + optind = first_nonopt; + return EOF; + } + + /* If we have come to a non-option and did not permute it, + either stop the scan or describe it to the caller and pass it by. */ + + if ((argv[optind][0] != '-' || argv[optind][1] == '\0')) + { + if (ordering == REQUIRE_ORDER) + return EOF; + optarg = argv[optind++]; + return 1; + } + + /* We have found another option-ARGV-element. + Skip the initial punctuation. */ + + nextchar = (argv[optind] + 1 + + (longopts != NULL && argv[optind][1] == '-')); + } + + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + + if (longopts != NULL + && (argv[optind][1] == '-' + || (long_only && (argv[optind][2] + || !my_index (optstring, argv[optind][1]))))) + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound; + int option_index; + + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + +#ifdef lint + indfound = 0; /* Avoid spurious compiler warning. */ +#endif + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if (nameend - nextchar == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + + if (ambig && !exact) + { + if (opterr) + fprintf (stderr, _("%s: option `%s' is ambiguous\n"), + argv[0], argv[optind]); + nextchar += strlen (nextchar); + optind++; + return '?'; + } + + if (pfound != NULL) + { + option_index = indfound; + optind++; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (opterr) + if (argv[optind - 1][1] == '-') + /* --option */ + fprintf (stderr, + _("%s: option `--%s' doesn't allow an argument\n"), + argv[0], pfound->name); + else + /* +option or -option */ + fprintf (stderr, + _("%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], pfound->name); + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (opterr) + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + + /* Can't find it as a long option. If this is not getopt_long_only, + or the option starts with '--' or is not a valid short + option, then it's an error. + Otherwise interpret it as a short option. */ + if (!long_only || argv[optind][1] == '-' + || my_index (optstring, *nextchar) == NULL) + { + if (opterr) + { + if (argv[optind][1] == '-') + /* --option */ + fprintf (stderr, _("%s: unrecognized option `--%s'\n"), + argv[0], nextchar); + else + /* +option or -option */ + fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), + argv[0], argv[optind][0], nextchar); + } + nextchar = (char *) ""; + optind++; + return '?'; + } + } + + /* Look at and handle the next short option-character. */ + + { + char c = *nextchar++; + char *temp = my_index (optstring, c); + + /* Increment `optind' when we start to process its last character. */ + if (*nextchar == '\0') + ++optind; + + if (temp == NULL || c == ':') + { + if (opterr) + { + if (posixly_correct) + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, _("%s: illegal option -- %c\n"), + argv[0], c); + else + fprintf (stderr, _("%s: invalid option -- %c\n"), + argv[0], c); + } + optopt = c; + return '?'; + } + if (temp[1] == ':') + { + if (temp[2] == ':') + { + /* This is an option that accepts an argument optionally. */ + if (*nextchar != '\0') + { + optarg = nextchar; + optind++; + } + else + optarg = NULL; + nextchar = NULL; + } + else + { + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (opterr) + { + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, + _("%s: option requires an argument -- %c\n"), + argv[0], c); + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + nextchar = NULL; + } + } + return c; + } +} + +int +getopt (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + return _getopt_internal (argc, argv, optstring, + (const struct option *) 0, + (int *) 0, + 0); +} + +#endif /* _LIBC or not __GNU_LIBRARY__. */ + +#ifdef TEST + +/* Compile with -DTEST to make an executable for use in testing + the above definition of `getopt'. */ + +int +main (argc, argv) + int argc; + char **argv; +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + + c = getopt (argc, argv, "abc:d:0123456789"); + if (c == EOF) + break; + + switch (c) + { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/gnu/usr.bin/sort/getopt.h b/gnu/usr.bin/sort/getopt.h new file mode 100644 index 000000000000..4ac33b71824d --- /dev/null +++ b/gnu/usr.bin/sort/getopt.h @@ -0,0 +1,129 @@ +/* Declarations for getopt. + Copyright (C) 1989, 90, 91, 92, 93, 94 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef _GETOPT_H +#define _GETOPT_H 1 + +#ifdef __cplusplus +extern "C" { +#endif + +/* For communication from `getopt' to the caller. + When `getopt' finds an option that takes an argument, + the argument value is returned here. + Also, when `ordering' is RETURN_IN_ORDER, + each non-option ARGV-element is returned here. */ + +extern char *optarg; + +/* Index in ARGV of the next element to be scanned. + This is used for communication to and from the caller + and for communication between successive calls to `getopt'. + + On entry to `getopt', zero means this is the first call; initialize. + + When `getopt' returns EOF, this is the index of the first of the + non-option elements that the caller should itself scan. + + Otherwise, `optind' communicates from one call to the next + how much of ARGV has been scanned so far. */ + +extern int optind; + +/* Callers store zero here to inhibit the error message `getopt' prints + for unrecognized options. */ + +extern int opterr; + +/* Set to an option character which was unrecognized. */ + +extern int optopt; + +/* Describe the long-named options requested by the application. + The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector + of `struct option' terminated by an element containing a name which is + zero. + + The field `has_arg' is: + no_argument (or 0) if the option does not take an argument, + required_argument (or 1) if the option requires an argument, + optional_argument (or 2) if the option takes an optional argument. + + If the field `flag' is not NULL, it points to a variable that is set + to the value given in the field `val' when the option is found, but + left unchanged if the option is not found. + + To have a long-named option do something other than set an `int' to + a compiled-in constant, such as set a value from `optarg', set the + option's `flag' field to zero and its `val' field to a nonzero + value (the equivalent single-letter option character, if there is + one). For long options that have a zero `flag' field, `getopt' + returns the contents of the `val' field. */ + +struct option +{ +#if defined (__STDC__) && __STDC__ + const char *name; +#else + char *name; +#endif + /* has_arg can't be an enum because some compilers complain about + type mismatches in all the code that assumes it is an int. */ + int has_arg; + int *flag; + int val; +}; + +/* Names for the values of the `has_arg' field of `struct option'. */ + +#define no_argument 0 +#define required_argument 1 +#define optional_argument 2 + +#if defined (__STDC__) && __STDC__ +#ifdef __GNU_LIBRARY__ +/* Many other libraries have conflicting prototypes for getopt, with + differences in the consts, in stdlib.h. To avoid compilation + errors, only prototype getopt for the GNU C library. */ +extern int getopt (int argc, char *const *argv, const char *shortopts); +#else /* not __GNU_LIBRARY__ */ +extern int getopt (); +#endif /* __GNU_LIBRARY__ */ +extern int getopt_long (int argc, char *const *argv, const char *shortopts, + const struct option *longopts, int *longind); +extern int getopt_long_only (int argc, char *const *argv, + const char *shortopts, + const struct option *longopts, int *longind); + +/* Internal only. Users should not call this directly. */ +extern int _getopt_internal (int argc, char *const *argv, + const char *shortopts, + const struct option *longopts, int *longind, + int long_only); +#else /* not __STDC__ */ +extern int getopt (); +extern int getopt_long (); +extern int getopt_long_only (); + +extern int _getopt_internal (); +#endif /* __STDC__ */ + +#ifdef __cplusplus +} +#endif + +#endif /* _GETOPT_H */ diff --git a/gnu/usr.bin/sort/getopt1.c b/gnu/usr.bin/sort/getopt1.c new file mode 100644 index 000000000000..4580211cfac3 --- /dev/null +++ b/gnu/usr.bin/sort/getopt1.c @@ -0,0 +1,180 @@ +/* getopt_long and getopt_long_only entry points for GNU getopt. + Copyright (C) 1987, 88, 89, 90, 91, 92, 1993, 1994 + Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include "getopt.h" + +#if !defined (__STDC__) || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +#ifndef const +#define const +#endif +#endif + +#include <stdio.h> + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#if defined (_LIBC) || !defined (__GNU_LIBRARY__) + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +#include <stdlib.h> +#else +char *getenv (); +#endif + +#ifndef NULL +#define NULL 0 +#endif + +int +getopt_long (argc, argv, options, long_options, opt_index) + int argc; + char *const *argv; + const char *options; + const struct option *long_options; + int *opt_index; +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 0); +} + +/* Like getopt_long, but '-' as well as '--' can indicate a long option. + If an option that starts with '-' (not '--') doesn't match a long option, + but does match a short option, it is parsed as a short option + instead. */ + +int +getopt_long_only (argc, argv, options, long_options, opt_index) + int argc; + char *const *argv; + const char *options; + const struct option *long_options; + int *opt_index; +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 1); +} + + +#endif /* _LIBC or not __GNU_LIBRARY__. */ + +#ifdef TEST + +#include <stdio.h> + +int +main (argc, argv) + int argc; + char **argv; +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + int option_index = 0; + static struct option long_options[] = + { + {"add", 1, 0, 0}, + {"append", 0, 0, 0}, + {"delete", 1, 0, 0}, + {"verbose", 0, 0, 0}, + {"create", 0, 0, 0}, + {"file", 1, 0, 0}, + {0, 0, 0, 0} + }; + + c = getopt_long (argc, argv, "abc:d:0123456789", + long_options, &option_index); + if (c == EOF) + break; + + switch (c) + { + case 0: + printf ("option %s", long_options[option_index].name); + if (optarg) + printf (" with arg %s", optarg); + printf ("\n"); + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case 'd': + printf ("option d with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/gnu/usr.bin/sort/long-options.c b/gnu/usr.bin/sort/long-options.c new file mode 100644 index 000000000000..dd7a8ca14822 --- /dev/null +++ b/gnu/usr.bin/sort/long-options.c @@ -0,0 +1,79 @@ +/* Utility to accept --help and --version options as unobtrusively as possible. + Copyright (C) 1993, 1994 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* Jim Meyering (meyering@comco.com) */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdio.h> +#include <getopt.h> +#include "long-options.h" + +static struct option const long_options[] = +{ + {"help", no_argument, 0, 'h'}, + {"version", no_argument, 0, 'v'}, + {0, 0, 0, 0} +}; + +/* Process long options --help and --version, but only if argc == 2. + Be careful not to gobble up `--'. */ + +void +parse_long_options (argc, argv, command_name, version_string, usage) + int argc; + char **argv; + const char *command_name; + const char *version_string; + void (*usage)(); +{ + int c; + int saved_opterr; + int saved_optind; + + saved_opterr = opterr; + saved_optind = optind; + + /* Don't print an error message for unrecognized options. */ + opterr = 0; + + if (argc == 2 + && (c = getopt_long (argc, argv, "+", long_options, (int *) 0)) != EOF) + { + switch (c) + { + case 'h': + (*usage) (0); + + case 'v': + printf ("%s - %s\n", command_name, version_string); + exit (0); + + default: + /* Don't process any other long-named options. */ + break; + } + } + + /* Restore previous value. */ + opterr = saved_opterr; + + /* Restore optind in case it has advanced past a leading `--'. */ + optind = saved_optind; +} diff --git a/gnu/usr.bin/sort/long-options.h b/gnu/usr.bin/sort/long-options.h new file mode 100644 index 000000000000..986a52d0d47c --- /dev/null +++ b/gnu/usr.bin/sort/long-options.h @@ -0,0 +1,10 @@ +#undef __P +#if defined (__STDC__) && __STDC__ +#define __P(args) args +#else +#define __P(args) () +#endif + +void + parse_long_options __P ((int _argc, char **_argv, const char *_command_name, + const char *_version_string, void (*_usage) (int))); diff --git a/gnu/usr.bin/sort/sort.1 b/gnu/usr.bin/sort/sort.1 new file mode 100644 index 000000000000..e9f4b1ed290c --- /dev/null +++ b/gnu/usr.bin/sort/sort.1 @@ -0,0 +1,231 @@ +.TH SORT 1 "GNU Text Utilities" "FSF" \" -*- nroff -*- +.SH NAME +sort \- sort lines of text files +.SH SYNOPSIS +.B sort +[\-cmus] [\-t separator] [\-o output-file] [\-T tempdir] [\-bdfiMnr] +[+POS1 [\-POS2]] [\-k POS1[,POS2]] [file...] +.br +.B sort +{\-\-help,\-\-version} +.SH DESCRIPTION +This manual page +documents the GNU version of +.BR sort . +.B sort +sorts, merges, or compares all the lines from the given files, or the standard +input if no files are given. A file name of `-' means standard input. +By default, +.B sort +writes the results to the standard output. +.PP +.B sort +has three modes of operation: sort (the default), merge, and check for +sortedness. The following options change the operation mode: +.TP +.I \-c +Check whether the given files are already sorted: if they are not all +sorted, print an error message and exit with a status of 1. +.TP +.I \-m +Merge the given files by sorting them as a group. Each input file +should already be individually sorted. It always works to sort +instead of merge; merging is provided because it is faster, in the +case where it works. +.PP +A pair of lines is compared as follows: +if any key fields have been specified, +.B sort +compares each pair of fields, in the order specified on the command +line, according to the associated ordering options, until a difference +is found or no fields are left. +.PP +If any of the global options +.I Mbdfinr +are given but no key fields are +specified, +.B sort +compares the entire lines according to the global options. +.PP +Finally, as a last resort when all keys compare equal +(or if no ordering options were specified at all), +.B sort +compares the lines byte by byte in machine collating sequence. +The last resort comparison honors the +.I -r +global option. +The +.I \-s +(stable) option disables this last-resort comparison so that +lines in which all fields compare equal are left in their original +relative order. If no fields or global options are specified, +.I \-s +has no effect. +.PP +GNU +.B sort +has no limits on input line length or restrictions on bytes allowed +within lines. In addition, if the final byte of an input file is not +a newline, GNU +.B sort +silently supplies one. +.PP +If the environment variable +.B TMPDIR +is set, +.B sort +uses it as the directory in which to put temporary files instead of +the default, /tmp. The +.I "\-T tempdir" +option is another way to select the directory for temporary files; it +overrides the environment variable. +.PP +The following options affect the ordering of output lines. They may +be specified globally or as part of a specific key field. If no key +fields are specified, global options apply to comparison of entire +lines; otherwise the global options are inherited by key fields that +do not specify any special options of their own. +.TP +.I \-b +Ignore leading blanks when finding sort keys in each line. +.TP +.I \-d +Sort in `phone directory' order: ignore all characters except letters, +digits and blanks when sorting. +.TP +.I \-f +Fold lower case characters into the equivalent upper case characters +when sorting so that, for example, `b' is sorted the same way `B' is. +.TP +.I \-i +Ignore characters outside the ASCII range 040-0176 octal (inclusive) +when sorting. +.TP +.I \-M +An initial string, consisting of any amount of white space, followed +by three letters abbreviating a month name, is folded to UPPER case +and compared in the order `JAN' < `FEB' < ... < `DEC.' Invalid names +compare low to valid names. +.TP +.I \-n +Compare according to arithmetic value an initial numeric string +consisting of optional white space, an optional \- sign, and zero or +more digits, optionally followed by a decimal point and zero or more +digits. +.TP +.I \-r +Reverse the result of comparison, so that lines with greater key +values appear earlier in the output instead of later. +.PP +Other options are: +.TP +.I "\-o output-file" +Write output to +.I output-file +instead of to the standard output. If +.I output-file +is one of the input files, +.B sort +copies it to a temporary file before sorting and writing the output to +.IR output-file . +.TP +.I "\-t separator" +Use character +.I separator +as the field separator when finding the sort keys in each line. By +default, fields are separated by the empty string between a +non-whitespace character and a whitespace character. That is to say, +given the input line ` foo bar', +.B sort +breaks it into fields ` foo' and ` bar'. The field separator is not +considered to be part of either the field preceding or the field +following it. +.TP +.I \-u +For the default case or the +.I \-m +option, only output the first of a sequence of lines that compare +equal. For the +.I \-c +option, check that no pair of consecutive lines compares equal. +.TP +.I "+POS1 [\-POS2]" +Specify a field within each line to use as a sorting key. The field +consists of the portion of the line starting at POS1 and up to (but +not including) POS2 (or to the end of the line if POS2 is not given). +The fields and character positions are numbered starting with 0. +.TP +.I "\-k POS1[,POS2]" +An alternate syntax for specifying sorting keys. +The fields and character positions are numbered starting with 1. +.PP +A position has the form \fIf\fP.\fIc\fP, where \fIf\fP is the number +of the field to use and \fIc\fP is the number of the first character +from the beginning of the field (for \fI+pos\fP) or from the end of +the previous field (for \fI\-pos\fP). The .\fIc\fP part of a position +may be omitted in which case it is taken to be the first character in +the field. If the +.I \-b +option has been given, the .\fIc\fP part of a field specification is +counted from the first nonblank character of the field (for +\fI+pos\fP) or from the first nonblank character following the +previous field (for \fI\-pos\fP). +.PP +A \fI+pos\fP or \fI-pos\fP argument may also have any of the option +letters +.I Mbdfinr +appended to it, in which case the global ordering options are not used +for that particular field. The +.I \-b +option may be independently attached to either or both of the +\fI+pos\fP and \fI\-pos\fP parts of a field specification, and if it +is inherited from the global options it will be attached to both. +If a +.I \-n +or +.I \-M +option is used, thus implying a +.I \-b +option, the +.I \-b +option is taken to apply to both the \fI+pos\fP and the \fI\-pos\fP +parts of a key specification. Keys may span multiple fields. +.PP +In addition, when GNU +.B sort +is invoked with exactly one argument, the following options are recognized: +.TP +.I "\-\-help" +Print a usage message on standard output and exit successfully. +.TP +.I "\-\-version" +Print version information on standard output then exit successfully. +.SH COMPATIBILITY +.PP +Historical (BSD and System V) implementations of +.B sort +have differed in their interpretation of some options, +particularly +.IR \-b , +.IR \-f , +and +.IR \-n . +GNU sort follows the POSIX behavior, which is +usually (but not always!) like the System V behavior. +According to POSIX +.I \-n +no longer implies +.IR \-b . +For consistency, +.I \-M +has been changed in the same way. +This may affect the meaning of character positions in field +specifications in obscure cases. +If this bites you the fix is to add an explicit +.IR \-b . +.SH BUGS +The different meaning of field numbers depending +on whether +.I -k +is used is confusing. +It's all POSIX's fault! diff --git a/gnu/usr.bin/sort/sort.c b/gnu/usr.bin/sort/sort.c new file mode 100644 index 000000000000..dc3addb6e408 --- /dev/null +++ b/gnu/usr.bin/sort/sort.c @@ -0,0 +1,2078 @@ +/* sort - sort lines of text (with all kinds of options). + Copyright (C) 1988, 1991, 1992, 1993, 1994, 1995 Free Software Foundation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + + Written December 1988 by Mike Haertel. + The author may be reached (Email) at the address mike@gnu.ai.mit.edu, + or (US mail) as Mike Haertel c/o Free Software Foundation. */ + +#include <config.h> + +/* Get isblank from GNU libc. */ +#define _GNU_SOURCE + +#include <sys/types.h> +#include <signal.h> +#include <stdio.h> +#include "system.h" +#include "version.h" +#include "long-options.h" +#include "error.h" +#include "xstrtod.h" + +#ifdef HAVE_LIMITS_H +#include <limits.h> +#else +#ifndef UCHAR_MAX +#define UCHAR_MAX 255 +#endif +#endif +#ifndef STDC_HEADERS +char *malloc (); +char *realloc (); +void free (); +#endif + +/* Undefine, to avoid warning about redefinition on some systems. */ +#undef min +#define min(a, b) ((a) < (b) ? (a) : (b)) + +#define UCHAR_LIM (UCHAR_MAX + 1) +#define UCHAR(c) ((unsigned char) (c)) + +#ifndef DEFAULT_TMPDIR +#define DEFAULT_TMPDIR "/tmp" +#endif + +/* The kind of blanks for '-b' to skip in various options. */ +enum blanktype { bl_start, bl_end, bl_both }; + +/* Lines are held in core as counted strings. */ +struct line +{ + char *text; /* Text of the line. */ + int length; /* Length not including final newline. */ + char *keybeg; /* Start of first key. */ + char *keylim; /* Limit of first key. */ +}; + +/* Arrays of lines. */ +struct lines +{ + struct line *lines; /* Dynamically allocated array of lines. */ + int used; /* Number of slots used. */ + int alloc; /* Number of slots allocated. */ + int limit; /* Max number of slots to allocate. */ +}; + +/* Input buffers. */ +struct buffer +{ + char *buf; /* Dynamically allocated buffer. */ + int used; /* Number of bytes used. */ + int alloc; /* Number of bytes allocated. */ + int left; /* Number of bytes left after line parsing. */ +}; + +struct keyfield +{ + int sword; /* Zero-origin 'word' to start at. */ + int schar; /* Additional characters to skip. */ + int skipsblanks; /* Skip leading white space at start. */ + int eword; /* Zero-origin first word after field. */ + int echar; /* Additional characters in field. */ + int skipeblanks; /* Skip trailing white space at finish. */ + int *ignore; /* Boolean array of characters to ignore. */ + char *translate; /* Translation applied to characters. */ + int numeric; /* Flag for numeric comparison. Handle + strings of digits with optional decimal + point, but no exponential notation. */ + int general_numeric; /* Flag for general, numeric comparison. + Handle numbers in exponential notation. */ + int month; /* Flag for comparison by month name. */ + int reverse; /* Reverse the sense of comparison. */ + struct keyfield *next; /* Next keyfield to try. */ +}; + +struct month +{ + char *name; + int val; +}; + +/* The name this program was run with. */ +char *program_name; + +/* Table of digits. */ +static int digits[UCHAR_LIM]; + +/* Table of white space. */ +static int blanks[UCHAR_LIM]; + +/* Table of non-printing characters. */ +static int nonprinting[UCHAR_LIM]; + +/* Table of non-dictionary characters (not letters, digits, or blanks). */ +static int nondictionary[UCHAR_LIM]; + +/* Translation table folding lower case to upper. */ +static char fold_toupper[UCHAR_LIM]; + +/* Table mapping 3-letter month names to integers. + Alphabetic order allows binary search. */ +static struct month const monthtab[] = +{ + {"APR", 4}, + {"AUG", 8}, + {"DEC", 12}, + {"FEB", 2}, + {"JAN", 1}, + {"JUL", 7}, + {"JUN", 6}, + {"MAR", 3}, + {"MAY", 5}, + {"NOV", 11}, + {"OCT", 10}, + {"SEP", 9} +}; + +/* During the merge phase, the number of files to merge at once. */ +#define NMERGE 16 + +/* Initial buffer size for in core sorting. Will not grow unless a + line longer than this is seen. */ +static int sortalloc = 512 * 1024; + +/* Initial buffer size for in core merge buffers. Bear in mind that + up to NMERGE * mergealloc bytes may be allocated for merge buffers. */ +static int mergealloc = 16 * 1024; + +/* Guess of average line length. */ +static int linelength = 30; + +/* Maximum number of elements for the array(s) of struct line's, in bytes. */ +#define LINEALLOC (256 * 1024) + +/* Prefix for temporary file names. */ +static char *temp_file_prefix; + +/* Flag to reverse the order of all comparisons. */ +static int reverse; + +/* Flag for stable sort. This turns off the last ditch bytewise + comparison of lines, and instead leaves lines in the same order + they were read if all keys compare equal. */ +static int stable; + +/* Tab character separating fields. If NUL, then fields are separated + by the empty string between a non-whitespace character and a whitespace + character. */ +static char tab; + +/* Flag to remove consecutive duplicate lines from the output. + Only the last of a sequence of equal lines will be output. */ +static int unique; + +/* Nonzero if any of the input files are the standard input. */ +static int have_read_stdin; + +/* Lists of key field comparisons to be tried. */ +static struct keyfield keyhead; + +static void +usage (int status) +{ + if (status != 0) + fprintf (stderr, _("Try `%s --help' for more information.\n"), + program_name); + else + { + printf (_("\ +Usage: %s [OPTION]... [FILE]...\n\ +"), + program_name); + printf (_("\ +Write sorted concatenation of all FILE(s) to standard output.\n\ +\n\ + +POS1 [-POS2] start a key at POS1, end it before POS2\n\ + -M compare (unknown) < `JAN' < ... < `DEC', imply -b\n\ + -T DIRECT use DIRECT for temporary files, not $TMPDIR or %s\n\ + -b ignore leading blanks in sort fields or keys\n\ + -c check if given files already sorted, do not sort\n\ + -d consider only [a-zA-Z0-9 ] characters in keys\n\ + -f fold lower case to upper case characters in keys\n\ + -g compare according to general numerical value, imply -b\n\ + -i consider only [\\040-\\0176] characters in keys\n\ + -k POS1[,POS2] same as +POS1 [-POS2], but all positions counted from 1\n\ + -m merge already sorted files, do not sort\n\ + -n compare according to string numerical value, imply -b\n\ + -o FILE write result on FILE instead of standard output\n\ + -r reverse the result of comparisons\n\ + -s stabilize sort by disabling last resort comparison\n\ + -t SEP use SEParator instead of non- to whitespace transition\n\ + -u with -c, check for strict ordering\n\ + -u with -m, only output the first of an equal sequence\n\ + --help display this help and exit\n\ + --version output version information and exit\n\ +\n\ +POS is F[.C][OPTS], where F is the field number and C the character\n\ +position in the field, both counted from zero. OPTS is made up of one\n\ +or more of Mbdfinr, this effectively disable global -Mbdfinr settings\n\ +for that key. If no key given, use the entire line as key. With no\n\ +FILE, or when FILE is -, read standard input.\n\ +") + , DEFAULT_TMPDIR); + } + exit (status); +} + +/* The list of temporary files. */ +static struct tempnode +{ + char *name; + struct tempnode *next; +} temphead; + +/* Clean up any remaining temporary files. */ + +static void +cleanup (void) +{ + struct tempnode *node; + + for (node = temphead.next; node; node = node->next) + unlink (node->name); +} + +/* Allocate N bytes of memory dynamically, with error checking. */ + +static char * +xmalloc (unsigned int n) +{ + char *p; + + p = malloc (n); + if (p == 0) + { + error (0, 0, _("virtual memory exhausted")); + cleanup (); + exit (2); + } + return p; +} + +/* Change the size of an allocated block of memory P to N bytes, + with error checking. + If P is NULL, run xmalloc. + If N is 0, run free and return NULL. */ + +static char * +xrealloc (char *p, unsigned int n) +{ + if (p == 0) + return xmalloc (n); + if (n == 0) + { + free (p); + return 0; + } + p = realloc (p, n); + if (p == 0) + { + error (0, 0, _("virtual memory exhausted")); + cleanup (); + exit (2); + } + return p; +} + +static FILE * +xtmpfopen (const char *file) +{ + FILE *fp; + int fd; + + fd = open (file, O_WRONLY | O_CREAT | O_TRUNC, 0600); + if (fd < 0 || (fp = fdopen (fd, "w")) == NULL) + { + error (0, errno, "%s", file); + cleanup (); + exit (2); + } + + return fp; +} + +static FILE * +xfopen (const char *file, const char *how) +{ + FILE *fp; + + if (strcmp (file, "-") == 0) + { + fp = stdin; + } + else + { + if ((fp = fopen (file, how)) == NULL) + { + error (0, errno, "%s", file); + cleanup (); + exit (2); + } + } + + if (fp == stdin) + have_read_stdin = 1; + return fp; +} + +static void +xfclose (FILE *fp) +{ + if (fp == stdin) + { + /* Allow reading stdin from tty more than once. */ + if (feof (fp)) + clearerr (fp); + } + else if (fp == stdout) + { + if (fflush (fp) != 0) + { + error (0, errno, _("flushing file")); + cleanup (); + exit (2); + } + } + else + { + if (fclose (fp) != 0) + { + error (0, errno, _("error closing file")); + cleanup (); + exit (2); + } + } +} + +static void +xfwrite (const char *buf, int size, int nelem, FILE *fp) +{ + if (fwrite (buf, size, nelem, fp) != nelem) + { + error (0, errno, _("write error")); + cleanup (); + exit (2); + } +} + +/* Return a name for a temporary file. */ + +static char * +tempname (void) +{ + static unsigned int seq; + int len = strlen (temp_file_prefix); + char *name = xmalloc (len + 1 + sizeof ("sort") - 1 + 5 + 5 + 1); + struct tempnode *node; + + node = (struct tempnode *) xmalloc (sizeof (struct tempnode)); + sprintf (name, + "%s%ssort%5.5d%5.5d", + temp_file_prefix, + (len && temp_file_prefix[len - 1] != '/') ? "/" : "", + (unsigned int) getpid () & 0xffff, seq); + + /* Make sure that SEQ's value fits in 5 digits. */ + ++seq; + if (seq >= 100000) + seq = 0; + + node->name = name; + node->next = temphead.next; + temphead.next = node; + return name; +} + +/* Search through the list of temporary files for NAME; + remove it if it is found on the list. */ + +static void +zaptemp (char *name) +{ + struct tempnode *node, *temp; + + for (node = &temphead; node->next; node = node->next) + if (!strcmp (name, node->next->name)) + break; + if (node->next) + { + temp = node->next; + unlink (temp->name); + free (temp->name); + node->next = temp->next; + free ((char *) temp); + } +} + +/* Initialize the character class tables. */ + +static void +inittables (void) +{ + int i; + + for (i = 0; i < UCHAR_LIM; ++i) + { + if (ISBLANK (i)) + blanks[i] = 1; + if (ISDIGIT (i)) + digits[i] = 1; + if (!ISPRINT (i)) + nonprinting[i] = 1; + if (!ISALNUM (i) && !ISBLANK (i)) + nondictionary[i] = 1; + if (ISLOWER (i)) + fold_toupper[i] = toupper (i); + else + fold_toupper[i] = i; + } +} + +/* Initialize BUF, allocating ALLOC bytes initially. */ + +static void +initbuf (struct buffer *buf, int alloc) +{ + buf->alloc = alloc; + buf->buf = xmalloc (buf->alloc); + buf->used = buf->left = 0; +} + +/* Fill BUF reading from FP, moving buf->left bytes from the end + of buf->buf to the beginning first. If EOF is reached and the + file wasn't terminated by a newline, supply one. Return a count + of bytes buffered. */ + +static int +fillbuf (struct buffer *buf, FILE *fp) +{ + int cc; + + memmove (buf->buf, buf->buf + buf->used - buf->left, buf->left); + buf->used = buf->left; + + while (!feof (fp) && (buf->used == 0 || !memchr (buf->buf, '\n', buf->used))) + { + if (buf->used == buf->alloc) + { + buf->alloc *= 2; + buf->buf = xrealloc (buf->buf, buf->alloc); + } + cc = fread (buf->buf + buf->used, 1, buf->alloc - buf->used, fp); + if (ferror (fp)) + { + error (0, errno, _("read error")); + cleanup (); + exit (2); + } + buf->used += cc; + } + + if (feof (fp) && buf->used && buf->buf[buf->used - 1] != '\n') + { + if (buf->used == buf->alloc) + { + buf->alloc *= 2; + buf->buf = xrealloc (buf->buf, buf->alloc); + } + buf->buf[buf->used++] = '\n'; + } + + return buf->used; +} + +/* Initialize LINES, allocating space for ALLOC lines initially. + LIMIT is the maximum possible number of lines to allocate space + for, ever. */ + +static void +initlines (struct lines *lines, int alloc, int limit) +{ + lines->alloc = alloc; + lines->lines = (struct line *) xmalloc (lines->alloc * sizeof (struct line)); + lines->used = 0; + lines->limit = limit; +} + +/* Return a pointer to the first character of the field specified + by KEY in LINE. */ + +static char * +begfield (const struct line *line, const struct keyfield *key) +{ + register char *ptr = line->text, *lim = ptr + line->length; + register int sword = key->sword, schar = key->schar; + + if (tab) + while (ptr < lim && sword--) + { + while (ptr < lim && *ptr != tab) + ++ptr; + if (ptr < lim) + ++ptr; + } + else + while (ptr < lim && sword--) + { + while (ptr < lim && blanks[UCHAR (*ptr)]) + ++ptr; + while (ptr < lim && !blanks[UCHAR (*ptr)]) + ++ptr; + } + + if (key->skipsblanks) + while (ptr < lim && blanks[UCHAR (*ptr)]) + ++ptr; + + if (ptr + schar <= lim) + ptr += schar; + else + ptr = lim; + + return ptr; +} + +/* Return the limit of (a pointer to the first character after) the field + in LINE specified by KEY. */ + +static char * +limfield (const struct line *line, const struct keyfield *key) +{ + register char *ptr = line->text, *lim = ptr + line->length; + register int eword = key->eword, echar = key->echar; + + /* Note: from the POSIX spec: + The leading field separator itself is included in + a field when -t is not used. FIXME: move this comment up... */ + + /* Move PTR past EWORD fields or to one past the last byte on LINE, + whichever comes first. If there are more than EWORD fields, leave + PTR pointing at the beginning of the field having zero-based index, + EWORD. If a delimiter character was specified (via -t), then that + `beginning' is the first character following the delimiting TAB. + Otherwise, leave PTR pointing at the first `blank' character after + the preceding field. */ + if (tab) + while (ptr < lim && eword--) + { + while (ptr < lim && *ptr != tab) + ++ptr; + if (ptr < lim && (eword || echar > 0)) + ++ptr; + } + else + while (ptr < lim && eword--) + { + while (ptr < lim && blanks[UCHAR (*ptr)]) + ++ptr; + while (ptr < lim && !blanks[UCHAR (*ptr)]) + ++ptr; + } + + /* Make LIM point to the end of (one byte past) the current field. */ + if (tab) + { + char *newlim; + newlim = memchr (ptr, tab, lim - ptr); + if (newlim) + lim = newlim; + } + else + { + char *newlim; + newlim = ptr; + while (newlim < lim && blanks[UCHAR (*newlim)]) + ++newlim; + while (newlim < lim && !blanks[UCHAR (*newlim)]) + ++newlim; + lim = newlim; + } + + /* If we're skipping leading blanks, don't start counting characters + until after skipping past any leading blanks. */ + if (key->skipsblanks) + while (ptr < lim && blanks[UCHAR (*ptr)]) + ++ptr; + + /* Advance PTR by ECHAR (if possible), but no further than LIM. */ + if (ptr + echar <= lim) + ptr += echar; + else + ptr = lim; + + return ptr; +} + +/* FIXME */ + +void +trim_trailing_blanks (const char *a_start, char **a_end) +{ + while (*a_end > a_start && blanks[UCHAR (*(*a_end - 1))]) + --(*a_end); +} + +/* Find the lines in BUF, storing pointers and lengths in LINES. + Also replace newlines in BUF with NULs. */ + +static void +findlines (struct buffer *buf, struct lines *lines) +{ + register char *beg = buf->buf, *lim = buf->buf + buf->used, *ptr; + struct keyfield *key = keyhead.next; + + lines->used = 0; + + while (beg < lim && (ptr = memchr (beg, '\n', lim - beg)) + && lines->used < lines->limit) + { + /* There are various places in the code that rely on a NUL + being at the end of in-core lines; NULs inside the lines + will not cause trouble, though. */ + *ptr = '\0'; + + if (lines->used == lines->alloc) + { + lines->alloc *= 2; + lines->lines = (struct line *) + xrealloc ((char *) lines->lines, + lines->alloc * sizeof (struct line)); + } + + lines->lines[lines->used].text = beg; + lines->lines[lines->used].length = ptr - beg; + + /* Precompute the position of the first key for efficiency. */ + if (key) + { + if (key->eword >= 0) + lines->lines[lines->used].keylim = + limfield (&lines->lines[lines->used], key); + else + lines->lines[lines->used].keylim = ptr; + + if (key->sword >= 0) + lines->lines[lines->used].keybeg = + begfield (&lines->lines[lines->used], key); + else + { + if (key->skipsblanks) + while (blanks[UCHAR (*beg)]) + ++beg; + lines->lines[lines->used].keybeg = beg; + } + if (key->skipeblanks) + { + trim_trailing_blanks (lines->lines[lines->used].keybeg, + &lines->lines[lines->used].keylim); + } + } + else + { + lines->lines[lines->used].keybeg = 0; + lines->lines[lines->used].keylim = 0; + } + + ++lines->used; + beg = ptr + 1; + } + + buf->left = lim - beg; +} + +/* Compare strings A and B containing decimal fractions < 1. Each string + should begin with a decimal point followed immediately by the digits + of the fraction. Strings not of this form are considered to be zero. */ + +static int +fraccompare (register const char *a, register const char *b) +{ + register tmpa = UCHAR (*a), tmpb = UCHAR (*b); + + if (tmpa == '.' && tmpb == '.') + { + do + tmpa = UCHAR (*++a), tmpb = UCHAR (*++b); + while (tmpa == tmpb && digits[tmpa]); + if (digits[tmpa] && digits[tmpb]) + return tmpa - tmpb; + if (digits[tmpa]) + { + while (tmpa == '0') + tmpa = UCHAR (*++a); + if (digits[tmpa]) + return 1; + return 0; + } + if (digits[tmpb]) + { + while (tmpb == '0') + tmpb = UCHAR (*++b); + if (digits[tmpb]) + return -1; + return 0; + } + return 0; + } + else if (tmpa == '.') + { + do + tmpa = UCHAR (*++a); + while (tmpa == '0'); + if (digits[tmpa]) + return 1; + return 0; + } + else if (tmpb == '.') + { + do + tmpb = UCHAR (*++b); + while (tmpb == '0'); + if (digits[tmpb]) + return -1; + return 0; + } + return 0; +} + +/* Compare strings A and B as numbers without explicitly converting them to + machine numbers. Comparatively slow for short strings, but asymptotically + hideously fast. */ + +static int +numcompare (register const char *a, register const char *b) +{ + register int tmpa, tmpb, loga, logb, tmp; + + tmpa = UCHAR (*a); + tmpb = UCHAR (*b); + + while (blanks[tmpa]) + tmpa = UCHAR (*++a); + while (blanks[tmpb]) + tmpb = UCHAR (*++b); + + if (tmpa == '-') + { + do + tmpa = UCHAR (*++a); + while (tmpa == '0'); + if (tmpb != '-') + { + if (tmpa == '.') + do + tmpa = UCHAR (*++a); + while (tmpa == '0'); + if (digits[tmpa]) + return -1; + while (tmpb == '0') + tmpb = UCHAR (*++b); + if (tmpb == '.') + do + tmpb = *++b; + while (tmpb == '0'); + if (digits[tmpb]) + return -1; + return 0; + } + do + tmpb = UCHAR (*++b); + while (tmpb == '0'); + + while (tmpa == tmpb && digits[tmpa]) + tmpa = UCHAR (*++a), tmpb = UCHAR (*++b); + + if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa])) + return -fraccompare (a, b); + + if (digits[tmpa]) + for (loga = 1; digits[UCHAR (*++a)]; ++loga) + ; + else + loga = 0; + + if (digits[tmpb]) + for (logb = 1; digits[UCHAR (*++b)]; ++logb) + ; + else + logb = 0; + + if ((tmp = logb - loga) != 0) + return tmp; + + if (!loga) + return 0; + + return tmpb - tmpa; + } + else if (tmpb == '-') + { + do + tmpb = UCHAR (*++b); + while (tmpb == '0'); + if (tmpb == '.') + do + tmpb = *++b; + while (tmpb == '0'); + if (digits[tmpb]) + return 1; + while (tmpa == '0') + tmpa = UCHAR (*++a); + if (tmpa == '.') + do + tmpa = UCHAR (*++a); + while (tmpa == '0'); + if (digits[tmpa]) + return 1; + return 0; + } + else + { + while (tmpa == '0') + tmpa = UCHAR (*++a); + while (tmpb == '0') + tmpb = UCHAR (*++b); + + while (tmpa == tmpb && digits[tmpa]) + tmpa = UCHAR (*++a), tmpb = UCHAR (*++b); + + if ((tmpa == '.' && !digits[tmpb]) || (tmpb == '.' && !digits[tmpa])) + return fraccompare (a, b); + + if (digits[tmpa]) + for (loga = 1; digits[UCHAR (*++a)]; ++loga) + ; + else + loga = 0; + + if (digits[tmpb]) + for (logb = 1; digits[UCHAR (*++b)]; ++logb) + ; + else + logb = 0; + + if ((tmp = loga - logb) != 0) + return tmp; + + if (!loga) + return 0; + + return tmpa - tmpb; + } +} + +static int +general_numcompare (const char *sa, const char *sb) +{ + double a, b; + /* FIXME: add option to warn about failed conversions. */ + /* FIXME: maybe add option to try expensive FP conversion + only if A and B can't be compared more cheaply/accurately. */ + if (xstrtod (sa, NULL, &a)) + { + a = 0; + } + if (xstrtod (sb, NULL, &b)) + { + b = 0; + } + return a == b ? 0 : a < b ? -1 : 1; +} + +/* Return an integer <= 12 associated with month name S with length LEN, + 0 if the name in S is not recognized. */ + +static int +getmonth (const char *s, int len) +{ + char month[4]; + register int i, lo = 0, hi = 12; + + while (len > 0 && blanks[UCHAR(*s)]) + ++s, --len; + + if (len < 3) + return 0; + + for (i = 0; i < 3; ++i) + month[i] = fold_toupper[UCHAR (s[i])]; + month[3] = '\0'; + + while (hi - lo > 1) + if (strcmp (month, monthtab[(lo + hi) / 2].name) < 0) + hi = (lo + hi) / 2; + else + lo = (lo + hi) / 2; + if (!strcmp (month, monthtab[lo].name)) + return monthtab[lo].val; + return 0; +} + +/* Compare two lines A and B trying every key in sequence until there + are no more keys or a difference is found. */ + +static int +keycompare (const struct line *a, const struct line *b) +{ + register char *texta, *textb, *lima, *limb, *translate; + register int *ignore; + struct keyfield *key; + int diff = 0, iter = 0, lena, lenb; + + for (key = keyhead.next; key; key = key->next, ++iter) + { + ignore = key->ignore; + translate = key->translate; + + /* Find the beginning and limit of each field. */ + if (iter || a->keybeg == NULL || b->keybeg == NULL) + { + if (key->eword >= 0) + lima = limfield (a, key), limb = limfield (b, key); + else + lima = a->text + a->length, limb = b->text + b->length; + + if (key->sword >= 0) + texta = begfield (a, key), textb = begfield (b, key); + else + { + texta = a->text, textb = b->text; + if (key->skipsblanks) + { + while (texta < lima && blanks[UCHAR (*texta)]) + ++texta; + while (textb < limb && blanks[UCHAR (*textb)]) + ++textb; + } + } + } + else + { + /* For the first iteration only, the key positions have + been precomputed for us. */ + texta = a->keybeg, lima = a->keylim; + textb = b->keybeg, limb = b->keylim; + } + + /* Find the lengths. */ + lena = lima - texta, lenb = limb - textb; + if (lena < 0) + lena = 0; + if (lenb < 0) + lenb = 0; + + if (key->skipeblanks) + { + char *a_end = texta + lena; + char *b_end = textb + lenb; + trim_trailing_blanks (texta, &a_end); + trim_trailing_blanks (textb, &b_end); + lena = a_end - texta; + lenb = b_end - textb; + } + + /* Actually compare the fields. */ + if (key->numeric) + { + if (*lima || *limb) + { + char savea = *lima, saveb = *limb; + + *lima = *limb = '\0'; + diff = numcompare (texta, textb); + *lima = savea, *limb = saveb; + } + else + diff = numcompare (texta, textb); + + if (diff) + return key->reverse ? -diff : diff; + continue; + } + else if (key->general_numeric) + { + if (*lima || *limb) + { + char savea = *lima, saveb = *limb; + + *lima = *limb = '\0'; + diff = general_numcompare (texta, textb); + *lima = savea, *limb = saveb; + } + else + diff = general_numcompare (texta, textb); + + if (diff) + return key->reverse ? -diff : diff; + continue; + } + else if (key->month) + { + diff = getmonth (texta, lena) - getmonth (textb, lenb); + if (diff) + return key->reverse ? -diff : diff; + continue; + } + else if (ignore && translate) + +#define CMP_WITH_IGNORE(A, B) \ + do \ + { \ + while (texta < lima && textb < limb) \ + { \ + while (texta < lima && ignore[UCHAR (*texta)]) \ + ++texta; \ + while (textb < limb && ignore[UCHAR (*textb)]) \ + ++textb; \ + if (texta < lima && textb < limb) \ + { \ + if ((A) != (B)) \ + { \ + diff = (A) - (B); \ + break; \ + } \ + ++texta; \ + ++textb; \ + } \ + \ + if (texta == lima && textb < limb && !ignore[UCHAR (*textb)]) \ + diff = -1; \ + else if (texta < lima && textb == limb \ + && !ignore[UCHAR (*texta)]) \ + diff = 1; \ + } \ + \ + if (diff == 0) \ + { \ + while (texta < lima && ignore[UCHAR (*texta)]) \ + ++texta; \ + while (textb < limb && ignore[UCHAR (*textb)]) \ + ++textb; \ + \ + if (texta == lima && textb < limb) \ + diff = -1; \ + else if (texta < lima && textb == limb) \ + diff = 1; \ + } \ + /* Relative lengths are meaningless if characters were ignored. \ + Handling this case here avoids what might be an invalid length \ + comparison below. */ \ + if (diff == 0 && texta == lima && textb == limb) \ + return 0; \ + } \ + while (0) + + CMP_WITH_IGNORE (translate[UCHAR (*texta)], translate[UCHAR (*textb)]); + else if (ignore) + CMP_WITH_IGNORE (*texta, *textb); + else if (translate) + while (texta < lima && textb < limb) + { + if (translate[UCHAR (*texta++)] != translate[UCHAR (*textb++)]) + { + diff = (translate[UCHAR (*--texta)] + - translate[UCHAR (*--textb)]); + break; + } + } + else + diff = memcmp (texta, textb, min (lena, lenb)); + + if (diff) + return key->reverse ? -diff : diff; + if ((diff = lena - lenb) != 0) + return key->reverse ? -diff : diff; + } + + return 0; +} + +/* Compare two lines A and B, returning negative, zero, or positive + depending on whether A compares less than, equal to, or greater than B. */ + +static int +compare (register const struct line *a, register const struct line *b) +{ + int diff, tmpa, tmpb, mini; + + /* First try to compare on the specified keys (if any). + The only two cases with no key at all are unadorned sort, + and unadorned sort -r. */ + if (keyhead.next) + { + diff = keycompare (a, b); + if (diff != 0) + return diff; + if (unique || stable) + return 0; + } + + /* If the keys all compare equal (or no keys were specified) + fall through to the default byte-by-byte comparison. */ + tmpa = a->length, tmpb = b->length; + mini = min (tmpa, tmpb); + if (mini == 0) + diff = tmpa - tmpb; + else + { + char *ap = a->text, *bp = b->text; + + diff = UCHAR (*ap) - UCHAR (*bp); + if (diff == 0) + { + diff = memcmp (ap, bp, mini); + if (diff == 0) + diff = tmpa - tmpb; + } + } + + return reverse ? -diff : diff; +} + +/* Check that the lines read from the given FP come in order. Return + 1 if they do and 0 if there is a disorder. + FIXME: return number of first out-of-order line if not sorted. */ + +static int +checkfp (FILE *fp) +{ + struct buffer buf; /* Input buffer. */ + struct lines lines; /* Lines scanned from the buffer. */ + struct line temp; /* Copy of previous line. */ + int cc; /* Character count. */ + int alloc, sorted = 1; + + initbuf (&buf, mergealloc); + initlines (&lines, mergealloc / linelength + 1, + LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line))); + alloc = linelength; + temp.text = xmalloc (alloc); + + cc = fillbuf (&buf, fp); + if (cc == 0) + goto finish; + + findlines (&buf, &lines); + + while (1) + { + struct line *prev_line; /* Pointer to previous line. */ + int cmp; /* Result of calling compare. */ + int i; + + /* Compare each line in the buffer with its successor. */ + for (i = 0; i < lines.used - 1; ++i) + { + cmp = compare (&lines.lines[i], &lines.lines[i + 1]); + if ((unique && cmp >= 0) || (cmp > 0)) + { + sorted = 0; + goto finish; + } + } + + /* Save the last line of the buffer and refill the buffer. */ + prev_line = lines.lines + (lines.used - 1); + if (prev_line->length > alloc) + { + while (prev_line->length + 1 > alloc) + alloc *= 2; + temp.text = xrealloc (temp.text, alloc); + } + memcpy (temp.text, prev_line->text, prev_line->length + 1); + temp.length = prev_line->length; + temp.keybeg = temp.text + (prev_line->keybeg - prev_line->text); + temp.keylim = temp.text + (prev_line->keylim - prev_line->text); + + cc = fillbuf (&buf, fp); + if (cc == 0) + break; + + findlines (&buf, &lines); + /* Make sure the line saved from the old buffer contents is + less than or equal to the first line of the new buffer. */ + cmp = compare (&temp, &lines.lines[0]); + if ((unique && cmp >= 0) || (cmp > 0)) + { + sorted = 0; + break; + } + } + +finish: + xfclose (fp); + free (buf.buf); + free ((char *) lines.lines); + free (temp.text); + return sorted; +} + +/* Merge lines from FPS onto OFP. NFPS cannot be greater than NMERGE. + Close FPS before returning. */ + +static void +mergefps (FILE **fps, register int nfps, FILE *ofp) +{ + struct buffer buffer[NMERGE]; /* Input buffers for each file. */ + struct lines lines[NMERGE]; /* Line tables for each buffer. */ + struct line saved; /* Saved line for unique check. */ + int savedflag = 0; /* True if there is a saved line. */ + int savealloc; /* Size allocated for the saved line. */ + int cur[NMERGE]; /* Current line in each line table. */ + int ord[NMERGE]; /* Table representing a permutation of fps, + such that lines[ord[0]].lines[cur[ord[0]]] + is the smallest line and will be next + output. */ + register int i, j, t; + +#ifdef lint /* Suppress `used before initialized' warning. */ + savealloc = 0; +#endif + + /* Allocate space for a saved line if necessary. */ + if (unique) + { + savealloc = linelength; + saved.text = xmalloc (savealloc); + } + + /* Read initial lines from each input file. */ + for (i = 0; i < nfps; ++i) + { + initbuf (&buffer[i], mergealloc); + /* If a file is empty, eliminate it from future consideration. */ + while (i < nfps && !fillbuf (&buffer[i], fps[i])) + { + xfclose (fps[i]); + --nfps; + for (j = i; j < nfps; ++j) + fps[j] = fps[j + 1]; + } + if (i == nfps) + free (buffer[i].buf); + else + { + initlines (&lines[i], mergealloc / linelength + 1, + LINEALLOC / ((NMERGE + NMERGE) * sizeof (struct line))); + findlines (&buffer[i], &lines[i]); + cur[i] = 0; + } + } + + /* Set up the ord table according to comparisons among input lines. + Since this only reorders two items if one is strictly greater than + the other, it is stable. */ + for (i = 0; i < nfps; ++i) + ord[i] = i; + for (i = 1; i < nfps; ++i) + if (compare (&lines[ord[i - 1]].lines[cur[ord[i - 1]]], + &lines[ord[i]].lines[cur[ord[i]]]) > 0) + t = ord[i - 1], ord[i - 1] = ord[i], ord[i] = t, i = 0; + + /* Repeatedly output the smallest line until no input remains. */ + while (nfps) + { + /* If uniqified output is turned on, output only the first of + an identical series of lines. */ + if (unique) + { + if (savedflag && compare (&saved, &lines[ord[0]].lines[cur[ord[0]]])) + { + xfwrite (saved.text, 1, saved.length, ofp); + putc ('\n', ofp); + savedflag = 0; + } + if (!savedflag) + { + if (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1) + { + while (savealloc < lines[ord[0]].lines[cur[ord[0]]].length + 1) + savealloc *= 2; + saved.text = xrealloc (saved.text, savealloc); + } + saved.length = lines[ord[0]].lines[cur[ord[0]]].length; + memcpy (saved.text, lines[ord[0]].lines[cur[ord[0]]].text, + saved.length + 1); + if (lines[ord[0]].lines[cur[ord[0]]].keybeg != NULL) + { + saved.keybeg = saved.text + + (lines[ord[0]].lines[cur[ord[0]]].keybeg + - lines[ord[0]].lines[cur[ord[0]]].text); + } + if (lines[ord[0]].lines[cur[ord[0]]].keylim != NULL) + { + saved.keylim = saved.text + + (lines[ord[0]].lines[cur[ord[0]]].keylim + - lines[ord[0]].lines[cur[ord[0]]].text); + } + savedflag = 1; + } + } + else + { + xfwrite (lines[ord[0]].lines[cur[ord[0]]].text, 1, + lines[ord[0]].lines[cur[ord[0]]].length, ofp); + putc ('\n', ofp); + } + + /* Check if we need to read more lines into core. */ + if (++cur[ord[0]] == lines[ord[0]].used) + if (fillbuf (&buffer[ord[0]], fps[ord[0]])) + { + findlines (&buffer[ord[0]], &lines[ord[0]]); + cur[ord[0]] = 0; + } + else + { + /* We reached EOF on fps[ord[0]]. */ + for (i = 1; i < nfps; ++i) + if (ord[i] > ord[0]) + --ord[i]; + --nfps; + xfclose (fps[ord[0]]); + free (buffer[ord[0]].buf); + free ((char *) lines[ord[0]].lines); + for (i = ord[0]; i < nfps; ++i) + { + fps[i] = fps[i + 1]; + buffer[i] = buffer[i + 1]; + lines[i] = lines[i + 1]; + cur[i] = cur[i + 1]; + } + for (i = 0; i < nfps; ++i) + ord[i] = ord[i + 1]; + continue; + } + + /* The new line just read in may be larger than other lines + already in core; push it back in the queue until we encounter + a line larger than it. */ + for (i = 1; i < nfps; ++i) + { + t = compare (&lines[ord[0]].lines[cur[ord[0]]], + &lines[ord[i]].lines[cur[ord[i]]]); + if (!t) + t = ord[0] - ord[i]; + if (t < 0) + break; + } + t = ord[0]; + for (j = 1; j < i; ++j) + ord[j - 1] = ord[j]; + ord[i - 1] = t; + } + + if (unique && savedflag) + { + xfwrite (saved.text, 1, saved.length, ofp); + putc ('\n', ofp); + free (saved.text); + } +} + +/* Sort the array LINES with NLINES members, using TEMP for temporary space. */ + +static void +sortlines (struct line *lines, int nlines, struct line *temp) +{ + register struct line *lo, *hi, *t; + register int nlo, nhi; + + if (nlines == 2) + { + if (compare (&lines[0], &lines[1]) > 0) + *temp = lines[0], lines[0] = lines[1], lines[1] = *temp; + return; + } + + nlo = nlines / 2; + lo = lines; + nhi = nlines - nlo; + hi = lines + nlo; + + if (nlo > 1) + sortlines (lo, nlo, temp); + + if (nhi > 1) + sortlines (hi, nhi, temp); + + t = temp; + + while (nlo && nhi) + if (compare (lo, hi) <= 0) + *t++ = *lo++, --nlo; + else + *t++ = *hi++, --nhi; + while (nlo--) + *t++ = *lo++; + + for (lo = lines, nlo = nlines - nhi, t = temp; nlo; --nlo) + *lo++ = *t++; +} + +/* Check that each of the NFILES FILES is ordered. + Return a count of disordered files. */ + +static int +check (char **files, int nfiles) +{ + int i, disorders = 0; + FILE *fp; + + for (i = 0; i < nfiles; ++i) + { + fp = xfopen (files[i], "r"); + if (!checkfp (fp)) + { + fprintf (stderr, _("%s: disorder on %s\n"), program_name, files[i]); + ++disorders; + } + } + return disorders; +} + +/* Merge NFILES FILES onto OFP. */ + +static void +merge (char **files, int nfiles, FILE *ofp) +{ + int i, j, t; + char *temp; + FILE *fps[NMERGE], *tfp; + + while (nfiles > NMERGE) + { + t = 0; + for (i = 0; i < nfiles / NMERGE; ++i) + { + for (j = 0; j < NMERGE; ++j) + fps[j] = xfopen (files[i * NMERGE + j], "r"); + tfp = xtmpfopen (temp = tempname ()); + mergefps (fps, NMERGE, tfp); + xfclose (tfp); + for (j = 0; j < NMERGE; ++j) + zaptemp (files[i * NMERGE + j]); + files[t++] = temp; + } + for (j = 0; j < nfiles % NMERGE; ++j) + fps[j] = xfopen (files[i * NMERGE + j], "r"); + tfp = xtmpfopen (temp = tempname ()); + mergefps (fps, nfiles % NMERGE, tfp); + xfclose (tfp); + for (j = 0; j < nfiles % NMERGE; ++j) + zaptemp (files[i * NMERGE + j]); + files[t++] = temp; + nfiles = t; + } + + for (i = 0; i < nfiles; ++i) + fps[i] = xfopen (files[i], "r"); + mergefps (fps, i, ofp); + for (i = 0; i < nfiles; ++i) + zaptemp (files[i]); +} + +/* Sort NFILES FILES onto OFP. */ + +static void +sort (char **files, int nfiles, FILE *ofp) +{ + struct buffer buf; + struct lines lines; + struct line *tmp; + int i, ntmp; + FILE *fp, *tfp; + struct tempnode *node; + int n_temp_files = 0; + char **tempfiles; + + initbuf (&buf, sortalloc); + initlines (&lines, sortalloc / linelength + 1, + LINEALLOC / sizeof (struct line)); + ntmp = lines.alloc; + tmp = (struct line *) xmalloc (ntmp * sizeof (struct line)); + + while (nfiles--) + { + fp = xfopen (*files++, "r"); + while (fillbuf (&buf, fp)) + { + findlines (&buf, &lines); + if (lines.used > ntmp) + { + while (lines.used > ntmp) + ntmp *= 2; + tmp = (struct line *) + xrealloc ((char *) tmp, ntmp * sizeof (struct line)); + } + sortlines (lines.lines, lines.used, tmp); + if (feof (fp) && !nfiles && !n_temp_files && !buf.left) + tfp = ofp; + else + { + ++n_temp_files; + tfp = xtmpfopen (tempname ()); + } + for (i = 0; i < lines.used; ++i) + if (!unique || i == 0 + || compare (&lines.lines[i], &lines.lines[i - 1])) + { + xfwrite (lines.lines[i].text, 1, lines.lines[i].length, tfp); + putc ('\n', tfp); + } + if (tfp != ofp) + xfclose (tfp); + } + xfclose (fp); + } + + free (buf.buf); + free ((char *) lines.lines); + free ((char *) tmp); + + if (n_temp_files) + { + tempfiles = (char **) xmalloc (n_temp_files * sizeof (char *)); + i = n_temp_files; + for (node = temphead.next; i > 0; node = node->next) + tempfiles[--i] = node->name; + merge (tempfiles, n_temp_files, ofp); + free ((char *) tempfiles); + } +} + +/* Insert key KEY at the end of the list (`keyhead'). */ + +static void +insertkey (struct keyfield *key) +{ + struct keyfield *k = &keyhead; + + while (k->next) + k = k->next; + k->next = key; + key->next = NULL; +} + +static void +badfieldspec (const char *s) +{ + error (2, 0, _("invalid field specification `%s'"), s); +} + +/* Handle interrupts and hangups. */ + +static void +sighandler (int sig) +{ +#ifdef SA_INTERRUPT + struct sigaction sigact; + + sigact.sa_handler = SIG_DFL; + sigemptyset (&sigact.sa_mask); + sigact.sa_flags = 0; + sigaction (sig, &sigact, NULL); +#else /* !SA_INTERRUPT */ + signal (sig, SIG_DFL); +#endif /* SA_INTERRUPT */ + cleanup (); + kill (getpid (), sig); +} + +/* Set the ordering options for KEY specified in S. + Return the address of the first character in S that + is not a valid ordering option. + BLANKTYPE is the kind of blanks that 'b' should skip. */ + +static char * +set_ordering (register const char *s, struct keyfield *key, + enum blanktype blanktype) +{ + while (*s) + { + switch (*s) + { + case 'b': + if (blanktype == bl_start || blanktype == bl_both) + key->skipsblanks = 1; + if (blanktype == bl_end || blanktype == bl_both) + key->skipeblanks = 1; + break; + case 'd': + key->ignore = nondictionary; + break; + case 'f': + key->translate = fold_toupper; + break; + case 'g': + key->general_numeric = 1; + break; + case 'i': + key->ignore = nonprinting; + break; + case 'M': + key->month = 1; + break; + case 'n': + key->numeric = 1; + if (blanktype == bl_start || blanktype == bl_both) + key->skipsblanks = 1; + if (blanktype == bl_end || blanktype == bl_both) + key->skipeblanks = 1; + break; + case 'r': + key->reverse = 1; + break; + default: + return (char *) s; + } + ++s; + } + return (char *) s; +} + +void +main (int argc, char **argv) +{ + struct keyfield *key = NULL, gkey; + char *s; + int i, t, t2; + int checkonly = 0, mergeonly = 0, nfiles = 0; + char *minus = "-", *outfile = minus, **files, *tmp; + FILE *ofp; +#ifdef SA_INTERRUPT + struct sigaction oldact, newact; +#endif /* SA_INTERRUPT */ + + program_name = argv[0]; + + parse_long_options (argc, argv, "sort", version_string, usage); + + have_read_stdin = 0; + inittables (); + + temp_file_prefix = getenv ("TMPDIR"); + if (temp_file_prefix == NULL) + temp_file_prefix = DEFAULT_TMPDIR; + +#ifdef SA_INTERRUPT + newact.sa_handler = sighandler; + sigemptyset (&newact.sa_mask); + newact.sa_flags = 0; + + sigaction (SIGINT, NULL, &oldact); + if (oldact.sa_handler != SIG_IGN) + sigaction (SIGINT, &newact, NULL); + sigaction (SIGHUP, NULL, &oldact); + if (oldact.sa_handler != SIG_IGN) + sigaction (SIGHUP, &newact, NULL); + sigaction (SIGPIPE, NULL, &oldact); + if (oldact.sa_handler != SIG_IGN) + sigaction (SIGPIPE, &newact, NULL); + sigaction (SIGTERM, NULL, &oldact); + if (oldact.sa_handler != SIG_IGN) + sigaction (SIGTERM, &newact, NULL); +#else /* !SA_INTERRUPT */ + if (signal (SIGINT, SIG_IGN) != SIG_IGN) + signal (SIGINT, sighandler); + if (signal (SIGHUP, SIG_IGN) != SIG_IGN) + signal (SIGHUP, sighandler); + if (signal (SIGPIPE, SIG_IGN) != SIG_IGN) + signal (SIGPIPE, sighandler); + if (signal (SIGTERM, SIG_IGN) != SIG_IGN) + signal (SIGTERM, sighandler); +#endif /* !SA_INTERRUPT */ + + gkey.sword = gkey.eword = -1; + gkey.ignore = NULL; + gkey.translate = NULL; + gkey.numeric = gkey.general_numeric = gkey.month = gkey.reverse = 0; + gkey.skipsblanks = gkey.skipeblanks = 0; + + files = (char **) xmalloc (sizeof (char *) * argc); + + for (i = 1; i < argc; ++i) + { + if (argv[i][0] == '+') + { + if (key) + insertkey (key); + key = (struct keyfield *) xmalloc (sizeof (struct keyfield)); + key->eword = -1; + key->ignore = NULL; + key->translate = NULL; + key->skipsblanks = key->skipeblanks = 0; + key->numeric = key->general_numeric = key->month = key->reverse = 0; + s = argv[i] + 1; + if (! (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])]))) + badfieldspec (argv[i]); + for (t = 0; digits[UCHAR (*s)]; ++s) + t = 10 * t + *s - '0'; + t2 = 0; + if (*s == '.') + for (++s; digits[UCHAR (*s)]; ++s) + t2 = 10 * t2 + *s - '0'; + if (t2 || t) + { + key->sword = t; + key->schar = t2; + } + else + key->sword = -1; + s = set_ordering (s, key, bl_start); + if (*s) + badfieldspec (argv[i]); + } + else if (argv[i][0] == '-' && argv[i][1]) + { + s = argv[i] + 1; + if (digits[UCHAR (*s)] || (*s == '.' && digits[UCHAR (s[1])])) + { + if (!key) + usage (2); + for (t = 0; digits[UCHAR (*s)]; ++s) + t = t * 10 + *s - '0'; + t2 = 0; + if (*s == '.') + for (++s; digits[UCHAR (*s)]; ++s) + t2 = t2 * 10 + *s - '0'; + key->eword = t; + key->echar = t2; + s = set_ordering (s, key, bl_end); + if (*s) + badfieldspec (argv[i]); + insertkey (key); + key = NULL; + } + else + while (*s) + { + s = set_ordering (s, &gkey, bl_both); + switch (*s) + { + case '\0': + break; + case 'c': + checkonly = 1; + break; + case 'k': + if (s[1]) + ++s; + else + { + if (i == argc - 1) + error (2, 0, _("option `-k' requires an argument")); + else + s = argv[++i]; + } + if (key) + insertkey (key); + key = (struct keyfield *) + xmalloc (sizeof (struct keyfield)); + key->eword = -1; + key->ignore = NULL; + key->translate = NULL; + key->skipsblanks = key->skipeblanks = 0; + key->numeric = key->month = key->reverse = 0; + /* Get POS1. */ + if (!digits[UCHAR (*s)]) + badfieldspec (argv[i]); + for (t = 0; digits[UCHAR (*s)]; ++s) + t = 10 * t + *s - '0'; + if (t == 0) + { + /* Provoke with `sort -k0' */ + error (0, 0, _("the starting field number argument \ +to the `-k' option must be positive")); + badfieldspec (argv[i]); + } + --t; + t2 = 0; + if (*s == '.') + { + if (!digits[UCHAR (s[1])]) + { + /* Provoke with `sort -k1.' */ + error (0, 0, _("starting field spec has `.' but \ +lacks following character offset")); + badfieldspec (argv[i]); + } + for (++s; digits[UCHAR (*s)]; ++s) + t2 = 10 * t2 + *s - '0'; + if (t2 == 0) + { + /* Provoke with `sort -k1.0' */ + error (0, 0, _("starting field character offset \ +argument to the `-k' option\nmust be positive")); + badfieldspec (argv[i]); + } + --t2; + } + if (t2 || t) + { + key->sword = t; + key->schar = t2; + } + else + key->sword = -1; + s = set_ordering (s, key, bl_start); + if (*s == 0) + { + key->eword = -1; + key->echar = 0; + } + else if (*s != ',') + badfieldspec (argv[i]); + else if (*s == ',') + { + /* Skip over comma. */ + ++s; + if (*s == 0) + { + /* Provoke with `sort -k1,' */ + error (0, 0, _("field specification has `,' but \ +lacks following field spec")); + badfieldspec (argv[i]); + } + /* Get POS2. */ + for (t = 0; digits[UCHAR (*s)]; ++s) + t = t * 10 + *s - '0'; + if (t == 0) + { + /* Provoke with `sort -k1,0' */ + error (0, 0, _("ending field number argument \ +to the `-k' option must be positive")); + badfieldspec (argv[i]); + } + --t; + t2 = 0; + if (*s == '.') + { + if (!digits[UCHAR (s[1])]) + { + /* Provoke with `sort -k1,1.' */ + error (0, 0, _("ending field spec has `.' \ +but lacks following character offset")); + badfieldspec (argv[i]); + } + for (++s; digits[UCHAR (*s)]; ++s) + t2 = t2 * 10 + *s - '0'; + } + else + { + /* `-k 2,3' is equivalent to `+1 -3'. */ + ++t; + } + key->eword = t; + key->echar = t2; + s = set_ordering (s, key, bl_end); + if (*s) + badfieldspec (argv[i]); + } + insertkey (key); + key = NULL; + goto outer; + case 'm': + mergeonly = 1; + break; + case 'o': + if (s[1]) + outfile = s + 1; + else + { + if (i == argc - 1) + error (2, 0, _("option `-o' requires an argument")); + else + outfile = argv[++i]; + } + goto outer; + case 's': + stable = 1; + break; + case 't': + if (s[1]) + tab = *++s; + else if (i < argc - 1) + { + tab = *argv[++i]; + goto outer; + } + else + error (2, 0, _("option `-t' requires an argument")); + break; + case 'T': + if (s[1]) + temp_file_prefix = ++s; + else + { + if (i < argc - 1) + temp_file_prefix = argv[++i]; + else + error (2, 0, _("option `-T' requires an argument")); + } + goto outer; + /* break; */ + case 'u': + unique = 1; + break; + case 'y': + /* Accept and ignore e.g. -y0 for compatibility with + Solaris 2. */ + goto outer; + default: + fprintf (stderr, _("%s: unrecognized option `-%c'\n"), + argv[0], *s); + usage (2); + } + if (*s) + ++s; + } + } + else /* Not an option. */ + { + files[nfiles++] = argv[i]; + } + outer:; + } + + if (key) + insertkey (key); + + /* Inheritance of global options to individual keys. */ + for (key = keyhead.next; key; key = key->next) + if (!key->ignore && !key->translate && !key->skipsblanks && !key->reverse + && !key->skipeblanks && !key->month && !key->numeric + && !key->general_numeric) + { + key->ignore = gkey.ignore; + key->translate = gkey.translate; + key->skipsblanks = gkey.skipsblanks; + key->skipeblanks = gkey.skipeblanks; + key->month = gkey.month; + key->numeric = gkey.numeric; + key->general_numeric = gkey.general_numeric; + key->reverse = gkey.reverse; + } + + if (!keyhead.next && (gkey.ignore || gkey.translate || gkey.skipsblanks + || gkey.skipeblanks || gkey.month || gkey.numeric + || gkey.general_numeric)) + insertkey (&gkey); + reverse = gkey.reverse; + + if (nfiles == 0) + { + nfiles = 1; + files = − + } + + if (checkonly) + exit (check (files, nfiles) != 0); + + if (strcmp (outfile, "-")) + { + struct stat outstat; + if (stat (outfile, &outstat) == 0) + { + /* The following code prevents a race condition when + people use the brain dead shell programming idiom: + cat file | sort -o file + This feature is provided for historical compatibility, + but we strongly discourage ever relying on this in + new shell programs. */ + + /* Temporarily copy each input file that might be another name + for the output file. When in doubt (e.g. a pipe), copy. */ + for (i = 0; i < nfiles; ++i) + { + char buf[8192]; + FILE *fp; + int cc; + + if (S_ISREG (outstat.st_mode) && strcmp (outfile, files[i])) + { + struct stat instat; + if ((strcmp (files[i], "-") + ? stat (files[i], &instat) + : fstat (fileno (stdin), &instat)) != 0) + { + error (0, errno, "%s", files[i]); + cleanup (); + exit (2); + } + if (S_ISREG (instat.st_mode) + && (instat.st_ino != outstat.st_ino + || instat.st_dev != outstat.st_dev)) + { + /* We know the files are distinct. */ + continue; + } + } + + fp = xfopen (files[i], "r"); + tmp = tempname (); + ofp = xtmpfopen (tmp); + while ((cc = fread (buf, 1, sizeof buf, fp)) > 0) + xfwrite (buf, 1, cc, ofp); + if (ferror (fp)) + { + error (0, errno, "%s", files[i]); + cleanup (); + exit (2); + } + xfclose (ofp); + xfclose (fp); + files[i] = tmp; + } + } + ofp = xfopen (outfile, "w"); + } + else + ofp = stdout; + + if (mergeonly) + merge (files, nfiles, ofp); + else + sort (files, nfiles, ofp); + cleanup (); + + /* If we wait for the implicit flush on exit, and the parent process + has closed stdout (e.g., exec >&- in a shell), then the output file + winds up empty. I don't understand why. This is under SunOS, + Solaris, Ultrix, and Irix. This premature fflush makes the output + reappear. --karl@cs.umb.edu */ + if (fflush (ofp) < 0) + error (1, errno, _("%s: write error"), outfile); + + if (have_read_stdin && fclose (stdin) == EOF) + error (1, errno, outfile); + if (ferror (stdout) || fclose (stdout) == EOF) + error (1, errno, _("%s: write error"), outfile); + + exit (0); +} diff --git a/gnu/usr.bin/sort/system.h b/gnu/usr.bin/sort/system.h new file mode 100644 index 000000000000..bfb19e121c34 --- /dev/null +++ b/gnu/usr.bin/sort/system.h @@ -0,0 +1,205 @@ +/* system-dependent definitions for textutils programs. + Copyright (C) 1989, 1990, 1991 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +/* Include sys/types.h before this file. */ + +#include <sys/stat.h> + +#ifdef STAT_MACROS_BROKEN +#undef S_ISBLK +#undef S_ISCHR +#undef S_ISDIR +#undef S_ISFIFO +#undef S_ISLNK +#undef S_ISMPB +#undef S_ISMPC +#undef S_ISNWK +#undef S_ISREG +#undef S_ISSOCK +#endif /* STAT_MACROS_BROKEN. */ + +#if !defined(S_ISBLK) && defined(S_IFBLK) +#define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK) +#endif +#if !defined(S_ISCHR) && defined(S_IFCHR) +#define S_ISCHR(m) (((m) & S_IFMT) == S_IFCHR) +#endif +#if !defined(S_ISDIR) && defined(S_IFDIR) +#define S_ISDIR(m) (((m) & S_IFMT) == S_IFDIR) +#endif +#if !defined(S_ISREG) && defined(S_IFREG) +#define S_ISREG(m) (((m) & S_IFMT) == S_IFREG) +#endif +#if !defined(S_ISFIFO) && defined(S_IFIFO) +#define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO) +#endif +#if !defined(S_ISLNK) && defined(S_IFLNK) +#define S_ISLNK(m) (((m) & S_IFMT) == S_IFLNK) +#endif +#if !defined(S_ISSOCK) && defined(S_IFSOCK) +#define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK) +#endif +#if !defined(S_ISMPB) && defined(S_IFMPB) /* V7 */ +#define S_ISMPB(m) (((m) & S_IFMT) == S_IFMPB) +#define S_ISMPC(m) (((m) & S_IFMT) == S_IFMPC) +#endif +#if !defined(S_ISNWK) && defined(S_IFNWK) /* HP/UX */ +#define S_ISNWK(m) (((m) & S_IFMT) == S_IFNWK) +#endif +#if !defined(HAVE_MKFIFO) +#define mkfifo(path, mode) (mknod ((path), (mode) | S_IFIFO, 0)) +#endif + +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif + +#ifndef _POSIX_VERSION +off_t lseek (); +#endif + +#ifndef STDIN_FILENO +#define STDIN_FILENO 0 +#endif + +#ifndef STDOUT_FILENO +#define STDOUT_FILENO 1 +#endif + +#ifndef STDERR_FILENO +#define STDERR_FILENO 2 +#endif + +/* Don't use bcopy! Use memmove if source and destination may overlap, + memcpy otherwise. */ + +#ifdef HAVE_STRING_H +# if !STDC_HEADERS && HAVE_MEMORY_H +# include <memory.h> +# endif +# include <string.h> +#else +# include <strings.h> +char *memchr (); +#endif + +#include <errno.h> +#ifndef errno +extern int errno; +#endif + +#ifdef STDC_HEADERS +#include <stdlib.h> +#else +char *getenv (); +#endif + +#ifndef EXIT_FAILURE +# define EXIT_FAILURE 1 +#endif + +#ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +#endif + +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#else +#include <sys/file.h> +#endif + +#if !defined(SEEK_SET) +#define SEEK_SET 0 +#define SEEK_CUR 1 +#define SEEK_END 2 +#endif + +#ifndef _POSIX_SOURCE +#include <sys/param.h> +#endif + +/* Get or fake the disk device blocksize. + Usually defined by sys/param.h (if at all). */ +#if !defined(DEV_BSIZE) && defined(BSIZE) +#define DEV_BSIZE BSIZE +#endif +#if !defined(DEV_BSIZE) && defined(BBSIZE) /* SGI */ +#define DEV_BSIZE BBSIZE +#endif +#ifndef DEV_BSIZE +#define DEV_BSIZE 4096 +#endif + +/* Extract or fake data from a `struct stat'. + ST_BLKSIZE: Optimal I/O blocksize for the file, in bytes. */ +#ifndef HAVE_ST_BLKSIZE +# define ST_BLKSIZE(statbuf) DEV_BSIZE +#else /* HAVE_ST_BLKSIZE */ +/* Some systems, like Sequents, return st_blksize of 0 on pipes. */ +# define ST_BLKSIZE(statbuf) ((statbuf).st_blksize > 0 \ + ? (statbuf).st_blksize : DEV_BSIZE) +#endif /* HAVE_ST_BLKSIZE */ + +#ifndef S_ISLNK +#define lstat stat +#endif + +#ifndef RETSIGTYPE +#define RETSIGTYPE void +#endif + +#include <ctype.h> + +#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) +#define ISASCII(c) 1 +#else +#define ISASCII(c) isascii(c) +#endif + +#ifdef isblank +#define ISBLANK(c) (ISASCII (c) && isblank (c)) +#else +#define ISBLANK(c) ((c) == ' ' || (c) == '\t') +#endif +#ifdef isgraph +#define ISGRAPH(c) (ISASCII (c) && isgraph (c)) +#else +#define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) +#endif + +#define ISPRINT(c) (ISASCII (c) && isprint (c)) +#define ISDIGIT(c) (ISASCII (c) && isdigit (c)) +#define ISALNUM(c) (ISASCII (c) && isalnum (c)) +#define ISALPHA(c) (ISASCII (c) && isalpha (c)) +#define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) +#define ISLOWER(c) (ISASCII (c) && islower (c)) +#define ISPUNCT(c) (ISASCII (c) && ispunct (c)) +#define ISSPACE(c) (ISASCII (c) && isspace (c)) +#define ISUPPER(c) (ISASCII (c) && isupper (c)) +#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) + +/* Disable string localization for the time being. */ +#undef _ +#define _(String) String + +#ifndef __P +# if PROTOTYPES +# define __P(Args) Args +# else +# define __P(Args) () +# endif +#endif diff --git a/gnu/usr.bin/sort/version.c b/gnu/usr.bin/sort/version.c new file mode 100644 index 000000000000..0289fcb69842 --- /dev/null +++ b/gnu/usr.bin/sort/version.c @@ -0,0 +1,3 @@ +#include <config.h> +#include "version.h" +const char *version_string = "GNU textutils 1.14"; diff --git a/gnu/usr.bin/sort/version.h b/gnu/usr.bin/sort/version.h new file mode 100644 index 000000000000..63de4fd14719 --- /dev/null +++ b/gnu/usr.bin/sort/version.h @@ -0,0 +1 @@ +extern const char *version_string; diff --git a/gnu/usr.bin/sort/xstrtod.c b/gnu/usr.bin/sort/xstrtod.c new file mode 100644 index 000000000000..838c5c463213 --- /dev/null +++ b/gnu/usr.bin/sort/xstrtod.c @@ -0,0 +1,48 @@ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#ifdef STDC_HEADERS +#include <stdlib.h> +#else +double strtod (); +#endif + +#include <errno.h> +#include <stdio.h> +#include <limits.h> +#include <ctype.h> +#include "xstrtod.h" + +int +xstrtod (str, ptr, result) + const char *str; + const char **ptr; + double *result; +{ + double val; + char *terminator; + int fail; + + fail = 0; + errno = 0; + val = strtod (str, &terminator); + + /* Having a non-zero terminator is an error only when PTR is NULL. */ + if (terminator == str || (ptr == NULL && *terminator != '\0')) + fail = 1; + else + { + /* Allow underflow (in which case strtod returns zero), + but flag overflow as an error. */ + if (val != 0.0 && errno == ERANGE) + fail = 1; + } + + if (ptr != NULL) + *ptr = terminator; + + *result = val; + return fail; +} + diff --git a/gnu/usr.bin/sort/xstrtod.h b/gnu/usr.bin/sort/xstrtod.h new file mode 100644 index 000000000000..15b85f446fa4 --- /dev/null +++ b/gnu/usr.bin/sort/xstrtod.h @@ -0,0 +1,15 @@ +#ifndef XSTRTOD_H +#define XSTRTOD_H 1 + +#ifndef __P +# if defined (__GNUC__) || (defined (__STDC__) && __STDC__) +# define __P(args) args +# else +# define __P(args) () +# endif /* GCC. */ +#endif /* Not __P. */ + +int + xstrtod __P ((const char *str, const char **ptr, double *result)); + +#endif /* XSTRTOD_H */ |