diff options
Diffstat (limited to 'contrib/groff/src/preproc/refer')
-rw-r--r-- | contrib/groff/src/preproc/refer/Makefile.sub | 10 | ||||
-rw-r--r-- | contrib/groff/src/preproc/refer/command.cpp | 809 | ||||
-rw-r--r-- | contrib/groff/src/preproc/refer/label.cpp | 1605 | ||||
-rw-r--r-- | contrib/groff/src/preproc/refer/ref.cpp | 1160 | ||||
-rw-r--r-- | contrib/groff/src/preproc/refer/refer.cpp | 1235 | ||||
-rw-r--r-- | contrib/groff/src/preproc/refer/refer.man | 10 | ||||
-rw-r--r-- | contrib/groff/src/preproc/refer/token.cpp | 378 |
7 files changed, 5199 insertions, 8 deletions
diff --git a/contrib/groff/src/preproc/refer/Makefile.sub b/contrib/groff/src/preproc/refer/Makefile.sub index 9f4a53be93c0..2e22a3a77909 100644 --- a/contrib/groff/src/preproc/refer/Makefile.sub +++ b/contrib/groff/src/preproc/refer/Makefile.sub @@ -9,15 +9,15 @@ OBJS=\ refer.$(OBJEXT) \ token.$(OBJEXT) CCSRCS=\ - $(srcdir)/command.cc \ - $(srcdir)/ref.cc \ - $(srcdir)/refer.cc \ - $(srcdir)/token.cc + $(srcdir)/command.cpp \ + $(srcdir)/ref.cpp \ + $(srcdir)/refer.cpp \ + $(srcdir)/token.cpp HDRS=\ $(srcdir)/refer.h \ $(srcdir)/token.h \ $(srcdir)/command.h \ $(srcdir)/ref.h GRAM=$(srcdir)/label.y -YTABC=label.cc +YTABC=label.cpp NAMEPREFIX=$(g) diff --git a/contrib/groff/src/preproc/refer/command.cpp b/contrib/groff/src/preproc/refer/command.cpp new file mode 100644 index 000000000000..a7c6bfb89381 --- /dev/null +++ b/contrib/groff/src/preproc/refer/command.cpp @@ -0,0 +1,809 @@ +// -*- C++ -*- +/* Copyright (C) 1989, 1990, 1991, 1992, 2001, 2002 + Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with groff; see the file COPYING. If not, write to the Free Software +Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#include "refer.h" +#include "refid.h" +#include "search.h" +#include "command.h" + +cset cs_field_name = csalpha; + +class input_item { + input_item *next; + char *filename; + int first_lineno; + string buffer; + const char *ptr; + const char *end; +public: + input_item(string &, const char *, int = 1); + ~input_item(); + int get_char(); + int peek_char(); + void skip_char(); + int get_location(const char **, int *); + + friend class input_stack; +}; + +input_item::input_item(string &s, const char *fn, int ln) +: filename(strsave(fn)), first_lineno(ln) +{ + buffer.move(s); + ptr = buffer.contents(); + end = ptr + buffer.length(); +} + +input_item::~input_item() +{ + a_delete filename; +} + +inline int input_item::peek_char() +{ + if (ptr >= end) + return EOF; + else + return (unsigned char)*ptr; +} + +inline int input_item::get_char() +{ + if (ptr >= end) + return EOF; + else + return (unsigned char)*ptr++; +} + +inline void input_item::skip_char() +{ + ptr++; +} + +int input_item::get_location(const char **filenamep, int *linenop) +{ + *filenamep = filename; + if (ptr == buffer.contents()) + *linenop = first_lineno; + else { + int ln = first_lineno; + const char *e = ptr - 1; + for (const char *p = buffer.contents(); p < e; p++) + if (*p == '\n') + ln++; + *linenop = ln; + } + return 1; +} + +class input_stack { + static input_item *top; +public: + static void init(); + static int get_char(); + static int peek_char(); + static void skip_char() { top->skip_char(); } + static void push_file(const char *); + static void push_string(string &, const char *, int); + static void error(const char *format, + const errarg &arg1 = empty_errarg, + const errarg &arg2 = empty_errarg, + const errarg &arg3 = empty_errarg); +}; + +input_item *input_stack::top = 0; + +void input_stack::init() +{ + while (top) { + input_item *tem = top; + top = top->next; + delete tem; + } +} + +int input_stack::get_char() +{ + while (top) { + int c = top->get_char(); + if (c >= 0) + return c; + input_item *tem = top; + top = top->next; + delete tem; + } + return -1; +} + +int input_stack::peek_char() +{ + while (top) { + int c = top->peek_char(); + if (c >= 0) + return c; + input_item *tem = top; + top = top->next; + delete tem; + } + return -1; +} + +void input_stack::push_file(const char *fn) +{ + FILE *fp; + if (strcmp(fn, "-") == 0) { + fp = stdin; + fn = "<standard input>"; + } + else { + errno = 0; + fp = fopen(fn, "r"); + if (fp == 0) { + error("can't open `%1': %2", fn, strerror(errno)); + return; + } + } + string buf; + int bol = 1; + int lineno = 1; + for (;;) { + int c = getc(fp); + if (bol && c == '.') { + // replace lines beginning with .R1 or .R2 with a blank line + c = getc(fp); + if (c == 'R') { + c = getc(fp); + if (c == '1' || c == '2') { + int cc = c; + c = getc(fp); + if (compatible_flag || c == ' ' || c == '\n' || c == EOF) { + while (c != '\n' && c != EOF) + c = getc(fp); + } + else { + buf += '.'; + buf += 'R'; + buf += cc; + } + } + else { + buf += '.'; + buf += 'R'; + } + } + else + buf += '.'; + } + if (c == EOF) + break; + if (invalid_input_char(c)) + error_with_file_and_line(fn, lineno, + "invalid input character code %1", int(c)); + else { + buf += c; + if (c == '\n') { + bol = 1; + lineno++; + } + else + bol = 0; + } + } + if (fp != stdin) + fclose(fp); + if (buf.length() > 0 && buf[buf.length() - 1] != '\n') + buf += '\n'; + input_item *it = new input_item(buf, fn); + it->next = top; + top = it; +} + +void input_stack::push_string(string &s, const char *filename, int lineno) +{ + input_item *it = new input_item(s, filename, lineno); + it->next = top; + top = it; +} + +void input_stack::error(const char *format, const errarg &arg1, + const errarg &arg2, const errarg &arg3) +{ + const char *filename; + int lineno; + for (input_item *it = top; it; it = it->next) + if (it->get_location(&filename, &lineno)) { + error_with_file_and_line(filename, lineno, format, arg1, arg2, arg3); + return; + } + ::error(format, arg1, arg2, arg3); +} + +void command_error(const char *format, const errarg &arg1, + const errarg &arg2, const errarg &arg3) +{ + input_stack::error(format, arg1, arg2, arg3); +} + +// # not recognized in "" +// \<newline> is recognized in "" +// # does not conceal newline +// if missing closing quote, word extends to end of line +// no special treatment of \ other than before newline +// \<newline> not recognized after # +// ; allowed as alternative to newline +// ; not recognized in "" +// don't clear word_buffer; just append on +// return -1 for EOF, 0 for newline, 1 for word + +int get_word(string &word_buffer) +{ + int c = input_stack::get_char(); + for (;;) { + if (c == '#') { + do { + c = input_stack::get_char(); + } while (c != '\n' && c != EOF); + break; + } + if (c == '\\' && input_stack::peek_char() == '\n') + input_stack::skip_char(); + else if (c != ' ' && c != '\t') + break; + c = input_stack::get_char(); + } + if (c == EOF) + return -1; + if (c == '\n' || c == ';') + return 0; + if (c == '"') { + for (;;) { + c = input_stack::peek_char(); + if (c == EOF || c == '\n') + break; + input_stack::skip_char(); + if (c == '"') { + int d = input_stack::peek_char(); + if (d == '"') + input_stack::skip_char(); + else + break; + } + else if (c == '\\') { + int d = input_stack::peek_char(); + if (d == '\n') + input_stack::skip_char(); + else + word_buffer += '\\'; + } + else + word_buffer += c; + } + return 1; + } + word_buffer += c; + for (;;) { + c = input_stack::peek_char(); + if (c == ' ' || c == '\t' || c == '\n' || c == '#' || c == ';') + break; + input_stack::skip_char(); + if (c == '\\') { + int d = input_stack::peek_char(); + if (d == '\n') + input_stack::skip_char(); + else + word_buffer += '\\'; + } + else + word_buffer += c; + } + return 1; +} + +union argument { + const char *s; + int n; +}; + +// This is for debugging. + +static void echo_command(int argc, argument *argv) +{ + for (int i = 0; i < argc; i++) + fprintf(stderr, "%s\n", argv[i].s); +} + +static void include_command(int argc, argument *argv) +{ + assert(argc == 1); + input_stack::push_file(argv[0].s); +} + +static void capitalize_command(int argc, argument *argv) +{ + if (argc > 0) + capitalize_fields = argv[0].s; + else + capitalize_fields.clear(); +} + +static void accumulate_command(int, argument *) +{ + accumulate = 1; +} + +static void no_accumulate_command(int, argument *) +{ + accumulate = 0; +} + +static void move_punctuation_command(int, argument *) +{ + move_punctuation = 1; +} + +static void no_move_punctuation_command(int, argument *) +{ + move_punctuation = 0; +} + +static void sort_command(int argc, argument *argv) +{ + if (argc == 0) + sort_fields = "AD"; + else + sort_fields = argv[0].s; + accumulate = 1; +} + +static void no_sort_command(int, argument *) +{ + sort_fields.clear(); +} + +static void articles_command(int argc, argument *argv) +{ + articles.clear(); + int i; + for (i = 0; i < argc; i++) { + articles += argv[i].s; + articles += '\0'; + } + int len = articles.length(); + for (i = 0; i < len; i++) + articles[i] = cmlower(articles[i]); +} + +static void database_command(int argc, argument *argv) +{ + for (int i = 0; i < argc; i++) + database_list.add_file(argv[i].s); +} + +static void default_database_command(int, argument *) +{ + search_default = 1; +} + +static void no_default_database_command(int, argument *) +{ + search_default = 0; +} + +static void bibliography_command(int argc, argument *argv) +{ + const char *saved_filename = current_filename; + int saved_lineno = current_lineno; + int saved_label_in_text = label_in_text; + label_in_text = 0; + if (!accumulate) + fputs(".]<\n", stdout); + for (int i = 0; i < argc; i++) + do_bib(argv[i].s); + if (accumulate) + output_references(); + else + fputs(".]>\n", stdout); + current_filename = saved_filename; + current_lineno = saved_lineno; + label_in_text = saved_label_in_text; +} + +static void annotate_command(int argc, argument *argv) +{ + if (argc > 0) + annotation_field = argv[0].s[0]; + else + annotation_field = 'X'; + if (argc == 2) + annotation_macro = argv[1].s; + else + annotation_macro = "AP"; +} + +static void no_annotate_command(int, argument *) +{ + annotation_macro.clear(); + annotation_field = -1; +} + +static void reverse_command(int, argument *argv) +{ + reverse_fields = argv[0].s; +} + +static void no_reverse_command(int, argument *) +{ + reverse_fields.clear(); +} + +static void abbreviate_command(int argc, argument *argv) +{ + abbreviate_fields = argv[0].s; + period_before_initial = argc > 1 ? argv[1].s : ". "; + period_before_last_name = argc > 2 ? argv[2].s : ". "; + period_before_other = argc > 3 ? argv[3].s : ". "; + period_before_hyphen = argc > 4 ? argv[4].s : "."; +} + +static void no_abbreviate_command(int, argument *) +{ + abbreviate_fields.clear(); +} + +string search_ignore_fields; + +static void search_ignore_command(int argc, argument *argv) +{ + if (argc > 0) + search_ignore_fields = argv[0].s; + else + search_ignore_fields = "XYZ"; + search_ignore_fields += '\0'; + linear_ignore_fields = search_ignore_fields.contents(); +} + +static void no_search_ignore_command(int, argument *) +{ + linear_ignore_fields = ""; +} + +static void search_truncate_command(int argc, argument *argv) +{ + if (argc > 0) + linear_truncate_len = argv[0].n; + else + linear_truncate_len = 6; +} + +static void no_search_truncate_command(int, argument *) +{ + linear_truncate_len = -1; +} + +static void discard_command(int argc, argument *argv) +{ + if (argc == 0) + discard_fields = "XYZ"; + else + discard_fields = argv[0].s; + accumulate = 1; +} + +static void no_discard_command(int, argument *) +{ + discard_fields.clear(); +} + +static void label_command(int, argument *argv) +{ + set_label_spec(argv[0].s); +} + +static void abbreviate_label_ranges_command(int argc, argument *argv) +{ + abbreviate_label_ranges = 1; + label_range_indicator = argc > 0 ? argv[0].s : "-"; +} + +static void no_abbreviate_label_ranges_command(int, argument *) +{ + abbreviate_label_ranges = 0; +} + +static void label_in_reference_command(int, argument *) +{ + label_in_reference = 1; +} + +static void no_label_in_reference_command(int, argument *) +{ + label_in_reference = 0; +} + +static void label_in_text_command(int, argument *) +{ + label_in_text = 1; +} + +static void no_label_in_text_command(int, argument *) +{ + label_in_text = 0; +} + +static void sort_adjacent_labels_command(int, argument *) +{ + sort_adjacent_labels = 1; +} + +static void no_sort_adjacent_labels_command(int, argument *) +{ + sort_adjacent_labels = 0; +} + +static void date_as_label_command(int argc, argument *argv) +{ + if (set_date_label_spec(argc > 0 ? argv[0].s : "D%a*")) + date_as_label = 1; +} + +static void no_date_as_label_command(int, argument *) +{ + date_as_label = 0; +} + +static void short_label_command(int, argument *argv) +{ + if (set_short_label_spec(argv[0].s)) + short_label_flag = 1; +} + +static void no_short_label_command(int, argument *) +{ + short_label_flag = 0; +} + +static void compatible_command(int, argument *) +{ + compatible_flag = 1; +} + +static void no_compatible_command(int, argument *) +{ + compatible_flag = 0; +} + +static void join_authors_command(int argc, argument *argv) +{ + join_authors_exactly_two = argv[0].s; + join_authors_default = argc > 1 ? argv[1].s : argv[0].s; + join_authors_last_two = argc == 3 ? argv[2].s : argv[0].s; +} + +static void bracket_label_command(int, argument *argv) +{ + pre_label = argv[0].s; + post_label = argv[1].s; + sep_label = argv[2].s; +} + +static void separate_label_second_parts_command(int, argument *argv) +{ + separate_label_second_parts = argv[0].s; +} + +static void et_al_command(int argc, argument *argv) +{ + et_al = argv[0].s; + et_al_min_elide = argv[1].n; + if (et_al_min_elide < 1) + et_al_min_elide = 1; + et_al_min_total = argc >= 3 ? argv[2].n : 0; +} + +static void no_et_al_command(int, argument *) +{ + et_al.clear(); + et_al_min_elide = 0; +} + +typedef void (*command_t)(int, argument *); + +/* arg_types is a string describing the numbers and types of arguments. +s means a string, i means an integer, f is a list of fields, F is +a single field, +? means that the previous argument is optional, * means that the +previous argument can occur any number of times. */ + +struct { + const char *name; + command_t func; + const char *arg_types; +} command_table[] = { + { "include", include_command, "s" }, + { "echo", echo_command, "s*" }, + { "capitalize", capitalize_command, "f?" }, + { "accumulate", accumulate_command, "" }, + { "no-accumulate", no_accumulate_command, "" }, + { "move-punctuation", move_punctuation_command, "" }, + { "no-move-punctuation", no_move_punctuation_command, "" }, + { "sort", sort_command, "s?" }, + { "no-sort", no_sort_command, "" }, + { "articles", articles_command, "s*" }, + { "database", database_command, "ss*" }, + { "default-database", default_database_command, "" }, + { "no-default-database", no_default_database_command, "" }, + { "bibliography", bibliography_command, "ss*" }, + { "annotate", annotate_command, "F?s?" }, + { "no-annotate", no_annotate_command, "" }, + { "reverse", reverse_command, "s" }, + { "no-reverse", no_reverse_command, "" }, + { "abbreviate", abbreviate_command, "ss?s?s?s?" }, + { "no-abbreviate", no_abbreviate_command, "" }, + { "search-ignore", search_ignore_command, "f?" }, + { "no-search-ignore", no_search_ignore_command, "" }, + { "search-truncate", search_truncate_command, "i?" }, + { "no-search-truncate", no_search_truncate_command, "" }, + { "discard", discard_command, "f?" }, + { "no-discard", no_discard_command, "" }, + { "label", label_command, "s" }, + { "abbreviate-label-ranges", abbreviate_label_ranges_command, "s?" }, + { "no-abbreviate-label-ranges", no_abbreviate_label_ranges_command, "" }, + { "label-in-reference", label_in_reference_command, "" }, + { "no-label-in-reference", no_label_in_reference_command, "" }, + { "label-in-text", label_in_text_command, "" }, + { "no-label-in-text", no_label_in_text_command, "" }, + { "sort-adjacent-labels", sort_adjacent_labels_command, "" }, + { "no-sort-adjacent-labels", no_sort_adjacent_labels_command, "" }, + { "date-as-label", date_as_label_command, "s?" }, + { "no-date-as-label", no_date_as_label_command, "" }, + { "short-label", short_label_command, "s" }, + { "no-short-label", no_short_label_command, "" }, + { "compatible", compatible_command, "" }, + { "no-compatible", no_compatible_command, "" }, + { "join-authors", join_authors_command, "sss?" }, + { "bracket-label", bracket_label_command, "sss" }, + { "separate-label-second-parts", separate_label_second_parts_command, "s" }, + { "et-al", et_al_command, "sii?" }, + { "no-et-al", no_et_al_command, "" }, +}; + +static int check_args(const char *types, const char *name, + int argc, argument *argv) +{ + int argno = 0; + while (*types) { + if (argc == 0) { + if (types[1] == '?') + break; + else if (types[1] == '*') { + assert(types[2] == '\0'); + break; + } + else { + input_stack::error("missing argument for command `%1'", name); + return 0; + } + } + switch (*types) { + case 's': + break; + case 'i': + { + char *ptr; + long n = strtol(argv->s, &ptr, 10); + if ((n == 0 && ptr == argv->s) + || *ptr != '\0') { + input_stack::error("argument %1 for command `%2' must be an integer", + argno + 1, name); + return 0; + } + argv->n = (int)n; + break; + } + case 'f': + { + for (const char *ptr = argv->s; *ptr != '\0'; ptr++) + if (!cs_field_name(*ptr)) { + input_stack::error("argument %1 for command `%2' must be a list of fields", + argno + 1, name); + return 0; + } + break; + } + case 'F': + if (argv->s[0] == '\0' || argv->s[1] != '\0' + || !cs_field_name(argv->s[0])) { + input_stack::error("argument %1 for command `%2' must be a field name", + argno + 1, name); + return 0; + } + break; + default: + assert(0); + } + if (types[1] == '?') + types += 2; + else if (types[1] != '*') + types += 1; + --argc; + ++argv; + ++argno; + } + if (argc > 0) { + input_stack::error("too many arguments for command `%1'", name); + return 0; + } + return 1; +} + +static void execute_command(const char *name, int argc, argument *argv) +{ + for (unsigned int i = 0; + i < sizeof(command_table)/sizeof(command_table[0]); i++) + if (strcmp(name, command_table[i].name) == 0) { + if (check_args(command_table[i].arg_types, name, argc, argv)) + (*command_table[i].func)(argc, argv); + return; + } + input_stack::error("unknown command `%1'", name); +} + +static void command_loop() +{ + string command; + for (;;) { + command.clear(); + int res = get_word(command); + if (res != 1) { + if (res == 0) + continue; + break; + } + int argc = 0; + command += '\0'; + while ((res = get_word(command)) == 1) { + argc++; + command += '\0'; + } + argument *argv = new argument[argc]; + const char *ptr = command.contents(); + for (int i = 0; i < argc; i++) + argv[i].s = ptr = strchr(ptr, '\0') + 1; + execute_command(command.contents(), argc, argv); + a_delete argv; + if (res == -1) + break; + } +} + +void process_commands(const char *file) +{ + input_stack::init(); + input_stack::push_file(file); + command_loop(); +} + +void process_commands(string &s, const char *file, int lineno) +{ + input_stack::init(); + input_stack::push_string(s, file, lineno); + command_loop(); +} diff --git a/contrib/groff/src/preproc/refer/label.cpp b/contrib/groff/src/preproc/refer/label.cpp new file mode 100644 index 000000000000..8d915a826412 --- /dev/null +++ b/contrib/groff/src/preproc/refer/label.cpp @@ -0,0 +1,1605 @@ +#ifndef lint +static char yysccsid[] = "@(#)yaccpar 1.9 (Berkeley) 02/21/93 (groff)"; +#endif +#define YYBYACC 1 +#define YYMAJOR 1 +#define YYMINOR 9 +#define yyclearin (yychar=(-1)) +#define yyerrok (yyerrflag=0) +#define YYRECOVERING (yyerrflag!=0) +#define YYPREFIX "yy" +#line 22 "label.y" + +#include "refer.h" +#include "refid.h" +#include "ref.h" +#include "token.h" + +int yylex(); +void yyerror(const char *); +int yyparse(); + +static const char *format_serial(char c, int n); + +struct label_info { + int start; + int length; + int count; + int total; + label_info(const string &); +}; + +label_info *lookup_label(const string &label); + +struct expression { + enum { + /* Does the tentative label depend on the reference?*/ + CONTAINS_VARIABLE = 01, + CONTAINS_STAR = 02, + CONTAINS_FORMAT = 04, + CONTAINS_AT = 010 + }; + virtual ~expression() { } + virtual void evaluate(int, const reference &, string &, + substring_position &) = 0; + virtual unsigned analyze() { return 0; } +}; + +class at_expr : public expression { +public: + at_expr() { } + void evaluate(int, const reference &, string &, substring_position &); + unsigned analyze() { return CONTAINS_VARIABLE|CONTAINS_AT; } +}; + +class format_expr : public expression { + char type; + int width; + int first_number; +public: + format_expr(char c, int w = 0, int f = 1) + : type(c), width(w), first_number(f) { } + void evaluate(int, const reference &, string &, substring_position &); + unsigned analyze() { return CONTAINS_FORMAT; } +}; + +class field_expr : public expression { + int number; + char name; +public: + field_expr(char nm, int num) : number(num), name(nm) { } + void evaluate(int, const reference &, string &, substring_position &); + unsigned analyze() { return CONTAINS_VARIABLE; } +}; + +class literal_expr : public expression { + string s; +public: + literal_expr(const char *ptr, int len) : s(ptr, len) { } + void evaluate(int, const reference &, string &, substring_position &); +}; + +class unary_expr : public expression { +protected: + expression *expr; +public: + unary_expr(expression *e) : expr(e) { } + ~unary_expr() { delete expr; } + void evaluate(int, const reference &, string &, substring_position &) = 0; + unsigned analyze() { return expr ? expr->analyze() : 0; } +}; + +/* This caches the analysis of an expression.*/ + +class analyzed_expr : public unary_expr { + unsigned flags; +public: + analyzed_expr(expression *); + void evaluate(int, const reference &, string &, substring_position &); + unsigned analyze() { return flags; } +}; + +class star_expr : public unary_expr { +public: + star_expr(expression *e) : unary_expr(e) { } + void evaluate(int, const reference &, string &, substring_position &); + unsigned analyze() { + return ((expr ? (expr->analyze() & ~CONTAINS_VARIABLE) : 0) + | CONTAINS_STAR); + } +}; + +typedef void map_func(const char *, const char *, string &); + +class map_expr : public unary_expr { + map_func *func; +public: + map_expr(expression *e, map_func *f) : unary_expr(e), func(f) { } + void evaluate(int, const reference &, string &, substring_position &); +}; + +typedef const char *extractor_func(const char *, const char *, const char **); + +class extractor_expr : public unary_expr { + int part; + extractor_func *func; +public: + enum { BEFORE = +1, MATCH = 0, AFTER = -1 }; + extractor_expr(expression *e, extractor_func *f, int pt) + : unary_expr(e), part(pt), func(f) { } + void evaluate(int, const reference &, string &, substring_position &); +}; + +class truncate_expr : public unary_expr { + int n; +public: + truncate_expr(expression *e, int i) : unary_expr(e), n(i) { } + void evaluate(int, const reference &, string &, substring_position &); +}; + +class separator_expr : public unary_expr { +public: + separator_expr(expression *e) : unary_expr(e) { } + void evaluate(int, const reference &, string &, substring_position &); +}; + +class binary_expr : public expression { +protected: + expression *expr1; + expression *expr2; +public: + binary_expr(expression *e1, expression *e2) : expr1(e1), expr2(e2) { } + ~binary_expr() { delete expr1; delete expr2; } + void evaluate(int, const reference &, string &, substring_position &) = 0; + unsigned analyze() { + return (expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0); + } +}; + +class alternative_expr : public binary_expr { +public: + alternative_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } + void evaluate(int, const reference &, string &, substring_position &); +}; + +class list_expr : public binary_expr { +public: + list_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } + void evaluate(int, const reference &, string &, substring_position &); +}; + +class substitute_expr : public binary_expr { +public: + substitute_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { } + void evaluate(int, const reference &, string &, substring_position &); +}; + +class ternary_expr : public expression { +protected: + expression *expr1; + expression *expr2; + expression *expr3; +public: + ternary_expr(expression *e1, expression *e2, expression *e3) + : expr1(e1), expr2(e2), expr3(e3) { } + ~ternary_expr() { delete expr1; delete expr2; delete expr3; } + void evaluate(int, const reference &, string &, substring_position &) = 0; + unsigned analyze() { + return ((expr1 ? expr1->analyze() : 0) + | (expr2 ? expr2->analyze() : 0) + | (expr3 ? expr3->analyze() : 0)); + } +}; + +class conditional_expr : public ternary_expr { +public: + conditional_expr(expression *e1, expression *e2, expression *e3) + : ternary_expr(e1, e2, e3) { } + void evaluate(int, const reference &, string &, substring_position &); +}; + +static expression *parsed_label = 0; +static expression *parsed_date_label = 0; +static expression *parsed_short_label = 0; + +static expression *parse_result; + +string literals; + +#line 221 "label.y" +typedef union { + int num; + expression *expr; + struct { int ndigits; int val; } dig; + struct { int start; int len; } str; +} YYSTYPE; +#line 217 "y.tab.c" +#define TOKEN_LETTER 257 +#define TOKEN_LITERAL 258 +#define TOKEN_DIGIT 259 +#define YYERRCODE 256 +short yylhs[] = { -1, + 0, 1, 1, 6, 6, 2, 2, 2, 3, 3, + 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 9, 9, 7, 7, 8, 8, + 10, 10, 10, +}; +short yylen[] = { 2, + 1, 1, 5, 0, 1, 1, 3, 3, 1, 2, + 1, 3, 1, 1, 1, 2, 2, 2, 5, 3, + 3, 2, 3, 3, 0, 1, 1, 2, 1, 2, + 0, 1, 1, +}; +short yydefred[] = { 0, + 0, 14, 13, 0, 0, 0, 0, 5, 0, 0, + 0, 0, 1, 27, 0, 17, 29, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 22, 0, 28, + 30, 23, 24, 0, 0, 0, 32, 33, 0, 0, + 0, 0, 0, 0, 3, 0, 19, +}; +short yydgoto[] = { 7, + 8, 9, 10, 11, 12, 13, 15, 18, 47, 39, +}; +short yysindex[] = { -32, + -257, 0, 0, -240, -32, -32, 0, 0, -18, -32, + -36, -114, 0, 0, -246, 0, 0, -241, -14, -39, + -32, -32, -32, -114, -21, -257, -257, 0, -32, 0, + 0, 0, 0, -25, -32, -32, 0, 0, -223, -246, + -246, -36, -32, -257, 0, -246, 0, +}; +short yyrindex[] = { 35, + 1, 0, 0, 0, -5, -4, 0, 0, 14, 208, + 159, 224, 0, 0, 11, 0, 0, 40, 0, 0, + 2, 0, 0, 253, -220, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 263, 281, 0, 0, 0, 50, + 105, 214, 0, 115, 0, 149, 0, +}; +short yygindex[] = { 0, + 19, 0, 7, 37, -10, 10, -23, 0, 0, 0, +}; +#define YYTABLESIZE 511 +short yytable[] = { 24, + 15, 14, 40, 41, 4, 28, 26, 5, 27, 25, + 16, 29, 30, 2, 19, 20, 16, 31, 17, 23, + 46, 37, 33, 38, 24, 24, 32, 6, 35, 36, + 34, 3, 43, 44, 4, 4, 31, 15, 15, 18, + 15, 15, 15, 15, 21, 15, 15, 16, 16, 20, + 16, 16, 16, 16, 2, 16, 16, 4, 15, 4, + 15, 45, 15, 15, 15, 42, 0, 0, 16, 0, + 16, 2, 16, 16, 16, 2, 18, 18, 0, 18, + 18, 18, 18, 0, 18, 18, 20, 20, 0, 20, + 20, 20, 20, 0, 20, 20, 0, 18, 0, 18, + 0, 18, 18, 18, 21, 22, 0, 20, 0, 20, + 0, 20, 20, 20, 25, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 15, 0, 15, 0, 0, 0, + 0, 0, 0, 0, 16, 0, 16, 0, 0, 0, + 0, 21, 21, 0, 21, 21, 21, 21, 26, 21, + 21, 25, 25, 0, 25, 25, 25, 25, 11, 25, + 25, 0, 21, 18, 21, 18, 21, 21, 21, 0, + 0, 0, 25, 20, 25, 20, 25, 25, 25, 0, + 0, 0, 0, 0, 0, 26, 26, 0, 26, 26, + 26, 26, 0, 26, 26, 11, 11, 0, 11, 11, + 0, 0, 0, 0, 0, 0, 26, 6, 26, 0, + 26, 26, 26, 12, 0, 0, 11, 0, 11, 0, + 11, 11, 11, 9, 1, 2, 0, 0, 21, 0, + 21, 0, 0, 0, 0, 0, 0, 0, 25, 0, + 25, 0, 0, 0, 0, 6, 0, 0, 6, 0, + 12, 12, 10, 12, 12, 0, 0, 15, 15, 0, + 9, 9, 7, 9, 9, 6, 0, 16, 16, 6, + 6, 12, 26, 12, 26, 12, 12, 12, 0, 0, + 8, 9, 11, 9, 11, 9, 9, 9, 0, 10, + 10, 0, 10, 10, 0, 0, 18, 18, 0, 0, + 7, 0, 0, 7, 0, 0, 20, 20, 0, 0, + 10, 0, 10, 0, 10, 10, 10, 0, 8, 0, + 7, 8, 0, 0, 7, 7, 0, 0, 0, 0, + 0, 6, 0, 0, 0, 0, 0, 12, 8, 12, + 0, 0, 8, 8, 0, 0, 0, 9, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 21, 21, 0, 0, 0, 0, 0, 0, 0, + 0, 25, 25, 0, 0, 0, 10, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 7, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 8, 26, 26, 0, 0, 0, + 0, 0, 0, 0, 0, 11, 11, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 12, 12, 0, 0, 0, 0, 0, 0, 0, 0, + 9, 9, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 10, + 10, +}; +short yycheck[] = { 10, + 0, 259, 26, 27, 37, 42, 43, 40, 45, 46, + 0, 126, 259, 0, 5, 6, 257, 259, 259, 38, + 44, 43, 62, 45, 35, 36, 41, 60, 22, 23, + 21, 64, 58, 257, 0, 41, 257, 37, 38, 0, + 40, 41, 42, 43, 63, 45, 46, 37, 38, 0, + 40, 41, 42, 43, 41, 45, 46, 62, 58, 58, + 60, 43, 62, 63, 64, 29, -1, -1, 58, -1, + 60, 58, 62, 63, 64, 62, 37, 38, -1, 40, + 41, 42, 43, -1, 45, 46, 37, 38, -1, 40, + 41, 42, 43, -1, 45, 46, -1, 58, -1, 60, + -1, 62, 63, 64, 0, 124, -1, 58, -1, 60, + -1, 62, 63, 64, 0, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 124, -1, 126, -1, -1, -1, + -1, -1, -1, -1, 124, -1, 126, -1, -1, -1, + -1, 37, 38, -1, 40, 41, 42, 43, 0, 45, + 46, 37, 38, -1, 40, 41, 42, 43, 0, 45, + 46, -1, 58, 124, 60, 126, 62, 63, 64, -1, + -1, -1, 58, 124, 60, 126, 62, 63, 64, -1, + -1, -1, -1, -1, -1, 37, 38, -1, 40, 41, + 42, 43, -1, 45, 46, 37, 38, -1, 40, 41, + -1, -1, -1, -1, -1, -1, 58, 0, 60, -1, + 62, 63, 64, 0, -1, -1, 58, -1, 60, -1, + 62, 63, 64, 0, 257, 258, -1, -1, 124, -1, + 126, -1, -1, -1, -1, -1, -1, -1, 124, -1, + 126, -1, -1, -1, -1, 38, -1, -1, 41, -1, + 37, 38, 0, 40, 41, -1, -1, 257, 258, -1, + 37, 38, 0, 40, 41, 58, -1, 257, 258, 62, + 63, 58, 124, 60, 126, 62, 63, 64, -1, -1, + 0, 58, 124, 60, 126, 62, 63, 64, -1, 37, + 38, -1, 40, 41, -1, -1, 257, 258, -1, -1, + 38, -1, -1, 41, -1, -1, 257, 258, -1, -1, + 58, -1, 60, -1, 62, 63, 64, -1, 38, -1, + 58, 41, -1, -1, 62, 63, -1, -1, -1, -1, + -1, 124, -1, -1, -1, -1, -1, 124, 58, 126, + -1, -1, 62, 63, -1, -1, -1, 124, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, 257, 258, -1, -1, -1, -1, -1, -1, -1, + -1, 257, 258, -1, -1, -1, 124, -1, -1, -1, + -1, -1, -1, -1, -1, -1, 124, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, 124, 257, 258, -1, -1, -1, + -1, -1, -1, -1, -1, 257, 258, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + 257, 258, -1, -1, -1, -1, -1, -1, -1, -1, + 257, 258, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, 257, + 258, +}; +#define YYFINAL 7 +#ifndef YYDEBUG +#define YYDEBUG 0 +#endif +#define YYMAXTOKEN 259 +#if YYDEBUG +char *yyname[] = { +"end-of-file",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,"'%'","'&'",0,"'('","')'","'*'","'+'",0,"'-'","'.'",0,0,0,0,0,0,0,0,0,0,0, +"':'",0,"'<'",0,"'>'","'?'","'@'",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,"'|'",0, +"'~'",0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +0,0,0,0,0,0,0,0,0,0,0,0,0,"TOKEN_LETTER","TOKEN_LITERAL","TOKEN_DIGIT", +}; +char *yyrule[] = { +"$accept : expr", +"expr : optional_conditional", +"conditional : alternative", +"conditional : alternative '?' optional_conditional ':' conditional", +"optional_conditional :", +"optional_conditional : conditional", +"alternative : list", +"alternative : alternative '|' list", +"alternative : alternative '&' list", +"list : substitute", +"list : list substitute", +"substitute : string", +"substitute : substitute '~' string", +"string : '@'", +"string : TOKEN_LITERAL", +"string : TOKEN_LETTER", +"string : TOKEN_LETTER number", +"string : '%' TOKEN_LETTER", +"string : '%' digits", +"string : string '.' flag TOKEN_LETTER optional_number", +"string : string '+' number", +"string : string '-' number", +"string : string '*'", +"string : '(' optional_conditional ')'", +"string : '<' optional_conditional '>'", +"optional_number :", +"optional_number : number", +"number : TOKEN_DIGIT", +"number : number TOKEN_DIGIT", +"digits : TOKEN_DIGIT", +"digits : digits TOKEN_DIGIT", +"flag :", +"flag : '+'", +"flag : '-'", +}; +#endif +#ifdef YYSTACKSIZE +#undef YYMAXDEPTH +#define YYMAXDEPTH YYSTACKSIZE +#else +#ifdef YYMAXDEPTH +#define YYSTACKSIZE YYMAXDEPTH +#else +#define YYSTACKSIZE 500 +#define YYMAXDEPTH 500 +#endif +#endif +int yydebug; +int yynerrs; +int yyerrflag; +int yychar; +short *yyssp; +YYSTYPE *yyvsp; +YYSTYPE yyval; +YYSTYPE yylval; +short yyss[YYSTACKSIZE]; +YYSTYPE yyvs[YYSTACKSIZE]; +#define yystacksize YYSTACKSIZE +#line 397 "label.y" + +/* bison defines const to be empty unless __STDC__ is defined, which it +isn't under cfront */ + +#ifdef const +#undef const +#endif + +const char *spec_ptr; +const char *spec_end; +const char *spec_cur; + +int yylex() +{ + while (spec_ptr < spec_end && csspace(*spec_ptr)) + spec_ptr++; + spec_cur = spec_ptr; + if (spec_ptr >= spec_end) + return 0; + unsigned char c = *spec_ptr++; + if (csalpha(c)) { + yylval.num = c; + return TOKEN_LETTER; + } + if (csdigit(c)) { + yylval.num = c - '0'; + return TOKEN_DIGIT; + } + if (c == '\'') { + yylval.str.start = literals.length(); + for (; spec_ptr < spec_end; spec_ptr++) { + if (*spec_ptr == '\'') { + if (++spec_ptr < spec_end && *spec_ptr == '\'') + literals += '\''; + else { + yylval.str.len = literals.length() - yylval.str.start; + return TOKEN_LITERAL; + } + } + else + literals += *spec_ptr; + } + yylval.str.len = literals.length() - yylval.str.start; + return TOKEN_LITERAL; + } + return c; +} + +int set_label_spec(const char *label_spec) +{ + spec_cur = spec_ptr = label_spec; + spec_end = strchr(label_spec, '\0'); + literals.clear(); + if (yyparse()) + return 0; + delete parsed_label; + parsed_label = parse_result; + return 1; +} + +int set_date_label_spec(const char *label_spec) +{ + spec_cur = spec_ptr = label_spec; + spec_end = strchr(label_spec, '\0'); + literals.clear(); + if (yyparse()) + return 0; + delete parsed_date_label; + parsed_date_label = parse_result; + return 1; +} + +int set_short_label_spec(const char *label_spec) +{ + spec_cur = spec_ptr = label_spec; + spec_end = strchr(label_spec, '\0'); + literals.clear(); + if (yyparse()) + return 0; + delete parsed_short_label; + parsed_short_label = parse_result; + return 1; +} + +void yyerror(const char *message) +{ + if (spec_cur < spec_end) + command_error("label specification %1 before `%2'", message, spec_cur); + else + command_error("label specification %1 at end of string", + message, spec_cur); +} + +void at_expr::evaluate(int tentative, const reference &ref, + string &result, substring_position &) +{ + if (tentative) + ref.canonicalize_authors(result); + else { + const char *end, *start = ref.get_authors(&end); + if (start) + result.append(start, end - start); + } +} + +void format_expr::evaluate(int tentative, const reference &ref, + string &result, substring_position &) +{ + if (tentative) + return; + const label_info *lp = ref.get_label_ptr(); + int num = lp == 0 ? ref.get_number() : lp->count; + if (type != '0') + result += format_serial(type, num + 1); + else { + const char *ptr = i_to_a(num + first_number); + int pad = width - strlen(ptr); + while (--pad >= 0) + result += '0'; + result += ptr; + } +} + +static const char *format_serial(char c, int n) +{ + assert(n > 0); + static char buf[128]; // more than enough. + switch (c) { + case 'i': + case 'I': + { + char *p = buf; + // troff uses z and w to represent 10000 and 5000 in Roman + // numerals; I can find no historical basis for this usage + const char *s = c == 'i' ? "zwmdclxvi" : "ZWMDCLXVI"; + if (n >= 40000) + return i_to_a(n); + while (n >= 10000) { + *p++ = s[0]; + n -= 10000; + } + for (int i = 1000; i > 0; i /= 10, s += 2) { + int m = n/i; + n -= m*i; + switch (m) { + case 3: + *p++ = s[2]; + /* falls through */ + case 2: + *p++ = s[2]; + /* falls through */ + case 1: + *p++ = s[2]; + break; + case 4: + *p++ = s[2]; + *p++ = s[1]; + break; + case 8: + *p++ = s[1]; + *p++ = s[2]; + *p++ = s[2]; + *p++ = s[2]; + break; + case 7: + *p++ = s[1]; + *p++ = s[2]; + *p++ = s[2]; + break; + case 6: + *p++ = s[1]; + *p++ = s[2]; + break; + case 5: + *p++ = s[1]; + break; + case 9: + *p++ = s[2]; + *p++ = s[0]; + } + } + *p = 0; + break; + } + case 'a': + case 'A': + { + char *p = buf; + // this is derived from troff/reg.c + while (n > 0) { + int d = n % 26; + if (d == 0) + d = 26; + n -= d; + n /= 26; + *p++ = c + d - 1; // ASCII dependent + } + *p-- = 0; + // Reverse it. + char *q = buf; + while (q < p) { + char temp = *q; + *q = *p; + *p = temp; + --p; + ++q; + } + break; + } + default: + assert(0); + } + return buf; +} + +void field_expr::evaluate(int, const reference &ref, + string &result, substring_position &) +{ + const char *end; + const char *start = ref.get_field(name, &end); + if (start) { + start = nth_field(number, start, &end); + if (start) + result.append(start, end - start); + } +} + +void literal_expr::evaluate(int, const reference &, + string &result, substring_position &) +{ + result += s; +} + +analyzed_expr::analyzed_expr(expression *e) +: unary_expr(e), flags(e ? e->analyze() : 0) +{ +} + +void analyzed_expr::evaluate(int tentative, const reference &ref, + string &result, substring_position &pos) +{ + if (expr) + expr->evaluate(tentative, ref, result, pos); +} + +void star_expr::evaluate(int tentative, const reference &ref, + string &result, substring_position &pos) +{ + const label_info *lp = ref.get_label_ptr(); + if (!tentative + && (lp == 0 || lp->total > 1) + && expr) + expr->evaluate(tentative, ref, result, pos); +} + +void separator_expr::evaluate(int tentative, const reference &ref, + string &result, substring_position &pos) +{ + int start_length = result.length(); + int is_first = pos.start < 0; + if (expr) + expr->evaluate(tentative, ref, result, pos); + if (is_first) { + pos.start = start_length; + pos.length = result.length() - start_length; + } +} + +void map_expr::evaluate(int tentative, const reference &ref, + string &result, substring_position &) +{ + if (expr) { + string temp; + substring_position temp_pos; + expr->evaluate(tentative, ref, temp, temp_pos); + (*func)(temp.contents(), temp.contents() + temp.length(), result); + } +} + +void extractor_expr::evaluate(int tentative, const reference &ref, + string &result, substring_position &) +{ + if (expr) { + string temp; + substring_position temp_pos; + expr->evaluate(tentative, ref, temp, temp_pos); + const char *end, *start = (*func)(temp.contents(), + temp.contents() + temp.length(), + &end); + switch (part) { + case BEFORE: + if (start) + result.append(temp.contents(), start - temp.contents()); + else + result += temp; + break; + case MATCH: + if (start) + result.append(start, end - start); + break; + case AFTER: + if (start) + result.append(end, temp.contents() + temp.length() - end); + break; + default: + assert(0); + } + } +} + +static void first_part(int len, const char *ptr, const char *end, + string &result) +{ + for (;;) { + const char *token_start = ptr; + if (!get_token(&ptr, end)) + break; + const token_info *ti = lookup_token(token_start, ptr); + int counts = ti->sortify_non_empty(token_start, ptr); + if (counts && --len < 0) + break; + if (counts || ti->is_accent()) + result.append(token_start, ptr - token_start); + } +} + +static void last_part(int len, const char *ptr, const char *end, + string &result) +{ + const char *start = ptr; + int count = 0; + for (;;) { + const char *token_start = ptr; + if (!get_token(&ptr, end)) + break; + const token_info *ti = lookup_token(token_start, ptr); + if (ti->sortify_non_empty(token_start, ptr)) + count++; + } + ptr = start; + int skip = count - len; + if (skip > 0) { + for (;;) { + const char *token_start = ptr; + if (!get_token(&ptr, end)) + assert(0); + const token_info *ti = lookup_token(token_start, ptr); + if (ti->sortify_non_empty(token_start, ptr) && --skip < 0) { + ptr = token_start; + break; + } + } + } + first_part(len, ptr, end, result); +} + +void truncate_expr::evaluate(int tentative, const reference &ref, + string &result, substring_position &) +{ + if (expr) { + string temp; + substring_position temp_pos; + expr->evaluate(tentative, ref, temp, temp_pos); + const char *start = temp.contents(); + const char *end = start + temp.length(); + if (n > 0) + first_part(n, start, end, result); + else if (n < 0) + last_part(-n, start, end, result); + } +} + +void alternative_expr::evaluate(int tentative, const reference &ref, + string &result, substring_position &pos) +{ + int start_length = result.length(); + if (expr1) + expr1->evaluate(tentative, ref, result, pos); + if (result.length() == start_length && expr2) + expr2->evaluate(tentative, ref, result, pos); +} + +void list_expr::evaluate(int tentative, const reference &ref, + string &result, substring_position &pos) +{ + if (expr1) + expr1->evaluate(tentative, ref, result, pos); + if (expr2) + expr2->evaluate(tentative, ref, result, pos); +} + +void substitute_expr::evaluate(int tentative, const reference &ref, + string &result, substring_position &pos) +{ + int start_length = result.length(); + if (expr1) + expr1->evaluate(tentative, ref, result, pos); + if (result.length() > start_length && result[result.length() - 1] == '-') { + // ought to see if pos covers the - + result.set_length(result.length() - 1); + if (expr2) + expr2->evaluate(tentative, ref, result, pos); + } +} + +void conditional_expr::evaluate(int tentative, const reference &ref, + string &result, substring_position &pos) +{ + string temp; + substring_position temp_pos; + if (expr1) + expr1->evaluate(tentative, ref, temp, temp_pos); + if (temp.length() > 0) { + if (expr2) + expr2->evaluate(tentative, ref, result, pos); + } + else { + if (expr3) + expr3->evaluate(tentative, ref, result, pos); + } +} + +void reference::pre_compute_label() +{ + if (parsed_label != 0 + && (parsed_label->analyze() & expression::CONTAINS_VARIABLE)) { + label.clear(); + substring_position temp_pos; + parsed_label->evaluate(1, *this, label, temp_pos); + label_ptr = lookup_label(label); + } +} + +void reference::compute_label() +{ + label.clear(); + if (parsed_label) + parsed_label->evaluate(0, *this, label, separator_pos); + if (short_label_flag && parsed_short_label) + parsed_short_label->evaluate(0, *this, short_label, short_separator_pos); + if (date_as_label) { + string new_date; + if (parsed_date_label) { + substring_position temp_pos; + parsed_date_label->evaluate(0, *this, new_date, temp_pos); + } + set_date(new_date); + } + if (label_ptr) + label_ptr->count += 1; +} + +void reference::immediate_compute_label() +{ + if (label_ptr) + label_ptr->total = 2; // force use of disambiguator + compute_label(); +} + +int reference::merge_labels(reference **v, int n, label_type type, + string &result) +{ + if (abbreviate_label_ranges) + return merge_labels_by_number(v, n, type, result); + else + return merge_labels_by_parts(v, n, type, result); +} + +int reference::merge_labels_by_number(reference **v, int n, label_type type, + string &result) +{ + if (n <= 1) + return 0; + int num = get_number(); + // Only merge three or more labels. + if (v[0]->get_number() != num + 1 + || v[1]->get_number() != num + 2) + return 0; + int i; + for (i = 2; i < n; i++) + if (v[i]->get_number() != num + i + 1) + break; + result = get_label(type); + result += label_range_indicator; + result += v[i - 1]->get_label(type); + return i; +} + +const substring_position &reference::get_separator_pos(label_type type) const +{ + if (type == SHORT_LABEL && short_label_flag) + return short_separator_pos; + else + return separator_pos; +} + +const string &reference::get_label(label_type type) const +{ + if (type == SHORT_LABEL && short_label_flag) + return short_label; + else + return label; +} + +int reference::merge_labels_by_parts(reference **v, int n, label_type type, + string &result) +{ + if (n <= 0) + return 0; + const string &lb = get_label(type); + const substring_position &sp = get_separator_pos(type); + if (sp.start < 0 + || sp.start != v[0]->get_separator_pos(type).start + || memcmp(lb.contents(), v[0]->get_label(type).contents(), + sp.start) != 0) + return 0; + result = lb; + int i = 0; + do { + result += separate_label_second_parts; + const substring_position &s = v[i]->get_separator_pos(type); + int sep_end_pos = s.start + s.length; + result.append(v[i]->get_label(type).contents() + sep_end_pos, + v[i]->get_label(type).length() - sep_end_pos); + } while (++i < n + && sp.start == v[i]->get_separator_pos(type).start + && memcmp(lb.contents(), v[i]->get_label(type).contents(), + sp.start) == 0); + return i; +} + +string label_pool; + +label_info::label_info(const string &s) +: start(label_pool.length()), length(s.length()), count(0), total(1) +{ + label_pool += s; +} + +static label_info **label_table = 0; +static int label_table_size = 0; +static int label_table_used = 0; + +label_info *lookup_label(const string &label) +{ + if (label_table == 0) { + label_table = new label_info *[17]; + label_table_size = 17; + for (int i = 0; i < 17; i++) + label_table[i] = 0; + } + unsigned h = hash_string(label.contents(), label.length()) % label_table_size; + label_info **ptr; + for (ptr = label_table + h; + *ptr != 0; + (ptr == label_table) + ? (ptr = label_table + label_table_size - 1) + : ptr--) + if ((*ptr)->length == label.length() + && memcmp(label_pool.contents() + (*ptr)->start, label.contents(), + label.length()) == 0) { + (*ptr)->total += 1; + return *ptr; + } + label_info *result = *ptr = new label_info(label); + if (++label_table_used * 2 > label_table_size) { + // Rehash the table. + label_info **old_table = label_table; + int old_size = label_table_size; + label_table_size = next_size(label_table_size); + label_table = new label_info *[label_table_size]; + int i; + for (i = 0; i < label_table_size; i++) + label_table[i] = 0; + for (i = 0; i < old_size; i++) + if (old_table[i]) { + unsigned h = hash_string(label_pool.contents() + old_table[i]->start, + old_table[i]->length); + label_info **p; + for (p = label_table + (h % label_table_size); + *p != 0; + (p == label_table) + ? (p = label_table + label_table_size - 1) + : --p) + ; + *p = old_table[i]; + } + a_delete old_table; + } + return result; +} + +void clear_labels() +{ + for (int i = 0; i < label_table_size; i++) { + delete label_table[i]; + label_table[i] = 0; + } + label_table_used = 0; + label_pool.clear(); +} + +static void consider_authors(reference **start, reference **end, int i); + +void compute_labels(reference **v, int n) +{ + if (parsed_label + && (parsed_label->analyze() & expression::CONTAINS_AT) + && sort_fields.length() >= 2 + && sort_fields[0] == 'A' + && sort_fields[1] == '+') + consider_authors(v, v + n, 0); + for (int i = 0; i < n; i++) + v[i]->compute_label(); +} + + +/* A reference with a list of authors <A0,A1,...,AN> _needs_ author i +where 0 <= i <= N if there exists a reference with a list of authors +<B0,B1,...,BM> such that <A0,A1,...,AN> != <B0,B1,...,BM> and M >= i +and Aj = Bj for 0 <= j < i. In this case if we can't say ``A0, +A1,...,A(i-1) et al'' because this would match both <A0,A1,...,AN> and +<B0,B1,...,BM>. If a reference needs author i we only have to call +need_author(j) for some j >= i such that the reference also needs +author j. */ + +/* This function handles 2 tasks: +determine which authors are needed (cannot be elided with et al.); +determine which authors can have only last names in the labels. + +References >= start and < end have the same first i author names. +Also they're sorted by A+. */ + +static void consider_authors(reference **start, reference **end, int i) +{ + if (start >= end) + return; + reference **p = start; + if (i >= (*p)->get_nauthors()) { + for (++p; p < end && i >= (*p)->get_nauthors(); p++) + ; + if (p < end && i > 0) { + // If we have an author list <A B C> and an author list <A B C D>, + // then both lists need C. + for (reference **q = start; q < end; q++) + (*q)->need_author(i - 1); + } + start = p; + } + while (p < end) { + reference **last_name_start = p; + reference **name_start = p; + for (++p; + p < end && i < (*p)->get_nauthors() + && same_author_last_name(**last_name_start, **p, i); + p++) { + if (!same_author_name(**name_start, **p, i)) { + consider_authors(name_start, p, i + 1); + name_start = p; + } + } + consider_authors(name_start, p, i + 1); + if (last_name_start == name_start) { + for (reference **q = last_name_start; q < p; q++) + (*q)->set_last_name_unambiguous(i); + } + // If we have an author list <A B C D> and <A B C E>, then the lists + // need author D and E respectively. + if (name_start > start || p < end) { + for (reference **q = last_name_start; q < p; q++) + (*q)->need_author(i); + } + } +} + +int same_author_last_name(const reference &r1, const reference &r2, int n) +{ + const char *ae1; + const char *as1 = r1.get_sort_field(0, n, 0, &ae1); + assert(as1 != 0); + const char *ae2; + const char *as2 = r2.get_sort_field(0, n, 0, &ae2); + assert(as2 != 0); + return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0; +} + +int same_author_name(const reference &r1, const reference &r2, int n) +{ + const char *ae1; + const char *as1 = r1.get_sort_field(0, n, -1, &ae1); + assert(as1 != 0); + const char *ae2; + const char *as2 = r2.get_sort_field(0, n, -1, &ae2); + assert(as2 != 0); + return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0; +} + + +void int_set::set(int i) +{ + assert(i >= 0); + int bytei = i >> 3; + if (bytei >= v.length()) { + int old_length = v.length(); + v.set_length(bytei + 1); + for (int j = old_length; j <= bytei; j++) + v[j] = 0; + } + v[bytei] |= 1 << (i & 7); +} + +int int_set::get(int i) const +{ + assert(i >= 0); + int bytei = i >> 3; + return bytei >= v.length() ? 0 : (v[bytei] & (1 << (i & 7))) != 0; +} + +void reference::set_last_name_unambiguous(int i) +{ + last_name_unambiguous.set(i); +} + +void reference::need_author(int n) +{ + if (n > last_needed_author) + last_needed_author = n; +} + +const char *reference::get_authors(const char **end) const +{ + if (!computed_authors) { + ((reference *)this)->computed_authors = 1; + string &result = ((reference *)this)->authors; + int na = get_nauthors(); + result.clear(); + for (int i = 0; i < na; i++) { + if (last_name_unambiguous.get(i)) { + const char *e, *start = get_author_last_name(i, &e); + assert(start != 0); + result.append(start, e - start); + } + else { + const char *e, *start = get_author(i, &e); + assert(start != 0); + result.append(start, e - start); + } + if (i == last_needed_author + && et_al.length() > 0 + && et_al_min_elide > 0 + && last_needed_author + et_al_min_elide < na + && na >= et_al_min_total) { + result += et_al; + break; + } + if (i < na - 1) { + if (na == 2) + result += join_authors_exactly_two; + else if (i < na - 2) + result += join_authors_default; + else + result += join_authors_last_two; + } + } + } + const char *start = authors.contents(); + *end = start + authors.length(); + return start; +} + +int reference::get_nauthors() const +{ + if (nauthors < 0) { + const char *dummy; + int na; + for (na = 0; get_author(na, &dummy) != 0; na++) + ; + ((reference *)this)->nauthors = na; + } + return nauthors; +} +#line 1227 "y.tab.c" +#define YYABORT goto yyabort +#define YYREJECT goto yyabort +#define YYACCEPT goto yyaccept +#define YYERROR goto yyerrlab +int +#if defined(__STDC__) +yyparse(void) +#else +yyparse() +#endif +{ + register int yym, yyn, yystate; +#if YYDEBUG + register char *yys; + extern char *getenv(); + + if (yys = getenv("YYDEBUG")) + { + yyn = *yys; + if (yyn >= '0' && yyn <= '9') + yydebug = yyn - '0'; + } +#endif + + yynerrs = 0; + yyerrflag = 0; + yychar = (-1); + + yyssp = yyss; + yyvsp = yyvs; + *yyssp = yystate = 0; + +yyloop: + if ((yyn = yydefred[yystate]) != 0) goto yyreduce; + if (yychar < 0) + { + if ((yychar = yylex()) < 0) yychar = 0; +#if YYDEBUG + if (yydebug) + { + yys = 0; + if (yychar <= YYMAXTOKEN) yys = yyname[yychar]; + if (!yys) yys = "illegal-symbol"; + printf("%sdebug: state %d, reading %d (%s)\n", + YYPREFIX, yystate, yychar, yys); + } +#endif + } + if ((yyn = yysindex[yystate]) && (yyn += yychar) >= 0 && + yyn <= YYTABLESIZE && yycheck[yyn] == yychar) + { +#if YYDEBUG + if (yydebug) + printf("%sdebug: state %d, shifting to state %d\n", + YYPREFIX, yystate, yytable[yyn]); +#endif + if (yyssp >= yyss + yystacksize - 1) + { + goto yyoverflow; + } + *++yyssp = yystate = yytable[yyn]; + *++yyvsp = yylval; + yychar = (-1); + if (yyerrflag > 0) --yyerrflag; + goto yyloop; + } + if ((yyn = yyrindex[yystate]) && (yyn += yychar) >= 0 && + yyn <= YYTABLESIZE && yycheck[yyn] == yychar) + { + yyn = yytable[yyn]; + goto yyreduce; + } + if (yyerrflag) goto yyinrecovery; +#ifdef lint + goto yynewerror; +#endif +yynewerror: + yyerror("syntax error"); +#ifdef lint + goto yyerrlab; +#endif +yyerrlab: + ++yynerrs; +yyinrecovery: + if (yyerrflag < 3) + { + yyerrflag = 3; + for (;;) + { + if ((yyn = yysindex[*yyssp]) && (yyn += YYERRCODE) >= 0 && + yyn <= YYTABLESIZE && yycheck[yyn] == YYERRCODE) + { +#if YYDEBUG + if (yydebug) + printf("%sdebug: state %d, error recovery shifting\ + to state %d\n", YYPREFIX, *yyssp, yytable[yyn]); +#endif + if (yyssp >= yyss + yystacksize - 1) + { + goto yyoverflow; + } + *++yyssp = yystate = yytable[yyn]; + *++yyvsp = yylval; + goto yyloop; + } + else + { +#if YYDEBUG + if (yydebug) + printf("%sdebug: error recovery discarding state %d\n", + YYPREFIX, *yyssp); +#endif + if (yyssp <= yyss) goto yyabort; + --yyssp; + --yyvsp; + } + } + } + else + { + if (yychar == 0) goto yyabort; +#if YYDEBUG + if (yydebug) + { + yys = 0; + if (yychar <= YYMAXTOKEN) yys = yyname[yychar]; + if (!yys) yys = "illegal-symbol"; + printf("%sdebug: state %d, error recovery discards token %d (%s)\n", + YYPREFIX, yystate, yychar, yys); + } +#endif + yychar = (-1); + goto yyloop; + } +yyreduce: +#if YYDEBUG + if (yydebug) + printf("%sdebug: state %d, reducing by rule %d (%s)\n", + YYPREFIX, yystate, yyn, yyrule[yyn]); +#endif + yym = yylen[yyn]; + yyval = yyvsp[1-yym]; + switch (yyn) + { +case 1: +#line 250 "label.y" +{ parse_result = (yyvsp[0].expr ? new analyzed_expr(yyvsp[0].expr) : 0); } +break; +case 2: +#line 255 "label.y" +{ yyval.expr = yyvsp[0].expr; } +break; +case 3: +#line 257 "label.y" +{ yyval.expr = new conditional_expr(yyvsp[-4].expr, yyvsp[-2].expr, yyvsp[0].expr); } +break; +case 4: +#line 262 "label.y" +{ yyval.expr = 0; } +break; +case 5: +#line 264 "label.y" +{ yyval.expr = yyvsp[0].expr; } +break; +case 6: +#line 269 "label.y" +{ yyval.expr = yyvsp[0].expr; } +break; +case 7: +#line 271 "label.y" +{ yyval.expr = new alternative_expr(yyvsp[-2].expr, yyvsp[0].expr); } +break; +case 8: +#line 273 "label.y" +{ yyval.expr = new conditional_expr(yyvsp[-2].expr, yyvsp[0].expr, 0); } +break; +case 9: +#line 278 "label.y" +{ yyval.expr = yyvsp[0].expr; } +break; +case 10: +#line 280 "label.y" +{ yyval.expr = new list_expr(yyvsp[-1].expr, yyvsp[0].expr); } +break; +case 11: +#line 285 "label.y" +{ yyval.expr = yyvsp[0].expr; } +break; +case 12: +#line 287 "label.y" +{ yyval.expr = new substitute_expr(yyvsp[-2].expr, yyvsp[0].expr); } +break; +case 13: +#line 292 "label.y" +{ yyval.expr = new at_expr; } +break; +case 14: +#line 294 "label.y" +{ + yyval.expr = new literal_expr(literals.contents() + yyvsp[0].str.start, + yyvsp[0].str.len); + } +break; +case 15: +#line 299 "label.y" +{ yyval.expr = new field_expr(yyvsp[0].num, 0); } +break; +case 16: +#line 301 "label.y" +{ yyval.expr = new field_expr(yyvsp[-1].num, yyvsp[0].num - 1); } +break; +case 17: +#line 303 "label.y" +{ + switch (yyvsp[0].num) { + case 'I': + case 'i': + case 'A': + case 'a': + yyval.expr = new format_expr(yyvsp[0].num); + break; + default: + command_error("unrecognized format `%1'", char(yyvsp[0].num)); + yyval.expr = new format_expr('a'); + break; + } + } +break; +case 18: +#line 319 "label.y" +{ + yyval.expr = new format_expr('0', yyvsp[0].dig.ndigits, yyvsp[0].dig.val); + } +break; +case 19: +#line 323 "label.y" +{ + switch (yyvsp[-1].num) { + case 'l': + yyval.expr = new map_expr(yyvsp[-4].expr, lowercase); + break; + case 'u': + yyval.expr = new map_expr(yyvsp[-4].expr, uppercase); + break; + case 'c': + yyval.expr = new map_expr(yyvsp[-4].expr, capitalize); + break; + case 'r': + yyval.expr = new map_expr(yyvsp[-4].expr, reverse_name); + break; + case 'a': + yyval.expr = new map_expr(yyvsp[-4].expr, abbreviate_name); + break; + case 'y': + yyval.expr = new extractor_expr(yyvsp[-4].expr, find_year, yyvsp[-2].num); + break; + case 'n': + yyval.expr = new extractor_expr(yyvsp[-4].expr, find_last_name, yyvsp[-2].num); + break; + default: + yyval.expr = yyvsp[-4].expr; + command_error("unknown function `%1'", char(yyvsp[-1].num)); + break; + } + } +break; +case 20: +#line 354 "label.y" +{ yyval.expr = new truncate_expr(yyvsp[-2].expr, yyvsp[0].num); } +break; +case 21: +#line 356 "label.y" +{ yyval.expr = new truncate_expr(yyvsp[-2].expr, -yyvsp[0].num); } +break; +case 22: +#line 358 "label.y" +{ yyval.expr = new star_expr(yyvsp[-1].expr); } +break; +case 23: +#line 360 "label.y" +{ yyval.expr = yyvsp[-1].expr; } +break; +case 24: +#line 362 "label.y" +{ yyval.expr = new separator_expr(yyvsp[-1].expr); } +break; +case 25: +#line 367 "label.y" +{ yyval.num = -1; } +break; +case 26: +#line 369 "label.y" +{ yyval.num = yyvsp[0].num; } +break; +case 27: +#line 374 "label.y" +{ yyval.num = yyvsp[0].num; } +break; +case 28: +#line 376 "label.y" +{ yyval.num = yyvsp[-1].num*10 + yyvsp[0].num; } +break; +case 29: +#line 381 "label.y" +{ yyval.dig.ndigits = 1; yyval.dig.val = yyvsp[0].num; } +break; +case 30: +#line 383 "label.y" +{ yyval.dig.ndigits = yyvsp[-1].dig.ndigits + 1; yyval.dig.val = yyvsp[-1].dig.val*10 + yyvsp[0].num; } +break; +case 31: +#line 389 "label.y" +{ yyval.num = 0; } +break; +case 32: +#line 391 "label.y" +{ yyval.num = 1; } +break; +case 33: +#line 393 "label.y" +{ yyval.num = -1; } +break; +#line 1550 "y.tab.c" + } + yyssp -= yym; + yystate = *yyssp; + yyvsp -= yym; + yym = yylhs[yyn]; + if (yystate == 0 && yym == 0) + { +#if YYDEBUG + if (yydebug) + printf("%sdebug: after reduction, shifting from state 0 to\ + state %d\n", YYPREFIX, YYFINAL); +#endif + yystate = YYFINAL; + *++yyssp = YYFINAL; + *++yyvsp = yyval; + if (yychar < 0) + { + if ((yychar = yylex()) < 0) yychar = 0; +#if YYDEBUG + if (yydebug) + { + yys = 0; + if (yychar <= YYMAXTOKEN) yys = yyname[yychar]; + if (!yys) yys = "illegal-symbol"; + printf("%sdebug: state %d, reading %d (%s)\n", + YYPREFIX, YYFINAL, yychar, yys); + } +#endif + } + if (yychar == 0) goto yyaccept; + goto yyloop; + } + if ((yyn = yygindex[yym]) && (yyn += yystate) >= 0 && + yyn <= YYTABLESIZE && yycheck[yyn] == yystate) + yystate = yytable[yyn]; + else + yystate = yydgoto[yym]; +#if YYDEBUG + if (yydebug) + printf("%sdebug: after reduction, shifting from state %d \ +to state %d\n", YYPREFIX, *yyssp, yystate); +#endif + if (yyssp >= yyss + yystacksize - 1) + { + goto yyoverflow; + } + *++yyssp = yystate; + *++yyvsp = yyval; + goto yyloop; +yyoverflow: + yyerror("yacc stack overflow"); +yyabort: + return (1); +yyaccept: + return (0); +} diff --git a/contrib/groff/src/preproc/refer/ref.cpp b/contrib/groff/src/preproc/refer/ref.cpp new file mode 100644 index 000000000000..9c040789a82f --- /dev/null +++ b/contrib/groff/src/preproc/refer/ref.cpp @@ -0,0 +1,1160 @@ +// -*- C++ -*- +/* Copyright (C) 1989, 1990, 1991, 1992, 2001 Free Software Foundation, Inc. +Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with groff; see the file COPYING. If not, write to the Free Software +Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#include "refer.h" +#include "refid.h" +#include "ref.h" +#include "token.h" + +static const char *find_day(const char *, const char *, const char **); +static int find_month(const char *start, const char *end); +static void abbreviate_names(string &); + +#define DEFAULT_ARTICLES "the\000a\000an" + +string articles(DEFAULT_ARTICLES, sizeof(DEFAULT_ARTICLES)); + +// Multiple occurrences of fields are separated by FIELD_SEPARATOR. +const char FIELD_SEPARATOR = '\0'; + +const char MULTI_FIELD_NAMES[] = "AE"; +const char *AUTHOR_FIELDS = "AQ"; + +enum { OTHER, JOURNAL_ARTICLE, BOOK, ARTICLE_IN_BOOK, TECH_REPORT, BELL_TM }; + +const char *reference_types[] = { + "other", + "journal-article", + "book", + "article-in-book", + "tech-report", + "bell-tm", +}; + +static string temp_fields[256]; + +reference::reference(const char *start, int len, reference_id *ridp) +: h(0), merged(0), no(-1), field(0), nfields(0), label_ptr(0), + computed_authors(0), last_needed_author(-1), nauthors(-1) +{ + int i; + for (i = 0; i < 256; i++) + field_index[i] = NULL_FIELD_INDEX; + if (ridp) + rid = *ridp; + if (start == 0) + return; + if (len <= 0) + return; + const char *end = start + len; + const char *ptr = start; + assert(*ptr == '%'); + while (ptr < end) { + if (ptr + 1 < end && ptr[1] != '\0' + && ((ptr[1] != '%' && ptr[1] == annotation_field) + || (ptr + 2 < end && ptr[1] == '%' && ptr[2] != '\0' + && discard_fields.search(ptr[2]) < 0))) { + if (ptr[1] == '%') + ptr++; + string &f = temp_fields[(unsigned char)ptr[1]]; + ptr += 2; + while (ptr < end && csspace(*ptr)) + ptr++; + for (;;) { + for (;;) { + if (ptr >= end) { + f += '\n'; + break; + } + f += *ptr; + if (*ptr++ == '\n') + break; + } + if (ptr >= end || *ptr == '%') + break; + } + } + else if (ptr + 1 < end && ptr[1] != '\0' && ptr[1] != '%' + && discard_fields.search(ptr[1]) < 0) { + string &f = temp_fields[(unsigned char)ptr[1]]; + if (f.length() > 0) { + if (strchr(MULTI_FIELD_NAMES, ptr[1]) != 0) + f += FIELD_SEPARATOR; + else + f.clear(); + } + ptr += 2; + if (ptr < end) { + if (*ptr == ' ') + ptr++; + for (;;) { + const char *p = ptr; + while (ptr < end && *ptr != '\n') + ptr++; + // strip trailing white space + const char *q = ptr; + while (q > p && q[-1] != '\n' && csspace(q[-1])) + q--; + while (p < q) + f += *p++; + if (ptr >= end) + break; + ptr++; + if (ptr >= end) + break; + if (*ptr == '%') + break; + f += ' '; + } + } + } + else { + // skip this field + for (;;) { + while (ptr < end && *ptr++ != '\n') + ; + if (ptr >= end || *ptr == '%') + break; + } + } + } + for (i = 0; i < 256; i++) + if (temp_fields[i].length() > 0) + nfields++; + field = new string[nfields]; + int j = 0; + for (i = 0; i < 256; i++) + if (temp_fields[i].length() > 0) { + field[j].move(temp_fields[i]); + if (abbreviate_fields.search(i) >= 0) + abbreviate_names(field[j]); + field_index[i] = j; + j++; + } +} + +reference::~reference() +{ + if (nfields > 0) + ad_delete(nfields) field; +} + +// ref is the inline, this is the database ref + +void reference::merge(reference &ref) +{ + int i; + for (i = 0; i < 256; i++) + if (field_index[i] != NULL_FIELD_INDEX) + temp_fields[i].move(field[field_index[i]]); + for (i = 0; i < 256; i++) + if (ref.field_index[i] != NULL_FIELD_INDEX) + temp_fields[i].move(ref.field[ref.field_index[i]]); + for (i = 0; i < 256; i++) + field_index[i] = NULL_FIELD_INDEX; + int old_nfields = nfields; + nfields = 0; + for (i = 0; i < 256; i++) + if (temp_fields[i].length() > 0) + nfields++; + if (nfields != old_nfields) { + if (old_nfields > 0) + ad_delete(old_nfields) field; + field = new string[nfields]; + } + int j = 0; + for (i = 0; i < 256; i++) + if (temp_fields[i].length() > 0) { + field[j].move(temp_fields[i]); + field_index[i] = j; + j++; + } + merged = 1; +} + +void reference::insert_field(unsigned char c, string &s) +{ + assert(s.length() > 0); + if (field_index[c] != NULL_FIELD_INDEX) { + field[field_index[c]].move(s); + return; + } + assert(field_index[c] == NULL_FIELD_INDEX); + string *old_field = field; + field = new string[nfields + 1]; + int pos = 0; + int i; + for (i = 0; i < int(c); i++) + if (field_index[i] != NULL_FIELD_INDEX) + pos++; + for (i = 0; i < pos; i++) + field[i].move(old_field[i]); + field[pos].move(s); + for (i = pos; i < nfields; i++) + field[i + 1].move(old_field[i]); + if (nfields > 0) + ad_delete(nfields) old_field; + nfields++; + field_index[c] = pos; + for (i = c + 1; i < 256; i++) + if (field_index[i] != NULL_FIELD_INDEX) + field_index[i] += 1; +} + +void reference::delete_field(unsigned char c) +{ + if (field_index[c] == NULL_FIELD_INDEX) + return; + string *old_field = field; + field = new string[nfields - 1]; + int i; + for (i = 0; i < int(field_index[c]); i++) + field[i].move(old_field[i]); + for (i = field_index[c]; i < nfields - 1; i++) + field[i].move(old_field[i + 1]); + if (nfields > 0) + ad_delete(nfields) old_field; + nfields--; + field_index[c] = NULL_FIELD_INDEX; + for (i = c + 1; i < 256; i++) + if (field_index[i] != NULL_FIELD_INDEX) + field_index[i] -= 1; +} + +void reference::compute_hash_code() +{ + if (!rid.is_null()) + h = rid.hash(); + else { + h = 0; + for (int i = 0; i < nfields; i++) + if (field[i].length() > 0) { + h <<= 4; + h ^= hash_string(field[i].contents(), field[i].length()); + } + } +} + +void reference::set_number(int n) +{ + no = n; +} + +const char SORT_SEP = '\001'; +const char SORT_SUB_SEP = '\002'; +const char SORT_SUB_SUB_SEP = '\003'; + +// sep specifies additional word separators + +void sortify_words(const char *s, const char *end, const char *sep, + string &result) +{ + int non_empty = 0; + int need_separator = 0; + for (;;) { + const char *token_start = s; + if (!get_token(&s, end)) + break; + if ((s - token_start == 1 + && (*token_start == ' ' + || *token_start == '\n' + || (sep && *token_start != '\0' + && strchr(sep, *token_start) != 0))) + || (s - token_start == 2 + && token_start[0] == '\\' && token_start[1] == ' ')) { + if (non_empty) + need_separator = 1; + } + else { + const token_info *ti = lookup_token(token_start, s); + if (ti->sortify_non_empty(token_start, s)) { + if (need_separator) { + result += ' '; + need_separator = 0; + } + ti->sortify(token_start, s, result); + non_empty = 1; + } + } + } +} + +void sortify_word(const char *s, const char *end, string &result) +{ + for (;;) { + const char *token_start = s; + if (!get_token(&s, end)) + break; + const token_info *ti = lookup_token(token_start, s); + ti->sortify(token_start, s, result); + } +} + +void sortify_other(const char *s, int len, string &key) +{ + sortify_words(s, s + len, 0, key); +} + +void sortify_title(const char *s, int len, string &key) +{ + const char *end = s + len; + for (; s < end && (*s == ' ' || *s == '\n'); s++) + ; + const char *ptr = s; + for (;;) { + const char *token_start = ptr; + if (!get_token(&ptr, end)) + break; + if (ptr - token_start == 1 + && (*token_start == ' ' || *token_start == '\n')) + break; + } + if (ptr < end) { + unsigned int first_word_len = ptr - s - 1; + const char *ae = articles.contents() + articles.length(); + for (const char *a = articles.contents(); + a < ae; + a = strchr(a, '\0') + 1) + if (first_word_len == strlen(a)) { + unsigned int j; + for (j = 0; j < first_word_len; j++) + if (a[j] != cmlower(s[j])) + break; + if (j >= first_word_len) { + s = ptr; + for (; s < end && (*s == ' ' || *s == '\n'); s++) + ; + break; + } + } + } + sortify_words(s, end, 0, key); +} + +void sortify_name(const char *s, int len, string &key) +{ + const char *last_name_end; + const char *last_name = find_last_name(s, s + len, &last_name_end); + sortify_word(last_name, last_name_end, key); + key += SORT_SUB_SUB_SEP; + if (last_name > s) + sortify_words(s, last_name, ".", key); + key += SORT_SUB_SUB_SEP; + if (last_name_end < s + len) + sortify_words(last_name_end, s + len, ".,", key); +} + +void sortify_date(const char *s, int len, string &key) +{ + const char *year_end; + const char *year_start = find_year(s, s + len, &year_end); + if (!year_start) { + // Things without years are often `forthcoming', so it makes sense + // that they sort after things with explicit years. + key += 'A'; + sortify_words(s, s + len, 0, key); + return; + } + int n = year_end - year_start; + while (n < 4) { + key += '0'; + n++; + } + while (year_start < year_end) + key += *year_start++; + int m = find_month(s, s + len); + if (m < 0) + return; + key += 'A' + m; + const char *day_end; + const char *day_start = find_day(s, s + len, &day_end); + if (!day_start) + return; + if (day_end - day_start == 1) + key += '0'; + while (day_start < day_end) + key += *day_start++; +} + +// SORT_{SUB,SUB_SUB}_SEP can creep in from use of @ in label specification. + +void sortify_label(const char *s, int len, string &key) +{ + const char *end = s + len; + for (;;) { + const char *ptr; + for (ptr = s; + ptr < end && *ptr != SORT_SUB_SEP && *ptr != SORT_SUB_SUB_SEP; + ptr++) + ; + if (ptr > s) + sortify_words(s, ptr, 0, key); + s = ptr; + if (s >= end) + break; + key += *s++; + } +} + +void reference::compute_sort_key() +{ + if (sort_fields.length() == 0) + return; + sort_fields += '\0'; + const char *sf = sort_fields.contents(); + while (*sf != '\0') { + if (sf > sort_fields) + sort_key += SORT_SEP; + char f = *sf++; + int n = 1; + if (*sf == '+') { + n = INT_MAX; + sf++; + } + else if (csdigit(*sf)) { + char *ptr; + long l = strtol(sf, &ptr, 10); + if (l == 0 && ptr == sf) + ; + else { + sf = ptr; + if (l < 0) { + n = 1; + } + else { + n = int(l); + } + } + } + if (f == '.') + sortify_label(label.contents(), label.length(), sort_key); + else if (f == AUTHOR_FIELDS[0]) + sortify_authors(n, sort_key); + else + sortify_field(f, n, sort_key); + } + sort_fields.set_length(sort_fields.length() - 1); +} + +void reference::sortify_authors(int n, string &result) const +{ + for (const char *p = AUTHOR_FIELDS; *p != '\0'; p++) + if (contains_field(*p)) { + sortify_field(*p, n, result); + return; + } + sortify_field(AUTHOR_FIELDS[0], n, result); +} + +void reference::canonicalize_authors(string &result) const +{ + int len = result.length(); + sortify_authors(INT_MAX, result); + if (result.length() > len) + result += SORT_SUB_SEP; +} + +void reference::sortify_field(unsigned char f, int n, string &result) const +{ + typedef void (*sortify_t)(const char *, int, string &); + sortify_t sortifier = sortify_other; + switch (f) { + case 'A': + case 'E': + sortifier = sortify_name; + break; + case 'D': + sortifier = sortify_date; + break; + case 'B': + case 'J': + case 'T': + sortifier = sortify_title; + break; + } + int fi = field_index[(unsigned char)f]; + if (fi != NULL_FIELD_INDEX) { + string &str = field[fi]; + const char *start = str.contents(); + const char *end = start + str.length(); + for (int i = 0; i < n && start < end; i++) { + const char *p = start; + while (start < end && *start != FIELD_SEPARATOR) + start++; + if (i > 0) + result += SORT_SUB_SEP; + (*sortifier)(p, start - p, result); + if (start < end) + start++; + } + } +} + +int compare_reference(const reference &r1, const reference &r2) +{ + assert(r1.no >= 0); + assert(r2.no >= 0); + const char *s1 = r1.sort_key.contents(); + int n1 = r1.sort_key.length(); + const char *s2 = r2.sort_key.contents(); + int n2 = r2.sort_key.length(); + for (; n1 > 0 && n2 > 0; --n1, --n2, ++s1, ++s2) + if (*s1 != *s2) + return (int)(unsigned char)*s1 - (int)(unsigned char)*s2; + if (n2 > 0) + return -1; + if (n1 > 0) + return 1; + return r1.no - r2.no; +} + +int same_reference(const reference &r1, const reference &r2) +{ + if (!r1.rid.is_null() && r1.rid == r2.rid) + return 1; + if (r1.h != r2.h) + return 0; + if (r1.nfields != r2.nfields) + return 0; + int i = 0; + for (i = 0; i < 256; i++) + if (r1.field_index != r2.field_index) + return 0; + for (i = 0; i < r1.nfields; i++) + if (r1.field[i] != r2.field[i]) + return 0; + return 1; +} + +const char *find_last_name(const char *start, const char *end, + const char **endp) +{ + const char *ptr = start; + const char *last_word = start; + for (;;) { + const char *token_start = ptr; + if (!get_token(&ptr, end)) + break; + if (ptr - token_start == 1) { + if (*token_start == ',') { + *endp = token_start; + return last_word; + } + else if (*token_start == ' ' || *token_start == '\n') { + if (ptr < end && *ptr != ' ' && *ptr != '\n') + last_word = ptr; + } + } + } + *endp = end; + return last_word; +} + +void abbreviate_name(const char *ptr, const char *end, string &result) +{ + const char *last_name_end; + const char *last_name_start = find_last_name(ptr, end, &last_name_end); + int need_period = 0; + for (;;) { + const char *token_start = ptr; + if (!get_token(&ptr, last_name_start)) + break; + const token_info *ti = lookup_token(token_start, ptr); + if (need_period) { + if ((ptr - token_start == 1 && *token_start == ' ') + || (ptr - token_start == 2 && token_start[0] == '\\' + && token_start[1] == ' ')) + continue; + if (ti->is_upper()) + result += period_before_initial; + else + result += period_before_other; + need_period = 0; + } + result.append(token_start, ptr - token_start); + if (ti->is_upper()) { + const char *lower_ptr = ptr; + int first_token = 1; + for (;;) { + token_start = ptr; + if (!get_token(&ptr, last_name_start)) + break; + if ((ptr - token_start == 1 && *token_start == ' ') + || (ptr - token_start == 2 && token_start[0] == '\\' + && token_start[1] == ' ')) + break; + ti = lookup_token(token_start, ptr); + if (ti->is_hyphen()) { + const char *ptr1 = ptr; + if (get_token(&ptr1, last_name_start)) { + ti = lookup_token(ptr, ptr1); + if (ti->is_upper()) { + result += period_before_hyphen; + result.append(token_start, ptr1 - token_start); + ptr = ptr1; + } + } + } + else if (ti->is_upper()) { + // MacDougal -> MacD. + result.append(lower_ptr, ptr - lower_ptr); + lower_ptr = ptr; + first_token = 1; + } + else if (first_token && ti->is_accent()) { + result.append(token_start, ptr - token_start); + lower_ptr = ptr; + } + first_token = 0; + } + need_period = 1; + } + } + if (need_period) + result += period_before_last_name; + result.append(last_name_start, end - last_name_start); +} + +static void abbreviate_names(string &result) +{ + string str; + str.move(result); + const char *ptr = str.contents(); + const char *end = ptr + str.length(); + while (ptr < end) { + const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr); + if (name_end == 0) + name_end = end; + abbreviate_name(ptr, name_end, result); + if (name_end >= end) + break; + ptr = name_end + 1; + result += FIELD_SEPARATOR; + } +} + +void reverse_name(const char *ptr, const char *name_end, string &result) +{ + const char *last_name_end; + const char *last_name_start = find_last_name(ptr, name_end, &last_name_end); + result.append(last_name_start, last_name_end - last_name_start); + while (last_name_start > ptr + && (last_name_start[-1] == ' ' || last_name_start[-1] == '\n')) + last_name_start--; + if (last_name_start > ptr) { + result += ", "; + result.append(ptr, last_name_start - ptr); + } + if (last_name_end < name_end) + result.append(last_name_end, name_end - last_name_end); +} + +void reverse_names(string &result, int n) +{ + if (n <= 0) + return; + string str; + str.move(result); + const char *ptr = str.contents(); + const char *end = ptr + str.length(); + while (ptr < end) { + if (--n < 0) { + result.append(ptr, end - ptr); + break; + } + const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr); + if (name_end == 0) + name_end = end; + reverse_name(ptr, name_end, result); + if (name_end >= end) + break; + ptr = name_end + 1; + result += FIELD_SEPARATOR; + } +} + +// Return number of field separators. + +int join_fields(string &f) +{ + const char *ptr = f.contents(); + int len = f.length(); + int nfield_seps = 0; + int j; + for (j = 0; j < len; j++) + if (ptr[j] == FIELD_SEPARATOR) + nfield_seps++; + if (nfield_seps == 0) + return 0; + string temp; + int field_seps_left = nfield_seps; + for (j = 0; j < len; j++) { + if (ptr[j] == FIELD_SEPARATOR) { + if (nfield_seps == 1) + temp += join_authors_exactly_two; + else if (--field_seps_left == 0) + temp += join_authors_last_two; + else + temp += join_authors_default; + } + else + temp += ptr[j]; + } + f = temp; + return nfield_seps; +} + +void uppercase(const char *start, const char *end, string &result) +{ + for (;;) { + const char *token_start = start; + if (!get_token(&start, end)) + break; + const token_info *ti = lookup_token(token_start, start); + ti->upper_case(token_start, start, result); + } +} + +void lowercase(const char *start, const char *end, string &result) +{ + for (;;) { + const char *token_start = start; + if (!get_token(&start, end)) + break; + const token_info *ti = lookup_token(token_start, start); + ti->lower_case(token_start, start, result); + } +} + +void capitalize(const char *ptr, const char *end, string &result) +{ + int in_small_point_size = 0; + for (;;) { + const char *start = ptr; + if (!get_token(&ptr, end)) + break; + const token_info *ti = lookup_token(start, ptr); + const char *char_end = ptr; + int is_lower = ti->is_lower(); + if ((is_lower || ti->is_upper()) && get_token(&ptr, end)) { + const token_info *ti2 = lookup_token(char_end, ptr); + if (!ti2->is_accent()) + ptr = char_end; + } + if (is_lower) { + if (!in_small_point_size) { + result += "\\s-2"; + in_small_point_size = 1; + } + ti->upper_case(start, char_end, result); + result.append(char_end, ptr - char_end); + } + else { + if (in_small_point_size) { + result += "\\s+2"; + in_small_point_size = 0; + } + result.append(start, ptr - start); + } + } + if (in_small_point_size) + result += "\\s+2"; +} + +void capitalize_field(string &str) +{ + string temp; + capitalize(str.contents(), str.contents() + str.length(), temp); + str.move(temp); +} + +int is_terminated(const char *ptr, const char *end) +{ + const char *last_token = end; + for (;;) { + const char *p = ptr; + if (!get_token(&ptr, end)) + break; + last_token = p; + } + return end - last_token == 1 + && (*last_token == '.' || *last_token == '!' || *last_token == '?'); +} + +void reference::output(FILE *fp) +{ + fputs(".]-\n", fp); + for (int i = 0; i < 256; i++) + if (field_index[i] != NULL_FIELD_INDEX && i != annotation_field) { + string &f = field[field_index[i]]; + if (!csdigit(i)) { + int j = reverse_fields.search(i); + if (j >= 0) { + int n; + int len = reverse_fields.length(); + if (++j < len && csdigit(reverse_fields[j])) { + n = reverse_fields[j] - '0'; + for (++j; j < len && csdigit(reverse_fields[j]); j++) + // should check for overflow + n = n*10 + reverse_fields[j] - '0'; + } + else + n = INT_MAX; + reverse_names(f, n); + } + } + int is_multiple = join_fields(f) > 0; + if (capitalize_fields.search(i) >= 0) + capitalize_field(f); + if (memchr(f.contents(), '\n', f.length()) == 0) { + fprintf(fp, ".ds [%c ", i); + if (f[0] == ' ' || f[0] == '\\' || f[0] == '"') + putc('"', fp); + put_string(f, fp); + putc('\n', fp); + } + else { + fprintf(fp, ".de [%c\n", i); + put_string(f, fp); + fputs("..\n", fp); + } + if (i == 'P') { + int multiple_pages = 0; + const char *s = f.contents(); + const char *end = f.contents() + f.length(); + for (;;) { + const char *token_start = s; + if (!get_token(&s, end)) + break; + const token_info *ti = lookup_token(token_start, s); + if (ti->is_hyphen() || ti->is_range_sep()) { + multiple_pages = 1; + break; + } + } + fprintf(fp, ".nr [P %d\n", multiple_pages); + } + else if (i == 'E') + fprintf(fp, ".nr [E %d\n", is_multiple); + } + for (const char *p = "TAO"; *p; p++) { + int fi = field_index[(unsigned char)*p]; + if (fi != NULL_FIELD_INDEX) { + string &f = field[fi]; + fprintf(fp, ".nr [%c %d\n", *p, + is_terminated(f.contents(), f.contents() + f.length())); + } + } + int t = classify(); + fprintf(fp, ".][ %d %s\n", t, reference_types[t]); + if (annotation_macro.length() > 0 && annotation_field >= 0 + && field_index[annotation_field] != NULL_FIELD_INDEX) { + putc('.', fp); + put_string(annotation_macro, fp); + putc('\n', fp); + put_string(field[field_index[annotation_field]], fp); + } +} + +void reference::print_sort_key_comment(FILE *fp) +{ + fputs(".\\\"", fp); + put_string(sort_key, fp); + putc('\n', fp); +} + +const char *find_year(const char *start, const char *end, const char **endp) +{ + for (;;) { + while (start < end && !csdigit(*start)) + start++; + const char *ptr = start; + if (start == end) + break; + while (ptr < end && csdigit(*ptr)) + ptr++; + if (ptr - start == 4 || ptr - start == 3 + || (ptr - start == 2 + && (start[0] >= '4' || (start[0] == '3' && start[1] >= '2')))) { + *endp = ptr; + return start; + } + start = ptr; + } + return 0; +} + +static const char *find_day(const char *start, const char *end, + const char **endp) +{ + for (;;) { + while (start < end && !csdigit(*start)) + start++; + const char *ptr = start; + if (start == end) + break; + while (ptr < end && csdigit(*ptr)) + ptr++; + if ((ptr - start == 1 && start[0] != '0') + || (ptr - start == 2 && + (start[0] == '1' + || start[0] == '2' + || (start[0] == '3' && start[1] <= '1') + || (start[0] == '0' && start[1] != '0')))) { + *endp = ptr; + return start; + } + start = ptr; + } + return 0; +} + +static int find_month(const char *start, const char *end) +{ + static const char *months[] = { + "january", + "february", + "march", + "april", + "may", + "june", + "july", + "august", + "september", + "october", + "november", + "december", + }; + for (;;) { + while (start < end && !csalpha(*start)) + start++; + const char *ptr = start; + if (start == end) + break; + while (ptr < end && csalpha(*ptr)) + ptr++; + if (ptr - start >= 3) { + for (unsigned int i = 0; i < sizeof(months)/sizeof(months[0]); i++) { + const char *q = months[i]; + const char *p = start; + for (; p < ptr; p++, q++) + if (cmlower(*p) != *q) + break; + if (p >= ptr) + return i; + } + } + start = ptr; + } + return -1; +} + +int reference::contains_field(char c) const +{ + return field_index[(unsigned char)c] != NULL_FIELD_INDEX; +} + +int reference::classify() +{ + if (contains_field('J')) + return JOURNAL_ARTICLE; + if (contains_field('B')) + return ARTICLE_IN_BOOK; + if (contains_field('G')) + return TECH_REPORT; + if (contains_field('R')) + return TECH_REPORT; + if (contains_field('I')) + return BOOK; + if (contains_field('M')) + return BELL_TM; + return OTHER; +} + +const char *reference::get_year(const char **endp) const +{ + if (field_index['D'] != NULL_FIELD_INDEX) { + string &date = field[field_index['D']]; + const char *start = date.contents(); + const char *end = start + date.length(); + return find_year(start, end, endp); + } + else + return 0; +} + +const char *reference::get_field(unsigned char c, const char **endp) const +{ + if (field_index[c] != NULL_FIELD_INDEX) { + string &f = field[field_index[c]]; + const char *start = f.contents(); + *endp = start + f.length(); + return start; + } + else + return 0; +} + +const char *reference::get_date(const char **endp) const +{ + return get_field('D', endp); +} + +const char *nth_field(int i, const char *start, const char **endp) +{ + while (--i >= 0) { + start = (char *)memchr(start, FIELD_SEPARATOR, *endp - start); + if (!start) + return 0; + start++; + } + const char *e = (char *)memchr(start, FIELD_SEPARATOR, *endp - start); + if (e) + *endp = e; + return start; +} + +const char *reference::get_author(int i, const char **endp) const +{ + for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) { + const char *start = get_field(*f, endp); + if (start) { + if (strchr(MULTI_FIELD_NAMES, *f) != 0) + return nth_field(i, start, endp); + else if (i == 0) + return start; + else + return 0; + } + } + return 0; +} + +const char *reference::get_author_last_name(int i, const char **endp) const +{ + for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) { + const char *start = get_field(*f, endp); + if (start) { + if (strchr(MULTI_FIELD_NAMES, *f) != 0) { + start = nth_field(i, start, endp); + if (!start) + return 0; + } + if (*f == 'A') + return find_last_name(start, *endp, endp); + else + return start; + } + } + return 0; +} + +void reference::set_date(string &d) +{ + if (d.length() == 0) + delete_field('D'); + else + insert_field('D', d); +} + +int same_year(const reference &r1, const reference &r2) +{ + const char *ye1; + const char *ys1 = r1.get_year(&ye1); + const char *ye2; + const char *ys2 = r2.get_year(&ye2); + if (ys1 == 0) { + if (ys2 == 0) + return same_date(r1, r2); + else + return 0; + } + else if (ys2 == 0) + return 0; + else if (ye1 - ys1 != ye2 - ys2) + return 0; + else + return memcmp(ys1, ys2, ye1 - ys1) == 0; +} + +int same_date(const reference &r1, const reference &r2) +{ + const char *e1; + const char *s1 = r1.get_date(&e1); + const char *e2; + const char *s2 = r2.get_date(&e2); + if (s1 == 0) + return s2 == 0; + else if (s2 == 0) + return 0; + else if (e1 - s1 != e2 - s2) + return 0; + else + return memcmp(s1, s2, e1 - s1) == 0; +} + +const char *reference::get_sort_field(int i, int si, int ssi, + const char **endp) const +{ + const char *start = sort_key.contents(); + const char *end = start + sort_key.length(); + if (i < 0) { + *endp = end; + return start; + } + while (--i >= 0) { + start = (char *)memchr(start, SORT_SEP, end - start); + if (!start) + return 0; + start++; + } + const char *e = (char *)memchr(start, SORT_SEP, end - start); + if (e) + end = e; + if (si < 0) { + *endp = end; + return start; + } + while (--si >= 0) { + start = (char *)memchr(start, SORT_SUB_SEP, end - start); + if (!start) + return 0; + start++; + } + e = (char *)memchr(start, SORT_SUB_SEP, end - start); + if (e) + end = e; + if (ssi < 0) { + *endp = end; + return start; + } + while (--ssi >= 0) { + start = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start); + if (!start) + return 0; + start++; + } + e = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start); + if (e) + end = e; + *endp = end; + return start; +} + diff --git a/contrib/groff/src/preproc/refer/refer.cpp b/contrib/groff/src/preproc/refer/refer.cpp new file mode 100644 index 000000000000..33df35ca44fe --- /dev/null +++ b/contrib/groff/src/preproc/refer/refer.cpp @@ -0,0 +1,1235 @@ +// -*- C++ -*- +/* Copyright (C) 1989-1992, 2000, 2001, 2002 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with groff; see the file COPYING. If not, write to the Free Software +Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#include "refer.h" +#include "refid.h" +#include "ref.h" +#include "token.h" +#include "search.h" +#include "command.h" + +extern "C" const char *Version_string; + +const char PRE_LABEL_MARKER = '\013'; +const char POST_LABEL_MARKER = '\014'; +const char LABEL_MARKER = '\015'; // label_type is added on + +#define FORCE_LEFT_BRACKET 04 +#define FORCE_RIGHT_BRACKET 010 + +static FILE *outfp = stdout; + +string capitalize_fields; +string reverse_fields; +string abbreviate_fields; +string period_before_last_name = ". "; +string period_before_initial = "."; +string period_before_hyphen = ""; +string period_before_other = ". "; +string sort_fields; +int annotation_field = -1; +string annotation_macro; +string discard_fields = "XYZ"; +string pre_label = "\\*([."; +string post_label = "\\*(.]"; +string sep_label = ", "; +int accumulate = 0; +int move_punctuation = 0; +int abbreviate_label_ranges = 0; +string label_range_indicator; +int label_in_text = 1; +int label_in_reference = 1; +int date_as_label = 0; +int sort_adjacent_labels = 0; +// Join exactly two authors with this. +string join_authors_exactly_two = " and "; +// When there are more than two authors join the last two with this. +string join_authors_last_two = ", and "; +// Otherwise join authors with this. +string join_authors_default = ", "; +string separate_label_second_parts = ", "; +// Use this string to represent that there are other authors. +string et_al = " et al"; +// Use et al only if it can replace at least this many authors. +int et_al_min_elide = 2; +// Use et al only if the total number of authors is at least this. +int et_al_min_total = 3; + + +int compatible_flag = 0; + +int short_label_flag = 0; + +static int recognize_R1_R2 = 1; + +search_list database_list; +int search_default = 1; +static int default_database_loaded = 0; + +static reference **citation = 0; +static int ncitations = 0; +static int citation_max = 0; + +static reference **reference_hash_table = 0; +static int hash_table_size; +static int nreferences = 0; + +static int need_syncing = 0; +string pending_line; +string pending_lf_lines; + +static void output_pending_line(); +static unsigned immediately_handle_reference(const string &); +static void immediately_output_references(); +static unsigned store_reference(const string &); +static void divert_to_temporary_file(); +static reference *make_reference(const string &, unsigned *); +static void usage(FILE *stream); +static void do_file(const char *); +static void split_punct(string &line, string &punct); +static void output_citation_group(reference **v, int n, label_type, FILE *fp); +static void possibly_load_default_database(); + +int main(int argc, char **argv) +{ + program_name = argv[0]; + static char stderr_buf[BUFSIZ]; + setbuf(stderr, stderr_buf); + outfp = stdout; + int finished_options = 0; + int bib_flag = 0; + int done_spec = 0; + + for (--argc, ++argv; + !finished_options && argc > 0 && argv[0][0] == '-' + && argv[0][1] != '\0'; + argv++, argc--) { + const char *opt = argv[0] + 1; + while (opt != 0 && *opt != '\0') { + switch (*opt) { + case 'C': + compatible_flag = 1; + opt++; + break; + case 'B': + bib_flag = 1; + label_in_reference = 0; + label_in_text = 0; + ++opt; + if (*opt == '\0') { + annotation_field = 'X'; + annotation_macro = "AP"; + } + else if (csalnum(opt[0]) && opt[1] == '.' && opt[2] != '\0') { + annotation_field = opt[0]; + annotation_macro = opt + 2; + } + opt = 0; + break; + case 'P': + move_punctuation = 1; + opt++; + break; + case 'R': + recognize_R1_R2 = 0; + opt++; + break; + case 'S': + // Not a very useful spec. + set_label_spec("(A.n|Q)', '(D.y|D)"); + done_spec = 1; + pre_label = " ("; + post_label = ")"; + sep_label = "; "; + opt++; + break; + case 'V': + verify_flag = 1; + opt++; + break; + case 'f': + { + const char *num = 0; + if (*++opt == '\0') { + if (argc > 1) { + num = *++argv; + --argc; + } + else { + error("option `f' requires an argument"); + usage(stderr); + exit(1); + } + } + else { + num = opt; + opt = 0; + } + const char *ptr; + for (ptr = num; *ptr; ptr++) + if (!csdigit(*ptr)) { + error("bad character `%1' in argument to -f option", *ptr); + break; + } + if (*ptr == '\0') { + string spec; + spec = '%'; + spec += num; + spec += '\0'; + set_label_spec(spec.contents()); + done_spec = 1; + } + break; + } + case 'b': + label_in_text = 0; + label_in_reference = 0; + opt++; + break; + case 'e': + accumulate = 1; + opt++; + break; + case 'c': + capitalize_fields = ++opt; + opt = 0; + break; + case 'k': + { + char buf[5]; + if (csalpha(*++opt)) + buf[0] = *opt++; + else { + if (*opt != '\0') + error("bad field name `%1'", *opt++); + buf[0] = 'L'; + } + buf[1] = '~'; + buf[2] = '%'; + buf[3] = 'a'; + buf[4] = '\0'; + set_label_spec(buf); + done_spec = 1; + } + break; + case 'a': + { + const char *ptr; + for (ptr = ++opt; *ptr; ptr++) + if (!csdigit(*ptr)) { + error("argument to `a' option not a number"); + break; + } + if (*ptr == '\0') { + reverse_fields = 'A'; + reverse_fields += opt; + } + opt = 0; + } + break; + case 'i': + linear_ignore_fields = ++opt; + opt = 0; + break; + case 'l': + { + char buf[INT_DIGITS*2 + 11]; // A.n+2D.y-3%a + strcpy(buf, "A.n"); + if (*++opt != '\0' && *opt != ',') { + char *ptr; + long n = strtol(opt, &ptr, 10); + if (n == 0 && ptr == opt) { + error("bad integer `%1' in `l' option", opt); + opt = 0; + break; + } + if (n < 0) + n = 0; + opt = ptr; + sprintf(strchr(buf, '\0'), "+%ld", n); + } + strcat(buf, "D.y"); + if (*opt == ',') + opt++; + if (*opt != '\0') { + char *ptr; + long n = strtol(opt, &ptr, 10); + if (n == 0 && ptr == opt) { + error("bad integer `%1' in `l' option", opt); + opt = 0; + break; + } + if (n < 0) + n = 0; + sprintf(strchr(buf, '\0'), "-%ld", n); + opt = ptr; + if (*opt != '\0') + error("argument to `l' option not of form `m,n'"); + } + strcat(buf, "%a"); + if (!set_label_spec(buf)) + assert(0); + done_spec = 1; + } + break; + case 'n': + search_default = 0; + opt++; + break; + case 'p': + { + const char *filename = 0; + if (*++opt == '\0') { + if (argc > 1) { + filename = *++argv; + argc--; + } + else { + error("option `p' requires an argument"); + usage(stderr); + exit(1); + } + } + else { + filename = opt; + opt = 0; + } + database_list.add_file(filename); + } + break; + case 's': + if (*++opt == '\0') + sort_fields = "AD"; + else { + sort_fields = opt; + opt = 0; + } + accumulate = 1; + break; + case 't': + { + char *ptr; + long n = strtol(opt, &ptr, 10); + if (n == 0 && ptr == opt) { + error("bad integer `%1' in `t' option", opt); + opt = 0; + break; + } + if (n < 1) + n = 1; + linear_truncate_len = int(n); + opt = ptr; + break; + } + case '-': + if (opt[1] == '\0') { + finished_options = 1; + opt++; + break; + } + if (strcmp(opt,"-version")==0) { + case 'v': + printf("GNU refer (groff) version %s\n", Version_string); + exit(0); + break; + } + if (strcmp(opt,"-help")==0) { + usage(stdout); + exit(0); + break; + } + // fall through + default: + error("unrecognized option `%1'", *opt); + usage(stderr); + exit(1); + break; + } + } + } + if (!done_spec) + set_label_spec("%1"); + if (argc <= 0) { + if (bib_flag) + do_bib("-"); + else + do_file("-"); + } + else { + for (int i = 0; i < argc; i++) { + if (bib_flag) + do_bib(argv[i]); + else + do_file(argv[i]); + } + } + if (accumulate) + output_references(); + if (fflush(stdout) < 0) + fatal("output error"); + return 0; +} + +static void usage(FILE *stream) +{ + fprintf(stream, +"usage: %s [-benvCPRS] [-aN] [-cXYZ] [-fN] [-iXYZ] [-kX] [-lM,N] [-p file]\n" +" [-sXYZ] [-tN] [-BL.M] [files ...]\n", + program_name); +} + +static void possibly_load_default_database() +{ + if (search_default && !default_database_loaded) { + char *filename = getenv("REFER"); + if (filename) + database_list.add_file(filename); + else + database_list.add_file(DEFAULT_INDEX, 1); + default_database_loaded = 1; + } +} + +static int is_list(const string &str) +{ + const char *start = str.contents(); + const char *end = start + str.length(); + while (end > start && csspace(end[-1])) + end--; + while (start < end && csspace(*start)) + start++; + return end - start == 6 && memcmp(start, "$LIST$", 6) == 0; +} + +static void do_file(const char *filename) +{ + FILE *fp; + if (strcmp(filename, "-") == 0) { + fp = stdin; + } + else { + errno = 0; + fp = fopen(filename, "r"); + if (fp == 0) { + error("can't open `%1': %2", filename, strerror(errno)); + return; + } + } + current_filename = filename; + fprintf(outfp, ".lf 1 %s\n", filename); + string line; + current_lineno = 0; + for (;;) { + line.clear(); + for (;;) { + int c = getc(fp); + if (c == EOF) { + if (line.length() > 0) + line += '\n'; + break; + } + if (invalid_input_char(c)) + error("invalid input character code %1", c); + else { + line += c; + if (c == '\n') + break; + } + } + int len = line.length(); + if (len == 0) + break; + current_lineno++; + if (len >= 2 && line[0] == '.' && line[1] == '[') { + int start_lineno = current_lineno; + int start_of_line = 1; + string str; + string post; + string pre(line.contents() + 2, line.length() - 3); + for (;;) { + int c = getc(fp); + if (c == EOF) { + error_with_file_and_line(current_filename, start_lineno, + "missing `.]' line"); + break; + } + if (start_of_line) + current_lineno++; + if (start_of_line && c == '.') { + int d = getc(fp); + if (d == ']') { + while ((d = getc(fp)) != '\n' && d != EOF) { + if (invalid_input_char(d)) + error("invalid input character code %1", d); + else + post += d; + } + break; + } + if (d != EOF) + ungetc(d, fp); + } + if (invalid_input_char(c)) + error("invalid input character code %1", c); + else + str += c; + start_of_line = (c == '\n'); + } + if (is_list(str)) { + output_pending_line(); + if (accumulate) + output_references(); + else + error("found `$LIST$' but not accumulating references"); + } + else { + unsigned flags = (accumulate + ? store_reference(str) + : immediately_handle_reference(str)); + if (label_in_text) { + if (accumulate && outfp == stdout) + divert_to_temporary_file(); + if (pending_line.length() == 0) { + warning("can't attach citation to previous line"); + } + else + pending_line.set_length(pending_line.length() - 1); + string punct; + if (move_punctuation) + split_punct(pending_line, punct); + int have_text = pre.length() > 0 || post.length() > 0; + label_type lt = label_type(flags & ~(FORCE_LEFT_BRACKET + |FORCE_RIGHT_BRACKET)); + if ((flags & FORCE_LEFT_BRACKET) || !have_text) + pending_line += PRE_LABEL_MARKER; + pending_line += pre; + char lm = LABEL_MARKER + (int)lt; + pending_line += lm; + pending_line += post; + if ((flags & FORCE_RIGHT_BRACKET) || !have_text) + pending_line += POST_LABEL_MARKER; + pending_line += punct; + pending_line += '\n'; + } + } + need_syncing = 1; + } + else if (len >= 4 + && line[0] == '.' && line[1] == 'l' && line[2] == 'f' + && (compatible_flag || line[3] == '\n' || line[3] == ' ')) { + pending_lf_lines += line; + line += '\0'; + if (interpret_lf_args(line.contents() + 3)) + current_lineno--; + } + else if (recognize_R1_R2 + && len >= 4 + && line[0] == '.' && line[1] == 'R' && line[2] == '1' + && (compatible_flag || line[3] == '\n' || line[3] == ' ')) { + line.clear(); + int start_of_line = 1; + int start_lineno = current_lineno; + for (;;) { + int c = getc(fp); + if (c != EOF && start_of_line) + current_lineno++; + if (start_of_line && c == '.') { + c = getc(fp); + if (c == 'R') { + c = getc(fp); + if (c == '2') { + c = getc(fp); + if (compatible_flag || c == ' ' || c == '\n' || c == EOF) { + while (c != EOF && c != '\n') + c = getc(fp); + break; + } + else { + line += '.'; + line += 'R'; + line += '2'; + } + } + else { + line += '.'; + line += 'R'; + } + } + else + line += '.'; + } + if (c == EOF) { + error_with_file_and_line(current_filename, start_lineno, + "missing `.R2' line"); + break; + } + if (invalid_input_char(c)) + error("invalid input character code %1", int(c)); + else { + line += c; + start_of_line = c == '\n'; + } + } + output_pending_line(); + if (accumulate) + output_references(); + else + nreferences = 0; + process_commands(line, current_filename, start_lineno + 1); + need_syncing = 1; + } + else { + output_pending_line(); + pending_line = line; + } + } + need_syncing = 0; + output_pending_line(); + if (fp != stdin) + fclose(fp); +} + +class label_processing_state { + enum { + NORMAL, + PENDING_LABEL, + PENDING_LABEL_POST, + PENDING_LABEL_POST_PRE, + PENDING_POST + } state; + label_type type; // type of pending labels + int count; // number of pending labels + reference **rptr; // pointer to next reference + int rcount; // number of references left + FILE *fp; + int handle_pending(int c); +public: + label_processing_state(reference **, int, FILE *); + ~label_processing_state(); + void process(int c); +}; + +static void output_pending_line() +{ + if (label_in_text && !accumulate && ncitations > 0) { + label_processing_state state(citation, ncitations, outfp); + int len = pending_line.length(); + for (int i = 0; i < len; i++) + state.process((unsigned char)(pending_line[i])); + } + else + put_string(pending_line, outfp); + pending_line.clear(); + if (pending_lf_lines.length() > 0) { + put_string(pending_lf_lines, outfp); + pending_lf_lines.clear(); + } + if (!accumulate) + immediately_output_references(); + if (need_syncing) { + fprintf(outfp, ".lf %d %s\n", current_lineno, current_filename); + need_syncing = 0; + } +} + +static void split_punct(string &line, string &punct) +{ + const char *start = line.contents(); + const char *end = start + line.length(); + const char *ptr = start; + const char *last_token_start = 0; + for (;;) { + if (ptr >= end) + break; + last_token_start = ptr; + if (*ptr == PRE_LABEL_MARKER || *ptr == POST_LABEL_MARKER + || (*ptr >= LABEL_MARKER && *ptr < LABEL_MARKER + N_LABEL_TYPES)) + ptr++; + else if (!get_token(&ptr, end)) + break; + } + if (last_token_start) { + const token_info *ti = lookup_token(last_token_start, end); + if (ti->is_punct()) { + punct.append(last_token_start, end - last_token_start); + line.set_length(last_token_start - start); + } + } +} + +static void divert_to_temporary_file() +{ + outfp = xtmpfile(); +} + +static void store_citation(reference *ref) +{ + if (ncitations >= citation_max) { + if (citation == 0) + citation = new reference*[citation_max = 100]; + else { + reference **old_citation = citation; + citation_max *= 2; + citation = new reference *[citation_max]; + memcpy(citation, old_citation, ncitations*sizeof(reference *)); + a_delete old_citation; + } + } + citation[ncitations++] = ref; +} + +static unsigned store_reference(const string &str) +{ + if (reference_hash_table == 0) { + reference_hash_table = new reference *[17]; + hash_table_size = 17; + for (int i = 0; i < hash_table_size; i++) + reference_hash_table[i] = 0; + } + unsigned flags; + reference *ref = make_reference(str, &flags); + ref->compute_hash_code(); + unsigned h = ref->hash(); + reference **ptr; + for (ptr = reference_hash_table + (h % hash_table_size); + *ptr != 0; + ((ptr == reference_hash_table) + ? (ptr = reference_hash_table + hash_table_size - 1) + : --ptr)) + if (same_reference(**ptr, *ref)) + break; + if (*ptr != 0) { + if (ref->is_merged()) + warning("fields ignored because reference already used"); + delete ref; + ref = *ptr; + } + else { + *ptr = ref; + ref->set_number(nreferences); + nreferences++; + ref->pre_compute_label(); + ref->compute_sort_key(); + if (nreferences*2 >= hash_table_size) { + // Rehash it. + reference **old_table = reference_hash_table; + int old_size = hash_table_size; + hash_table_size = next_size(hash_table_size); + reference_hash_table = new reference*[hash_table_size]; + int i; + for (i = 0; i < hash_table_size; i++) + reference_hash_table[i] = 0; + for (i = 0; i < old_size; i++) + if (old_table[i]) { + reference **p; + for (p = (reference_hash_table + + (old_table[i]->hash() % hash_table_size)); + *p; + ((p == reference_hash_table) + ? (p = reference_hash_table + hash_table_size - 1) + : --p)) + ; + *p = old_table[i]; + } + a_delete old_table; + } + } + if (label_in_text) + store_citation(ref); + return flags; +} + +unsigned immediately_handle_reference(const string &str) +{ + unsigned flags; + reference *ref = make_reference(str, &flags); + ref->set_number(nreferences); + if (label_in_text || label_in_reference) { + ref->pre_compute_label(); + ref->immediate_compute_label(); + } + nreferences++; + store_citation(ref); + return flags; +} + +static void immediately_output_references() +{ + for (int i = 0; i < ncitations; i++) { + reference *ref = citation[i]; + if (label_in_reference) { + fputs(".ds [F ", outfp); + const string &label = ref->get_label(NORMAL_LABEL); + if (label.length() > 0 + && (label[0] == ' ' || label[0] == '\\' || label[0] == '"')) + putc('"', outfp); + put_string(label, outfp); + putc('\n', outfp); + } + ref->output(outfp); + delete ref; + } + ncitations = 0; +} + +static void output_citation_group(reference **v, int n, label_type type, + FILE *fp) +{ + if (sort_adjacent_labels) { + // Do an insertion sort. Usually n will be very small. + for (int i = 1; i < n; i++) { + int num = v[i]->get_number(); + reference *temp = v[i]; + int j; + for (j = i - 1; j >= 0 && v[j]->get_number() > num; j--) + v[j + 1] = v[j]; + v[j + 1] = temp; + } + } + // This messes up if !accumulate. + if (accumulate && n > 1) { + // remove duplicates + int j = 1; + for (int i = 1; i < n; i++) + if (v[i]->get_label(type) != v[i - 1]->get_label(type)) + v[j++] = v[i]; + n = j; + } + string merged_label; + for (int i = 0; i < n; i++) { + int nmerged = v[i]->merge_labels(v + i + 1, n - i - 1, type, merged_label); + if (nmerged > 0) { + put_string(merged_label, fp); + i += nmerged; + } + else + put_string(v[i]->get_label(type), fp); + if (i < n - 1) + put_string(sep_label, fp); + } +} + + +label_processing_state::label_processing_state(reference **p, int n, FILE *f) +: state(NORMAL), count(0), rptr(p), rcount(n), fp(f) +{ +} + +label_processing_state::~label_processing_state() +{ + int handled = handle_pending(EOF); + assert(!handled); + assert(rcount == 0); +} + +int label_processing_state::handle_pending(int c) +{ + switch (state) { + case NORMAL: + break; + case PENDING_LABEL: + if (c == POST_LABEL_MARKER) { + state = PENDING_LABEL_POST; + return 1; + } + else { + output_citation_group(rptr, count, type, fp); + rptr += count ; + rcount -= count; + state = NORMAL; + } + break; + case PENDING_LABEL_POST: + if (c == PRE_LABEL_MARKER) { + state = PENDING_LABEL_POST_PRE; + return 1; + } + else { + output_citation_group(rptr, count, type, fp); + rptr += count; + rcount -= count; + put_string(post_label, fp); + state = NORMAL; + } + break; + case PENDING_LABEL_POST_PRE: + if (c >= LABEL_MARKER + && c < LABEL_MARKER + N_LABEL_TYPES + && c - LABEL_MARKER == type) { + count += 1; + state = PENDING_LABEL; + return 1; + } + else { + output_citation_group(rptr, count, type, fp); + rptr += count; + rcount -= count; + put_string(sep_label, fp); + state = NORMAL; + } + break; + case PENDING_POST: + if (c == PRE_LABEL_MARKER) { + put_string(sep_label, fp); + state = NORMAL; + return 1; + } + else { + put_string(post_label, fp); + state = NORMAL; + } + break; + } + return 0; +} + +void label_processing_state::process(int c) +{ + if (handle_pending(c)) + return; + assert(state == NORMAL); + switch (c) { + case PRE_LABEL_MARKER: + put_string(pre_label, fp); + state = NORMAL; + break; + case POST_LABEL_MARKER: + state = PENDING_POST; + break; + case LABEL_MARKER: + case LABEL_MARKER + 1: + count = 1; + state = PENDING_LABEL; + type = label_type(c - LABEL_MARKER); + break; + default: + state = NORMAL; + putc(c, fp); + break; + } +} + +extern "C" { + +int rcompare(const void *p1, const void *p2) +{ + return compare_reference(**(reference **)p1, **(reference **)p2); +} + +} + +void output_references() +{ + assert(accumulate); + if (nreferences > 0) { + int j = 0; + int i; + for (i = 0; i < hash_table_size; i++) + if (reference_hash_table[i] != 0) + reference_hash_table[j++] = reference_hash_table[i]; + assert(j == nreferences); + for (; j < hash_table_size; j++) + reference_hash_table[j] = 0; + qsort(reference_hash_table, nreferences, sizeof(reference*), rcompare); + for (i = 0; i < nreferences; i++) + reference_hash_table[i]->set_number(i); + compute_labels(reference_hash_table, nreferences); + } + if (outfp != stdout) { + rewind(outfp); + { + label_processing_state state(citation, ncitations, stdout); + int c; + while ((c = getc(outfp)) != EOF) + state.process(c); + } + ncitations = 0; + fclose(outfp); + outfp = stdout; + } + if (nreferences > 0) { + fputs(".]<\n", outfp); + for (int i = 0; i < nreferences; i++) { + if (sort_fields.length() > 0) + reference_hash_table[i]->print_sort_key_comment(outfp); + if (label_in_reference) { + fputs(".ds [F ", outfp); + const string &label = reference_hash_table[i]->get_label(NORMAL_LABEL); + if (label.length() > 0 + && (label[0] == ' ' || label[0] == '\\' || label[0] == '"')) + putc('"', outfp); + put_string(label, outfp); + putc('\n', outfp); + } + reference_hash_table[i]->output(outfp); + delete reference_hash_table[i]; + reference_hash_table[i] = 0; + } + fputs(".]>\n", outfp); + nreferences = 0; + } + clear_labels(); +} + +static reference *find_reference(const char *query, int query_len) +{ + // This is so that error messages look better. + while (query_len > 0 && csspace(query[query_len - 1])) + query_len--; + string str; + for (int i = 0; i < query_len; i++) + str += query[i] == '\n' ? ' ' : query[i]; + str += '\0'; + possibly_load_default_database(); + search_list_iterator iter(&database_list, str.contents()); + reference_id rid; + const char *start; + int len; + if (!iter.next(&start, &len, &rid)) { + error("no matches for `%1'", str.contents()); + return 0; + } + const char *end = start + len; + while (start < end) { + if (*start == '%') + break; + while (start < end && *start++ != '\n') + ; + } + if (start >= end) { + error("found a reference for `%1' but it didn't contain any fields", + str.contents()); + return 0; + } + reference *result = new reference(start, end - start, &rid); + if (iter.next(&start, &len, &rid)) + warning("multiple matches for `%1'", str.contents()); + return result; +} + +static reference *make_reference(const string &str, unsigned *flagsp) +{ + const char *start = str.contents(); + const char *end = start + str.length(); + const char *ptr = start; + while (ptr < end) { + if (*ptr == '%') + break; + while (ptr < end && *ptr++ != '\n') + ; + } + *flagsp = 0; + for (; start < ptr; start++) { + if (*start == '#') + *flagsp = (SHORT_LABEL | (*flagsp & (FORCE_RIGHT_BRACKET + | FORCE_LEFT_BRACKET))); + else if (*start == '[') + *flagsp |= FORCE_LEFT_BRACKET; + else if (*start == ']') + *flagsp |= FORCE_RIGHT_BRACKET; + else if (!csspace(*start)) + break; + } + if (start >= end) { + error("empty reference"); + return new reference; + } + reference *database_ref = 0; + if (start < ptr) + database_ref = find_reference(start, ptr - start); + reference *inline_ref = 0; + if (ptr < end) + inline_ref = new reference(ptr, end - ptr); + if (inline_ref) { + if (database_ref) { + database_ref->merge(*inline_ref); + delete inline_ref; + return database_ref; + } + else + return inline_ref; + } + else if (database_ref) + return database_ref; + else + return new reference; +} + +static void do_ref(const string &str) +{ + if (accumulate) + (void)store_reference(str); + else { + (void)immediately_handle_reference(str); + immediately_output_references(); + } +} + +static void trim_blanks(string &str) +{ + const char *start = str.contents(); + const char *end = start + str.length(); + while (end > start && end[-1] != '\n' && csspace(end[-1])) + --end; + str.set_length(end - start); +} + +void do_bib(const char *filename) +{ + FILE *fp; + if (strcmp(filename, "-") == 0) + fp = stdin; + else { + errno = 0; + fp = fopen(filename, "r"); + if (fp == 0) { + error("can't open `%1': %2", filename, strerror(errno)); + return; + } + current_filename = filename; + } + enum { + START, MIDDLE, BODY, BODY_START, BODY_BLANK, BODY_DOT + } state = START; + string body; + for (;;) { + int c = getc(fp); + if (c == EOF) + break; + if (invalid_input_char(c)) { + error("invalid input character code %1", c); + continue; + } + switch (state) { + case START: + if (c == '%') { + body = c; + state = BODY; + } + else if (c != '\n') + state = MIDDLE; + break; + case MIDDLE: + if (c == '\n') + state = START; + break; + case BODY: + body += c; + if (c == '\n') + state = BODY_START; + break; + case BODY_START: + if (c == '\n') { + do_ref(body); + state = START; + } + else if (c == '.') + state = BODY_DOT; + else if (csspace(c)) { + state = BODY_BLANK; + body += c; + } + else { + body += c; + state = BODY; + } + break; + case BODY_BLANK: + if (c == '\n') { + trim_blanks(body); + do_ref(body); + state = START; + } + else if (csspace(c)) + body += c; + else { + body += c; + state = BODY; + } + break; + case BODY_DOT: + if (c == ']') { + do_ref(body); + state = MIDDLE; + } + else { + body += '.'; + body += c; + state = c == '\n' ? BODY_START : BODY; + } + break; + default: + assert(0); + } + if (c == '\n') + current_lineno++; + } + switch (state) { + case START: + case MIDDLE: + break; + case BODY: + body += '\n'; + do_ref(body); + break; + case BODY_DOT: + case BODY_START: + do_ref(body); + break; + case BODY_BLANK: + trim_blanks(body); + do_ref(body); + break; + } + fclose(fp); +} + +// from the Dragon Book + +unsigned hash_string(const char *s, int len) +{ + const char *end = s + len; + unsigned h = 0, g; + while (s < end) { + h <<= 4; + h += *s++; + if ((g = h & 0xf0000000) != 0) { + h ^= g >> 24; + h ^= g; + } + } + return h; +} + +int next_size(int n) +{ + static const int table_sizes[] = { + 101, 503, 1009, 2003, 3001, 4001, 5003, 10007, 20011, 40009, + 80021, 160001, 500009, 1000003, 2000003, 4000037, 8000009, + 16000057, 32000011, 64000031, 128000003, 0 + }; + + const int *p; + for (p = table_sizes; *p <= n && *p != 0; p++) + ; + assert(*p != 0); + return *p; +} + diff --git a/contrib/groff/src/preproc/refer/refer.man b/contrib/groff/src/preproc/refer/refer.man index d8468bb9cd5a..d39d85d8598f 100644 --- a/contrib/groff/src/preproc/refer/refer.man +++ b/contrib/groff/src/preproc/refer/refer.man @@ -1,5 +1,5 @@ .ig -Copyright (C) 1989-2000, 2001 Free Software Foundation, Inc. +Copyright (C) 1989-2000, 2001, 2002, 2003 Free Software Foundation, Inc. Permission is granted to make and distribute verbatim copies of this manual provided the copyright notice and this permission notice @@ -67,7 +67,7 @@ This file documents the GNU version of which is part of the groff document formatting system. .B refer copies the contents of -.IR filename \|.\|.\|. +.IR filename \|.\|.\|.\& to the standard output, except that lines between .B .[ @@ -290,7 +290,7 @@ field or a field. .TP .B B -For an article that is part of a book, the title of the book +For an article that is part of a book, the title of the book. .TP .B C The place (city) of publication. @@ -1289,6 +1289,10 @@ Default database. .TP .IB file @INDEX_SUFFIX@ Index files. +.SH ENVIRONMENT +.Tp \w'\fBREFER'u+2n +.B REFER +If set, overrides the default database. .SH "SEE ALSO" .BR @g@indxbib (@MAN1EXT@), .BR @g@lookbib (@MAN1EXT@), diff --git a/contrib/groff/src/preproc/refer/token.cpp b/contrib/groff/src/preproc/refer/token.cpp new file mode 100644 index 000000000000..e9fac5df3fa2 --- /dev/null +++ b/contrib/groff/src/preproc/refer/token.cpp @@ -0,0 +1,378 @@ +// -*- C++ -*- +/* Copyright (C) 1989, 1990, 1991, 1992, 2001 Free Software Foundation, Inc. + Written by James Clark (jjc@jclark.com) + +This file is part of groff. + +groff is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +groff is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License along +with groff; see the file COPYING. If not, write to the Free Software +Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#include "refer.h" +#include "token.h" + +#define TOKEN_TABLE_SIZE 1009 +// I believe in Icelandic thorn sorts after z. +#define THORN_SORT_KEY "{" + +struct token_table_entry { + const char *tok; + token_info ti; + token_table_entry(); +}; + +token_table_entry token_table[TOKEN_TABLE_SIZE]; +int ntokens = 0; + +static void skip_name(const char **ptr, const char *end) +{ + if (*ptr < end) { + switch (*(*ptr)++) { + case '(': + if (*ptr < end) { + *ptr += 1; + if (*ptr < end) + *ptr += 1; + } + break; + case '[': + while (*ptr < end) + if (*(*ptr)++ == ']') + break; + break; + } + } +} + +int get_token(const char **ptr, const char *end) +{ + if (*ptr >= end) + return 0; + char c = *(*ptr)++; + if (c == '\\' && *ptr < end) { + switch (**ptr) { + default: + *ptr += 1; + break; + case '(': + case '[': + skip_name(ptr, end); + break; + case '*': + case 'f': + *ptr += 1; + skip_name(ptr, end); + break; + } + } + return 1; +} + +token_info::token_info() +: type(TOKEN_OTHER), sort_key(0), other_case(0) +{ +} + +void token_info::set(token_type t, const char *sk, const char *oc) +{ + assert(oc == 0 || t == TOKEN_UPPER || t == TOKEN_LOWER); + type = t; + sort_key = sk; + other_case = oc; +} + +void token_info::sortify(const char *start, const char *end, string &result) + const +{ + if (sort_key) + result += sort_key; + else if (type == TOKEN_UPPER || type == TOKEN_LOWER) { + for (; start < end; start++) + if (csalpha(*start)) + result += cmlower(*start); + } +} + +int token_info::sortify_non_empty(const char *start, const char *end) const +{ + if (sort_key) + return *sort_key != '\0'; + if (type != TOKEN_UPPER && type != TOKEN_LOWER) + return 0; + for (; start < end; start++) + if (csalpha(*start)) + return 1; + return 0; +} + + +void token_info::lower_case(const char *start, const char *end, + string &result) const +{ + if (type != TOKEN_UPPER) { + while (start < end) + result += *start++; + } + else if (other_case) + result += other_case; + else { + while (start < end) + result += cmlower(*start++); + } +} + +void token_info::upper_case(const char *start, const char *end, + string &result) const +{ + if (type != TOKEN_LOWER) { + while (start < end) + result += *start++; + } + else if (other_case) + result += other_case; + else { + while (start < end) + result += cmupper(*start++); + } +} + +token_table_entry::token_table_entry() +: tok(0) +{ +} + +static void store_token(const char *tok, token_type typ, + const char *sk = 0, const char *oc = 0) +{ + unsigned n = hash_string(tok, strlen(tok)) % TOKEN_TABLE_SIZE; + for (;;) { + if (token_table[n].tok == 0) { + if (++ntokens == TOKEN_TABLE_SIZE) + assert(0); + token_table[n].tok = tok; + break; + } + if (strcmp(tok, token_table[n].tok) == 0) + break; + if (n == 0) + n = TOKEN_TABLE_SIZE - 1; + else + --n; + } + token_table[n].ti.set(typ, sk, oc); +} + + +token_info default_token_info; + +const token_info *lookup_token(const char *start, const char *end) +{ + unsigned n = hash_string(start, end - start) % TOKEN_TABLE_SIZE; + for (;;) { + if (token_table[n].tok == 0) + break; + if (strlen(token_table[n].tok) == size_t(end - start) + && memcmp(token_table[n].tok, start, end - start) == 0) + return &(token_table[n].ti); + if (n == 0) + n = TOKEN_TABLE_SIZE - 1; + else + --n; + } + return &default_token_info; +} + +static void init_ascii() +{ + const char *p; + for (p = "abcdefghijklmnopqrstuvwxyz"; *p; p++) { + char buf[2]; + buf[0] = *p; + buf[1] = '\0'; + store_token(strsave(buf), TOKEN_LOWER); + buf[0] = cmupper(buf[0]); + store_token(strsave(buf), TOKEN_UPPER); + } + for (p = "0123456789"; *p; p++) { + char buf[2]; + buf[0] = *p; + buf[1] = '\0'; + const char *s = strsave(buf); + store_token(s, TOKEN_OTHER, s); + } + for (p = ".,:;?!"; *p; p++) { + char buf[2]; + buf[0] = *p; + buf[1] = '\0'; + store_token(strsave(buf), TOKEN_PUNCT); + } + store_token("-", TOKEN_HYPHEN); +} + +static void store_letter(const char *lower, const char *upper, + const char *sort_key = 0) +{ + store_token(lower, TOKEN_LOWER, sort_key, upper); + store_token(upper, TOKEN_UPPER, sort_key, lower); +} + +static void init_letter(unsigned char uc_code, unsigned char lc_code, + const char *sort_key) +{ + char lbuf[2]; + lbuf[0] = lc_code; + lbuf[1] = 0; + char ubuf[2]; + ubuf[0] = uc_code; + ubuf[1] = 0; + store_letter(strsave(lbuf), strsave(ubuf), sort_key); +} + +static void init_latin1() +{ + init_letter(0xc0, 0xe0, "a"); + init_letter(0xc1, 0xe1, "a"); + init_letter(0xc2, 0xe2, "a"); + init_letter(0xc3, 0xe3, "a"); + init_letter(0xc4, 0xe4, "a"); + init_letter(0xc5, 0xe5, "a"); + init_letter(0xc6, 0xe6, "ae"); + init_letter(0xc7, 0xe7, "c"); + init_letter(0xc8, 0xe8, "e"); + init_letter(0xc9, 0xe9, "e"); + init_letter(0xca, 0xea, "e"); + init_letter(0xcb, 0xeb, "e"); + init_letter(0xcc, 0xec, "i"); + init_letter(0xcd, 0xed, "i"); + init_letter(0xce, 0xee, "i"); + init_letter(0xcf, 0xef, "i"); + + init_letter(0xd0, 0xf0, "d"); + init_letter(0xd1, 0xf1, "n"); + init_letter(0xd2, 0xf2, "o"); + init_letter(0xd3, 0xf3, "o"); + init_letter(0xd4, 0xf4, "o"); + init_letter(0xd5, 0xf5, "o"); + init_letter(0xd6, 0xf6, "o"); + init_letter(0xd8, 0xf8, "o"); + init_letter(0xd9, 0xf9, "u"); + init_letter(0xda, 0xfa, "u"); + init_letter(0xdb, 0xfb, "u"); + init_letter(0xdc, 0xfc, "u"); + init_letter(0xdd, 0xfd, "y"); + init_letter(0xde, 0xfe, THORN_SORT_KEY); + + store_token("\337", TOKEN_LOWER, "ss", "SS"); + store_token("\377", TOKEN_LOWER, "y", "Y"); +} + +static void init_two_char_letter(char l1, char l2, char u1, char u2, + const char *sk = 0) +{ + char buf[6]; + buf[0] = '\\'; + buf[1] = '('; + buf[2] = l1; + buf[3] = l2; + buf[4] = '\0'; + const char *p = strsave(buf); + buf[2] = u1; + buf[3] = u2; + store_letter(p, strsave(buf), sk); + buf[1] = '['; + buf[4] = ']'; + buf[5] = '\0'; + p = strsave(buf); + buf[2] = l1; + buf[3] = l2; + store_letter(strsave(buf), p, sk); + +} + +static void init_special_chars() +{ + const char *p; + for (p = "':^`~"; *p; p++) + for (const char *q = "aeiouy"; *q; q++) { + // Use a variable to work around bug in gcc 2.0 + char c = cmupper(*q); + init_two_char_letter(*p, *q, *p, c); + } + for (p = "/l/o~n,coeaeij"; *p; p += 2) { + // Use variables to work around bug in gcc 2.0 + char c0 = cmupper(p[0]); + char c1 = cmupper(p[1]); + init_two_char_letter(p[0], p[1], c0, c1); + } + init_two_char_letter('v', 's', 'v', 'S', "s"); + init_two_char_letter('v', 'z', 'v', 'Z', "z"); + init_two_char_letter('o', 'a', 'o', 'A', "a"); + init_two_char_letter('T', 'p', 'T', 'P', THORN_SORT_KEY); + init_two_char_letter('-', 'd', '-', 'D'); + + store_token("\\(ss", TOKEN_LOWER, 0, "SS"); + store_token("\\[ss]", TOKEN_LOWER, 0, "SS"); + + store_token("\\(Sd", TOKEN_LOWER, "d", "\\(-D"); + store_token("\\[Sd]", TOKEN_LOWER, "d", "\\[-D]"); + store_token("\\(hy", TOKEN_HYPHEN); + store_token("\\[hy]", TOKEN_HYPHEN); + store_token("\\(en", TOKEN_RANGE_SEP); + store_token("\\[en]", TOKEN_RANGE_SEP); +} + +static void init_strings() +{ + char buf[6]; + buf[0] = '\\'; + buf[1] = '*'; + for (const char *p = "'`^^,:~v_o./;"; *p; p++) { + buf[2] = *p; + buf[3] = '\0'; + store_token(strsave(buf), TOKEN_ACCENT); + buf[2] = '['; + buf[3] = *p; + buf[4] = ']'; + buf[5] = '\0'; + store_token(strsave(buf), TOKEN_ACCENT); + } + + // -ms special letters + store_letter("\\*(th", "\\*(Th", THORN_SORT_KEY); + store_letter("\\*[th]", "\\*[Th]", THORN_SORT_KEY); + store_letter("\\*(d-", "\\*(D-"); + store_letter("\\*[d-]", "\\*[D-]"); + store_letter("\\*(ae", "\\*(Ae", "ae"); + store_letter("\\*[ae]", "\\*[Ae]", "ae"); + store_letter("\\*(oe", "\\*(Oe", "oe"); + store_letter("\\*[oe]", "\\*[Oe]", "oe"); + + store_token("\\*3", TOKEN_LOWER, "y", "Y"); + store_token("\\*8", TOKEN_LOWER, "ss", "SS"); + store_token("\\*q", TOKEN_LOWER, "o", "O"); +} + +struct token_initer { + token_initer(); +}; + +static token_initer the_token_initer; + +token_initer::token_initer() +{ + init_ascii(); + init_latin1(); + init_special_chars(); + init_strings(); + default_token_info.set(TOKEN_OTHER); +} |