diff options
Diffstat (limited to 'contrib/tzdata/zishrink.awk')
-rw-r--r-- | contrib/tzdata/zishrink.awk | 156 |
1 files changed, 156 insertions, 0 deletions
diff --git a/contrib/tzdata/zishrink.awk b/contrib/tzdata/zishrink.awk new file mode 100644 index 000000000000..02c883a62620 --- /dev/null +++ b/contrib/tzdata/zishrink.awk @@ -0,0 +1,156 @@ +# Convert tzdata source into a smaller version of itself. + +# Contributed by Paul Eggert. This file is in the public domain. + +# This is not a general-purpose converter; it is designed for current tzdata. +# 'zic' should treat this script's output as if it were identical to +# this script's input. + + +# Return a new rule name. +# N_RULE_NAMES keeps track of how many rule names have been generated. + +function gen_rule_name(alphabet, base, rule_name, n, digit) +{ + alphabet = "" + alphabet = alphabet "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + alphabet = alphabet "abcdefghijklmnopqrstuvwxyz" + alphabet = alphabet "!$%&'()*+,./:;<=>?@[\\]^_`{|}~" + base = length(alphabet) + rule_name = "" + n = n_rule_names++ + + do { + n -= rule_name && n <= base + digit = n % base + rule_name = substr(alphabet, digit + 1, 1) rule_name + n = (n - digit) / base + } while (n); + + return rule_name +} + +# Process an input line and save it for later output. + +function process_input_line(line, field, end, i, n, startdef) +{ + # Remove comments, normalize spaces, and append a space to each line. + sub(/#.*/, "", line) + line = line " " + gsub(/[[:space:]]+/, " ", line) + + # Abbreviate keywords. Do not abbreviate "Link" to just "L", + # as pre-2017c zic erroneously diagnoses "Li" as ambiguous. + sub(/^Link /, "Li ", line) + sub(/^Rule /, "R ", line) + sub(/^Zone /, "Z ", line) + + # SystemV rules are not needed. + if (line ~ /^R SystemV /) return + + # Replace FooAsia rules with the same rules without "Asia", as they + # are duplicates. + if (match(line, /[^ ]Asia /)) { + if (line ~ /^R /) return + line = substr(line, 1, RSTART) substr(line, RSTART + 5) + } + + # Abbreviate times. + while (match(line, /[: ]0+[0-9]/)) + line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1) + while (match(line, /:0[^:]/)) + line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2) + + # Abbreviate weekday names. Do not abbreviate "Sun" and "Sat", as + # pre-2017c zic erroneously diagnoses "Su" and "Sa" as ambiguous. + while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) { + end = RSTART + RLENGTH + line = substr(line, 1, end - 4) substr(line, end - 1) + } + while (match(line, / (last)?(Tue|Thu)[ <>]/)) { + end = RSTART + RLENGTH + line = substr(line, 1, end - 3) substr(line, end - 1) + } + + # Abbreviate "max", "only" and month names. + # Do not abbreviate "min", as pre-2017c zic erroneously diagnoses "mi" + # as ambiguous. + gsub(/ max /, " ma ", line) + gsub(/ only /, " o ", line) + gsub(/ Jan /, " Ja ", line) + gsub(/ Feb /, " F ", line) + gsub(/ Apr /, " Ap ", line) + gsub(/ Aug /, " Au ", line) + gsub(/ Sep /, " S ", line) + gsub(/ Oct /, " O ", line) + gsub(/ Nov /, " N ", line) + gsub(/ Dec /, " D ", line) + + # Strip leading and trailing space. + sub(/^ /, "", line) + sub(/ $/, "", line) + + # Remove unnecessary trailing zero fields. + sub(/ 0+$/, "", line) + + # Remove unnecessary trailing days-of-month "1". + if (match(line, /[[:alpha:]] 1$/)) + line = substr(line, 1, RSTART) + + # Remove unnecessary trailing " Ja" (for January). + sub(/ Ja$/, "", line) + + n = split(line, field) + + # Abbreviate rule names. + i = field[1] == "Z" ? 4 : field[1] == "Li" ? 0 : 2 + if (i && field[i] ~ /^[^-+0-9]/) { + if (!rule[field[i]]) + rule[field[i]] = gen_rule_name() + field[i] = rule[field[i]] + } + + # If this zone supersedes an earlier one, delete the earlier one + # from the saved output lines. + startdef = "" + if (field[1] == "Z") + zonename = startdef = field[2] + else if (field[1] == "Li") + zonename = startdef = field[3] + else if (field[1] == "R") + zonename = "" + if (startdef) { + i = zonedef[startdef] + if (i) { + do + output_line[i - 1] = "" + while (output_line[i++] ~ /^[-+0-9]/); + } + } + zonedef[zonename] = nout + 1 + + # Save the line for later output. + line = field[1] + for (i = 2; i <= n; i++) + line = line " " field[i] + output_line[nout++] = line +} + +function output_saved_lines(i) +{ + for (i = 0; i < nout; i++) + if (output_line[i]) + print output_line[i] +} + +BEGIN { + print "# This zic input file is in the public domain." +} + +/^[[:space:]]*[^#[:space:]]/ { + process_input_line($0) +} + +END { + output_saved_lines() +} |