summaryrefslogtreecommitdiff
path: root/contrib/tzdata/zishrink.awk
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/tzdata/zishrink.awk')
-rw-r--r--contrib/tzdata/zishrink.awk156
1 files changed, 156 insertions, 0 deletions
diff --git a/contrib/tzdata/zishrink.awk b/contrib/tzdata/zishrink.awk
new file mode 100644
index 000000000000..02c883a62620
--- /dev/null
+++ b/contrib/tzdata/zishrink.awk
@@ -0,0 +1,156 @@
+# Convert tzdata source into a smaller version of itself.
+
+# Contributed by Paul Eggert. This file is in the public domain.
+
+# This is not a general-purpose converter; it is designed for current tzdata.
+# 'zic' should treat this script's output as if it were identical to
+# this script's input.
+
+
+# Return a new rule name.
+# N_RULE_NAMES keeps track of how many rule names have been generated.
+
+function gen_rule_name(alphabet, base, rule_name, n, digit)
+{
+ alphabet = ""
+ alphabet = alphabet "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ alphabet = alphabet "abcdefghijklmnopqrstuvwxyz"
+ alphabet = alphabet "!$%&'()*+,./:;<=>?@[\\]^_`{|}~"
+ base = length(alphabet)
+ rule_name = ""
+ n = n_rule_names++
+
+ do {
+ n -= rule_name && n <= base
+ digit = n % base
+ rule_name = substr(alphabet, digit + 1, 1) rule_name
+ n = (n - digit) / base
+ } while (n);
+
+ return rule_name
+}
+
+# Process an input line and save it for later output.
+
+function process_input_line(line, field, end, i, n, startdef)
+{
+ # Remove comments, normalize spaces, and append a space to each line.
+ sub(/#.*/, "", line)
+ line = line " "
+ gsub(/[[:space:]]+/, " ", line)
+
+ # Abbreviate keywords. Do not abbreviate "Link" to just "L",
+ # as pre-2017c zic erroneously diagnoses "Li" as ambiguous.
+ sub(/^Link /, "Li ", line)
+ sub(/^Rule /, "R ", line)
+ sub(/^Zone /, "Z ", line)
+
+ # SystemV rules are not needed.
+ if (line ~ /^R SystemV /) return
+
+ # Replace FooAsia rules with the same rules without "Asia", as they
+ # are duplicates.
+ if (match(line, /[^ ]Asia /)) {
+ if (line ~ /^R /) return
+ line = substr(line, 1, RSTART) substr(line, RSTART + 5)
+ }
+
+ # Abbreviate times.
+ while (match(line, /[: ]0+[0-9]/))
+ line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1)
+ while (match(line, /:0[^:]/))
+ line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2)
+
+ # Abbreviate weekday names. Do not abbreviate "Sun" and "Sat", as
+ # pre-2017c zic erroneously diagnoses "Su" and "Sa" as ambiguous.
+ while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) {
+ end = RSTART + RLENGTH
+ line = substr(line, 1, end - 4) substr(line, end - 1)
+ }
+ while (match(line, / (last)?(Tue|Thu)[ <>]/)) {
+ end = RSTART + RLENGTH
+ line = substr(line, 1, end - 3) substr(line, end - 1)
+ }
+
+ # Abbreviate "max", "only" and month names.
+ # Do not abbreviate "min", as pre-2017c zic erroneously diagnoses "mi"
+ # as ambiguous.
+ gsub(/ max /, " ma ", line)
+ gsub(/ only /, " o ", line)
+ gsub(/ Jan /, " Ja ", line)
+ gsub(/ Feb /, " F ", line)
+ gsub(/ Apr /, " Ap ", line)
+ gsub(/ Aug /, " Au ", line)
+ gsub(/ Sep /, " S ", line)
+ gsub(/ Oct /, " O ", line)
+ gsub(/ Nov /, " N ", line)
+ gsub(/ Dec /, " D ", line)
+
+ # Strip leading and trailing space.
+ sub(/^ /, "", line)
+ sub(/ $/, "", line)
+
+ # Remove unnecessary trailing zero fields.
+ sub(/ 0+$/, "", line)
+
+ # Remove unnecessary trailing days-of-month "1".
+ if (match(line, /[[:alpha:]] 1$/))
+ line = substr(line, 1, RSTART)
+
+ # Remove unnecessary trailing " Ja" (for January).
+ sub(/ Ja$/, "", line)
+
+ n = split(line, field)
+
+ # Abbreviate rule names.
+ i = field[1] == "Z" ? 4 : field[1] == "Li" ? 0 : 2
+ if (i && field[i] ~ /^[^-+0-9]/) {
+ if (!rule[field[i]])
+ rule[field[i]] = gen_rule_name()
+ field[i] = rule[field[i]]
+ }
+
+ # If this zone supersedes an earlier one, delete the earlier one
+ # from the saved output lines.
+ startdef = ""
+ if (field[1] == "Z")
+ zonename = startdef = field[2]
+ else if (field[1] == "Li")
+ zonename = startdef = field[3]
+ else if (field[1] == "R")
+ zonename = ""
+ if (startdef) {
+ i = zonedef[startdef]
+ if (i) {
+ do
+ output_line[i - 1] = ""
+ while (output_line[i++] ~ /^[-+0-9]/);
+ }
+ }
+ zonedef[zonename] = nout + 1
+
+ # Save the line for later output.
+ line = field[1]
+ for (i = 2; i <= n; i++)
+ line = line " " field[i]
+ output_line[nout++] = line
+}
+
+function output_saved_lines(i)
+{
+ for (i = 0; i < nout; i++)
+ if (output_line[i])
+ print output_line[i]
+}
+
+BEGIN {
+ print "# This zic input file is in the public domain."
+}
+
+/^[[:space:]]*[^#[:space:]]/ {
+ process_input_line($0)
+}
+
+END {
+ output_saved_lines()
+}