zishrink.awk revision 325160
1# Convert tzdata source into a smaller version of itself.
2
3# Contributed by Paul Eggert.  This file is in the public domain.
4
5# This is not a general-purpose converter; it is designed for current tzdata.
6# 'zic' should treat this script's output as if it were identical to
7# this script's input.
8
9
10# Return a new rule name.
11# N_RULE_NAMES keeps track of how many rule names have been generated.
12
13function gen_rule_name(alphabet, base, rule_name, n, digit)
14{
15  alphabet = ""
16  alphabet = alphabet "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
17  alphabet = alphabet "abcdefghijklmnopqrstuvwxyz"
18  alphabet = alphabet "!$%&'()*+,./:;<=>?@[\\]^_`{|}~"
19  base = length(alphabet)
20  rule_name = ""
21  n = n_rule_names++
22
23  do {
24    n -= rule_name && n <= base
25    digit = n % base
26    rule_name = substr(alphabet, digit + 1, 1) rule_name
27    n = (n - digit) / base
28  } while (n);
29
30  return rule_name
31}
32
33# Process an input line and save it for later output.
34
35function process_input_line(line, field, end, i, n, startdef)
36{
37  # Remove comments, normalize spaces, and append a space to each line.
38  sub(/#.*/, "", line)
39  line = line " "
40  gsub(/[[:space:]]+/, " ", line)
41
42  # Abbreviate keywords.  Do not abbreviate "Link" to just "L",
43  # as pre-2017c zic erroneously diagnoses "Li" as ambiguous.
44  sub(/^Link /, "Li ", line)
45  sub(/^Rule /, "R ", line)
46  sub(/^Zone /, "Z ", line)
47
48  # SystemV rules are not needed.
49  if (line ~ /^R SystemV /) return
50
51  # Replace FooAsia rules with the same rules without "Asia", as they
52  # are duplicates.
53  if (match(line, /[^ ]Asia /)) {
54    if (line ~ /^R /) return
55    line = substr(line, 1, RSTART) substr(line, RSTART + 5)
56  }
57
58  # Abbreviate times.
59  while (match(line, /[: ]0+[0-9]/))
60    line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1)
61  while (match(line, /:0[^:]/))
62    line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2)
63
64  # Abbreviate weekday names.  Do not abbreviate "Sun" and "Sat", as
65  # pre-2017c zic erroneously diagnoses "Su" and "Sa" as ambiguous.
66  while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) {
67    end = RSTART + RLENGTH
68    line = substr(line, 1, end - 4) substr(line, end - 1)
69  }
70  while (match(line, / (last)?(Tue|Thu)[ <>]/)) {
71    end = RSTART + RLENGTH
72    line = substr(line, 1, end - 3) substr(line, end - 1)
73  }
74
75  # Abbreviate "max", "only" and month names.
76  # Do not abbreviate "min", as pre-2017c zic erroneously diagnoses "mi"
77  # as ambiguous.
78  gsub(/ max /, " ma ", line)
79  gsub(/ only /, " o ", line)
80  gsub(/ Jan /, " Ja ", line)
81  gsub(/ Feb /, " F ", line)
82  gsub(/ Apr /, " Ap ", line)
83  gsub(/ Aug /, " Au ", line)
84  gsub(/ Sep /, " S ", line)
85  gsub(/ Oct /, " O ", line)
86  gsub(/ Nov /, " N ", line)
87  gsub(/ Dec /, " D ", line)
88
89  # Strip leading and trailing space.
90  sub(/^ /, "", line)
91  sub(/ $/, "", line)
92
93  # Remove unnecessary trailing zero fields.
94  sub(/ 0+$/, "", line)
95
96  # Remove unnecessary trailing days-of-month "1".
97  if (match(line, /[[:alpha:]] 1$/))
98    line = substr(line, 1, RSTART)
99
100  # Remove unnecessary trailing " Ja" (for January).
101  sub(/ Ja$/, "", line)
102
103  n = split(line, field)
104
105  # Abbreviate rule names.
106  i = field[1] == "Z" ? 4 : field[1] == "Li" ? 0 : 2
107  if (i && field[i] ~ /^[^-+0-9]/) {
108    if (!rule[field[i]])
109      rule[field[i]] = gen_rule_name()
110    field[i] = rule[field[i]]
111  }
112
113  # If this zone supersedes an earlier one, delete the earlier one
114  # from the saved output lines.
115  startdef = ""
116  if (field[1] == "Z")
117    zonename = startdef = field[2]
118  else if (field[1] == "Li")
119    zonename = startdef = field[3]
120  else if (field[1] == "R")
121    zonename = ""
122  if (startdef) {
123    i = zonedef[startdef]
124    if (i) {
125      do
126	output_line[i - 1] = ""
127      while (output_line[i++] ~ /^[-+0-9]/);
128    }
129  }
130  zonedef[zonename] = nout + 1
131
132  # Save the line for later output.
133  line = field[1]
134  for (i = 2; i <= n; i++)
135    line = line " " field[i]
136  output_line[nout++] = line
137}
138
139function output_saved_lines(i)
140{
141  for (i = 0; i < nout; i++)
142    if (output_line[i])
143      print output_line[i]
144}
145
146BEGIN {
147  print "# This zic input file is in the public domain."
148}
149
150/^[[:space:]]*[^#[:space:]]/ {
151  process_input_line($0)
152}
153
154END {
155  output_saved_lines()
156}
157