zishrink.awk revision 325322
11573Srgrimes# Convert tzdata source into a smaller version of itself.
21573Srgrimes
31573Srgrimes# Contributed by Paul Eggert.  This file is in the public domain.
41573Srgrimes
51573Srgrimes# This is not a general-purpose converter; it is designed for current tzdata.
61573Srgrimes# 'zic' should treat this script's output as if it were identical to
71573Srgrimes# this script's input.
81573Srgrimes
91573Srgrimes
101573Srgrimes# Return a new rule name.
111573Srgrimes# N_RULE_NAMES keeps track of how many rule names have been generated.
121573Srgrimes
131573Srgrimesfunction gen_rule_name(alphabet, base, rule_name, n, digit)
141573Srgrimes{
151573Srgrimes  alphabet = ""
161573Srgrimes  alphabet = alphabet "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
171573Srgrimes  alphabet = alphabet "abcdefghijklmnopqrstuvwxyz"
181573Srgrimes  alphabet = alphabet "!$%&'()*+,./:;<=>?@[\\]^_`{|}~"
191573Srgrimes  base = length(alphabet)
201573Srgrimes  rule_name = ""
211573Srgrimes  n = n_rule_names++
221573Srgrimes
231573Srgrimes  do {
241573Srgrimes    n -= rule_name && n <= base
251573Srgrimes    digit = n % base
261573Srgrimes    rule_name = substr(alphabet, digit + 1, 1) rule_name
271573Srgrimes    n = (n - digit) / base
281573Srgrimes  } while (n);
291573Srgrimes
301573Srgrimes  return rule_name
311573Srgrimes}
321573Srgrimes
331573Srgrimes# Process an input line and save it for later output.
341573Srgrimes
351573Srgrimesfunction process_input_line(line, field, end, i, n, startdef)
361573Srgrimes{
371573Srgrimes  # Remove comments, normalize spaces, and append a space to each line.
381573Srgrimes  sub(/#.*/, "", line)
3992986Sobrien  line = line " "
4092986Sobrien  gsub(/[[:space:]]+/, " ", line)
411573Srgrimes
421573Srgrimes  # Abbreviate keywords.  Do not abbreviate "Link" to just "L",
431573Srgrimes  # as pre-2017c zic erroneously diagnoses "Li" as ambiguous.
441573Srgrimes  sub(/^Link /, "Li ", line)
451573Srgrimes  sub(/^Rule /, "R ", line)
461573Srgrimes  sub(/^Zone /, "Z ", line)
471573Srgrimes
481573Srgrimes  # SystemV rules are not needed.
49136091Sstefanf  if (line ~ /^R SystemV /) return
50132019Stjr
51132019Stjr  # Replace FooAsia rules with the same rules without "Asia", as they
521573Srgrimes  # are duplicates.
5319277Sache  if (match(line, /[^ ]Asia /)) {
5419277Sache    if (line ~ /^R /) return
551573Srgrimes    line = substr(line, 1, RSTART) substr(line, RSTART + 5)
561573Srgrimes  }
571573Srgrimes
581573Srgrimes  # Abbreviate times.
591573Srgrimes  while (match(line, /[: ]0+[0-9]/))
601573Srgrimes    line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1)
611573Srgrimes  while (match(line, /:0[^:]/))
621573Srgrimes    line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2)
631573Srgrimes
641573Srgrimes  # Abbreviate weekday names.  Do not abbreviate "Sun" and "Sat", as
651573Srgrimes  # pre-2017c zic erroneously diagnoses "Su" and "Sa" as ambiguous.
661573Srgrimes  while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) {
671573Srgrimes    end = RSTART + RLENGTH
681573Srgrimes    line = substr(line, 1, end - 4) substr(line, end - 1)
691573Srgrimes  }
701573Srgrimes  while (match(line, / (last)?(Tue|Thu)[ <>]/)) {
711573Srgrimes    end = RSTART + RLENGTH
721573Srgrimes    line = substr(line, 1, end - 3) substr(line, end - 1)
731573Srgrimes  }
741573Srgrimes
751573Srgrimes  # Abbreviate "max", "only" and month names.
761573Srgrimes  # Do not abbreviate "min", as pre-2017c zic erroneously diagnoses "mi"
771573Srgrimes  # as ambiguous.
781573Srgrimes  gsub(/ max /, " ma ", line)
791573Srgrimes  gsub(/ only /, " o ", line)
801573Srgrimes  gsub(/ Jan /, " Ja ", line)
811573Srgrimes  gsub(/ Feb /, " F ", line)
821573Srgrimes  gsub(/ Apr /, " Ap ", line)
831573Srgrimes  gsub(/ Aug /, " Au ", line)
84132019Stjr  gsub(/ Sep /, " S ", line)
8592905Sobrien  gsub(/ Oct /, " O ", line)
8692905Sobrien  gsub(/ Nov /, " N ", line)
87132019Stjr  gsub(/ Dec /, " D ", line)
8892905Sobrien
8992905Sobrien  # Strip leading and trailing space.
9092905Sobrien  sub(/^ /, "", line)
9192905Sobrien  sub(/ $/, "", line)
9292905Sobrien
9392905Sobrien  # Remove unnecessary trailing zero fields.
94132019Stjr  sub(/ 0+$/, "", line)
95132019Stjr
96132019Stjr  # Remove unnecessary trailing days-of-month "1".
97132019Stjr  if (match(line, /[[:alpha:]] 1$/))
98132019Stjr    line = substr(line, 1, RSTART)
9992905Sobrien
10092905Sobrien  # Remove unnecessary trailing " Ja" (for January).
10192905Sobrien  sub(/ Ja$/, "", line)
10292905Sobrien
10392905Sobrien  n = split(line, field)
104132019Stjr
105132019Stjr  # Abbreviate rule names.
106132019Stjr  i = field[1] == "Z" ? 4 : field[1] == "Li" ? 0 : 2
107132019Stjr  if (i && field[i] ~ /^[^-+0-9]/) {
10892905Sobrien    if (!rule[field[i]])
10992905Sobrien      rule[field[i]] = gen_rule_name()
11092905Sobrien    field[i] = rule[field[i]]
11192905Sobrien  }
11292905Sobrien
11392905Sobrien  # If this zone supersedes an earlier one, delete the earlier one
11492905Sobrien  # from the saved output lines.
115131973Stjr  startdef = ""
11692905Sobrien  if (field[1] == "Z")
11792905Sobrien    zonename = startdef = field[2]
11892905Sobrien  else if (field[1] == "Li")
119132019Stjr    zonename = startdef = field[3]
1201573Srgrimes  else if (field[1] == "R")
1211573Srgrimes    zonename = ""
1221573Srgrimes  if (startdef) {
1231573Srgrimes    i = zonedef[startdef]
1241573Srgrimes    if (i) {
1251573Srgrimes      do
1261573Srgrimes	output_line[i - 1] = ""
1271573Srgrimes      while (output_line[i++] ~ /^[-+0-9]/);
1281573Srgrimes    }
1291573Srgrimes  }
1301573Srgrimes  zonedef[zonename] = nout + 1
1311573Srgrimes
1321573Srgrimes  # Save the line for later output.
1331573Srgrimes  line = field[1]
1341573Srgrimes  for (i = 2; i <= n; i++)
1351573Srgrimes    line = line " " field[i]
1361573Srgrimes  output_line[nout++] = line
1371573Srgrimes}
1381573Srgrimes
1391573Srgrimesfunction output_saved_lines(i)
1401573Srgrimes{
1411573Srgrimes  for (i = 0; i < nout; i++)
1421573Srgrimes    if (output_line[i])
1431573Srgrimes      print output_line[i]
144132019Stjr}
1451573Srgrimes
1461573SrgrimesBEGIN {
1471573Srgrimes  print "# This zic input file is in the public domain."
1481573Srgrimes}
1491573Srgrimes
1501573Srgrimes/^[[:space:]]*[^#[:space:]]/ {
1511573Srgrimes  process_input_line($0)
1521573Srgrimes}
1531573Srgrimes
1541573SrgrimesEND {
1551573Srgrimes  output_saved_lines()
1561573Srgrimes}
1571573Srgrimes