zishrink.awk revision 325322
11573Srgrimes# Convert tzdata source into a smaller version of itself. 21573Srgrimes 31573Srgrimes# Contributed by Paul Eggert. This file is in the public domain. 41573Srgrimes 51573Srgrimes# This is not a general-purpose converter; it is designed for current tzdata. 61573Srgrimes# 'zic' should treat this script's output as if it were identical to 71573Srgrimes# this script's input. 81573Srgrimes 91573Srgrimes 101573Srgrimes# Return a new rule name. 111573Srgrimes# N_RULE_NAMES keeps track of how many rule names have been generated. 121573Srgrimes 131573Srgrimesfunction gen_rule_name(alphabet, base, rule_name, n, digit) 141573Srgrimes{ 151573Srgrimes alphabet = "" 161573Srgrimes alphabet = alphabet "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 171573Srgrimes alphabet = alphabet "abcdefghijklmnopqrstuvwxyz" 181573Srgrimes alphabet = alphabet "!$%&'()*+,./:;<=>?@[\\]^_`{|}~" 191573Srgrimes base = length(alphabet) 201573Srgrimes rule_name = "" 211573Srgrimes n = n_rule_names++ 221573Srgrimes 231573Srgrimes do { 241573Srgrimes n -= rule_name && n <= base 251573Srgrimes digit = n % base 261573Srgrimes rule_name = substr(alphabet, digit + 1, 1) rule_name 271573Srgrimes n = (n - digit) / base 281573Srgrimes } while (n); 291573Srgrimes 301573Srgrimes return rule_name 311573Srgrimes} 321573Srgrimes 331573Srgrimes# Process an input line and save it for later output. 341573Srgrimes 351573Srgrimesfunction process_input_line(line, field, end, i, n, startdef) 361573Srgrimes{ 371573Srgrimes # Remove comments, normalize spaces, and append a space to each line. 381573Srgrimes sub(/#.*/, "", line) 3992986Sobrien line = line " " 4092986Sobrien gsub(/[[:space:]]+/, " ", line) 411573Srgrimes 421573Srgrimes # Abbreviate keywords. Do not abbreviate "Link" to just "L", 431573Srgrimes # as pre-2017c zic erroneously diagnoses "Li" as ambiguous. 441573Srgrimes sub(/^Link /, "Li ", line) 451573Srgrimes sub(/^Rule /, "R ", line) 461573Srgrimes sub(/^Zone /, "Z ", line) 471573Srgrimes 481573Srgrimes # SystemV rules are not needed. 49136091Sstefanf if (line ~ /^R SystemV /) return 50132019Stjr 51132019Stjr # Replace FooAsia rules with the same rules without "Asia", as they 521573Srgrimes # are duplicates. 5319277Sache if (match(line, /[^ ]Asia /)) { 5419277Sache if (line ~ /^R /) return 551573Srgrimes line = substr(line, 1, RSTART) substr(line, RSTART + 5) 561573Srgrimes } 571573Srgrimes 581573Srgrimes # Abbreviate times. 591573Srgrimes while (match(line, /[: ]0+[0-9]/)) 601573Srgrimes line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1) 611573Srgrimes while (match(line, /:0[^:]/)) 621573Srgrimes line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2) 631573Srgrimes 641573Srgrimes # Abbreviate weekday names. Do not abbreviate "Sun" and "Sat", as 651573Srgrimes # pre-2017c zic erroneously diagnoses "Su" and "Sa" as ambiguous. 661573Srgrimes while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) { 671573Srgrimes end = RSTART + RLENGTH 681573Srgrimes line = substr(line, 1, end - 4) substr(line, end - 1) 691573Srgrimes } 701573Srgrimes while (match(line, / (last)?(Tue|Thu)[ <>]/)) { 711573Srgrimes end = RSTART + RLENGTH 721573Srgrimes line = substr(line, 1, end - 3) substr(line, end - 1) 731573Srgrimes } 741573Srgrimes 751573Srgrimes # Abbreviate "max", "only" and month names. 761573Srgrimes # Do not abbreviate "min", as pre-2017c zic erroneously diagnoses "mi" 771573Srgrimes # as ambiguous. 781573Srgrimes gsub(/ max /, " ma ", line) 791573Srgrimes gsub(/ only /, " o ", line) 801573Srgrimes gsub(/ Jan /, " Ja ", line) 811573Srgrimes gsub(/ Feb /, " F ", line) 821573Srgrimes gsub(/ Apr /, " Ap ", line) 831573Srgrimes gsub(/ Aug /, " Au ", line) 84132019Stjr gsub(/ Sep /, " S ", line) 8592905Sobrien gsub(/ Oct /, " O ", line) 8692905Sobrien gsub(/ Nov /, " N ", line) 87132019Stjr gsub(/ Dec /, " D ", line) 8892905Sobrien 8992905Sobrien # Strip leading and trailing space. 9092905Sobrien sub(/^ /, "", line) 9192905Sobrien sub(/ $/, "", line) 9292905Sobrien 9392905Sobrien # Remove unnecessary trailing zero fields. 94132019Stjr sub(/ 0+$/, "", line) 95132019Stjr 96132019Stjr # Remove unnecessary trailing days-of-month "1". 97132019Stjr if (match(line, /[[:alpha:]] 1$/)) 98132019Stjr line = substr(line, 1, RSTART) 9992905Sobrien 10092905Sobrien # Remove unnecessary trailing " Ja" (for January). 10192905Sobrien sub(/ Ja$/, "", line) 10292905Sobrien 10392905Sobrien n = split(line, field) 104132019Stjr 105132019Stjr # Abbreviate rule names. 106132019Stjr i = field[1] == "Z" ? 4 : field[1] == "Li" ? 0 : 2 107132019Stjr if (i && field[i] ~ /^[^-+0-9]/) { 10892905Sobrien if (!rule[field[i]]) 10992905Sobrien rule[field[i]] = gen_rule_name() 11092905Sobrien field[i] = rule[field[i]] 11192905Sobrien } 11292905Sobrien 11392905Sobrien # If this zone supersedes an earlier one, delete the earlier one 11492905Sobrien # from the saved output lines. 115131973Stjr startdef = "" 11692905Sobrien if (field[1] == "Z") 11792905Sobrien zonename = startdef = field[2] 11892905Sobrien else if (field[1] == "Li") 119132019Stjr zonename = startdef = field[3] 1201573Srgrimes else if (field[1] == "R") 1211573Srgrimes zonename = "" 1221573Srgrimes if (startdef) { 1231573Srgrimes i = zonedef[startdef] 1241573Srgrimes if (i) { 1251573Srgrimes do 1261573Srgrimes output_line[i - 1] = "" 1271573Srgrimes while (output_line[i++] ~ /^[-+0-9]/); 1281573Srgrimes } 1291573Srgrimes } 1301573Srgrimes zonedef[zonename] = nout + 1 1311573Srgrimes 1321573Srgrimes # Save the line for later output. 1331573Srgrimes line = field[1] 1341573Srgrimes for (i = 2; i <= n; i++) 1351573Srgrimes line = line " " field[i] 1361573Srgrimes output_line[nout++] = line 1371573Srgrimes} 1381573Srgrimes 1391573Srgrimesfunction output_saved_lines(i) 1401573Srgrimes{ 1411573Srgrimes for (i = 0; i < nout; i++) 1421573Srgrimes if (output_line[i]) 1431573Srgrimes print output_line[i] 144132019Stjr} 1451573Srgrimes 1461573SrgrimesBEGIN { 1471573Srgrimes print "# This zic input file is in the public domain." 1481573Srgrimes} 1491573Srgrimes 1501573Srgrimes/^[[:space:]]*[^#[:space:]]/ { 1511573Srgrimes process_input_line($0) 1521573Srgrimes} 1531573Srgrimes 1541573SrgrimesEND { 1551573Srgrimes output_saved_lines() 1561573Srgrimes} 1571573Srgrimes