man2hwnotes.pl revision 257814
1#!/usr/bin/perl -w 2# Emacs should use -*- cperl -*- mode 3# 4# Copyright (c) 2003-2006 Simon L. Nielsen <simon@FreeBSD.org> 5# All rights reserved. 6# 7# Redistribution and use in source and binary forms, with or without 8# modification, are permitted provided that the following conditions 9# are met: 10# 1. Redistributions of source code must retain the above copyright 11# notice, this list of conditions and the following disclaimer. 12# 2. Redistributions in binary form must reproduce the above copyright 13# notice, this list of conditions and the following disclaimer in the 14# documentation and/or other materials provided with the distribution. 15# 16# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26# SUCH DAMAGE. 27# 28# $FreeBSD: stable/10/release/doc/share/misc/man2hwnotes.pl 257814 2013-11-07 21:08:12Z gjb $ 29# 30 31# Parse the list of supported hardware out of section 4 manual pages 32# and output it on stdout as SGML/DocBook entities. 33 34# The script will look for the following line in the manual page: 35# .Sh HARDWARE 36# and make an entity of the content until the line containing: 37# .Sh 38# 39# For Lists only the first line will be printed. If there are 40# arguments to the .It command, only the argument will be printed. 41 42# Usage: 43# man2hwnotes.pl [-cl] [-d 0-6] [-a <archlist file>] [-o <outputfile>] 44# <manualpage> [<manualpage> ...] 45 46use strict; 47use Getopt::Std; 48use Digest::MD5 qw(md5_hex); 49 50# Section from manual page to extract 51my $hwlist_sect = "HARDWARE"; 52 53# Override default archtecture list for some devices: 54my $archlist_file = "dev.archlist.txt"; 55my %archlist; 56 57# Globals 58my $compat_mode = 0; # Enable compat for old Hardware Notes style 59my $debuglevel = 0; 60my $only_list_out = 0; # Should only lists be generated in the output? 61my @out_lines; # Single lines 62my @out_dev; # Device entities 63 64# Getopt 65my %options = (); 66if (!getopts("a:cd:lo:",\%options)) { 67 die("$!: Invalid command line arguments in ", __LINE__, "\n"); 68} 69 70if (defined($options{c})) { 71 $compat_mode = 1; 72} 73if (defined($options{d})) { 74 $debuglevel = $options{d}; 75} 76if (defined($options{a})) { 77 $archlist_file = $options{a}; 78} 79if (defined($options{l})) { 80 $only_list_out = 1; 81} 82 83my $outputfile = $options{o}; 84 85if ($debuglevel > 0) { 86 # Don't do output buffering in debug mode. 87 $| = 1; 88} 89 90load_archlist($archlist_file); 91 92if (defined($outputfile)) { 93 open(OLDOUT, ">&STDOUT") || die("$!: Could not open STDOUT in ", __LINE__, ".\n"); 94 open(STDOUT, ">$outputfile") || die("$!: Could not open $outputfile in ", __LINE__, ".\n"); 95} 96 97print <<EOT; 98<!-- 99 These are automatically generated device lists for FreeBSD hardware notes. 100--> 101EOT 102 103if ($only_list_out) { 104 # Print the default device preamble entities 105 print "<!ENTITY hwlist.preamble.pre 'The'>\n"; 106 print "<!ENTITY hwlist.preamble.post 'driver supports:'>\n"; 107} 108 109foreach my $page (@ARGV) { 110 if ($page !~ m/\.4$/) { 111 dlog(2, "Skipped $page (not *.4)"); 112 next; 113 } 114 dlog(2, "Parsing $page"); 115 parse($page); 116 117 if (@out_lines) { 118 print join("\n", @out_lines), "\n"; 119 } 120 if (@out_dev) { 121 print join("\n", @out_dev), "\n"; 122 } 123 124 @out_lines = (); 125 @out_dev = (); 126} 127 128if (defined($outputfile)) { 129 open(STDOUT, ">&OLDOUT") || die("$!: Could not open STDOUT in ", __LINE__, ".\n"); 130 close(OLDOUT) || die("$!: Could not close OLDOUT in ", __LINE__, ".\n"); 131} 132 133sub normalize (@) { 134 my @lines = @_; 135 136 foreach my $l (@lines) { 137 $l =~ s/\\&//g; 138 $l =~ s:([\x21-\x2f\x5b-\x60\x7b-\x7f]):sprintf("&\#\%d;", ord($1)):eg; 139 # Make sure ampersand is encoded as & since jade seems to 140 # be confused when it is encoded as & inside an entity. 141 $l =~ s/&/&/g; 142 } 143 return (wantarray) ? @lines : join "", @lines; 144} 145 146sub parse { 147 my ($manpage) = @_; 148 149 my $cur_mansection; 150 my $found_hwlist = 0; 151 my %mdocvars; 152 $mdocvars{isin_hwlist} = 0; 153 $mdocvars{isin_list} = 0; 154 $mdocvars{first_para} = 1; 155 $mdocvars{parabuf} = ""; 156 $mdocvars{listtype} = ""; 157 $mdocvars{it_nr} = 0; 158 159 open(MANPAGE, "$manpage") || die("$!: Could not open $manpage in ", __LINE__, ".\n"); 160 while(<MANPAGE>) { 161 chomp; 162 my $line = $_; 163 164 dlog(5, "Read '$line'"); 165 166 # Find commands 167 if (s/^\.(.*)$/$1/) { 168 my $cmd = $1; 169 170 # Detect, and ignore, comment lines 171 if (s/^\\"(.*)$/$1/) { 172 next; 173 } 174 175 $cmd =~ s/^([^ ]+).*$/$1/; 176 177 if (/^Nm "?(\w+)"?/ && !defined($mdocvars{Nm})) { 178 dlog(3, "Setting Nm to $1"); 179 $mdocvars{Nm} = $1; 180 # "_" cannot be used for an entity name. 181 $mdocvars{EntNm} = $1; 182 $mdocvars{EntNm} =~ s,_,.,g; 183 184 } elsif (/^Nm$/) { 185 if (defined($mdocvars{Nm}) && $mdocvars{Nm} ne "") { 186 parabuf_addline(\%mdocvars, "&man.".$mdocvars{EntNm}.".$cur_mansection;"); 187 } else { 188 dlog(2, "Warning: Bad Nm call in $manpage"); 189 } 190 191 } elsif (/^Sh (.+)$/) { 192 dlog(4, "Setting section to $1"); 193 my $cur_section = $1; 194 195 flush_out(\%mdocvars); 196 197 if ($cur_section =~ /^${hwlist_sect}$/) { 198 dlog(2, "Found the device section ${hwlist_sect}"); 199 $mdocvars{isin_hwlist} = 1; 200 $found_hwlist = 1; 201 add_sgmltag(\%mdocvars, "<!ENTITY hwlist.".$mdocvars{cur_manname}." '"); 202 if ($only_list_out) { 203 add_sgmltag("<para xmlns=\"http://docbook.org/ns/docbook\">&hwlist.preamble.pre; " . 204 "&man.".$mdocvars{EntNm}.".$cur_mansection; " . 205 "&hwlist.preamble.post;</para>"); 206 } 207 } elsif ($mdocvars{isin_hwlist}) { 208 dlog(2, "Found a HWLIST STOP key!"); 209 add_sgmltag(\%mdocvars, "'>"); 210 $mdocvars{isin_hwlist} = 0; 211 } 212 if ($mdocvars{isin_list}) { 213 dlog(1, "Warning: Still in list, but just entered new " . 214 "section. This is probably due to missing .El; " . 215 "check manual page for errors."); 216 # If we try to recover from this we will probably 217 # just end with bad SGML output and it really 218 # should be fixed in the manual page so we don't 219 # even try to "fix" this. 220 } 221 222 223 } elsif (/^Dt ([^ ]+) ([^ ]+)/) { 224 dlog(4, "Setting mansection to $2"); 225 $mdocvars{cur_manname} = lc($1); 226 $cur_mansection = $2; 227 228 # "_" cannot be used for an entity name. 229 $mdocvars{cur_manname} =~ s,_,.,g; 230 231 } elsif (/^It ?(.*)$/) { 232 my $txt = $1; 233 234 $mdocvars{it_nr}++; 235 236 # Flush last item 237 if ($mdocvars{parabuf} ne "") { 238 add_listitem(\%mdocvars); 239 } 240 241 # Remove quotes, if any. 242 $txt =~ s/"(.*)"/$1/; 243 244 if ($mdocvars{listtype} eq "column") { 245 # Ignore first item when it is likely to be a 246 # header. 247 if ($mdocvars{it_nr} == 1 && $txt =~ m/^(Em|Sy) /) { 248 dlog(2, "Skipping header line in column list"); 249 next; 250 } 251 # Only extract the first column. 252 $txt =~ s/ Ta /\t/g; 253 $txt =~ s/([^\t]+)\t.*/$1/; 254 } 255 256 # Remove Li commands 257 $txt =~ s/^Li //g; 258 259 parabuf_addline(\%mdocvars, normalize($txt)); 260 } elsif (/^Bl/) { 261 $mdocvars{isin_list} = 1; 262 flush_out(\%mdocvars); 263 add_sgmltag(\%mdocvars, "<itemizedlist xmlns=\"http://docbook.org/ns/docbook\">"); 264 265 if (/-tag/) { 266 $mdocvars{listtype} = "tag"; 267 # YACK! Hack for ata(4) 268 if ($mdocvars{Nm} eq "ata") { 269 $mdocvars{listtype} = "tagHACK"; 270 } 271 } elsif (/-bullet/) { 272 $mdocvars{listtype} = "bullet"; 273 } elsif (/-column/) { 274 $mdocvars{listtype} = "column"; 275 } else { 276 $mdocvars{listtype} = "unknown"; 277 } 278 dlog(2, "Listtype set to $mdocvars{listtype}"); 279 } elsif (/^El/) { 280 if ($mdocvars{parabuf} ne "") { 281 add_listitem(\%mdocvars); 282 } 283 284 add_sgmltag(\%mdocvars, "</itemizedlist>"); 285 $mdocvars{isin_list} = 0; 286 } elsif (/^Tn (.+)$/) { 287 # For now we print TradeName text as regular text. 288 my ($txt, $punct_str) = split_punct_chars($1); 289 290 parabuf_addline(\%mdocvars, normalize($txt . $punct_str)); 291 } elsif (/^Xr ([^ ]+) (.+)$/) { 292 my ($xr_sect, $punct_str) = split_punct_chars($2); 293 my $txt; 294 295 # We need to check if the manual page exist to avoid 296 # breaking the doc build just because of a broken 297 # reference. 298 #$txt = "&man.$1.$xr_sect;$punct_str"; 299 $txt = "$1($xr_sect)$punct_str"; 300 parabuf_addline(\%mdocvars, normalize($txt)); 301 } elsif (/^Dq (.+)$/) { 302 my ($txt, $punct_str) = split_punct_chars($1); 303 304 parabuf_addline(\%mdocvars, 305 normalize("<quote xmlns=\"http://docbook.org/ns/docbook\">$txt</quote>$punct_str")); 306 } elsif (/^Sx (.+)$/) { 307 if ($mdocvars{isin_hwlist}) { 308 dlog(1, "Warning: Reference to another section in the " . 309 "$hwlist_sect section in " . $mdocvars{Nm} . 310 "(${cur_mansection})"); 311 } 312 parabuf_addline(\%mdocvars, normalize($1)); 313 } elsif (/^Pa (.+)$/) { 314 my ($txt, $punct_str) = split_punct_chars($1); 315 316 $txt = make_ulink($txt) . $punct_str; 317 parabuf_addline(\%mdocvars, normalize($txt)); 318 } elsif (/^Pp/) { 319 dlog(3, "Got Pp command - forcing new para"); 320 flush_out(\%mdocvars); 321 } elsif (/^Fx (.+)/) { 322 dlog(3, "Got Fx command"); 323 parabuf_addline(\%mdocvars, "FreeBSD $1"); 324 } elsif (/^Fx/) { 325 dlog(3, "Got Fx command"); 326 parabuf_addline(\%mdocvars, "FreeBSD"); 327 } elsif (/^Em (.+)$/) { 328 my ($txt, $punct_str) = split_punct_chars($1); 329 330 parabuf_addline(\%mdocvars, 331 normalize("<emphasis xmlns=\"http://docbook.org/ns/docbook\">$txt</emphasis>$punct_str")); 332 } else { 333 # Ignore all other commands. 334 dlog(3, "Ignoring unknown command $cmd"); 335 } 336 } else { 337 # This is then regular text 338 parabuf_addline(\%mdocvars, normalize($_)); 339 } 340 } 341 close(MANPAGE) || die("$!: Could not close $manpage in ", __LINE__, ".\n"); 342 if (! $found_hwlist) { 343 dlog(2, "Hardware list not found in $manpage"); 344 } 345} 346 347sub dlog { 348 my ($level, $txt) = @_; 349 350 if ($level <= $debuglevel) { 351 print STDERR "$level: $txt\n"; 352 } 353} 354 355# Output a SGML tag. 356sub add_sgmltag { 357 my ($mdocvars, $txt) = (@_); 358 359 # We only care about the HW list for now. 360 if (${$mdocvars}{isin_hwlist}) { 361 push(@out_dev, $txt); 362 } 363} 364 365# Add a text entity, and return the used entity name. 366sub add_txt_ent { 367 my ($itemtxt) = (@_); 368 my ($entity_name); 369 370 # Convert mdoc(7) minus 371 $itemtxt =~ s/\\-/-/g; 372 373 $itemtxt =~ s/'/‘/g; 374 375 $entity_name = "hwlist." . md5_hex($itemtxt); 376 dlog(4, "Adding '$itemtxt' as entity $entity_name"); 377 push(@out_lines, "<!ENTITY $entity_name '$itemtxt'>"); 378 379 return ($entity_name); 380} 381sub flush_out { 382 my ($mdocvars) = (@_); 383 my ($entity_name, $out); 384 my $para_arch = ""; 385 386 if (!${$mdocvars}{isin_hwlist} || ${$mdocvars}{parabuf} eq "") { 387 return; 388 } 389 390 $entity_name = add_txt_ent(${$mdocvars}{parabuf}); 391 ${$mdocvars}{parabuf} = ""; 392 if(defined($archlist{${$mdocvars}{Nm}})) { 393 if ($compat_mode) { 394 $para_arch = ' arch="' . $archlist{${$mdocvars}{Nm}} . '"'; 395 } else { 396 $para_arch = '[' . $archlist{${$mdocvars}{Nm}} . '] '; 397 } 398 } 399 if ($compat_mode) { 400 $out = "<para xmlns=\"http://docbook.org/ns/docbook\"".$para_arch.">&".$entity_name.";</para>"; 401 } else { 402 if (${$mdocvars}{first_para}) { 403 $out = "<para xmlns=\"http://docbook.org/ns/docbook\">".$para_arch."&".$entity_name.";</para>"; 404 } else { 405 $out = "<para xmlns=\"http://docbook.org/ns/docbook\">&".$entity_name.";</para>"; 406 } 407 ${$mdocvars}{first_para} = 0; 408 } 409 410 dlog(4, "Flushing parabuf"); 411 add_sgmltag($mdocvars, $out); 412} 413 414# Add a new list item from the "parabuf". 415sub add_listitem { 416 my ($mdocvars) = (@_); 417 my ($listitem, $entity_name); 418 my $para_arch = ""; 419 420 $entity_name = add_txt_ent(${$mdocvars}{parabuf}); 421 ${$mdocvars}{parabuf} = ""; 422 423 if ($compat_mode) { 424 if(defined($archlist{${$mdocvars}{Nm}})) { 425 $para_arch = ' arch="' . $archlist{${$mdocvars}{Nm}} . '"'; 426 } 427 } 428 $listitem = "<listitem><para".$para_arch.">&".$entity_name.";</para></listitem>"; 429 dlog(4, "Adding '$listitem' to out_dev"); 430 push(@out_dev, $listitem); 431 432} 433 434# Add a line to the "paragraph buffer" 435sub parabuf_addline { 436 my $mdocvars = shift; 437 my ($txt) = (@_); 438 439 dlog(5, "Now in parabuf_addline for '$txt'"); 440 441 # We only care about the HW list for now. 442 if (!${$mdocvars}{isin_hwlist}) { 443 dlog(6, "Exiting parabuf_addline due to: !\${\$mdocvars}{isin_hwlist}"); 444 return; 445 } 446 if ($txt eq "") { 447 dlog(6, "Exiting parabuf_addline due to: \$txt eq \"\""); 448 return; 449 } 450 451 if ($only_list_out && !${$mdocvars}{isin_list}) { 452 dlog(6, "Exiting parabuf_addline due to: ". 453 "\$only_list_out && !\${\$mdocvars}{isin_list}"); 454 return; 455 } 456 457 # We only add the first line for "tag" lists 458 if (${$mdocvars}{parabuf} ne "" && ${$mdocvars}{isin_list} && 459 ${$mdocvars}{listtype} eq "tag") { 460 dlog(6, "Exiting parabuf_addline due to: ". 461 "\${\$mdocvars}{parabuf} ne \"\" && \${\$mdocvars}{isin_list} && ". 462 "\${\$mdocvars}{listtype} eq \"tag\""); 463 return; 464 } 465 466 if (${$mdocvars}{parabuf} ne "") { 467 ${$mdocvars}{parabuf} .= " "; 468 } 469 470 dlog(4, "Adding '$txt' to parabuf"); 471 472 ${$mdocvars}{parabuf} .= $txt; 473} 474 475sub load_archlist { 476 my ($file) = (@_); 477 478 my $lineno = 0; 479 480 dlog(2, "Parsing archlist $file"); 481 482 open(FILE, "$file") || die("$!: Could not open archlist $file in ", __LINE__, ".\n"); 483 while(<FILE>) { 484 chomp; 485 $lineno++; 486 487 if (/^#/ || $_ eq "") { 488 next; 489 } 490 491 if (/(\w+)\t([\w,]+)/) { 492 dlog(4, "For driver $1 setting arch to $2"); 493 $archlist{$1} = $2; 494 } else { 495 dlog(1, "Warning: Could not parse archlist line $lineno"); 496 } 497 } 498 499 close(FILE); 500} 501 502# Check if a character is a mdoc(7) punctuation character. 503sub is_punct_char { 504 my ($str) = (@_); 505 506 return (length($str) == 1 && $str =~ /[\.,:;()\[\]\?!]/); 507} 508 509# Split out the punctuation characters of a mdoc(7) line. 510sub split_punct_chars { 511 my ($str) = (@_); 512 my (@stritems, $stritem, $punct_str); 513 514 $punct_str = ""; 515 @stritems = split(/ /, $str); 516 517 while (defined($stritem = $stritems[$#stritems]) && 518 is_punct_char($stritem)) { 519 $punct_str = $stritem . $punct_str; 520 pop(@stritems); 521 } 522 523 return (join(' ', @stritems), $punct_str); 524} 525 526# Create a ulink, if the string contains an URL. 527sub make_ulink { 528 my ($str) = (@_); 529 530 $str =~ s,(http://[^ ]+),<link xmlns=\"http://docbook.org/ns/docbook\" xlink:href="$1"></link>,; 531 532 return $str; 533} 534