1#!/usr/bin/python 2# 3# Copyright (C) 2013 Free Software Foundation, Inc. 4# 5# This script is free software; you can redistribute it and/or modify 6# it under the terms of the GNU General Public License as published by 7# the Free Software Foundation; either version 3, or (at your option) 8# any later version. 9 10# This script adjusts the copyright notices at the top of source files 11# so that they have the form: 12# 13# Copyright XXXX-YYYY Free Software Foundation, Inc. 14# 15# It doesn't change code that is known to be maintained elsewhere or 16# that carries a non-FSF copyright. 17# 18# The script also doesn't change testsuite files, except those in 19# libstdc++-v3. This is because libstdc++-v3 has a conformance testsuite, 20# while most tests in other directories are just things that failed at some 21# point in the past. 22# 23# Pass --this-year to the script if you want it to add the current year 24# to all applicable notices. Pass --quilt if you are using quilt and 25# want files to be added to the quilt before being changed. 26# 27# By default the script will update all directories for which the 28# output has been vetted. You can instead pass the names of individual 29# directories, including those that haven't been approved. So: 30# 31# update-copyright.pl --this-year 32# 33# is the command that would be used at the beginning of a year to update 34# all copyright notices (and possibly at other times to check whether 35# new files have been added with old years). On the other hand: 36# 37# update-copyright.pl --this-year libjava 38# 39# would run the script on just libjava/. 40# 41# Note that things like --version output strings must be updated before 42# this script is run. There's already a separate procedure for that. 43 44import os 45import re 46import sys 47import time 48import subprocess 49 50class Errors: 51 def __init__ (self): 52 self.num_errors = 0 53 54 def report (self, filename, string): 55 if filename: 56 string = filename + ': ' + string 57 sys.stderr.write (string + '\n') 58 self.num_errors += 1 59 60 def ok (self): 61 return self.num_errors == 0 62 63class GenericFilter: 64 def __init__ (self): 65 self.skip_files = set() 66 self.skip_dirs = set() 67 self.skip_extensions = set() 68 self.fossilised_files = set() 69 self.own_files = set() 70 71 self.skip_files |= set ([ 72 # Skip licence files. 73 'COPYING', 74 'COPYING.LIB', 75 'COPYING3', 76 'COPYING3.LIB', 77 'LICENSE', 78 'fdl.texi', 79 'gpl_v3.texi', 80 'fdl-1.3.xml', 81 'gpl-3.0.xml', 82 83 # Skip auto- and libtool-related files 84 'aclocal.m4', 85 'compile', 86 'config.guess', 87 'config.sub', 88 'depcomp', 89 'install-sh', 90 'libtool.m4', 91 'ltmain.sh', 92 'ltoptions.m4', 93 'ltsugar.m4', 94 'ltversion.m4', 95 'lt~obsolete.m4', 96 'missing', 97 'mkdep', 98 'mkinstalldirs', 99 'move-if-change', 100 'shlibpath.m4', 101 'symlink-tree', 102 'ylwrap', 103 104 # Skip FSF mission statement, etc. 105 'gnu.texi', 106 'funding.texi', 107 'appendix_free.xml', 108 109 # Skip imported texinfo files. 110 'texinfo.tex', 111 ]) 112 113 114 def get_line_filter (self, dir, filename): 115 if filename.startswith ('ChangeLog'): 116 # Ignore references to copyright in changelog entries. 117 return re.compile ('\t') 118 119 return None 120 121 def skip_file (self, dir, filename): 122 if filename in self.skip_files: 123 return True 124 125 (base, extension) = os.path.splitext (os.path.join (dir, filename)) 126 if extension in self.skip_extensions: 127 return True 128 129 if extension == '.in': 130 # Skip .in files produced by automake. 131 if os.path.exists (base + '.am'): 132 return True 133 134 # Skip files produced by autogen 135 if (os.path.exists (base + '.def') 136 and os.path.exists (base + '.tpl')): 137 return True 138 139 # Skip configure files produced by autoconf 140 if filename == 'configure': 141 if os.path.exists (base + '.ac'): 142 return True 143 if os.path.exists (base + '.in'): 144 return True 145 146 return False 147 148 def skip_dir (self, dir, subdir): 149 return subdir in self.skip_dirs 150 151 def is_fossilised_file (self, dir, filename): 152 if filename in self.fossilised_files: 153 return True 154 # Only touch current current ChangeLogs. 155 if filename != 'ChangeLog' and filename.find ('ChangeLog') >= 0: 156 return True 157 return False 158 159 def by_package_author (self, dir, filename): 160 return filename in self.own_files 161 162class Copyright: 163 def __init__ (self, errors): 164 self.errors = errors 165 166 # Characters in a range of years. Include '.' for typos. 167 ranges = '[0-9](?:[-0-9.,\s]|\s+and\s+)*[0-9]' 168 169 # Non-whitespace characters in a copyright holder's name. 170 name = '[\w.,-]' 171 172 # Matches one year. 173 self.year_re = re.compile ('[0-9]+') 174 175 # Matches part of a year or copyright holder. 176 self.continuation_re = re.compile (ranges + '|' + name) 177 178 # Matches a full copyright notice: 179 self.copyright_re = re.compile ( 180 # 1: 'Copyright (C)', etc. 181 '([Cc]opyright' 182 '|[Cc]opyright\s+\([Cc]\)' 183 '|[Cc]opyright\s+%s' 184 '|[Cc]opyright\s+©' 185 '|[Cc]opyright\s+@copyright{}' 186 '|copyright = u\'' 187 '|@set\s+copyright[\w-]+)' 188 189 # 2: the years. Include the whitespace in the year, so that 190 # we can remove any excess. 191 '(\s*(?:' + ranges + ',?' 192 '|@value\{[^{}]*\})\s*)' 193 194 # 3: 'by ', if used 195 '(by\s+)?' 196 197 # 4: the copyright holder. Don't allow multiple consecutive 198 # spaces, so that right-margin gloss doesn't get caught 199 # (e.g. gnat_ugn.texi). 200 '(' + name + '(?:\s?' + name + ')*)?') 201 202 # A regexp for notices that might have slipped by. Just matching 203 # 'copyright' is too noisy, and 'copyright.*[0-9]' falls foul of 204 # HTML header markers, so check for 'copyright' and two digits. 205 self.other_copyright_re = re.compile ('copyright.*[0-9][0-9]', 206 re.IGNORECASE) 207 self.comment_re = re.compile('#+|[*]+|;+|%+|//+|@c |dnl ') 208 self.holders = { '@copying': '@copying' } 209 self.holder_prefixes = set() 210 211 # True to 'quilt add' files before changing them. 212 self.use_quilt = False 213 214 # If set, force all notices to include this year. 215 self.max_year = None 216 217 # Goes after the year(s). Could be ', '. 218 self.separator = ' ' 219 220 def add_package_author (self, holder, canon_form = None): 221 if not canon_form: 222 canon_form = holder 223 self.holders[holder] = canon_form 224 index = holder.find (' ') 225 while index >= 0: 226 self.holder_prefixes.add (holder[:index]) 227 index = holder.find (' ', index + 1) 228 229 def add_external_author (self, holder): 230 self.holders[holder] = None 231 232 class BadYear(): 233 def __init__ (self, year): 234 self.year = year 235 236 def __str__ (self): 237 return 'unrecognised year: ' + self.year 238 239 def parse_year (self, string): 240 year = int (string) 241 if len (string) == 2: 242 if year > 70: 243 return year + 1900 244 elif len (string) == 4: 245 return year 246 raise self.BadYear (string) 247 248 def year_range (self, years): 249 year_list = [self.parse_year (year) 250 for year in self.year_re.findall (years)] 251 assert len (year_list) > 0 252 return (min (year_list), max (year_list)) 253 254 def set_use_quilt (self, use_quilt): 255 self.use_quilt = use_quilt 256 257 def include_year (self, year): 258 assert not self.max_year 259 self.max_year = year 260 261 def canonicalise_years (self, dir, filename, filter, years): 262 # Leave texinfo variables alone. 263 if years.startswith ('@value'): 264 return years 265 266 (min_year, max_year) = self.year_range (years) 267 268 # Update the upper bound, if enabled. 269 if self.max_year and not filter.is_fossilised_file (dir, filename): 270 max_year = max (max_year, self.max_year) 271 272 # Use a range. 273 if min_year == max_year: 274 return '%d' % min_year 275 else: 276 return '%d-%d' % (min_year, max_year) 277 278 def strip_continuation (self, line): 279 line = line.lstrip() 280 match = self.comment_re.match (line) 281 if match: 282 line = line[match.end():].lstrip() 283 return line 284 285 def is_complete (self, match): 286 holder = match.group (4) 287 return (holder 288 and (holder not in self.holder_prefixes 289 or holder in self.holders)) 290 291 def update_copyright (self, dir, filename, filter, file, line, match): 292 orig_line = line 293 next_line = None 294 pathname = os.path.join (dir, filename) 295 296 intro = match.group (1) 297 if intro.startswith ('@set'): 298 # Texinfo year variables should always be on one line 299 after_years = line[match.end (2):].strip() 300 if after_years != '': 301 self.errors.report (pathname, 302 'trailing characters in @set: ' 303 + after_years) 304 return (False, orig_line, next_line) 305 else: 306 # If it looks like the copyright is incomplete, add the next line. 307 while not self.is_complete (match): 308 try: 309 next_line = file.next() 310 except StopIteration: 311 break 312 313 # If the next line doesn't look like a proper continuation, 314 # assume that what we've got is complete. 315 continuation = self.strip_continuation (next_line) 316 if not self.continuation_re.match (continuation): 317 break 318 319 # Merge the lines for matching purposes. 320 orig_line += next_line 321 line = line.rstrip() + ' ' + continuation 322 next_line = None 323 324 # Rematch with the longer line, at the original position. 325 match = self.copyright_re.match (line, match.start()) 326 assert match 327 328 holder = match.group (4) 329 330 # Use the filter to test cases where markup is getting in the way. 331 if filter.by_package_author (dir, filename): 332 assert holder not in self.holders 333 334 elif not holder: 335 self.errors.report (pathname, 'missing copyright holder') 336 return (False, orig_line, next_line) 337 338 elif holder not in self.holders: 339 self.errors.report (pathname, 340 'unrecognised copyright holder: ' + holder) 341 return (False, orig_line, next_line) 342 343 else: 344 # See whether the copyright is associated with the package 345 # author. 346 canon_form = self.holders[holder] 347 if not canon_form: 348 return (False, orig_line, next_line) 349 350 # Make sure the author is given in a consistent way. 351 line = (line[:match.start (4)] 352 + canon_form 353 + line[match.end (4):]) 354 355 # Remove any 'by' 356 line = line[:match.start (3)] + line[match.end (3):] 357 358 # Update the copyright years. 359 years = match.group (2).strip() 360 try: 361 canon_form = self.canonicalise_years (dir, filename, filter, years) 362 except self.BadYear as e: 363 self.errors.report (pathname, str (e)) 364 return (False, orig_line, next_line) 365 366 line = (line[:match.start (2)] 367 + ('' if intro.startswith ('copyright = ') else ' ') 368 + canon_form + self.separator 369 + line[match.end (2):]) 370 371 # Use the standard (C) form. 372 if intro.endswith ('right'): 373 intro += ' (C)' 374 elif intro.endswith ('(c)'): 375 intro = intro[:-3] + '(C)' 376 line = line[:match.start (1)] + intro + line[match.end (1):] 377 378 # Strip trailing whitespace 379 line = line.rstrip() + '\n' 380 381 return (line != orig_line, line, next_line) 382 383 def process_file (self, dir, filename, filter): 384 pathname = os.path.join (dir, filename) 385 if filename.endswith ('.tmp'): 386 # Looks like something we tried to create before. 387 try: 388 os.remove (pathname) 389 except OSError: 390 pass 391 return 392 393 lines = [] 394 changed = False 395 line_filter = filter.get_line_filter (dir, filename) 396 with open (pathname, 'r') as file: 397 prev = None 398 for line in file: 399 while line: 400 next_line = None 401 # Leave filtered-out lines alone. 402 if not (line_filter and line_filter.match (line)): 403 match = self.copyright_re.search (line) 404 if match: 405 res = self.update_copyright (dir, filename, filter, 406 file, line, match) 407 (this_changed, line, next_line) = res 408 changed = changed or this_changed 409 410 # Check for copyright lines that might have slipped by. 411 elif self.other_copyright_re.search (line): 412 self.errors.report (pathname, 413 'unrecognised copyright: %s' 414 % line.strip()) 415 lines.append (line) 416 line = next_line 417 418 # If something changed, write the new file out. 419 if changed and self.errors.ok(): 420 tmp_pathname = pathname + '.tmp' 421 with open (tmp_pathname, 'w') as file: 422 for line in lines: 423 file.write (line) 424 if self.use_quilt: 425 subprocess.call (['quilt', 'add', pathname]) 426 os.rename (tmp_pathname, pathname) 427 428 def process_tree (self, tree, filter): 429 for (dir, subdirs, filenames) in os.walk (tree): 430 # Don't recurse through directories that should be skipped. 431 for i in xrange (len (subdirs) - 1, -1, -1): 432 if filter.skip_dir (dir, subdirs[i]): 433 del subdirs[i] 434 435 # Handle the files in this directory. 436 for filename in filenames: 437 if filter.skip_file (dir, filename): 438 sys.stdout.write ('Skipping %s\n' 439 % os.path.join (dir, filename)) 440 else: 441 self.process_file (dir, filename, filter) 442 443class CmdLine: 444 def __init__ (self, copyright = Copyright): 445 self.errors = Errors() 446 self.copyright = copyright (self.errors) 447 self.dirs = [] 448 self.default_dirs = [] 449 self.chosen_dirs = [] 450 self.option_handlers = dict() 451 self.option_help = [] 452 453 self.add_option ('--help', 'Print this help', self.o_help) 454 self.add_option ('--quilt', '"quilt add" files before changing them', 455 self.o_quilt) 456 self.add_option ('--this-year', 'Add the current year to every notice', 457 self.o_this_year) 458 459 def add_option (self, name, help, handler): 460 self.option_help.append ((name, help)) 461 self.option_handlers[name] = handler 462 463 def add_dir (self, dir, filter = GenericFilter()): 464 self.dirs.append ((dir, filter)) 465 466 def o_help (self, option = None): 467 sys.stdout.write ('Usage: %s [options] dir1 dir2...\n\n' 468 'Options:\n' % sys.argv[0]) 469 format = '%-15s %s\n' 470 for (what, help) in self.option_help: 471 sys.stdout.write (format % (what, help)) 472 sys.stdout.write ('\nDirectories:\n') 473 474 format = '%-25s' 475 i = 0 476 for (dir, filter) in self.dirs: 477 i += 1 478 if i % 3 == 0 or i == len (self.dirs): 479 sys.stdout.write (dir + '\n') 480 else: 481 sys.stdout.write (format % dir) 482 sys.exit (0) 483 484 def o_quilt (self, option): 485 self.copyright.set_use_quilt (True) 486 487 def o_this_year (self, option): 488 self.copyright.include_year (time.localtime().tm_year) 489 490 def main (self): 491 for arg in sys.argv[1:]: 492 if arg[:1] != '-': 493 self.chosen_dirs.append (arg) 494 elif arg in self.option_handlers: 495 self.option_handlers[arg] (arg) 496 else: 497 self.errors.report (None, 'unrecognised option: ' + arg) 498 if self.errors.ok(): 499 if len (self.chosen_dirs) == 0: 500 self.chosen_dirs = self.default_dirs 501 if len (self.chosen_dirs) == 0: 502 self.o_help() 503 else: 504 for chosen_dir in self.chosen_dirs: 505 canon_dir = os.path.join (chosen_dir, '') 506 count = 0 507 for (dir, filter) in self.dirs: 508 if (dir + os.sep).startswith (canon_dir): 509 count += 1 510 self.copyright.process_tree (dir, filter) 511 if count == 0: 512 self.errors.report (None, 'unrecognised directory: ' 513 + chosen_dir) 514 sys.exit (0 if self.errors.ok() else 1) 515 516#---------------------------------------------------------------------------- 517 518class TopLevelFilter (GenericFilter): 519 def skip_dir (self, dir, subdir): 520 return True 521 522class ConfigFilter (GenericFilter): 523 def __init__ (self): 524 GenericFilter.__init__ (self) 525 526 def skip_file (self, dir, filename): 527 if filename.endswith ('.m4'): 528 pathname = os.path.join (dir, filename) 529 with open (pathname) as file: 530 # Skip files imported from gettext. 531 if file.readline().find ('gettext-') >= 0: 532 return True 533 return GenericFilter.skip_file (self, dir, filename) 534 535class GCCFilter (GenericFilter): 536 def __init__ (self): 537 GenericFilter.__init__ (self) 538 539 self.skip_files |= set ([ 540 # Not part of GCC 541 'math-68881.h', 542 ]) 543 544 self.skip_dirs |= set ([ 545 # Better not create a merge nightmare for the GNAT folks. 546 'ada', 547 548 # Handled separately. 549 'testsuite', 550 ]) 551 552 self.skip_extensions |= set ([ 553 # Maintained by the translation project. 554 '.po', 555 556 # Automatically-generated. 557 '.pot', 558 ]) 559 560 self.fossilised_files |= set ([ 561 # Old news won't be updated. 562 'ONEWS', 563 ]) 564 565class TestsuiteFilter (GenericFilter): 566 def __init__ (self): 567 GenericFilter.__init__ (self) 568 569 self.skip_extensions |= set ([ 570 # Don't change the tests, which could be woend by anyone. 571 '.c', 572 '.C', 573 '.cc', 574 '.h', 575 '.hs', 576 '.f', 577 '.f90', 578 '.go', 579 '.inc', 580 '.java', 581 ]) 582 583 def skip_file (self, dir, filename): 584 # g++.niklas/README contains historical copyright information 585 # and isn't updated. 586 if filename == 'README' and os.path.basename (dir) == 'g++.niklas': 587 return True 588 return GenericFilter.skip_file (self, dir, filename) 589 590class LibCppFilter (GenericFilter): 591 def __init__ (self): 592 GenericFilter.__init__ (self) 593 594 self.skip_extensions |= set ([ 595 # Maintained by the translation project. 596 '.po', 597 598 # Automatically-generated. 599 '.pot', 600 ]) 601 602class LibGCCFilter (GenericFilter): 603 def __init__ (self): 604 GenericFilter.__init__ (self) 605 606 self.skip_dirs |= set ([ 607 # Imported from GLIBC. 608 'soft-fp', 609 ]) 610 611class LibJavaFilter (GenericFilter): 612 def __init__ (self): 613 GenericFilter.__init__ (self) 614 615 self.skip_dirs |= set ([ 616 # Handled separately. 617 'testsuite', 618 619 # Not really part of the library 620 'contrib', 621 622 # Imported from upstream 623 'classpath', 624 'libltdl', 625 ]) 626 627 def get_line_filter (self, dir, filename): 628 if filename == 'NameDecoder.h': 629 return re.compile ('.*NAME_COPYRIGHT') 630 if filename == 'ICC_Profile.h': 631 return re.compile ('.*icSigCopyrightTag') 632 return GenericFilter.get_line_filter (self, dir, filename) 633 634class LibMudflapFilter (GenericFilter): 635 def __init__ (self): 636 GenericFilter.__init__ (self) 637 638 self.skip_dirs |= set ([ 639 # Handled separately. 640 'testsuite', 641 ]) 642 643class LibStdCxxFilter (GenericFilter): 644 def __init__ (self): 645 GenericFilter.__init__ (self) 646 647 self.skip_files |= set ([ 648 # Contains no copyright of its own, but quotes the GPL. 649 'intro.xml', 650 ]) 651 652 self.skip_dirs |= set ([ 653 # Contains automatically-generated sources. 654 'html', 655 656 # The testsuite data files shouldn't be changed. 657 'data', 658 659 # Contains imported images 660 'images', 661 ]) 662 663 self.own_files |= set ([ 664 # Contains markup around the copyright owner. 665 'spine.xml', 666 ]) 667 668 def get_line_filter (self, dir, filename): 669 if filename == 'boost_concept_check.h': 670 return re.compile ('// \(C\) Copyright Jeremy Siek') 671 return GenericFilter.get_line_filter (self, dir, filename) 672 673class GCCCopyright (Copyright): 674 def __init__ (self, errors): 675 Copyright.__init__ (self, errors) 676 677 canon_fsf = 'Free Software Foundation, Inc.' 678 self.add_package_author ('Free Software Foundation', canon_fsf) 679 self.add_package_author ('Free Software Foundation.', canon_fsf) 680 self.add_package_author ('Free Software Foundation Inc.', canon_fsf) 681 self.add_package_author ('Free Software Foundation, Inc', canon_fsf) 682 self.add_package_author ('Free Software Foundation, Inc.', canon_fsf) 683 self.add_package_author ('The Free Software Foundation', canon_fsf) 684 self.add_package_author ('The Free Software Foundation, Inc.', canon_fsf) 685 self.add_package_author ('Software Foundation, Inc.', canon_fsf) 686 687 self.add_external_author ('ARM') 688 self.add_external_author ('AdaCore') 689 self.add_external_author ('Ami Tavory and Vladimir Dreizin, IBM-HRL.') 690 self.add_external_author ('Cavium Networks.') 691 self.add_external_author ('Faraday Technology Corp.') 692 self.add_external_author ('Florida State University') 693 self.add_external_author ('Greg Colvin and Beman Dawes.') 694 self.add_external_author ('Hewlett-Packard Company') 695 self.add_external_author ('Information Technology Industry Council.') 696 self.add_external_author ('James Theiler, Brian Gough') 697 self.add_external_author ('Makoto Matsumoto and Takuji Nishimura,') 698 self.add_external_author ('National Research Council of Canada.') 699 self.add_external_author ('Peter Dimov and Multi Media Ltd.') 700 self.add_external_author ('Peter Dimov') 701 self.add_external_author ('Pipeline Associates, Inc.') 702 self.add_external_author ('Regents of the University of California.') 703 self.add_external_author ('Silicon Graphics Computer Systems, Inc.') 704 self.add_external_author ('Silicon Graphics') 705 self.add_external_author ('Stephen L. Moshier') 706 self.add_external_author ('Sun Microsystems, Inc. All rights reserved.') 707 self.add_external_author ('The Go Authors. All rights reserved.') 708 self.add_external_author ('The Go Authors. All rights reserved.') 709 self.add_external_author ('The Go Authors.') 710 self.add_external_author ('The Regents of the University of California.') 711 self.add_external_author ('Unicode, Inc.') 712 self.add_external_author ('University of Toronto.') 713 714class GCCCmdLine (CmdLine): 715 def __init__ (self): 716 CmdLine.__init__ (self, GCCCopyright) 717 718 self.add_dir ('.', TopLevelFilter()) 719 # boehm-gc is imported from upstream. 720 self.add_dir ('config', ConfigFilter()) 721 # contrib isn't really part of GCC. 722 self.add_dir ('fixincludes') 723 self.add_dir ('gcc', GCCFilter()) 724 self.add_dir (os.path.join ('gcc', 'testsuite'), TestsuiteFilter()) 725 self.add_dir ('gnattools') 726 self.add_dir ('include') 727 self.add_dir ('libada') 728 self.add_dir ('libatomic') 729 self.add_dir ('libbacktrace') 730 self.add_dir ('libcpp', LibCppFilter()) 731 self.add_dir ('libdecnumber') 732 # libffi is imported from upstream. 733 self.add_dir ('libgcc', LibGCCFilter()) 734 self.add_dir ('libgfortran') 735 self.add_dir ('libgomp') 736 self.add_dir ('libiberty') 737 self.add_dir ('libitm') 738 self.add_dir ('libjava', LibJavaFilter()) 739 self.add_dir (os.path.join ('libjava', 'testsuite'), TestsuiteFilter()) 740 self.add_dir ('libmudflap', LibMudflapFilter()) 741 self.add_dir (os.path.join ('libmudflap', 'testsuite'), 742 TestsuiteFilter()) 743 self.add_dir ('libobjc') 744 self.add_dir ('libquadmath') 745 # libsanitiser is imported from upstream. 746 self.add_dir ('libssp') 747 self.add_dir ('libstdc++-v3', LibStdCxxFilter()) 748 self.add_dir ('lto-plugin') 749 # zlib is imported from upstream. 750 751 self.default_dirs = [ 752 'gcc', 753 'libada', 754 'libatomic', 755 'libbacktrace', 756 'libcpp', 757 'libdecnumber', 758 'libgcc', 759 'libgfortran', 760 'libgomp', 761 'libitm', 762 'libmudflap', 763 'libobjc', 764 'libstdc++-v3', 765 ] 766 767GCCCmdLine().main() 768