1#!/usr/bin/python
2#
3# Copyright (C) 2013 Free Software Foundation, Inc.
4#
5# This script is free software; you can redistribute it and/or modify
6# it under the terms of the GNU General Public License as published by
7# the Free Software Foundation; either version 3, or (at your option)
8# any later version.
9
10# This script adjusts the copyright notices at the top of source files
11# so that they have the form:
12#
13#   Copyright XXXX-YYYY Free Software Foundation, Inc.
14#
15# It doesn't change code that is known to be maintained elsewhere or
16# that carries a non-FSF copyright.
17#
18# The script also doesn't change testsuite files, except those in
19# libstdc++-v3.  This is because libstdc++-v3 has a conformance testsuite,
20# while most tests in other directories are just things that failed at some
21# point in the past.
22#
23# Pass --this-year to the script if you want it to add the current year
24# to all applicable notices.  Pass --quilt if you are using quilt and
25# want files to be added to the quilt before being changed.
26#
27# By default the script will update all directories for which the
28# output has been vetted.  You can instead pass the names of individual
29# directories, including those that haven't been approved.  So:
30#
31#    update-copyright.pl --this-year
32#
33# is the command that would be used at the beginning of a year to update
34# all copyright notices (and possibly at other times to check whether
35# new files have been added with old years).  On the other hand:
36#
37#    update-copyright.pl --this-year libjava
38#
39# would run the script on just libjava/.
40#
41# Note that things like --version output strings must be updated before
42# this script is run.  There's already a separate procedure for that.
43
44import os
45import re
46import sys
47import time
48import subprocess
49
50class Errors:
51    def __init__ (self):
52        self.num_errors = 0
53
54    def report (self, filename, string):
55        if filename:
56            string = filename + ': ' + string
57        sys.stderr.write (string + '\n')
58        self.num_errors += 1
59
60    def ok (self):
61        return self.num_errors == 0
62
63class GenericFilter:
64    def __init__ (self):
65        self.skip_files = set()
66        self.skip_dirs = set()
67        self.skip_extensions = set()
68        self.fossilised_files = set()
69        self.own_files = set()
70
71        self.skip_files |= set ([
72                # Skip licence files.
73                'COPYING',
74                'COPYING.LIB',
75                'COPYING3',
76                'COPYING3.LIB',
77                'LICENSE',
78                'fdl.texi',
79                'gpl_v3.texi',
80                'fdl-1.3.xml',
81                'gpl-3.0.xml',
82
83                # Skip auto- and libtool-related files
84                'aclocal.m4',
85                'compile',
86                'config.guess',
87                'config.sub',
88                'depcomp',
89                'install-sh',
90                'libtool.m4',
91                'ltmain.sh',
92                'ltoptions.m4',
93                'ltsugar.m4',
94                'ltversion.m4',
95                'lt~obsolete.m4',
96                'missing',
97                'mkdep',
98                'mkinstalldirs',
99                'move-if-change',
100                'shlibpath.m4',
101                'symlink-tree',
102                'ylwrap',
103
104                # Skip FSF mission statement, etc.
105                'gnu.texi',
106                'funding.texi',
107                'appendix_free.xml',
108
109                # Skip imported texinfo files.
110                'texinfo.tex',
111                ])
112
113
114    def get_line_filter (self, dir, filename):
115        if filename.startswith ('ChangeLog'):
116            # Ignore references to copyright in changelog entries.
117            return re.compile ('\t')
118
119        return None
120
121    def skip_file (self, dir, filename):
122        if filename in self.skip_files:
123            return True
124
125        (base, extension) = os.path.splitext (os.path.join (dir, filename))
126        if extension in self.skip_extensions:
127            return True
128
129        if extension == '.in':
130            # Skip .in files produced by automake.
131            if os.path.exists (base + '.am'):
132                return True
133
134            # Skip files produced by autogen
135            if (os.path.exists (base + '.def')
136                and os.path.exists (base + '.tpl')):
137                return True
138
139        # Skip configure files produced by autoconf
140        if filename == 'configure':
141            if os.path.exists (base + '.ac'):
142                return True
143            if os.path.exists (base + '.in'):
144                return True
145
146        return False
147
148    def skip_dir (self, dir, subdir):
149        return subdir in self.skip_dirs
150
151    def is_fossilised_file (self, dir, filename):
152        if filename in self.fossilised_files:
153            return True
154        # Only touch current current ChangeLogs.
155        if filename != 'ChangeLog' and filename.find ('ChangeLog') >= 0:
156            return True
157        return False
158
159    def by_package_author (self, dir, filename):
160        return filename in self.own_files
161
162class Copyright:
163    def __init__ (self, errors):
164        self.errors = errors
165
166        # Characters in a range of years.  Include '.' for typos.
167        ranges = '[0-9](?:[-0-9.,\s]|\s+and\s+)*[0-9]'
168
169        # Non-whitespace characters in a copyright holder's name.
170        name = '[\w.,-]'
171
172        # Matches one year.
173        self.year_re = re.compile ('[0-9]+')
174
175        # Matches part of a year or copyright holder.
176        self.continuation_re = re.compile (ranges + '|' + name)
177
178        # Matches a full copyright notice:
179        self.copyright_re = re.compile (
180            # 1: 'Copyright (C)', etc.
181            '([Cc]opyright'
182            '|[Cc]opyright\s+\([Cc]\)'
183            '|[Cc]opyright\s+%s'
184            '|[Cc]opyright\s+©'
185            '|[Cc]opyright\s+@copyright{}'
186            '|copyright = u\''
187            '|@set\s+copyright[\w-]+)'
188
189            # 2: the years.  Include the whitespace in the year, so that
190            # we can remove any excess.
191            '(\s*(?:' + ranges + ',?'
192            '|@value\{[^{}]*\})\s*)'
193
194            # 3: 'by ', if used
195            '(by\s+)?'
196
197            # 4: the copyright holder.  Don't allow multiple consecutive
198            # spaces, so that right-margin gloss doesn't get caught
199            # (e.g. gnat_ugn.texi).
200            '(' + name + '(?:\s?' + name + ')*)?')
201
202        # A regexp for notices that might have slipped by.  Just matching
203        # 'copyright' is too noisy, and 'copyright.*[0-9]' falls foul of
204        # HTML header markers, so check for 'copyright' and two digits.
205        self.other_copyright_re = re.compile ('copyright.*[0-9][0-9]',
206                                              re.IGNORECASE)
207        self.comment_re = re.compile('#+|[*]+|;+|%+|//+|@c |dnl ')
208        self.holders = { '@copying': '@copying' }
209        self.holder_prefixes = set()
210
211        # True to 'quilt add' files before changing them.
212        self.use_quilt = False
213
214        # If set, force all notices to include this year.
215        self.max_year = None
216
217        # Goes after the year(s).  Could be ', '.
218        self.separator = ' '
219
220    def add_package_author (self, holder, canon_form = None):
221        if not canon_form:
222            canon_form = holder
223        self.holders[holder] = canon_form
224        index = holder.find (' ')
225        while index >= 0:
226            self.holder_prefixes.add (holder[:index])
227            index = holder.find (' ', index + 1)
228
229    def add_external_author (self, holder):
230        self.holders[holder] = None
231
232    class BadYear():
233        def __init__ (self, year):
234            self.year = year
235
236        def __str__ (self):
237            return 'unrecognised year: ' + self.year
238
239    def parse_year (self, string):
240        year = int (string)
241        if len (string) == 2:
242            if year > 70:
243                return year + 1900
244        elif len (string) == 4:
245            return year
246        raise self.BadYear (string)
247
248    def year_range (self, years):
249        year_list = [self.parse_year (year)
250                     for year in self.year_re.findall (years)]
251        assert len (year_list) > 0
252        return (min (year_list), max (year_list))
253
254    def set_use_quilt (self, use_quilt):
255        self.use_quilt = use_quilt
256
257    def include_year (self, year):
258        assert not self.max_year
259        self.max_year = year
260
261    def canonicalise_years (self, dir, filename, filter, years):
262        # Leave texinfo variables alone.
263        if years.startswith ('@value'):
264            return years
265
266        (min_year, max_year) = self.year_range (years)
267
268        # Update the upper bound, if enabled.
269        if self.max_year and not filter.is_fossilised_file (dir, filename):
270            max_year = max (max_year, self.max_year)
271
272        # Use a range.
273        if min_year == max_year:
274            return '%d' % min_year
275        else:
276            return '%d-%d' % (min_year, max_year)
277
278    def strip_continuation (self, line):
279        line = line.lstrip()
280        match = self.comment_re.match (line)
281        if match:
282            line = line[match.end():].lstrip()
283        return line
284
285    def is_complete (self, match):
286        holder = match.group (4)
287        return (holder
288                and (holder not in self.holder_prefixes
289                     or holder in self.holders))
290
291    def update_copyright (self, dir, filename, filter, file, line, match):
292        orig_line = line
293        next_line = None
294        pathname = os.path.join (dir, filename)
295
296        intro = match.group (1)
297        if intro.startswith ('@set'):
298            # Texinfo year variables should always be on one line
299            after_years = line[match.end (2):].strip()
300            if after_years != '':
301                self.errors.report (pathname,
302                                    'trailing characters in @set: '
303                                    + after_years)
304                return (False, orig_line, next_line)
305        else:
306            # If it looks like the copyright is incomplete, add the next line.
307            while not self.is_complete (match):
308                try:
309                    next_line = file.next()
310                except StopIteration:
311                    break
312
313                # If the next line doesn't look like a proper continuation,
314                # assume that what we've got is complete.
315                continuation = self.strip_continuation (next_line)
316                if not self.continuation_re.match (continuation):
317                    break
318
319                # Merge the lines for matching purposes.
320                orig_line += next_line
321                line = line.rstrip() + ' ' + continuation
322                next_line = None
323
324                # Rematch with the longer line, at the original position.
325                match = self.copyright_re.match (line, match.start())
326                assert match
327
328            holder = match.group (4)
329
330            # Use the filter to test cases where markup is getting in the way.
331            if filter.by_package_author (dir, filename):
332                assert holder not in self.holders
333
334            elif not holder:
335                self.errors.report (pathname, 'missing copyright holder')
336                return (False, orig_line, next_line)
337
338            elif holder not in self.holders:
339                self.errors.report (pathname,
340                                    'unrecognised copyright holder: ' + holder)
341                return (False, orig_line, next_line)
342
343            else:
344                # See whether the copyright is associated with the package
345                # author.
346                canon_form = self.holders[holder]
347                if not canon_form:
348                    return (False, orig_line, next_line)
349
350                # Make sure the author is given in a consistent way.
351                line = (line[:match.start (4)]
352                        + canon_form
353                        + line[match.end (4):])
354
355                # Remove any 'by'
356                line = line[:match.start (3)] + line[match.end (3):]
357
358        # Update the copyright years.
359        years = match.group (2).strip()
360        try:
361            canon_form = self.canonicalise_years (dir, filename, filter, years)
362        except self.BadYear as e:
363            self.errors.report (pathname, str (e))
364            return (False, orig_line, next_line)
365
366        line = (line[:match.start (2)]
367                + ('' if intro.startswith ('copyright = ') else ' ')
368                + canon_form + self.separator
369                + line[match.end (2):])
370
371        # Use the standard (C) form.
372        if intro.endswith ('right'):
373            intro += ' (C)'
374        elif intro.endswith ('(c)'):
375            intro = intro[:-3] + '(C)'
376        line = line[:match.start (1)] + intro + line[match.end (1):]
377
378        # Strip trailing whitespace
379        line = line.rstrip() + '\n'
380
381        return (line != orig_line, line, next_line)
382
383    def process_file (self, dir, filename, filter):
384        pathname = os.path.join (dir, filename)
385        if filename.endswith ('.tmp'):
386            # Looks like something we tried to create before.
387            try:
388                os.remove (pathname)
389            except OSError:
390                pass
391            return
392
393        lines = []
394        changed = False
395        line_filter = filter.get_line_filter (dir, filename)
396        with open (pathname, 'r') as file:
397            prev = None
398            for line in file:
399                while line:
400                    next_line = None
401                    # Leave filtered-out lines alone.
402                    if not (line_filter and line_filter.match (line)):
403                        match = self.copyright_re.search (line)
404                        if match:
405                            res = self.update_copyright (dir, filename, filter,
406                                                         file, line, match)
407                            (this_changed, line, next_line) = res
408                            changed = changed or this_changed
409
410                        # Check for copyright lines that might have slipped by.
411                        elif self.other_copyright_re.search (line):
412                            self.errors.report (pathname,
413                                                'unrecognised copyright: %s'
414                                                % line.strip())
415                    lines.append (line)
416                    line = next_line
417
418        # If something changed, write the new file out.
419        if changed and self.errors.ok():
420            tmp_pathname = pathname + '.tmp'
421            with open (tmp_pathname, 'w') as file:
422                for line in lines:
423                    file.write (line)
424            if self.use_quilt:
425                subprocess.call (['quilt', 'add', pathname])
426            os.rename (tmp_pathname, pathname)
427
428    def process_tree (self, tree, filter):
429        for (dir, subdirs, filenames) in os.walk (tree):
430            # Don't recurse through directories that should be skipped.
431            for i in xrange (len (subdirs) - 1, -1, -1):
432                if filter.skip_dir (dir, subdirs[i]):
433                    del subdirs[i]
434
435            # Handle the files in this directory.
436            for filename in filenames:
437                if filter.skip_file (dir, filename):
438                    sys.stdout.write ('Skipping %s\n'
439                                      % os.path.join (dir, filename))
440                else:
441                    self.process_file (dir, filename, filter)
442
443class CmdLine:
444    def __init__ (self, copyright = Copyright):
445        self.errors = Errors()
446        self.copyright = copyright (self.errors)
447        self.dirs = []
448        self.default_dirs = []
449        self.chosen_dirs = []
450        self.option_handlers = dict()
451        self.option_help = []
452
453        self.add_option ('--help', 'Print this help', self.o_help)
454        self.add_option ('--quilt', '"quilt add" files before changing them',
455                         self.o_quilt)
456        self.add_option ('--this-year', 'Add the current year to every notice',
457                         self.o_this_year)
458
459    def add_option (self, name, help, handler):
460        self.option_help.append ((name, help))
461        self.option_handlers[name] = handler
462
463    def add_dir (self, dir, filter = GenericFilter()):
464        self.dirs.append ((dir, filter))
465
466    def o_help (self, option = None):
467        sys.stdout.write ('Usage: %s [options] dir1 dir2...\n\n'
468                          'Options:\n' % sys.argv[0])
469        format = '%-15s %s\n'
470        for (what, help) in self.option_help:
471            sys.stdout.write (format % (what, help))
472        sys.stdout.write ('\nDirectories:\n')
473
474        format = '%-25s'
475        i = 0
476        for (dir, filter) in self.dirs:
477            i += 1
478            if i % 3 == 0 or i == len (self.dirs):
479                sys.stdout.write (dir + '\n')
480            else:
481                sys.stdout.write (format % dir)
482        sys.exit (0)
483
484    def o_quilt (self, option):
485        self.copyright.set_use_quilt (True)
486
487    def o_this_year (self, option):
488        self.copyright.include_year (time.localtime().tm_year)
489
490    def main (self):
491        for arg in sys.argv[1:]:
492            if arg[:1] != '-':
493                self.chosen_dirs.append (arg)
494            elif arg in self.option_handlers:
495                self.option_handlers[arg] (arg)
496            else:
497                self.errors.report (None, 'unrecognised option: ' + arg)
498        if self.errors.ok():
499            if len (self.chosen_dirs) == 0:
500                self.chosen_dirs = self.default_dirs
501            if len (self.chosen_dirs) == 0:
502                self.o_help()
503            else:
504                for chosen_dir in self.chosen_dirs:
505                    canon_dir = os.path.join (chosen_dir, '')
506                    count = 0
507                    for (dir, filter) in self.dirs:
508                        if (dir + os.sep).startswith (canon_dir):
509                            count += 1
510                            self.copyright.process_tree (dir, filter)
511                    if count == 0:
512                        self.errors.report (None, 'unrecognised directory: '
513                                            + chosen_dir)
514        sys.exit (0 if self.errors.ok() else 1)
515
516#----------------------------------------------------------------------------
517
518class TopLevelFilter (GenericFilter):
519    def skip_dir (self, dir, subdir):
520        return True
521
522class ConfigFilter (GenericFilter):
523    def __init__ (self):
524        GenericFilter.__init__ (self)
525
526    def skip_file (self, dir, filename):
527        if filename.endswith ('.m4'):
528            pathname = os.path.join (dir, filename)
529            with open (pathname) as file:
530                # Skip files imported from gettext.
531                if file.readline().find ('gettext-') >= 0:
532                    return True
533        return GenericFilter.skip_file (self, dir, filename)
534
535class GCCFilter (GenericFilter):
536    def __init__ (self):
537        GenericFilter.__init__ (self)
538
539        self.skip_files |= set ([
540                # Not part of GCC
541                'math-68881.h',
542                ])
543
544        self.skip_dirs |= set ([
545                # Better not create a merge nightmare for the GNAT folks.
546                'ada',
547
548                # Handled separately.
549                'testsuite',
550                ])
551
552        self.skip_extensions |= set ([
553                # Maintained by the translation project.
554                '.po',
555
556                # Automatically-generated.
557                '.pot',
558                ])
559
560        self.fossilised_files |= set ([
561                # Old news won't be updated.
562                'ONEWS',
563                ])
564
565class TestsuiteFilter (GenericFilter):
566    def __init__ (self):
567        GenericFilter.__init__ (self)
568
569        self.skip_extensions |= set ([
570                # Don't change the tests, which could be woend by anyone.
571                '.c',
572                '.C',
573                '.cc',
574                '.h',
575                '.hs',
576                '.f',
577                '.f90',
578                '.go',
579                '.inc',
580                '.java',
581                ])
582
583    def skip_file (self, dir, filename):
584        # g++.niklas/README contains historical copyright information
585        # and isn't updated.
586        if filename == 'README' and os.path.basename (dir) == 'g++.niklas':
587            return True
588        return GenericFilter.skip_file (self, dir, filename)
589
590class LibCppFilter (GenericFilter):
591    def __init__ (self):
592        GenericFilter.__init__ (self)
593
594        self.skip_extensions |= set ([
595                # Maintained by the translation project.
596                '.po',
597
598                # Automatically-generated.
599                '.pot',
600                ])
601
602class LibGCCFilter (GenericFilter):
603    def __init__ (self):
604        GenericFilter.__init__ (self)
605
606        self.skip_dirs |= set ([
607                # Imported from GLIBC.
608                'soft-fp',
609                ])
610
611class LibJavaFilter (GenericFilter):
612    def __init__ (self):
613        GenericFilter.__init__ (self)
614
615        self.skip_dirs |= set ([
616                # Handled separately.
617                'testsuite',
618
619                # Not really part of the library
620                'contrib',
621
622                # Imported from upstream
623                'classpath',
624                'libltdl',
625                ])
626
627    def get_line_filter (self, dir, filename):
628        if filename == 'NameDecoder.h':
629            return re.compile ('.*NAME_COPYRIGHT')
630        if filename == 'ICC_Profile.h':
631            return re.compile ('.*icSigCopyrightTag')
632        return GenericFilter.get_line_filter (self, dir, filename)
633
634class LibMudflapFilter (GenericFilter):
635    def __init__ (self):
636        GenericFilter.__init__ (self)
637
638        self.skip_dirs |= set ([
639                # Handled separately.
640                'testsuite',
641                ])
642
643class LibStdCxxFilter (GenericFilter):
644    def __init__ (self):
645        GenericFilter.__init__ (self)
646
647        self.skip_files |= set ([
648                # Contains no copyright of its own, but quotes the GPL.
649                'intro.xml',
650                ])
651
652        self.skip_dirs |= set ([
653                # Contains automatically-generated sources.
654                'html',
655
656                # The testsuite data files shouldn't be changed.
657                'data',
658
659                # Contains imported images
660                'images',
661                ])
662
663        self.own_files |= set ([
664                # Contains markup around the copyright owner.
665                'spine.xml',
666                ])
667
668    def get_line_filter (self, dir, filename):
669        if filename == 'boost_concept_check.h':
670            return re.compile ('// \(C\) Copyright Jeremy Siek')
671        return GenericFilter.get_line_filter (self, dir, filename)
672
673class GCCCopyright (Copyright):
674    def __init__ (self, errors):
675        Copyright.__init__ (self, errors)
676
677        canon_fsf = 'Free Software Foundation, Inc.'
678        self.add_package_author ('Free Software Foundation', canon_fsf)
679        self.add_package_author ('Free Software Foundation.', canon_fsf)
680        self.add_package_author ('Free Software Foundation Inc.', canon_fsf)
681        self.add_package_author ('Free Software Foundation, Inc', canon_fsf)
682        self.add_package_author ('Free Software Foundation, Inc.', canon_fsf)
683        self.add_package_author ('The Free Software Foundation', canon_fsf)
684        self.add_package_author ('The Free Software Foundation, Inc.', canon_fsf)
685        self.add_package_author ('Software Foundation, Inc.', canon_fsf)
686
687        self.add_external_author ('ARM')
688        self.add_external_author ('AdaCore')
689        self.add_external_author ('Ami Tavory and Vladimir Dreizin, IBM-HRL.')
690        self.add_external_author ('Cavium Networks.')
691        self.add_external_author ('Faraday Technology Corp.')
692        self.add_external_author ('Florida State University')
693        self.add_external_author ('Greg Colvin and Beman Dawes.')
694        self.add_external_author ('Hewlett-Packard Company')
695        self.add_external_author ('Information Technology Industry Council.')
696        self.add_external_author ('James Theiler, Brian Gough')
697        self.add_external_author ('Makoto Matsumoto and Takuji Nishimura,')
698        self.add_external_author ('National Research Council of Canada.')
699        self.add_external_author ('Peter Dimov and Multi Media Ltd.')
700        self.add_external_author ('Peter Dimov')
701        self.add_external_author ('Pipeline Associates, Inc.')
702        self.add_external_author ('Regents of the University of California.')
703        self.add_external_author ('Silicon Graphics Computer Systems, Inc.')
704        self.add_external_author ('Silicon Graphics')
705        self.add_external_author ('Stephen L. Moshier')
706        self.add_external_author ('Sun Microsystems, Inc. All rights reserved.')
707        self.add_external_author ('The Go Authors.  All rights reserved.')
708        self.add_external_author ('The Go Authors. All rights reserved.')
709        self.add_external_author ('The Go Authors.')
710        self.add_external_author ('The Regents of the University of California.')
711        self.add_external_author ('Unicode, Inc.')
712        self.add_external_author ('University of Toronto.')
713
714class GCCCmdLine (CmdLine):
715    def __init__ (self):
716        CmdLine.__init__ (self, GCCCopyright)
717
718        self.add_dir ('.', TopLevelFilter())
719        # boehm-gc is imported from upstream.
720        self.add_dir ('config', ConfigFilter())
721        # contrib isn't really part of GCC.
722        self.add_dir ('fixincludes')
723        self.add_dir ('gcc', GCCFilter())
724        self.add_dir (os.path.join ('gcc', 'testsuite'), TestsuiteFilter())
725        self.add_dir ('gnattools')
726        self.add_dir ('include')
727        self.add_dir ('libada')
728        self.add_dir ('libatomic')
729        self.add_dir ('libbacktrace')
730        self.add_dir ('libcpp', LibCppFilter())
731        self.add_dir ('libdecnumber')
732        # libffi is imported from upstream.
733        self.add_dir ('libgcc', LibGCCFilter())
734        self.add_dir ('libgfortran')
735        self.add_dir ('libgomp')
736        self.add_dir ('libiberty')
737        self.add_dir ('libitm')
738        self.add_dir ('libjava', LibJavaFilter())
739        self.add_dir (os.path.join ('libjava', 'testsuite'), TestsuiteFilter())
740        self.add_dir ('libmudflap', LibMudflapFilter())
741        self.add_dir (os.path.join ('libmudflap', 'testsuite'),
742                      TestsuiteFilter())
743        self.add_dir ('libobjc')
744        self.add_dir ('libquadmath')
745        # libsanitiser is imported from upstream.
746        self.add_dir ('libssp')
747        self.add_dir ('libstdc++-v3', LibStdCxxFilter())
748        self.add_dir ('lto-plugin')
749        # zlib is imported from upstream.
750
751        self.default_dirs = [
752            'gcc',
753            'libada',
754            'libatomic',
755            'libbacktrace',
756            'libcpp',
757            'libdecnumber',
758            'libgcc',
759            'libgfortran',
760            'libgomp',
761            'libitm',
762            'libmudflap',
763            'libobjc',
764            'libstdc++-v3',
765            ]
766
767GCCCmdLine().main()
768