make license check more-strict

The license text is now expected to match almost exactly (not
accounting for formatting in different file types (e.g. rst vs.
bash script vs. python)
This commit is contained in:
Peter Josef Scheibel 2019-05-06 16:35:09 -07:00 committed by Peter Scheibel
parent 7e94774214
commit 53ec16c9e5
2 changed files with 91 additions and 37 deletions

View file

@ -7,6 +7,7 @@
import os import os
import re import re
from collections import defaultdict
import llnl.util.tty as tty import llnl.util.tty as tty
@ -89,50 +90,103 @@ def list_files(args):
print(os.path.join(spack.paths.spack_root, relpath)) print(os.path.join(spack.paths.spack_root, relpath))
# Error codes for license verification. All values are chosen such that
# bool(value) evaluates to True
OLD_LICENSE, SPDX_MISMATCH, GENERAL_MISMATCH = range(1, 4)
class LicenseError(object):
def __init__(self):
self.error_counts = defaultdict(int)
def add_error(self, error):
self.error_counts[error] += 1
def has_errors(self):
return sum(self.error_counts.values()) > 0
def error_messages(self):
total = sum(self.error_counts.values())
missing = self.error_counts[GENERAL_MISMATCH]
spdx_mismatch = self.error_counts[SPDX_MISMATCH]
old_license = self.error_counts[OLD_LICENSE]
return (
'%d improperly licensed files' % (total),
'files with wrong SPDX-License-Identifier: %d' % spdx_mismatch,
'files with old license header: %d' % old_license,
'files not containing expected license: %d' % missing)
def _check_license(lines, path):
license_lines = [
r'Copyright 2013-(?:201[789]|202\d) Lawrence Livermore National Security, LLC and other', # noqa: E501
r'Spack Project Developers\. See the top-level COPYRIGHT file for details.', # noqa: E501
r'SPDX-License-Identifier: \(Apache-2\.0 OR MIT\)'
]
strict_date = r'Copyright 2013-2019'
found = []
for line in lines:
line = re.sub(r'^[\s#\.]*', '', line)
line = line.rstrip()
for i, license_line in enumerate(license_lines):
if re.match(license_line, line):
# The first line of the license contains the copyright date.
# We allow it to be out of date but print a warning if it is
# out of date.
if i == 0:
if not re.search(strict_date, line):
tty.debug('{0}: copyright date mismatch'.format(path))
found.append(i)
if len(found) == len(license_lines) and found == list(sorted(found)):
return
def old_license(line, path):
if re.search('This program is free software', line):
print('{0}: has old LGPL license header'.format(path))
return OLD_LICENSE
# If the SPDX identifier is present, then there is a mismatch (since it
# did not match the above regex)
def wrong_spdx_identifier(line, path):
m = re.search(r'SPDX-License-Identifier: ([^\n]*)', line)
if m and m.group(1) != apache2_mit_spdx:
print('{0}: SPDX license identifier mismatch'
'(expecting {1}, found {2})'
.format(path, apache2_mit_spdx, m.group(1)))
return SPDX_MISMATCH
checks = [old_license, wrong_spdx_identifier]
for line in lines:
for check in checks:
error = check(line, path)
if error:
return error
print('{0}: the license does not match the expected format'.format(path))
return GENERAL_MISMATCH
def verify(args): def verify(args):
"""verify that files in spack have the right license header""" """verify that files in spack have the right license header"""
errors = 0
missing = 0 license_errors = LicenseError()
old_license = 0
for relpath in _licensed_files(args.root): for relpath in _licensed_files(args.root):
path = os.path.join(args.root, relpath) path = os.path.join(args.root, relpath)
with open(path) as f: with open(path) as f:
lines = [line for line in f] lines = [line for line in f][:license_lines]
if not any(re.match(regex, relpath) for regex in lgpl_exceptions): error = _check_license(lines, path)
if any(re.match(r'^# This program is free software', line) if error:
for line in lines): license_errors.add_error(error)
print('%s: has old LGPL license header' % path)
old_license += 1
continue
# how we'll find licenses in files if license_errors.has_errors():
spdx_expr = r'SPDX-License-Identifier: ([^\n]*)' tty.die(*license_errors.error_messages())
# check first <license_lines> lines for required header
first_n_lines = ''.join(lines[:license_lines])
match = re.search(spdx_expr, first_n_lines)
if not match:
print('%s: no license header' % path)
missing += 1
continue
correct = apache2_mit_spdx
actual = match.group(1)
if actual != correct:
print("%s: labeled as '%s', but should be '%s'"
% (path, actual, correct))
errors += 1
continue
if any([errors, missing, old_license]):
tty.die(
'%d improperly licensed files' % (errors + missing + old_license),
'files with no SPDX-License-Identifier: %d' % missing,
'files with wrong SPDX-License-Identifier: %d' % errors,
'files with old license header: %d' % old_license)
else: else:
tty.msg('No license issues found.') tty.msg('No license issues found.')

View file

@ -61,7 +61,7 @@ def test_verify(tmpdir):
assert str(old_lgpl_header) in out assert str(old_lgpl_header) in out
assert str(correct_header) not in out assert str(correct_header) not in out
assert '3 improperly licensed files' in out assert '3 improperly licensed files' in out
assert re.search(r'files with no SPDX-License-Identifier:\s*1', out) assert re.search(r'files not containing expected license:\s*1', out)
assert re.search(r'files with wrong SPDX-License-Identifier:\s*1', out) assert re.search(r'files with wrong SPDX-License-Identifier:\s*1', out)
assert re.search(r'files with old license header:\s*1', out) assert re.search(r'files with old license header:\s*1', out)