make license check more-strict
The license text is now expected to match almost exactly (not accounting for formatting in different file types (e.g. rst vs. bash script vs. python)
This commit is contained in:
parent
7e94774214
commit
53ec16c9e5
2 changed files with 91 additions and 37 deletions
|
@ -7,6 +7,7 @@
|
|||
|
||||
import os
|
||||
import re
|
||||
from collections import defaultdict
|
||||
|
||||
import llnl.util.tty as tty
|
||||
|
||||
|
@ -89,50 +90,103 @@ def list_files(args):
|
|||
print(os.path.join(spack.paths.spack_root, relpath))
|
||||
|
||||
|
||||
# Error codes for license verification. All values are chosen such that
|
||||
# bool(value) evaluates to True
|
||||
OLD_LICENSE, SPDX_MISMATCH, GENERAL_MISMATCH = range(1, 4)
|
||||
|
||||
|
||||
class LicenseError(object):
|
||||
def __init__(self):
|
||||
self.error_counts = defaultdict(int)
|
||||
|
||||
def add_error(self, error):
|
||||
self.error_counts[error] += 1
|
||||
|
||||
def has_errors(self):
|
||||
return sum(self.error_counts.values()) > 0
|
||||
|
||||
def error_messages(self):
|
||||
total = sum(self.error_counts.values())
|
||||
missing = self.error_counts[GENERAL_MISMATCH]
|
||||
spdx_mismatch = self.error_counts[SPDX_MISMATCH]
|
||||
old_license = self.error_counts[OLD_LICENSE]
|
||||
return (
|
||||
'%d improperly licensed files' % (total),
|
||||
'files with wrong SPDX-License-Identifier: %d' % spdx_mismatch,
|
||||
'files with old license header: %d' % old_license,
|
||||
'files not containing expected license: %d' % missing)
|
||||
|
||||
|
||||
def _check_license(lines, path):
|
||||
license_lines = [
|
||||
r'Copyright 2013-(?:201[789]|202\d) Lawrence Livermore National Security, LLC and other', # noqa: E501
|
||||
r'Spack Project Developers\. See the top-level COPYRIGHT file for details.', # noqa: E501
|
||||
r'SPDX-License-Identifier: \(Apache-2\.0 OR MIT\)'
|
||||
]
|
||||
|
||||
strict_date = r'Copyright 2013-2019'
|
||||
|
||||
found = []
|
||||
|
||||
for line in lines:
|
||||
line = re.sub(r'^[\s#\.]*', '', line)
|
||||
line = line.rstrip()
|
||||
for i, license_line in enumerate(license_lines):
|
||||
if re.match(license_line, line):
|
||||
# The first line of the license contains the copyright date.
|
||||
# We allow it to be out of date but print a warning if it is
|
||||
# out of date.
|
||||
if i == 0:
|
||||
if not re.search(strict_date, line):
|
||||
tty.debug('{0}: copyright date mismatch'.format(path))
|
||||
found.append(i)
|
||||
|
||||
if len(found) == len(license_lines) and found == list(sorted(found)):
|
||||
return
|
||||
|
||||
def old_license(line, path):
|
||||
if re.search('This program is free software', line):
|
||||
print('{0}: has old LGPL license header'.format(path))
|
||||
return OLD_LICENSE
|
||||
|
||||
# If the SPDX identifier is present, then there is a mismatch (since it
|
||||
# did not match the above regex)
|
||||
def wrong_spdx_identifier(line, path):
|
||||
m = re.search(r'SPDX-License-Identifier: ([^\n]*)', line)
|
||||
if m and m.group(1) != apache2_mit_spdx:
|
||||
print('{0}: SPDX license identifier mismatch'
|
||||
'(expecting {1}, found {2})'
|
||||
.format(path, apache2_mit_spdx, m.group(1)))
|
||||
return SPDX_MISMATCH
|
||||
|
||||
checks = [old_license, wrong_spdx_identifier]
|
||||
|
||||
for line in lines:
|
||||
for check in checks:
|
||||
error = check(line, path)
|
||||
if error:
|
||||
return error
|
||||
|
||||
print('{0}: the license does not match the expected format'.format(path))
|
||||
return GENERAL_MISMATCH
|
||||
|
||||
|
||||
def verify(args):
|
||||
"""verify that files in spack have the right license header"""
|
||||
errors = 0
|
||||
missing = 0
|
||||
old_license = 0
|
||||
|
||||
license_errors = LicenseError()
|
||||
|
||||
for relpath in _licensed_files(args.root):
|
||||
path = os.path.join(args.root, relpath)
|
||||
with open(path) as f:
|
||||
lines = [line for line in f]
|
||||
lines = [line for line in f][:license_lines]
|
||||
|
||||
if not any(re.match(regex, relpath) for regex in lgpl_exceptions):
|
||||
if any(re.match(r'^# This program is free software', line)
|
||||
for line in lines):
|
||||
print('%s: has old LGPL license header' % path)
|
||||
old_license += 1
|
||||
continue
|
||||
error = _check_license(lines, path)
|
||||
if error:
|
||||
license_errors.add_error(error)
|
||||
|
||||
# how we'll find licenses in files
|
||||
spdx_expr = r'SPDX-License-Identifier: ([^\n]*)'
|
||||
|
||||
# check first <license_lines> lines for required header
|
||||
first_n_lines = ''.join(lines[:license_lines])
|
||||
match = re.search(spdx_expr, first_n_lines)
|
||||
|
||||
if not match:
|
||||
print('%s: no license header' % path)
|
||||
missing += 1
|
||||
continue
|
||||
|
||||
correct = apache2_mit_spdx
|
||||
actual = match.group(1)
|
||||
if actual != correct:
|
||||
print("%s: labeled as '%s', but should be '%s'"
|
||||
% (path, actual, correct))
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
if any([errors, missing, old_license]):
|
||||
tty.die(
|
||||
'%d improperly licensed files' % (errors + missing + old_license),
|
||||
'files with no SPDX-License-Identifier: %d' % missing,
|
||||
'files with wrong SPDX-License-Identifier: %d' % errors,
|
||||
'files with old license header: %d' % old_license)
|
||||
if license_errors.has_errors():
|
||||
tty.die(*license_errors.error_messages())
|
||||
else:
|
||||
tty.msg('No license issues found.')
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ def test_verify(tmpdir):
|
|||
assert str(old_lgpl_header) in out
|
||||
assert str(correct_header) not in out
|
||||
assert '3 improperly licensed files' in out
|
||||
assert re.search(r'files with no SPDX-License-Identifier:\s*1', out)
|
||||
assert re.search(r'files not containing expected license:\s*1', out)
|
||||
assert re.search(r'files with wrong SPDX-License-Identifier:\s*1', out)
|
||||
assert re.search(r'files with old license header:\s*1', out)
|
||||
|
||||
|
|
Loading…
Reference in a new issue