Check copyright statements and SPDX license identifier

Enforce a specific copyright statement and a specific SPDX license
identifier where they are present.

Binary files, third-party modules and a few other exceptions are not
checked.

There is currently no check that copyright statements and license
identifiers are present.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
This commit is contained in:
Gilles Peskine 2023-11-03 14:13:55 +01:00
parent 990030bce0
commit f2fb9f667c

View file

@ -12,6 +12,7 @@ Note: requires python 3, must be run from Mbed TLS root.
import argparse import argparse
import codecs import codecs
import inspect
import logging import logging
import os import os
import re import re
@ -345,6 +346,84 @@ class MergeArtifactIssueTracker(LineIssueTracker):
return False return False
THIS_FILE_BASE_NAME = \
os.path.basename(inspect.getframeinfo(inspect.currentframe()).filename)
LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER = \
inspect.getframeinfo(inspect.currentframe()).lineno
class LicenseIssueTracker(LineIssueTracker):
"""Check copyright statements and license indications.
This class only checks that statements are correct if present. It does
not enforce the presence of statements in each file.
"""
heading = "License issue:"
LICENSE_EXEMPTION_RE_LIST = [
# Third-party code, other than whitelisted third-party modules,
# may be under a different license.
r'3rdparty/(?!(p256-m)/.*)',
# Documentation explaining the license may have accidental
# false positives.
r'(ChangeLog|LICENSE|[-0-9A-Z_a-z]+\.md)\Z',
# Files imported from TF-M, and not used except in test builds,
# may be under a different license.
r'configs/crypto_config_profile_medium\.h\Z',
r'configs/tfm_mbedcrypto_config_profile_medium\.h\Z',
# Third-party file.
r'dco\.txt\Z',
]
path_exemptions = re.compile('|'.join(BINARY_FILE_PATH_RE_LIST +
LICENSE_EXEMPTION_RE_LIST))
COPYRIGHT_HOLDER = rb'The Mbed TLS Contributors'
# Catch "Copyright foo", "Copyright (C) foo", "Copyright © foo", etc.
COPYRIGHT_RE = re.compile(rb'.*\bcopyright\s+((?:\w|\s|[()]|[^ -~])*\w)', re.I)
SPDX_HEADER_KEY = b'SPDX-License-Identifier'
LICENSE_IDENTIFIER = b'Apache-2.0 OR GPL-2.0-or-later'
SPDX_RE = re.compile(br'.*?(' +
re.escape(SPDX_HEADER_KEY) +
br')(:\s*(.*?)\W*\Z|.*)', re.I)
def __init__(self):
super().__init__()
# Record what problem was caused. We can't easily report it due to
# the structure of the script. To be fixed after
# https://github.com/Mbed-TLS/mbedtls/pull/2506
self.problem = None
def issue_with_line(self, line, filepath, line_number):
# Use endswith() rather than the more correct os.path.basename()
# because experimentally, it makes a significant difference to
# the running time.
if filepath.endswith(THIS_FILE_BASE_NAME) and \
line_number > LINE_NUMBER_BEFORE_LICENSE_ISSUE_TRACKER:
# Avoid false positives from the code in this class.
# Also skip the rest of this file, which is highly unlikely to
# contain any problematic statements since we put those near the
# top of files.
return False
m = self.COPYRIGHT_RE.match(line)
if m and m.group(1) != self.COPYRIGHT_HOLDER:
self.problem = 'Invalid copyright line'
return True
m = self.SPDX_RE.match(line)
if m:
if m.group(1) != self.SPDX_HEADER_KEY:
self.problem = 'Misspelled ' + self.SPDX_HEADER_KEY.decode()
return True
if not m.group(3):
self.problem = 'Improperly formatted SPDX license identifier'
return True
if m.group(3) != self.LICENSE_IDENTIFIER:
self.problem = 'Wrong SPDX license identifier'
return True
return False
class IntegrityChecker: class IntegrityChecker:
"""Sanity-check files under the current directory.""" """Sanity-check files under the current directory."""
@ -365,6 +444,7 @@ class IntegrityChecker:
TrailingWhitespaceIssueTracker(), TrailingWhitespaceIssueTracker(),
TabIssueTracker(), TabIssueTracker(),
MergeArtifactIssueTracker(), MergeArtifactIssueTracker(),
LicenseIssueTracker(),
] ]
def setup_logger(self, log_file, level=logging.INFO): def setup_logger(self, log_file, level=logging.INFO):