Improve ease of specifying which files to look in (check_names)

- Instead of os.path.join, use glob patterns (supports Windows too)
- Instead of creating the lists beforehand (which adds messiness), pass glob
  expessions to functions and let them memoise it.
- Add support for excluding based on glob patterns, which isn't used now but
  could come in handy.

Signed-off-by: Yuto Takano <yuto.takano@arm.com>
This commit is contained in:
Yuto Takano 2021-08-09 14:48:53 +01:00
parent f005c3369a
commit 8e9a219310
2 changed files with 84 additions and 66 deletions

View file

@ -179,8 +179,11 @@ class NameCheck():
self.return_code = 0 self.return_code = 0
self.setup_logger(verbose) self.setup_logger(verbose)
# Memo for storing "glob expression": set(filepaths)
self.files = {}
# Globally excluded filenames # Globally excluded filenames
self.excluded_files = ["bn_mul", "compat-2.x.h"] self.excluded_files = ["**/bn_mul", "**/compat-2.x.h"]
# Will contain the parse result after a comprehensive parse # Will contain the parse result after a comprehensive parse
self.parse_result = {} self.parse_result = {}
@ -212,23 +215,46 @@ class NameCheck():
self.log.setLevel(logging.INFO) self.log.setLevel(logging.INFO)
self.log.addHandler(logging.StreamHandler()) self.log.addHandler(logging.StreamHandler())
def get_files(self, wildcard): def get_files(self, include_wildcards, exclude_wildcards):
""" """
Get all files that match a UNIX-style wildcard recursively. While the Get all files that match any of the UNIX-style wildcards. While the
script is designed only for use on UNIX/macOS (due to nm), this function check_names script is designed only for use on UNIX/macOS (due to nm),
would work fine on Windows even with forward slashes in the wildcard. this function alone would work fine on Windows even with forward slashes
in the wildcard.
Args: Args:
* wildcard: shell-style wildcards to match filepaths against. * include_wildcards: a List of shell-style wildcards to match filepaths.
* exclude_wildacrds: a List of shell-style wildcards to exclude.
Returns a List of relative filepaths. Returns a List of relative filepaths.
""" """
accumulator = [] accumulator = set()
for filepath in glob.iglob(wildcard, recursive=True): # exclude_wildcards may be None. Also, consider the global exclusions.
if os.path.basename(filepath) not in self.excluded_files: exclude_wildcards = (exclude_wildcards or []) + self.excluded_files
accumulator.append(filepath)
return accumulator # Perform set union on the glob results. Memoise individual sets.
for include_wildcard in include_wildcards:
if include_wildcard not in self.files:
self.files[include_wildcard] = set(glob.glob(
include_wildcard,
recursive=True
))
accumulator = accumulator.union(self.files[include_wildcard])
# Perform set difference to exclude. Also use the same memo since their
# behaviour is pretty much identical and it can benefit from the cache.
for exclude_wildcard in exclude_wildcards:
if exclude_wildcard not in self.files:
self.files[exclude_wildcard] = set(glob.glob(
exclude_wildcard,
recursive=True
))
accumulator = accumulator.difference(self.files[exclude_wildcard])
return list(accumulator)
def parse_names_in_source(self): def parse_names_in_source(self):
""" """
@ -243,31 +269,37 @@ class NameCheck():
.format(str(self.excluded_files)) .format(str(self.excluded_files))
) )
m_headers = self.get_files("include/mbedtls/*.h") all_macros = self.parse_macros([
p_headers = self.get_files("include/psa/*.h") "include/mbedtls/*.h",
t_headers = [ "include/psa/*.h",
"library/*.h",
"tests/include/test/drivers/*.h",
"3rdparty/everest/include/everest/everest.h", "3rdparty/everest/include/everest/everest.h",
"3rdparty/everest/include/everest/x25519.h" "3rdparty/everest/include/everest/x25519.h"
] ])
d_headers = self.get_files("tests/include/test/drivers/*.h") enum_consts = self.parse_enum_consts([
l_headers = self.get_files("library/*.h") "include/mbedtls/*.h",
libraries = self.get_files("library/*.c") + [ "library/*.h",
"3rdparty/everest/include/everest/everest.h",
"3rdparty/everest/include/everest/x25519.h"
])
identifiers = self.parse_identifiers([
"include/mbedtls/*.h",
"include/psa/*.h",
"library/*.h",
"3rdparty/everest/include/everest/everest.h",
"3rdparty/everest/include/everest/x25519.h"
])
mbed_words = self.parse_mbed_words([
"include/mbedtls/*.h",
"include/psa/*.h",
"library/*.h",
"3rdparty/everest/include/everest/everest.h",
"3rdparty/everest/include/everest/x25519.h",
"library/*.c",
"3rdparty/everest/library/everest.c", "3rdparty/everest/library/everest.c",
"3rdparty/everest/library/x25519.c" "3rdparty/everest/library/x25519.c"
] ])
all_macros = self.parse_macros(
m_headers + p_headers + t_headers + l_headers + d_headers
)
enum_consts = self.parse_enum_consts(
m_headers + l_headers + t_headers
)
identifiers = self.parse_identifiers(
m_headers + p_headers + t_headers + l_headers
)
mbed_words = self.parse_mbed_words(
m_headers + p_headers + t_headers + l_headers + libraries
)
symbols = self.parse_symbols() symbols = self.parse_symbols()
# Remove identifier macros like mbedtls_printf or mbedtls_calloc # Remove identifier macros like mbedtls_printf or mbedtls_calloc
@ -284,7 +316,6 @@ class NameCheck():
self.log.debug(" {} Identifiers".format(len(identifiers))) self.log.debug(" {} Identifiers".format(len(identifiers)))
self.log.debug(" {} Exported Symbols".format(len(symbols))) self.log.debug(" {} Exported Symbols".format(len(symbols)))
self.log.info("Analysing...") self.log.info("Analysing...")
self.parse_result = { self.parse_result = {
"macros": actual_macros, "macros": actual_macros,
"enum_consts": enum_consts, "enum_consts": enum_consts,
@ -293,12 +324,13 @@ class NameCheck():
"mbed_words": mbed_words "mbed_words": mbed_words
} }
def parse_macros(self, files): def parse_macros(self, include, exclude=None):
""" """
Parse all macros defined by #define preprocessor directives. Parse all macros defined by #define preprocessor directives.
Args: Args:
* files: A List of filepaths to look through. * include: A List of glob expressions to look for files through.
* exclude: A List of glob expressions for excluding files.
Returns a List of Match objects for the found macros. Returns a List of Match objects for the found macros.
""" """
@ -307,11 +339,9 @@ class NameCheck():
"asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_" "asm", "inline", "EMIT", "_CRT_SECURE_NO_DEPRECATE", "MULADDC_"
) )
self.log.debug("Looking for macros in {} files".format(len(files)))
macros = [] macros = []
for header_file in files: for header_file in self.get_files(include, exclude):
with open(header_file, "r", encoding="utf-8") as header: with open(header_file, "r", encoding="utf-8") as header:
for line_no, line in enumerate(header): for line_no, line in enumerate(header):
for macro in macro_regex.finditer(line): for macro in macro_regex.finditer(line):
@ -326,13 +356,14 @@ class NameCheck():
return macros return macros
def parse_mbed_words(self, files): def parse_mbed_words(self, include, exclude=None):
""" """
Parse all words in the file that begin with MBED, in and out of macros, Parse all words in the file that begin with MBED, in and out of macros,
comments, anything. comments, anything.
Args: Args:
* files: a List of filepaths to look through. * include: A List of glob expressions to look for files through.
* exclude: A List of glob expressions for excluding files.
Returns a List of Match objects for words beginning with MBED. Returns a List of Match objects for words beginning with MBED.
""" """
@ -340,11 +371,9 @@ class NameCheck():
mbed_regex = re.compile(r"\bMBED.+?_[A-Z0-9_]*") mbed_regex = re.compile(r"\bMBED.+?_[A-Z0-9_]*")
exclusions = re.compile(r"// *no-check-names|#error") exclusions = re.compile(r"// *no-check-names|#error")
self.log.debug("Looking for MBED names in {} files".format(len(files)))
mbed_words = [] mbed_words = []
for filename in files: for filename in self.get_files(include, exclude):
with open(filename, "r", encoding="utf-8") as fp: with open(filename, "r", encoding="utf-8") as fp:
for line_no, line in enumerate(fp): for line_no, line in enumerate(fp):
if exclusions.search(line): if exclusions.search(line):
@ -360,23 +389,19 @@ class NameCheck():
return mbed_words return mbed_words
def parse_enum_consts(self, files): def parse_enum_consts(self, include, exclude=None):
""" """
Parse all enum value constants that are declared. Parse all enum value constants that are declared.
Args: Args:
* files: A List of filepaths to look through. * include: A List of glob expressions to look for files through.
* exclude: A List of glob expressions for excluding files.
Returns a List of Match objects for the findings. Returns a List of Match objects for the findings.
""" """
self.log.debug(
"Looking for enum consts in {} files"
.format(len(files))
)
enum_consts = [] enum_consts = []
for header_file in files: for header_file in self.get_files(include, exclude):
# Emulate a finite state machine to parse enum declarations. # Emulate a finite state machine to parse enum declarations.
# 0 = not in enum # 0 = not in enum
# 1 = inside enum # 1 = inside enum
@ -408,7 +433,7 @@ class NameCheck():
return enum_consts return enum_consts
def parse_identifiers(self, files): def parse_identifiers(self, include, exclude=None):
""" """
Parse all lines of a header where a function identifier is declared, Parse all lines of a header where a function identifier is declared,
based on some huersitics. Highly dependent on formatting style. based on some huersitics. Highly dependent on formatting style.
@ -416,7 +441,8 @@ class NameCheck():
.search() checks throughout. .search() checks throughout.
Args: Args:
* files: A List of filepaths to look through. * include: A List of glob expressions to look for files through.
* exclude: A List of glob expressions for excluding files.
Returns a List of Match objects with identifiers. Returns a List of Match objects with identifiers.
""" """
@ -445,15 +471,9 @@ class NameCheck():
r"#" r"#"
r")" r")"
) )
self.log.debug(
"Looking for identifiers in {} files"
.format(len(files))
)
identifiers = [] identifiers = []
for header_file in files: for header_file in self.get_files(include, exclude):
with open(header_file, "r", encoding="utf-8") as header: with open(header_file, "r", encoding="utf-8") as header:
in_block_comment = False in_block_comment = False
# The previous line varibale is used for concatenating lines # The previous line varibale is used for concatenating lines

View file

@ -45,12 +45,10 @@ def main():
try: try:
name_check = NameCheck() name_check = NameCheck()
internal_headers = ( result = name_check.parse_identifiers([
name_check.get_files("include/mbedtls/*_internal.h") + "include/mbedtls/*_internal.h",
name_check.get_files("library/*.h") "library/*.h"
) ])
result = name_check.parse_identifiers(internal_headers)
identifiers = ["{}\n".format(match.name) for match in result] identifiers = ["{}\n".format(match.name) for match in result]
with open("_identifiers", "w", encoding="utf-8") as f: with open("_identifiers", "w", encoding="utf-8") as f: