From 7f6933a227699b7d809036b3f6cc8544c68d054f Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Tue, 4 Apr 2023 16:05:54 +0800
Subject: [PATCH 01/20] cert_audit: Initial script for auditing expiry date

We introduce the script to audit the expiry date of X509 files
(i.e. crt/crl/csr files) in tests/data_files/ folder.

This commit add basic classes and the framework for auditing
and "-a" option to list all valid crt/crl/csr files it found.

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/audit-validity-dates.py | 282 ++++++++++++++++++++++++++
 1 file changed, 282 insertions(+)
 create mode 100755 tests/scripts/audit-validity-dates.py
diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
new file mode 100755
index 000000000..67d409665
--- /dev/null
+++ b/tests/scripts/audit-validity-dates.py
@@ -0,0 +1,282 @@
+#!/usr/bin/env python3
+#
+# copyright the mbed tls contributors
+# spdx-license-identifier: apache-2.0
+#
+# licensed under the apache license, version 2.0 (the "license"); you may
+# not use this file except in compliance with the license.
+# you may obtain a copy of the license at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Audit validity date of X509 crt/crl/csr
+
+This script is used to audit the validity date of crt/crl/csr used for testing.
+The files are in tests/data_files/ while some data are in test suites data in
+tests/suites/*.data files.
+"""
+
+import os
+import sys
+import re
+import typing
+import types
+import argparse
+import datetime
+from enum import Enum
+
+from cryptography import x509
+
+class DataType(Enum):
+    CRT = 1 # Certificate
+    CRL = 2 # Certificate Revocation List
+    CSR = 3 # Certificate Signing Request
+
+class DataFormat(Enum):
+    PEM = 1 # Privacy-Enhanced Mail
+    DER = 2 # Distinguished Encoding Rules
+
+class AuditData:
+    """Store file, type and expiration date for audit."""
+    #pylint: disable=too-few-public-methods
+    def __init__(self, data_type: DataType):
+        self.data_type = data_type
+        self.filename = ""
+        self.not_valid_after: datetime.datetime
+        self.not_valid_before: datetime.datetime
+
+    def fill_validity_duration(self, x509_obj):
+        """Fill expiration_date field from a x509 object"""
+        # Certificate expires after "not_valid_after"
+        # Certificate is invalid before "not_valid_before"
+        if self.data_type == DataType.CRT:
+            self.not_valid_after = x509_obj.not_valid_after
+            self.not_valid_before = x509_obj.not_valid_before
+        # CertificateRevocationList expires after "next_update"
+        # CertificateRevocationList is invalid before "last_update"
+        elif self.data_type == DataType.CRL:
+            self.not_valid_after = x509_obj.next_update
+            self.not_valid_before = x509_obj.last_update
+        # CertificateSigningRequest is always valid.
+        elif self.data_type == DataType.CSR:
+            self.not_valid_after = datetime.datetime.max
+            self.not_valid_before = datetime.datetime.min
+        else:
+            raise ValueError("Unsupported file_type: {}".format(self.data_type))
+
+class X509Parser():
+    """A parser class to parse crt/crl/csr file or data in PEM/DER format."""
+    PEM_REGEX = br'-{5}BEGIN (?P<type>.*?)-{5}\n(?P<data>.*?)-{5}END (?P=type)-{5}\n'
+    PEM_TAG_REGEX = br'-{5}BEGIN (?P<type>.*?)-{5}\n'
+    PEM_TAGS = {
+        DataType.CRT: 'CERTIFICATE',
+        DataType.CRL: 'X509 CRL',
+        DataType.CSR: 'CERTIFICATE REQUEST'
+    }
+
+    def __init__(self, backends: dict):
+        self.backends = backends
+        self.__generate_parsers()
+
+    def __generate_parser(self, data_type: DataType):
+        """Parser generator for a specific DataType"""
+        tag = self.PEM_TAGS[data_type]
+        pem_loader = self.backends[data_type][DataFormat.PEM]
+        der_loader = self.backends[data_type][DataFormat.DER]
+        def wrapper(data: bytes):
+            pem_type = X509Parser.pem_data_type(data)
+            # It is in PEM format with target tag
+            if pem_type == tag:
+                return pem_loader(data)
+            # It is in PEM format without target tag
+            if pem_type:
+                return None
+            # It might be in DER format
+            try:
+                result = der_loader(data)
+            except ValueError:
+                result = None
+            return result
+        wrapper.__name__ = "{}.parser[{}]".format(type(self).__name__, tag)
+        return wrapper
+
+    def __generate_parsers(self):
+        """Generate parsers for all support DataType"""
+        self.parsers = {}
+        for data_type, _ in self.PEM_TAGS.items():
+            self.parsers[data_type] = self.__generate_parser(data_type)
+
+    def __getitem__(self, item):
+        return self.parsers[item]
+
+    @staticmethod
+    def pem_data_type(data: bytes) -> str:
+        """Get the tag from the data in PEM format
+
+        :param data: data to be checked in binary mode.
+        :return: PEM tag or "" when no tag detected.
+        """
+        m = re.search(X509Parser.PEM_TAG_REGEX, data)
+        if m is not None:
+            return m.group('type').decode('UTF-8')
+        else:
+            return ""
+
+class Auditor:
+    """A base class for audit."""
+    def __init__(self, verbose):
+        self.verbose = verbose
+        self.default_files = []
+        self.audit_data = []
+        self.parser = X509Parser({
+            DataType.CRT: {
+                DataFormat.PEM: x509.load_pem_x509_certificate,
+                DataFormat.DER: x509.load_der_x509_certificate
+            },
+            DataType.CRL: {
+                DataFormat.PEM: x509.load_pem_x509_crl,
+                DataFormat.DER: x509.load_der_x509_crl
+            },
+            DataType.CSR: {
+                DataFormat.PEM: x509.load_pem_x509_csr,
+                DataFormat.DER: x509.load_der_x509_csr
+            },
+        })
+
+    def error(self, *args):
+        #pylint: disable=no-self-use
+        print("Error: ", *args, file=sys.stderr)
+
+    def warn(self, *args):
+        if self.verbose:
+            print("Warn: ", *args, file=sys.stderr)
+
+    def parse_file(self, filename: str) -> typing.List[AuditData]:
+        """
+        Parse a list of AuditData from file.
+
+        :param filename: name of the file to parse.
+        :return list of AuditData parsed from the file.
+        """
+        with open(filename, 'rb') as f:
+            data = f.read()
+        result_list = []
+        result = self.parse_bytes(data)
+        if result is not None:
+            result.filename = filename
+            result_list.append(result)
+        return result_list
+
+    def parse_bytes(self, data: bytes):
+        """Parse AuditData from bytes."""
+        for data_type in list(DataType):
+            try:
+                result = self.parser[data_type](data)
+            except ValueError as val_error:
+                result = None
+                self.warn(val_error)
+            if result is not None:
+                audit_data = AuditData(data_type)
+                audit_data.fill_validity_duration(result)
+                return audit_data
+        return None
+
+    def walk_all(self, file_list):
+        """
+        Iterate over all the files in the list and get audit data.
+        """
+        if not file_list:
+            file_list = self.default_files
+        for filename in file_list:
+            data_list = self.parse_file(filename)
+            self.audit_data.extend(data_list)
+
+    def for_each(self, do, *args, **kwargs):
+        """
+        Sort the audit data and iterate over them.
+        """
+        if not isinstance(do, types.FunctionType):
+            return
+        for d in self.audit_data:
+            do(d, *args, **kwargs)
+
+    @staticmethod
+    def find_test_dir():
+        """Get the relative path for the MbedTLS test directory."""
+        if os.path.isdir('tests'):
+            tests_dir = 'tests'
+        elif os.path.isdir('suites'):
+            tests_dir = '.'
+        elif os.path.isdir('../suites'):
+            tests_dir = '..'
+        else:
+            raise Exception("Mbed TLS source tree not found")
+        return tests_dir
+
+class TestDataAuditor(Auditor):
+    """Class for auditing files in tests/data_files/"""
+    def __init__(self, verbose):
+        super().__init__(verbose)
+        self.default_files = self.collect_default_files()
+
+    def collect_default_files(self):
+        """collect all files in tests/data_files/"""
+        test_dir = self.find_test_dir()
+        test_data_folder = os.path.join(test_dir, 'data_files')
+        data_files = []
+        for (dir_path, _, file_names) in os.walk(test_data_folder):
+            data_files.extend(os.path.join(dir_path, file_name)
+                              for file_name in file_names)
+        return data_files
+
+
+def list_all(audit_data: AuditData):
+    print("{}\t{}\t{}\t{}".format(
+        audit_data.not_valid_before.isoformat(timespec='seconds'),
+        audit_data.not_valid_after.isoformat(timespec='seconds'),
+        audit_data.data_type.name,
+        audit_data.filename))
+
+def main():
+    """
+    Perform argument parsing.
+    """
+    parser = argparse.ArgumentParser(
+        description='Audit script for X509 crt/crl/csr files.'
+    )
+
+    parser.add_argument('-a', '--all',
+                        action='store_true',
+                        help='list the information of all files')
+    parser.add_argument('-v', '--verbose',
+                        action='store_true', dest='verbose',
+                        help='Show warnings')
+    parser.add_argument('-f', '--file', dest='file',
+                        help='file to audit (Debug only)',
+                        metavar='FILE')
+
+    args = parser.parse_args()
+
+    # start main routine
+    td_auditor = TestDataAuditor(args.verbose)
+
+    if args.file:
+        data_files = [args.file]
+    else:
+        data_files = td_auditor.default_files
+
+    td_auditor.walk_all(data_files)
+
+    if args.all:
+        td_auditor.for_each(list_all)
+
+    print("\nDone!\n")
+
+if __name__ == "__main__":
+    main()

From 45e32033db767f3213fa6099fd035a45a6eef237 Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Thu, 6 Apr 2023 14:33:41 +0800
Subject: [PATCH 02/20] cert_audit: Support audit on test suite data files

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/audit-validity-dates.py | 43 ++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
index 67d409665..0d1425b28 100755
--- a/tests/scripts/audit-validity-dates.py
+++ b/tests/scripts/audit-validity-dates.py
@@ -29,6 +29,7 @@ import typing
 import types
 import argparse
 import datetime
+import glob
 from enum import Enum
 
 from cryptography import x509
@@ -226,7 +227,7 @@ class TestDataAuditor(Auditor):
         self.default_files = self.collect_default_files()
 
     def collect_default_files(self):
-        """collect all files in tests/data_files/"""
+        """Collect all files in tests/data_files/"""
         test_dir = self.find_test_dir()
         test_data_folder = os.path.join(test_dir, 'data_files')
         data_files = []
@@ -235,6 +236,38 @@ class TestDataAuditor(Auditor):
                               for file_name in file_names)
         return data_files
 
+class SuiteDataAuditor(Auditor):
+    """Class for auditing files in tests/suites/*.data"""
+    def __init__(self, options):
+        super().__init__(options)
+        self.default_files = self.collect_default_files()
+
+    def collect_default_files(self):
+        """Collect all files in tests/suites/*.data"""
+        test_dir = self.find_test_dir()
+        suites_data_folder = os.path.join(test_dir, 'suites')
+        # collect all data files in tests/suites (114 in total)
+        data_files = glob.glob(os.path.join(suites_data_folder, '*.data'))
+        return data_files
+
+    def parse_file(self, filename: str):
+        """Parse AuditData from file."""
+        with open(filename, 'r') as f:
+            data = f.read()
+        audit_data_list = []
+        # extract hex strings from the data file.
+        hex_strings = re.findall(r'"(?P<data>[0-9a-fA-F]+)"', data)
+        for hex_str in hex_strings:
+            # We regard hex string with odd number length as invaild data.
+            if len(hex_str) & 1:
+                continue
+            bytes_data = bytes.fromhex(hex_str)
+            audit_data = self.parse_bytes(bytes_data)
+            if audit_data is None:
+                continue
+            audit_data.filename = filename
+            audit_data_list.append(audit_data)
+        return audit_data_list
 
 def list_all(audit_data: AuditData):
     print("{}\t{}\t{}\t{}".format(
@@ -265,16 +298,24 @@ def main():
 
     # start main routine
     td_auditor = TestDataAuditor(args.verbose)
+    sd_auditor = SuiteDataAuditor(args.verbose)
 
     if args.file:
         data_files = [args.file]
+        suite_data_files = [args.file]
     else:
         data_files = td_auditor.default_files
+        suite_data_files = sd_auditor.default_files
 
     td_auditor.walk_all(data_files)
+    # TODO: Improve the method for auditing test suite data files
+    #       It takes 6 times longer than td_auditor.walk_all(),
+    #       typically 0.827 s VS 0.147 s.
+    sd_auditor.walk_all(suite_data_files)
 
     if args.all:
         td_auditor.for_each(list_all)
+        sd_auditor.for_each(list_all)
 
     print("\nDone!\n")
 

From 30f2683d18606bc0501d2a9e4b29f1ee2e2741ac Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Fri, 7 Apr 2023 18:04:07 +0800
Subject: [PATCH 03/20] cert_audit: Parse more information from test suite data
 file

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/audit-validity-dates.py | 126 ++++++++++++++++++++++----
 1 file changed, 107 insertions(+), 19 deletions(-)

diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
index 0d1425b28..5e22bfca9 100755
--- a/tests/scripts/audit-validity-dates.py
+++ b/tests/scripts/audit-validity-dates.py
@@ -34,6 +34,9 @@ from enum import Enum
 
 from cryptography import x509
 
+# reuse the function to parse *.data file in tests/suites/
+from generate_test_code import parse_test_data as parse_suite_data
+
 class DataType(Enum):
     CRT = 1 # Certificate
     CRL = 2 # Certificate Revocation List
@@ -129,6 +132,32 @@ class X509Parser():
         else:
             return ""
 
+    @staticmethod
+    def check_hex_string(hex_str: str) -> bool:
+        """Check if the hex string is possibly DER data."""
+        hex_len = len(hex_str)
+        # At least 6 hex char for 3 bytes: Type + Length + Content
+        if hex_len < 6:
+            return False
+        # Check if Type (1 byte) is SEQUENCE.
+        if hex_str[0:2] != '30':
+            return False
+        # Check LENGTH (1 byte) value
+        content_len = int(hex_str[2:4], base=16)
+        consumed = 4
+        if content_len in (128, 255):
+            # Indefinite or Reserved
+            return False
+        elif content_len > 127:
+            # Definite, Long
+            length_len = (content_len - 128) * 2
+            content_len = int(hex_str[consumed:consumed+length_len], base=16)
+            consumed += length_len
+        # Check LENGTH
+        if hex_len != content_len * 2 + consumed:
+            return False
+        return True
+
 class Auditor:
     """A base class for audit."""
     def __init__(self, verbose):
@@ -236,6 +265,64 @@ class TestDataAuditor(Auditor):
                               for file_name in file_names)
         return data_files
 
+class FileWrapper():
+    """
+    This a stub class of generate_test_code.FileWrapper.
+
+    This class reads the whole file to memory before iterating
+    over the lines.
+    """
+
+    def __init__(self, file_name):
+        """
+        Read the file and initialize the line number to 0.
+
+        :param file_name: File path to open.
+        """
+        with open(file_name, 'rb') as f:
+            self.buf = f.read()
+        self.buf_len = len(self.buf)
+        self._line_no = 0
+        self._line_start = 0
+
+    def __iter__(self):
+        """Make the class iterable."""
+        return self
+
+    def __next__(self):
+        """
+        This method for returning a line of the file per iteration.
+
+        :return: Line read from file.
+        """
+        # If we reach the end of the file.
+        if not self._line_start < self.buf_len:
+            raise StopIteration
+
+        line_end = self.buf.find(b'\n', self._line_start) + 1
+        if line_end > 0:
+            # Find the first LF as the end of the new line.
+            line = self.buf[self._line_start:line_end]
+            self._line_start = line_end
+            self._line_no += 1
+        else:
+            # No LF found. We are at the last line without LF.
+            line = self.buf[self._line_start:]
+            self._line_start = self.buf_len
+            self._line_no += 1
+
+        # Convert byte array to string with correct encoding and
+        # strip any whitespaces added in the decoding process.
+        return line.decode(sys.getdefaultencoding()).rstrip() + '\n'
+
+    def get_line_no(self):
+        """
+        Gives current line number.
+        """
+        return self._line_no
+
+    line_no = property(get_line_no)
+
 class SuiteDataAuditor(Auditor):
     """Class for auditing files in tests/suites/*.data"""
     def __init__(self, options):
@@ -246,27 +333,31 @@ class SuiteDataAuditor(Auditor):
         """Collect all files in tests/suites/*.data"""
         test_dir = self.find_test_dir()
         suites_data_folder = os.path.join(test_dir, 'suites')
-        # collect all data files in tests/suites (114 in total)
         data_files = glob.glob(os.path.join(suites_data_folder, '*.data'))
         return data_files
 
     def parse_file(self, filename: str):
-        """Parse AuditData from file."""
-        with open(filename, 'r') as f:
-            data = f.read()
+        """
+        Parse a list of AuditData from file.
+
+        :param filename: name of the file to parse.
+        :return list of AuditData parsed from the file.
+        """
         audit_data_list = []
-        # extract hex strings from the data file.
-        hex_strings = re.findall(r'"(?P<data>[0-9a-fA-F]+)"', data)
-        for hex_str in hex_strings:
-            # We regard hex string with odd number length as invaild data.
-            if len(hex_str) & 1:
-                continue
-            bytes_data = bytes.fromhex(hex_str)
-            audit_data = self.parse_bytes(bytes_data)
-            if audit_data is None:
-                continue
-            audit_data.filename = filename
-            audit_data_list.append(audit_data)
+        data_f = FileWrapper(filename)
+        for _, _, _, test_args in parse_suite_data(data_f):
+            for test_arg in test_args:
+                match = re.match(r'"(?P<data>[0-9a-fA-F]+)"', test_arg)
+                if not match:
+                    continue
+                if not X509Parser.check_hex_string(match.group('data')):
+                    continue
+                audit_data = self.parse_bytes(bytes.fromhex(match.group('data')))
+                if audit_data is None:
+                    continue
+                audit_data.filename = filename
+                audit_data_list.append(audit_data)
+
         return audit_data_list
 
 def list_all(audit_data: AuditData):
@@ -308,9 +399,6 @@ def main():
         suite_data_files = sd_auditor.default_files
 
     td_auditor.walk_all(data_files)
-    # TODO: Improve the method for auditing test suite data files
-    #       It takes 6 times longer than td_auditor.walk_all(),
-    #       typically 0.827 s VS 0.147 s.
     sd_auditor.walk_all(suite_data_files)
 
     if args.all:

From ebf011f43eefb4fba2f9ece9c3859e8474d5f484 Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Tue, 11 Apr 2023 13:39:31 +0800
Subject: [PATCH 04/20] cert_audit: Introduce not-[before|after] option

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/audit-validity-dates.py | 37 ++++++++++++++++++---------
 1 file changed, 25 insertions(+), 12 deletions(-)

diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
index 5e22bfca9..85c0bd9dd 100755
--- a/tests/scripts/audit-validity-dates.py
+++ b/tests/scripts/audit-validity-dates.py
@@ -26,7 +26,6 @@ import os
 import sys
 import re
 import typing
-import types
 import argparse
 import datetime
 import glob
@@ -227,15 +226,6 @@ class Auditor:
             data_list = self.parse_file(filename)
             self.audit_data.extend(data_list)
 
-    def for_each(self, do, *args, **kwargs):
-        """
-        Sort the audit data and iterate over them.
-        """
-        if not isinstance(do, types.FunctionType):
-            return
-        for d in self.audit_data:
-            do(d, *args, **kwargs)
-
     @staticmethod
     def find_test_dir():
         """Get the relative path for the MbedTLS test directory."""
@@ -381,6 +371,12 @@ def main():
     parser.add_argument('-v', '--verbose',
                         action='store_true', dest='verbose',
                         help='Show warnings')
+    parser.add_argument('--not-before', dest='not_before',
+                        help='not valid before this date(UTC), YYYY-MM-DD',
+                        metavar='DATE')
+    parser.add_argument('--not-after', dest='not_after',
+                        help='not valid after this date(UTC), YYYY-MM-DD',
+                        metavar='DATE')
     parser.add_argument('-f', '--file', dest='file',
                         help='file to audit (Debug only)',
                         metavar='FILE')
@@ -398,12 +394,29 @@ def main():
         data_files = td_auditor.default_files
         suite_data_files = sd_auditor.default_files
 
+    if args.not_before:
+        not_before_date = datetime.datetime.fromisoformat(args.not_before)
+    else:
+        not_before_date = datetime.datetime.today()
+    if args.not_after:
+        not_after_date = datetime.datetime.fromisoformat(args.not_after)
+    else:
+        not_after_date = not_before_date
+
     td_auditor.walk_all(data_files)
     sd_auditor.walk_all(suite_data_files)
+    audit_results = td_auditor.audit_data + sd_auditor.audit_data
+
+    # we filter out the files whose validity duration covers the provide
+    # duration.
+    filter_func = lambda d: (not_before_date < d.not_valid_before) or \
+                            (d.not_valid_after < not_after_date)
 
     if args.all:
-        td_auditor.for_each(list_all)
-        sd_auditor.for_each(list_all)
+        filter_func = None
+
+    for d in filter(filter_func, audit_results):
+        list_all(d)
 
     print("\nDone!\n")
 

From cb8fc3275a96985373db400821185de139b20f93 Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Tue, 11 Apr 2023 15:05:29 +0800
Subject: [PATCH 05/20] cert_audit: Fill validity dates in AuditData
 constructor

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/audit-validity-dates.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
index 85c0bd9dd..472041e16 100755
--- a/tests/scripts/audit-validity-dates.py
+++ b/tests/scripts/audit-validity-dates.py
@@ -48,11 +48,10 @@ class DataFormat(Enum):
 class AuditData:
     """Store file, type and expiration date for audit."""
     #pylint: disable=too-few-public-methods
-    def __init__(self, data_type: DataType):
+    def __init__(self, data_type: DataType, x509_obj):
         self.data_type = data_type
         self.filename = ""
-        self.not_valid_after: datetime.datetime
-        self.not_valid_before: datetime.datetime
+        self.fill_validity_duration(x509_obj)
 
     def fill_validity_duration(self, x509_obj):
         """Fill expiration_date field from a x509 object"""
@@ -211,8 +210,7 @@ class Auditor:
                 result = None
                 self.warn(val_error)
             if result is not None:
-                audit_data = AuditData(data_type)
-                audit_data.fill_validity_duration(result)
+                audit_data = AuditData(data_type, result)
                 return audit_data
         return None
 

From 3179232211a39b27e341c21b1d1165773168234a Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Tue, 11 Apr 2023 16:30:54 +0800
Subject: [PATCH 06/20] cert_audit: Disable pylint error for importing
 cryptography

This is to make CI happy. The script requires cryptography
>= 35.0.0, which is only available for Python >= 3.6. But
both ubuntu-16.04 and Travis CI are using Python 3.5.x.

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/audit-validity-dates.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
index 472041e16..3f1987030 100755
--- a/tests/scripts/audit-validity-dates.py
+++ b/tests/scripts/audit-validity-dates.py
@@ -31,7 +31,10 @@ import datetime
 import glob
 from enum import Enum
 
-from cryptography import x509
+# The script requires cryptography >= 35.0.0 which is only available
+# for Python >= 3.6. Disable the pylint error here until we were
+# using modern system on our CI.
+from cryptography import x509 #pylint: disable=import-error
 
 # reuse the function to parse *.data file in tests/suites/
 from generate_test_code import parse_test_data as parse_suite_data

From 57240958ed3f915421145bc7454598715577cefb Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Thu, 13 Apr 2023 14:42:37 +0800
Subject: [PATCH 07/20] cert_audit: Make FILE as positional argument

Make FILE as positional argument so that we can
pass multiple files to the script. This commit
also contains some help message improvements.

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/audit-validity-dates.py | 32 +++++++++++++--------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
index 3f1987030..577179d0b 100755
--- a/tests/scripts/audit-validity-dates.py
+++ b/tests/scripts/audit-validity-dates.py
@@ -15,11 +15,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Audit validity date of X509 crt/crl/csr
+"""Audit validity date of X509 crt/crl/csr.
 
 This script is used to audit the validity date of crt/crl/csr used for testing.
-The files are in tests/data_files/ while some data are in test suites data in
-tests/suites/*.data files.
+It prints the information of X509 data whose validity duration does not cover
+the provided validity duration. The data are collected from tests/data_files/
+and tests/suites/*.data files by default.
 """
 
 import os
@@ -362,24 +363,23 @@ def main():
     """
     Perform argument parsing.
     """
-    parser = argparse.ArgumentParser(
-        description='Audit script for X509 crt/crl/csr files.'
-    )
+    parser = argparse.ArgumentParser(description=__doc__)
 
     parser.add_argument('-a', '--all',
                         action='store_true',
-                        help='list the information of all files')
+                        help='list the information of all the files')
     parser.add_argument('-v', '--verbose',
                         action='store_true', dest='verbose',
-                        help='Show warnings')
+                        help='show warnings')
     parser.add_argument('--not-before', dest='not_before',
-                        help='not valid before this date(UTC), YYYY-MM-DD',
+                        help=('not valid before this date (UTC, YYYY-MM-DD). '
+                              'Default: today'),
                         metavar='DATE')
     parser.add_argument('--not-after', dest='not_after',
-                        help='not valid after this date(UTC), YYYY-MM-DD',
+                        help=('not valid after this date (UTC, YYYY-MM-DD). '
+                              'Default: not-before'),
                         metavar='DATE')
-    parser.add_argument('-f', '--file', dest='file',
-                        help='file to audit (Debug only)',
+    parser.add_argument('files', nargs='*', help='files to audit',
                         metavar='FILE')
 
     args = parser.parse_args()
@@ -388,9 +388,9 @@ def main():
     td_auditor = TestDataAuditor(args.verbose)
     sd_auditor = SuiteDataAuditor(args.verbose)
 
-    if args.file:
-        data_files = [args.file]
-        suite_data_files = [args.file]
+    if args.files:
+        data_files = args.files
+        suite_data_files = args.files
     else:
         data_files = td_auditor.default_files
         suite_data_files = sd_auditor.default_files
@@ -408,7 +408,7 @@ def main():
     sd_auditor.walk_all(suite_data_files)
     audit_results = td_auditor.audit_data + sd_auditor.audit_data
 
-    # we filter out the files whose validity duration covers the provide
+    # we filter out the files whose validity duration covers the provided
     # duration.
     filter_func = lambda d: (not_before_date < d.not_valid_before) or \
                             (d.not_valid_after < not_after_date)

From 7725c1d2a9fe334926a83d3f81b444338f36628e Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Thu, 13 Apr 2023 15:55:30 +0800
Subject: [PATCH 08/20] cert_audit: Output line/argument number for *.data
 files

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/audit-validity-dates.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
index 577179d0b..537cf40f0 100755
--- a/tests/scripts/audit-validity-dates.py
+++ b/tests/scripts/audit-validity-dates.py
@@ -338,7 +338,7 @@ class SuiteDataAuditor(Auditor):
         audit_data_list = []
         data_f = FileWrapper(filename)
         for _, _, _, test_args in parse_suite_data(data_f):
-            for test_arg in test_args:
+            for idx, test_arg in enumerate(test_args):
                 match = re.match(r'"(?P<data>[0-9a-fA-F]+)"', test_arg)
                 if not match:
                     continue
@@ -347,7 +347,9 @@ class SuiteDataAuditor(Auditor):
                 audit_data = self.parse_bytes(bytes.fromhex(match.group('data')))
                 if audit_data is None:
                     continue
-                audit_data.filename = filename
+                audit_data.filename = "{}:{}:{}".format(filename,
+                                                        data_f.line_no,
+                                                        idx + 1)
                 audit_data_list.append(audit_data)
 
         return audit_data_list

From f8e5e059c53b85d7eff848c382bec38453acd53e Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Tue, 18 Apr 2023 15:43:25 +0800
Subject: [PATCH 09/20] cert_audit: Improve documentation

This commit is a collection of improving the documentation in the
script:

  * Restore uppercase in the license header.
  * Reword the script description.
  * Reword the docstring of AuditData.fill_validity_duration
  * Rename AuditData.filename to *.location

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/audit-validity-dates.py | 34 +++++++++++++--------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
index 537cf40f0..9ab8806d6 100755
--- a/tests/scripts/audit-validity-dates.py
+++ b/tests/scripts/audit-validity-dates.py
@@ -1,11 +1,11 @@
 #!/usr/bin/env python3
 #
-# copyright the mbed tls contributors
-# spdx-license-identifier: apache-2.0
+# Copyright The Mbed TLS Contributors
+# SPDX-License-Identifier: Apache-2.0
 #
-# licensed under the apache license, version 2.0 (the "license"); you may
-# not use this file except in compliance with the license.
-# you may obtain a copy of the license at
+# Licensed under the Apache License, Version 2.0 (the "License"); you may
+# not use this file except in compliance with the License.
+# You may obtain a copy of the License at
 #
 # http://www.apache.org/licenses/LICENSE-2.0
 #
@@ -18,9 +18,9 @@
 """Audit validity date of X509 crt/crl/csr.
 
 This script is used to audit the validity date of crt/crl/csr used for testing.
-It prints the information of X509 data whose validity duration does not cover
-the provided validity duration. The data are collected from tests/data_files/
-and tests/suites/*.data files by default.
+It would print the information about X.509 data if the validity period of the
+X.509 data didn't cover the provided validity period. The data are collected
+from tests/data_files/ and tests/suites/*.data files by default.
 """
 
 import os
@@ -50,15 +50,15 @@ class DataFormat(Enum):
     DER = 2 # Distinguished Encoding Rules
 
 class AuditData:
-    """Store file, type and expiration date for audit."""
+    """Store data location, type and validity period of X.509 objects."""
     #pylint: disable=too-few-public-methods
     def __init__(self, data_type: DataType, x509_obj):
         self.data_type = data_type
-        self.filename = ""
+        self.location = ""
         self.fill_validity_duration(x509_obj)
 
     def fill_validity_duration(self, x509_obj):
-        """Fill expiration_date field from a x509 object"""
+        """Read validity period from an X.509 object."""
         # Certificate expires after "not_valid_after"
         # Certificate is invalid before "not_valid_before"
         if self.data_type == DataType.CRT:
@@ -76,7 +76,7 @@ class AuditData:
         else:
             raise ValueError("Unsupported file_type: {}".format(self.data_type))
 
-class X509Parser():
+class X509Parser:
     """A parser class to parse crt/crl/csr file or data in PEM/DER format."""
     PEM_REGEX = br'-{5}BEGIN (?P<type>.*?)-{5}\n(?P<data>.*?)-{5}END (?P=type)-{5}\n'
     PEM_TAG_REGEX = br'-{5}BEGIN (?P<type>.*?)-{5}\n'
@@ -201,7 +201,7 @@ class Auditor:
         result_list = []
         result = self.parse_bytes(data)
         if result is not None:
-            result.filename = filename
+            result.location = filename
             result_list.append(result)
         return result_list
 
@@ -347,9 +347,9 @@ class SuiteDataAuditor(Auditor):
                 audit_data = self.parse_bytes(bytes.fromhex(match.group('data')))
                 if audit_data is None:
                     continue
-                audit_data.filename = "{}:{}:{}".format(filename,
-                                                        data_f.line_no,
-                                                        idx + 1)
+                audit_data.location = "{}:{}:#{}".format(filename,
+                                                         data_f.line_no,
+                                                         idx + 1)
                 audit_data_list.append(audit_data)
 
         return audit_data_list
@@ -359,7 +359,7 @@ def list_all(audit_data: AuditData):
         audit_data.not_valid_before.isoformat(timespec='seconds'),
         audit_data.not_valid_after.isoformat(timespec='seconds'),
         audit_data.data_type.name,
-        audit_data.filename))
+        audit_data.location))
 
 def main():
     """

From 8e6794ad56066cfcbd4168ca3d2b92a1cedf2367 Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Tue, 18 Apr 2023 17:00:47 +0800
Subject: [PATCH 10/20] cert_audit: Code refinement

This commit is a collection of code refinements
from review comments.

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/audit-validity-dates.py | 30 +++++++++++++++------------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
index 9ab8806d6..575da12d0 100755
--- a/tests/scripts/audit-validity-dates.py
+++ b/tests/scripts/audit-validity-dates.py
@@ -86,7 +86,12 @@ class X509Parser:
         DataType.CSR: 'CERTIFICATE REQUEST'
     }
 
-    def __init__(self, backends: dict):
+    def __init__(self,
+                 backends:
+                 typing.Dict[DataType,
+                             typing.Dict[DataFormat,
+                                         typing.Callable[[bytes], object]]]) \
+    -> None:
         self.backends = backends
         self.__generate_parsers()
 
@@ -122,7 +127,7 @@ class X509Parser:
         return self.parsers[item]
 
     @staticmethod
-    def pem_data_type(data: bytes) -> str:
+    def pem_data_type(data: bytes) -> typing.Optional[str]:
         """Get the tag from the data in PEM format
 
         :param data: data to be checked in binary mode.
@@ -132,7 +137,7 @@ class X509Parser:
         if m is not None:
             return m.group('type').decode('UTF-8')
         else:
-            return ""
+            return None
 
     @staticmethod
     def check_hex_string(hex_str: str) -> bool:
@@ -165,6 +170,7 @@ class Auditor:
     def __init__(self, verbose):
         self.verbose = verbose
         self.default_files = []
+        # A list to store the parsed audit_data.
         self.audit_data = []
         self.parser = X509Parser({
             DataType.CRT: {
@@ -198,12 +204,12 @@ class Auditor:
         """
         with open(filename, 'rb') as f:
             data = f.read()
-        result_list = []
         result = self.parse_bytes(data)
         if result is not None:
             result.location = filename
-            result_list.append(result)
-        return result_list
+            return [result]
+        else:
+            return []
 
     def parse_bytes(self, data: bytes):
         """Parse AuditData from bytes."""
@@ -218,11 +224,11 @@ class Auditor:
                 return audit_data
         return None
 
-    def walk_all(self, file_list):
+    def walk_all(self, file_list: typing.Optional[typing.List[str]] = None):
         """
         Iterate over all the files in the list and get audit data.
         """
-        if not file_list:
+        if file_list is None:
             file_list = self.default_files
         for filename in file_list:
             data_list = self.parse_file(filename)
@@ -250,11 +256,9 @@ class TestDataAuditor(Auditor):
     def collect_default_files(self):
         """Collect all files in tests/data_files/"""
         test_dir = self.find_test_dir()
-        test_data_folder = os.path.join(test_dir, 'data_files')
-        data_files = []
-        for (dir_path, _, file_names) in os.walk(test_data_folder):
-            data_files.extend(os.path.join(dir_path, file_name)
-                              for file_name in file_names)
+        test_data_glob = os.path.join(test_dir, 'data_files/**')
+        data_files = [f for f in glob.glob(test_data_glob, recursive=True)
+                      if os.path.isfile(f)]
         return data_files
 
 class FileWrapper():

From 7a344dde0f514f06053e6fdffd6ee589d6f498e3 Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Wed, 19 Apr 2023 15:03:20 +0800
Subject: [PATCH 11/20] New implementation for generate_test_code.FileWrapper

We get some performance benefit from the Buffered I/O.

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/generate_test_code.py | 49 ++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 18 deletions(-)

diff --git a/tests/scripts/generate_test_code.py b/tests/scripts/generate_test_code.py
index f19d30b61..347100dbe 100755
--- a/tests/scripts/generate_test_code.py
+++ b/tests/scripts/generate_test_code.py
@@ -163,7 +163,6 @@ __MBEDTLS_TEST_TEMPLATE__PLATFORM_CODE
 """
 
 
-import io
 import os
 import re
 import sys
@@ -208,43 +207,57 @@ class GeneratorInputError(Exception):
     pass
 
 
-class FileWrapper(io.FileIO):
+class FileWrapper:
     """
-    This class extends built-in io.FileIO class with attribute line_no,
+    This class extends the file object with attribute line_no,
     that indicates line number for the line that is read.
     """
 
-    def __init__(self, file_name):
+    def __init__(self, file_name) -> None:
         """
-        Instantiate the base class and initialize the line number to 0.
+        Instantiate the file object and initialize the line number to 0.
 
         :param file_name: File path to open.
         """
-        super().__init__(file_name, 'r')
+        # private mix-in file object
+        self._f = open(file_name, 'rb')
         self._line_no = 0
 
+    def __iter__(self):
+        return self
+
     def __next__(self):
         """
-        This method overrides base class's __next__ method and extends it
-        method to count the line numbers as each line is read.
+        This method makes FileWrapper iterable.
+        It counts the line numbers as each line is read.
 
         :return: Line read from file.
         """
-        line = super().__next__()
-        if line is not None:
-            self._line_no += 1
-            # Convert byte array to string with correct encoding and
-            # strip any whitespaces added in the decoding process.
-            return line.decode(sys.getdefaultencoding()).rstrip() + '\n'
-        return None
+        line = self._f.__next__()
+        self._line_no += 1
+        # Convert byte array to string with correct encoding and
+        # strip any whitespaces added in the decoding process.
+        return line.decode(sys.getdefaultencoding()).rstrip()+ '\n'
 
-    def get_line_no(self):
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self._f.__exit__(exc_type, exc_val, exc_tb)
+
+    @property
+    def line_no(self):
         """
-        Gives current line number.
+        Property that indicates line number for the line that is read.
         """
         return self._line_no
 
-    line_no = property(get_line_no)
+    @property
+    def name(self):
+        """
+        Property that indicates name of the file that is read.
+        """
+        return self._f.name
 
 
 def split_dep(dep):

From ad30679d9eef4c2c9b761bb12144f158ec120309 Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Wed, 19 Apr 2023 15:07:03 +0800
Subject: [PATCH 12/20] cert_audit: Reuse generate_test_code.FileWrapper

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/audit-validity-dates.py | 59 +--------------------------
 1 file changed, 1 insertion(+), 58 deletions(-)

diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
index 575da12d0..89a6dd4f5 100755
--- a/tests/scripts/audit-validity-dates.py
+++ b/tests/scripts/audit-validity-dates.py
@@ -39,6 +39,7 @@ from cryptography import x509 #pylint: disable=import-error
 
 # reuse the function to parse *.data file in tests/suites/
 from generate_test_code import parse_test_data as parse_suite_data
+from generate_test_code import FileWrapper
 
 class DataType(Enum):
     CRT = 1 # Certificate
@@ -261,64 +262,6 @@ class TestDataAuditor(Auditor):
                       if os.path.isfile(f)]
         return data_files
 
-class FileWrapper():
-    """
-    This a stub class of generate_test_code.FileWrapper.
-
-    This class reads the whole file to memory before iterating
-    over the lines.
-    """
-
-    def __init__(self, file_name):
-        """
-        Read the file and initialize the line number to 0.
-
-        :param file_name: File path to open.
-        """
-        with open(file_name, 'rb') as f:
-            self.buf = f.read()
-        self.buf_len = len(self.buf)
-        self._line_no = 0
-        self._line_start = 0
-
-    def __iter__(self):
-        """Make the class iterable."""
-        return self
-
-    def __next__(self):
-        """
-        This method for returning a line of the file per iteration.
-
-        :return: Line read from file.
-        """
-        # If we reach the end of the file.
-        if not self._line_start < self.buf_len:
-            raise StopIteration
-
-        line_end = self.buf.find(b'\n', self._line_start) + 1
-        if line_end > 0:
-            # Find the first LF as the end of the new line.
-            line = self.buf[self._line_start:line_end]
-            self._line_start = line_end
-            self._line_no += 1
-        else:
-            # No LF found. We are at the last line without LF.
-            line = self.buf[self._line_start:]
-            self._line_start = self.buf_len
-            self._line_no += 1
-
-        # Convert byte array to string with correct encoding and
-        # strip any whitespaces added in the decoding process.
-        return line.decode(sys.getdefaultencoding()).rstrip() + '\n'
-
-    def get_line_no(self):
-        """
-        Gives current line number.
-        """
-        return self._line_no
-
-    line_no = property(get_line_no)
-
 class SuiteDataAuditor(Auditor):
     """Class for auditing files in tests/suites/*.data"""
     def __init__(self, options):

From fcda6d4f51cc2c67c8567d427a8c52bafda7b3a7 Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Fri, 21 Apr 2023 11:04:07 +0800
Subject: [PATCH 13/20] cert_audit: Enable logging module

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/audit-validity-dates.py | 61 ++++++++++++++++++++-------
 1 file changed, 46 insertions(+), 15 deletions(-)

diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
index 89a6dd4f5..400066840 100755
--- a/tests/scripts/audit-validity-dates.py
+++ b/tests/scripts/audit-validity-dates.py
@@ -30,6 +30,7 @@ import typing
 import argparse
 import datetime
 import glob
+import logging
 from enum import Enum
 
 # The script requires cryptography >= 35.0.0 which is only available
@@ -168,8 +169,8 @@ class X509Parser:
 
 class Auditor:
     """A base class for audit."""
-    def __init__(self, verbose):
-        self.verbose = verbose
+    def __init__(self, logger):
+        self.logger = logger
         self.default_files = []
         # A list to store the parsed audit_data.
         self.audit_data = []
@@ -188,14 +189,6 @@ class Auditor:
             },
         })
 
-    def error(self, *args):
-        #pylint: disable=no-self-use
-        print("Error: ", *args, file=sys.stderr)
-
-    def warn(self, *args):
-        if self.verbose:
-            print("Warn: ", *args, file=sys.stderr)
-
     def parse_file(self, filename: str) -> typing.List[AuditData]:
         """
         Parse a list of AuditData from file.
@@ -219,7 +212,7 @@ class Auditor:
                 result = self.parser[data_type](data)
             except ValueError as val_error:
                 result = None
-                self.warn(val_error)
+                self.logger.warning(val_error)
             if result is not None:
                 audit_data = AuditData(data_type, result)
                 return audit_data
@@ -308,6 +301,39 @@ def list_all(audit_data: AuditData):
         audit_data.data_type.name,
         audit_data.location))
 
+
+def configure_logger(logger: logging.Logger) -> None:
+    """
+    Configure the logging.Logger instance so that:
+        - Format is set to "[%(levelname)s]: %(message)s".
+        - loglevel >= WARNING are printed to stderr.
+        - loglevel <  WARNING are printed to stdout.
+    """
+    class MaxLevelFilter(logging.Filter):
+        # pylint: disable=too-few-public-methods
+        def __init__(self, max_level, name=''):
+            super().__init__(name)
+            self.max_level = max_level
+
+        def filter(self, record: logging.LogRecord) -> bool:
+            return record.levelno <= self.max_level
+
+    log_formatter = logging.Formatter("[%(levelname)s]: %(message)s")
+
+    # set loglevel >= WARNING to be printed to stderr
+    stderr_hdlr = logging.StreamHandler(sys.stderr)
+    stderr_hdlr.setLevel(logging.WARNING)
+    stderr_hdlr.setFormatter(log_formatter)
+
+    # set loglevel <= INFO to be printed to stdout
+    stdout_hdlr = logging.StreamHandler(sys.stdout)
+    stdout_hdlr.addFilter(MaxLevelFilter(logging.INFO))
+    stdout_hdlr.setFormatter(log_formatter)
+
+    logger.addHandler(stderr_hdlr)
+    logger.addHandler(stdout_hdlr)
+
+
 def main():
     """
     Perform argument parsing.
@@ -319,7 +345,7 @@ def main():
                         help='list the information of all the files')
     parser.add_argument('-v', '--verbose',
                         action='store_true', dest='verbose',
-                        help='show warnings')
+                        help='show logs')
     parser.add_argument('--not-before', dest='not_before',
                         help=('not valid before this date (UTC, YYYY-MM-DD). '
                               'Default: today'),
@@ -334,8 +360,13 @@ def main():
     args = parser.parse_args()
 
     # start main routine
-    td_auditor = TestDataAuditor(args.verbose)
-    sd_auditor = SuiteDataAuditor(args.verbose)
+    # setup logger
+    logger = logging.getLogger()
+    configure_logger(logger)
+    logger.setLevel(logging.DEBUG if args.verbose else logging.ERROR)
+
+    td_auditor = TestDataAuditor(logger)
+    sd_auditor = SuiteDataAuditor(logger)
 
     if args.files:
         data_files = args.files
@@ -368,7 +399,7 @@ def main():
     for d in filter(filter_func, audit_results):
         list_all(d)
 
-    print("\nDone!\n")
+    logger.debug("Done!")
 
 if __name__ == "__main__":
     main()

From a228cbceccf35ccb6de1b4946b5e211936b3b98d Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Fri, 21 Apr 2023 11:59:25 +0800
Subject: [PATCH 14/20] cert_audit: Add data-files and suite-data-files options

The commit adds '--data-files' and '--suite-data-files'
options so that we could pass names for the two types
of files separately. Additionally, the commit improves
the documentation in the script.

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/audit-validity-dates.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
index 400066840..d74c6f826 100755
--- a/tests/scripts/audit-validity-dates.py
+++ b/tests/scripts/audit-validity-dates.py
@@ -171,9 +171,9 @@ class Auditor:
     """A base class for audit."""
     def __init__(self, logger):
         self.logger = logger
-        self.default_files = []
+        self.default_files = [] # type: typing.List[str]
         # A list to store the parsed audit_data.
-        self.audit_data = []
+        self.audit_data = [] # type: typing.List[AuditData]
         self.parser = X509Parser({
             DataType.CRT: {
                 DataFormat.PEM: x509.load_pem_x509_certificate,
@@ -354,7 +354,11 @@ def main():
                         help=('not valid after this date (UTC, YYYY-MM-DD). '
                               'Default: not-before'),
                         metavar='DATE')
-    parser.add_argument('files', nargs='*', help='files to audit',
+    parser.add_argument('--data-files', action='append', nargs='*',
+                        help='data files to audit',
+                        metavar='FILE')
+    parser.add_argument('--suite-data-files', action='append', nargs='*',
+                        help='suite data files to audit',
                         metavar='FILE')
 
     args = parser.parse_args()
@@ -368,22 +372,29 @@ def main():
     td_auditor = TestDataAuditor(logger)
     sd_auditor = SuiteDataAuditor(logger)
 
-    if args.files:
-        data_files = args.files
-        suite_data_files = args.files
-    else:
+    data_files = []
+    suite_data_files = []
+    if args.data_files is None and args.suite_data_files is None:
         data_files = td_auditor.default_files
         suite_data_files = sd_auditor.default_files
+    else:
+        if args.data_files is not None:
+            data_files = [x for l in args.data_files for x in l]
+        if args.suite_data_files is not None:
+            suite_data_files = [x for l in args.suite_data_files for x in l]
 
+    # validity period start date
     if args.not_before:
         not_before_date = datetime.datetime.fromisoformat(args.not_before)
     else:
         not_before_date = datetime.datetime.today()
+    # validity period end date
     if args.not_after:
         not_after_date = datetime.datetime.fromisoformat(args.not_after)
     else:
         not_after_date = not_before_date
 
+    # go through all the files
     td_auditor.walk_all(data_files)
     sd_auditor.walk_all(suite_data_files)
     audit_results = td_auditor.audit_data + sd_auditor.audit_data
@@ -396,6 +407,7 @@ def main():
     if args.all:
         filter_func = None
 
+    # filter and output the results
     for d in filter(filter_func, audit_results):
         list_all(d)
 

From 2d487217cd380c202ad002d5548d5d57391fb3ae Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Fri, 21 Apr 2023 12:41:24 +0800
Subject: [PATCH 15/20] cert_audit: Improve the method to find tests folder

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/audit-validity-dates.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
index d74c6f826..09559dc98 100755
--- a/tests/scripts/audit-validity-dates.py
+++ b/tests/scripts/audit-validity-dates.py
@@ -42,15 +42,20 @@ from cryptography import x509 #pylint: disable=import-error
 from generate_test_code import parse_test_data as parse_suite_data
 from generate_test_code import FileWrapper
 
+import scripts_path # pylint: disable=unused-import
+from mbedtls_dev import build_tree
+
 class DataType(Enum):
     CRT = 1 # Certificate
     CRL = 2 # Certificate Revocation List
     CSR = 3 # Certificate Signing Request
 
+
 class DataFormat(Enum):
     PEM = 1 # Privacy-Enhanced Mail
     DER = 2 # Distinguished Encoding Rules
 
+
 class AuditData:
     """Store data location, type and validity period of X.509 objects."""
     #pylint: disable=too-few-public-methods
@@ -78,6 +83,7 @@ class AuditData:
         else:
             raise ValueError("Unsupported file_type: {}".format(self.data_type))
 
+
 class X509Parser:
     """A parser class to parse crt/crl/csr file or data in PEM/DER format."""
     PEM_REGEX = br'-{5}BEGIN (?P<type>.*?)-{5}\n(?P<data>.*?)-{5}END (?P=type)-{5}\n'
@@ -167,6 +173,7 @@ class X509Parser:
             return False
         return True
 
+
 class Auditor:
     """A base class for audit."""
     def __init__(self, logger):
@@ -231,15 +238,8 @@ class Auditor:
     @staticmethod
     def find_test_dir():
         """Get the relative path for the MbedTLS test directory."""
-        if os.path.isdir('tests'):
-            tests_dir = 'tests'
-        elif os.path.isdir('suites'):
-            tests_dir = '.'
-        elif os.path.isdir('../suites'):
-            tests_dir = '..'
-        else:
-            raise Exception("Mbed TLS source tree not found")
-        return tests_dir
+        return os.path.relpath(build_tree.guess_mbedtls_root() + '/tests')
+
 
 class TestDataAuditor(Auditor):
     """Class for auditing files in tests/data_files/"""
@@ -255,6 +255,7 @@ class TestDataAuditor(Auditor):
                       if os.path.isfile(f)]
         return data_files
 
+
 class SuiteDataAuditor(Auditor):
     """Class for auditing files in tests/suites/*.data"""
     def __init__(self, options):
@@ -294,6 +295,7 @@ class SuiteDataAuditor(Auditor):
 
         return audit_data_list
 
+
 def list_all(audit_data: AuditData):
     print("{}\t{}\t{}\t{}".format(
         audit_data.not_valid_before.isoformat(timespec='seconds'),

From 28fe957239904aae39d6680f1d8db054d7e31ae6 Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Sun, 23 Apr 2023 13:56:25 +0800
Subject: [PATCH 16/20] cert_audit: Add simple parser of suite data file

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/audit-validity-dates.py | 29 ++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
index 09559dc98..ea6795904 100755
--- a/tests/scripts/audit-validity-dates.py
+++ b/tests/scripts/audit-validity-dates.py
@@ -38,8 +38,6 @@ from enum import Enum
 # using modern system on our CI.
 from cryptography import x509 #pylint: disable=import-error
 
-# reuse the function to parse *.data file in tests/suites/
-from generate_test_code import parse_test_data as parse_suite_data
 from generate_test_code import FileWrapper
 
 import scripts_path # pylint: disable=unused-import
@@ -256,6 +254,31 @@ class TestDataAuditor(Auditor):
         return data_files
 
 
+def parse_suite_data(data_f):
+    """
+    Parses .data file for test arguments that possiblly have a
+    valid X.509 data. If you need a more precise parser, please
+    use generate_test_code.parse_test_data instead.
+
+    :param data_f: file object of the data file.
+    :return: Generator that yields test function argument list.
+    """
+    for line in data_f:
+        line = line.strip()
+        # Skip comments
+        if line.startswith('#'):
+            continue
+
+        # Check parameters line
+        match = re.search(r'\A\w+(.*:)?\"', line)
+        if match:
+            # Read test vectors
+            parts = re.split(r'(?<!\\):', line)
+            parts = [x for x in parts if x]
+            args = parts[1:]
+            yield args
+
+
 class SuiteDataAuditor(Auditor):
     """Class for auditing files in tests/suites/*.data"""
     def __init__(self, options):
@@ -278,7 +301,7 @@ class SuiteDataAuditor(Auditor):
         """
         audit_data_list = []
         data_f = FileWrapper(filename)
-        for _, _, _, test_args in parse_suite_data(data_f):
+        for test_args in parse_suite_data(data_f):
             for idx, test_arg in enumerate(test_args):
                 match = re.match(r'"(?P<data>[0-9a-fA-F]+)"', test_arg)
                 if not match:

From c34b9ac18cdfa8088e34a7be69dfd4b6a57322b8 Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Sun, 23 Apr 2023 14:51:18 +0800
Subject: [PATCH 17/20] cert_audit: Clarify the abstraction of Auditor

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/audit-validity-dates.py | 64 ++++++++++++++++++---------
 1 file changed, 43 insertions(+), 21 deletions(-)

diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
index ea6795904..1517babb8 100755
--- a/tests/scripts/audit-validity-dates.py
+++ b/tests/scripts/audit-validity-dates.py
@@ -173,10 +173,25 @@ class X509Parser:
 
 
 class Auditor:
-    """A base class for audit."""
+    """
+    A base class that uses X509Parser to parse files to a list of AuditData.
+
+    A subclass must implement the following methods:
+      - collect_default_files: Return a list of file names that are defaultly
+        used for parsing (auditing). The list will be stored in
+        Auditor.default_files.
+      - parse_file: Method that parses a single file to a list of AuditData.
+
+    A subclass may override the following methods:
+      - parse_bytes: Defaultly, it parses `bytes` that contains only one valid
+        X.509 data(DER/PEM format) to an X.509 object.
+      - walk_all: Defaultly, it iterates over all the files in the provided
+        file name list, calls `parse_file` for each file and stores the results
+        by extending Auditor.audit_data.
+    """
     def __init__(self, logger):
         self.logger = logger
-        self.default_files = [] # type: typing.List[str]
+        self.default_files = self.collect_default_files()
         # A list to store the parsed audit_data.
         self.audit_data = [] # type: typing.List[AuditData]
         self.parser = X509Parser({
@@ -194,6 +209,10 @@ class Auditor:
             },
         })
 
+    def collect_default_files(self) -> typing.List[str]:
+        """Collect the default files for parsing."""
+        raise NotImplementedError
+
     def parse_file(self, filename: str) -> typing.List[AuditData]:
         """
         Parse a list of AuditData from file.
@@ -201,14 +220,7 @@ class Auditor:
         :param filename: name of the file to parse.
         :return list of AuditData parsed from the file.
         """
-        with open(filename, 'rb') as f:
-            data = f.read()
-        result = self.parse_bytes(data)
-        if result is not None:
-            result.location = filename
-            return [result]
-        else:
-            return []
+        raise NotImplementedError
 
     def parse_bytes(self, data: bytes):
         """Parse AuditData from bytes."""
@@ -240,19 +252,32 @@ class Auditor:
 
 
 class TestDataAuditor(Auditor):
-    """Class for auditing files in tests/data_files/"""
-    def __init__(self, verbose):
-        super().__init__(verbose)
-        self.default_files = self.collect_default_files()
+    """Class for auditing files in `tests/data_files/`"""
 
     def collect_default_files(self):
-        """Collect all files in tests/data_files/"""
+        """Collect all files in `tests/data_files/`"""
         test_dir = self.find_test_dir()
         test_data_glob = os.path.join(test_dir, 'data_files/**')
         data_files = [f for f in glob.glob(test_data_glob, recursive=True)
                       if os.path.isfile(f)]
         return data_files
 
+    def parse_file(self, filename: str) -> typing.List[AuditData]:
+        """
+        Parse a list of AuditData from data file.
+
+        :param filename: name of the file to parse.
+        :return list of AuditData parsed from the file.
+        """
+        with open(filename, 'rb') as f:
+            data = f.read()
+        result = self.parse_bytes(data)
+        if result is not None:
+            result.location = filename
+            return [result]
+        else:
+            return []
+
 
 def parse_suite_data(data_f):
     """
@@ -280,13 +305,10 @@ def parse_suite_data(data_f):
 
 
 class SuiteDataAuditor(Auditor):
-    """Class for auditing files in tests/suites/*.data"""
-    def __init__(self, options):
-        super().__init__(options)
-        self.default_files = self.collect_default_files()
+    """Class for auditing files in `tests/suites/*.data`"""
 
     def collect_default_files(self):
-        """Collect all files in tests/suites/*.data"""
+        """Collect all files in `tests/suites/*.data`"""
         test_dir = self.find_test_dir()
         suites_data_folder = os.path.join(test_dir, 'suites')
         data_files = glob.glob(os.path.join(suites_data_folder, '*.data'))
@@ -294,7 +316,7 @@ class SuiteDataAuditor(Auditor):
 
     def parse_file(self, filename: str):
         """
-        Parse a list of AuditData from file.
+        Parse a list of AuditData from test suite data file.
 
         :param filename: name of the file to parse.
         :return list of AuditData parsed from the file.

From 1381598aa3c0471cff3ef183dad66a561028e8ad Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Tue, 25 Apr 2023 14:55:38 +0800
Subject: [PATCH 18/20] cert_audit: Check the version of cryptography

The script requires cryptography >= 35.0.0, we
need to check the version and provide meaningful
error message when the package version was too
old.

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 scripts/ci.requirements.txt           |  5 +++++
 tests/scripts/audit-validity-dates.py | 13 ++++++++++---
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/scripts/ci.requirements.txt b/scripts/ci.requirements.txt
index 1ad983fa9..ac9c25acf 100644
--- a/scripts/ci.requirements.txt
+++ b/scripts/ci.requirements.txt
@@ -10,3 +10,8 @@ pylint == 2.4.4
 # Use the earliest version of mypy that works with our code base.
 # See https://github.com/Mbed-TLS/mbedtls/pull/3953 .
 mypy >= 0.780
+
+# Install cryptography to avoid import-error reported by pylint.
+# What we really need is cryptography >= 35.0.0, which is only
+# available for Python >= 3.6.
+cryptography # >= 35.0.0
diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
index 1517babb8..594777408 100755
--- a/tests/scripts/audit-validity-dates.py
+++ b/tests/scripts/audit-validity-dates.py
@@ -34,15 +34,21 @@ import logging
 from enum import Enum
 
 # The script requires cryptography >= 35.0.0 which is only available
-# for Python >= 3.6. Disable the pylint error here until we were
-# using modern system on our CI.
-from cryptography import x509 #pylint: disable=import-error
+# for Python >= 3.6.
+import cryptography
+from cryptography import x509
 
 from generate_test_code import FileWrapper
 
 import scripts_path # pylint: disable=unused-import
 from mbedtls_dev import build_tree
 
+def check_cryptography_version():
+    match = re.match(r'^[0-9]+', cryptography.__version__)
+    if match is None or int(match[0]) < 35:
+        raise Exception("audit-validity-dates requires cryptography >= 35.0.0"
+                        + "({} is too old)".format(cryptography.__version__))
+
 class DataType(Enum):
     CRT = 1 # Certificate
     CRL = 2 # Certificate Revocation List
@@ -460,5 +466,6 @@ def main():
 
     logger.debug("Done!")
 
+check_cryptography_version()
 if __name__ == "__main__":
     main()

From 1d4cc917cea1abc710e96465e4e6aa7f6296c738 Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Tue, 25 Apr 2023 15:17:19 +0800
Subject: [PATCH 19/20] cert_audit: Reword the options and their descriptions

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 tests/scripts/audit-validity-dates.py | 30 +++++++++++++--------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/tests/scripts/audit-validity-dates.py b/tests/scripts/audit-validity-dates.py
index 594777408..1ccfc2188 100755
--- a/tests/scripts/audit-validity-dates.py
+++ b/tests/scripts/audit-validity-dates.py
@@ -18,8 +18,8 @@
 """Audit validity date of X509 crt/crl/csr.
 
 This script is used to audit the validity date of crt/crl/csr used for testing.
-It would print the information about X.509 data if the validity period of the
-X.509 data didn't cover the provided validity period. The data are collected
+It prints the information about X.509 objects excluding the objects that
+are valid throughout the desired validity period. The data are collected
 from tests/data_files/ and tests/suites/*.data files by default.
 """
 
@@ -399,13 +399,13 @@ def main():
     parser.add_argument('-v', '--verbose',
                         action='store_true', dest='verbose',
                         help='show logs')
-    parser.add_argument('--not-before', dest='not_before',
-                        help=('not valid before this date (UTC, YYYY-MM-DD). '
+    parser.add_argument('--from', dest='start_date',
+                        help=('Start of desired validity period (UTC, YYYY-MM-DD). '
                               'Default: today'),
                         metavar='DATE')
-    parser.add_argument('--not-after', dest='not_after',
-                        help=('not valid after this date (UTC, YYYY-MM-DD). '
-                              'Default: not-before'),
+    parser.add_argument('--to', dest='end_date',
+                        help=('End of desired validity period (UTC, YYYY-MM-DD). '
+                              'Default: --from'),
                         metavar='DATE')
     parser.add_argument('--data-files', action='append', nargs='*',
                         help='data files to audit',
@@ -437,15 +437,15 @@ def main():
             suite_data_files = [x for l in args.suite_data_files for x in l]
 
     # validity period start date
-    if args.not_before:
-        not_before_date = datetime.datetime.fromisoformat(args.not_before)
+    if args.start_date:
+        start_date = datetime.datetime.fromisoformat(args.start_date)
     else:
-        not_before_date = datetime.datetime.today()
+        start_date = datetime.datetime.today()
     # validity period end date
-    if args.not_after:
-        not_after_date = datetime.datetime.fromisoformat(args.not_after)
+    if args.end_date:
+        end_date = datetime.datetime.fromisoformat(args.end_date)
     else:
-        not_after_date = not_before_date
+        end_date = start_date
 
     # go through all the files
     td_auditor.walk_all(data_files)
@@ -454,8 +454,8 @@ def main():
 
     # we filter out the files whose validity duration covers the provided
     # duration.
-    filter_func = lambda d: (not_before_date < d.not_valid_before) or \
-                            (d.not_valid_after < not_after_date)
+    filter_func = lambda d: (start_date < d.not_valid_before) or \
+                            (d.not_valid_after < end_date)
 
     if args.all:
         filter_func = None

From eb2c39ed2bc6a126ae7bdb6eab0457ebd6a32cfc Mon Sep 17 00:00:00 2001
From: Pengyu Lv <pengyu.lv@arm.com>
Date: Tue, 9 May 2023 02:15:58 +0000
Subject: [PATCH 20/20] Install cryptography only on linux platform

Signed-off-by: Pengyu Lv <pengyu.lv@arm.com>
---
 scripts/ci.requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/ci.requirements.txt b/scripts/ci.requirements.txt
index ac9c25acf..3ddc41705 100644
--- a/scripts/ci.requirements.txt
+++ b/scripts/ci.requirements.txt
@@ -14,4 +14,5 @@ mypy >= 0.780
 # Install cryptography to avoid import-error reported by pylint.
 # What we really need is cryptography >= 35.0.0, which is only
 # available for Python >= 3.6.
-cryptography # >= 35.0.0
+cryptography >= 35.0.0; sys_platform == 'linux' and python_version >= '3.6'
+cryptography;           sys_platform == 'linux' and python_version <  '3.6'