From d0b0a76c3d00c4bac8ea1467b7794b554774d45d Mon Sep 17 00:00:00 2001 From: John Andersen Date: Mon, 17 Jun 2024 15:50:49 +0000 Subject: [PATCH 1/2] feat: enable out of tree parsers Asciinema: https://asciinema.org/a/664036 Signed-off-by: John Andersen --- cve_bin_tool/egg_updater.py | 42 +++++--------------- cve_bin_tool/parsers/__init__.py | 1 + cve_bin_tool/parsers/dart.py | 4 ++ cve_bin_tool/parsers/go.py | 4 ++ cve_bin_tool/parsers/java.py | 4 ++ cve_bin_tool/parsers/javascript.py | 4 ++ cve_bin_tool/parsers/parse.py | 63 ++++++++++++++++-------------- cve_bin_tool/parsers/perl.py | 4 ++ cve_bin_tool/parsers/php.py | 4 ++ cve_bin_tool/parsers/python.py | 9 +++++ cve_bin_tool/parsers/r.py | 4 ++ cve_bin_tool/parsers/ruby.py | 4 ++ cve_bin_tool/parsers/rust.py | 4 ++ cve_bin_tool/parsers/swift.py | 4 ++ cve_bin_tool/version_scanner.py | 18 ++------- setup.py | 41 ++++++++++++++++++- test/test_parsers.py | 42 ++++++++++++++++++++ 17 files changed, 179 insertions(+), 77 deletions(-) create mode 100644 test/test_parsers.py diff --git a/cve_bin_tool/egg_updater.py b/cve_bin_tool/egg_updater.py index 3b6421a3db..70e6b6b932 100644 --- a/cve_bin_tool/egg_updater.py +++ b/cve_bin_tool/egg_updater.py @@ -2,11 +2,12 @@ # SPDX-License-Identifier: GPL-3.0-or-later import ast +import importlib.util import os import sys from io import StringIO -from setuptools import Distribution, find_packages +from setuptools import Distribution try: from cve_bin_tool.version import VERSION @@ -59,44 +60,21 @@ def update_egg() -> None: with StringIO() as f: cwd = os.getcwd() os.chdir(os.path.join(os.path.dirname(__file__), "..")) + setup_spec = importlib.util.spec_from_file_location( + "setup", os.path.join(os.path.dirname(__file__), "..", "setup.py") + ) + setup_module = importlib.util.module_from_spec(setup_spec) + setup_spec.loader.exec_module(setup_module) + setup_kwargs = setup_module.setup_kwargs sys.stdout = f sys.stderr = f - dist = Distribution( + setup_kwargs.update( dict( script_name="setup.py", script_args=["egg_info"], - name="cve-bin-tool", - version=VERSION, - packages=find_packages( - exclude=["locales", "presentation"], - ), - entry_points={ - "console_scripts": [ - "cve-bin-tool = cve_bin_tool.cli:main", - "csv2cve = cve_bin_tool.csv2cve:main", - ], - "cve_bin_tool.checker": [ - "{} = cve_bin_tool.checkers.{}:{}".format( - filename.replace(".py", ""), - filename.replace(".py", ""), - "".join( - (filename.replace(".py", "") + " checker") - .replace("_", " ") - .title() - .split() - ), - ) - for filename in os.listdir( - os.path.join( - os.path.abspath(os.path.dirname(__file__)), - "checkers", - ) - ) - if filename.endswith(".py") and "__init__" not in filename - ], - }, ) ) + dist = Distribution(setup_kwargs) dist.parse_command_line() dist.run_commands() sys.stdout = sys.__stdout__ diff --git a/cve_bin_tool/parsers/__init__.py b/cve_bin_tool/parsers/__init__.py index 4823e52448..29ba3948c9 100644 --- a/cve_bin_tool/parsers/__init__.py +++ b/cve_bin_tool/parsers/__init__.py @@ -13,6 +13,7 @@ from cve_bin_tool.util import ProductInfo, ScanInfo __all__ = [ + "parse", "Parser", "java", "javascript", diff --git a/cve_bin_tool/parsers/dart.py b/cve_bin_tool/parsers/dart.py index 41e7b0b077..745942aa9f 100644 --- a/cve_bin_tool/parsers/dart.py +++ b/cve_bin_tool/parsers/dart.py @@ -15,6 +15,10 @@ class DartParser(Parser): https://dart.dev/overview """ + PARSER_MATCH_FILENAMES = [ + "pubspec.lock", + ] + def __init__(self, cve_db, logger): super().__init__(cve_db, logger) self.purl_pkg_type = "pub" diff --git a/cve_bin_tool/parsers/go.py b/cve_bin_tool/parsers/go.py index 6dbf3d4542..39b278e84f 100644 --- a/cve_bin_tool/parsers/go.py +++ b/cve_bin_tool/parsers/go.py @@ -25,6 +25,10 @@ class GoParser(Parser): """ + PARSER_MATCH_FILENAMES = [ + "go.mod", + ] + def __init__(self, cve_db, logger): super().__init__(cve_db, logger) self.purl_pkg_type = "golang" diff --git a/cve_bin_tool/parsers/java.py b/cve_bin_tool/parsers/java.py index 0b792fcce1..e1e8a5f304 100644 --- a/cve_bin_tool/parsers/java.py +++ b/cve_bin_tool/parsers/java.py @@ -14,6 +14,10 @@ class JavaParser(Parser): """Class to handle parsing Java-based Packages.""" + PARSER_MATCH_FILENAMES = [ + "pom.xml", + ] + def __init__(self, cve_db, logger, validate=True): super().__init__(cve_db, logger) self.validate = validate diff --git a/cve_bin_tool/parsers/javascript.py b/cve_bin_tool/parsers/javascript.py index 6f58230315..f633654486 100644 --- a/cve_bin_tool/parsers/javascript.py +++ b/cve_bin_tool/parsers/javascript.py @@ -11,6 +11,10 @@ class JavascriptParser(Parser): """Parser for javascript's package-lock.json files""" + PARSER_MATCH_FILENAMES = [ + "package-lock.json", + ] + def __init__(self, cve_db, logger): super().__init__(cve_db, logger) self.purl_pkg_type = "npm" diff --git a/cve_bin_tool/parsers/parse.py b/cve_bin_tool/parsers/parse.py index acb8fc328a..fccca7f834 100644 --- a/cve_bin_tool/parsers/parse.py +++ b/cve_bin_tool/parsers/parse.py @@ -1,40 +1,45 @@ # Copyright (C) 2022 Intel Corporation # SPDX-License-Identifier: GPL-3.0-or-later +from __future__ import annotations -from cve_bin_tool.parsers.dart import DartParser -from cve_bin_tool.parsers.go import GoParser -from cve_bin_tool.parsers.java import JavaParser -from cve_bin_tool.parsers.javascript import JavascriptParser -from cve_bin_tool.parsers.perl import PerlParser -from cve_bin_tool.parsers.php import PhpParser -from cve_bin_tool.parsers.python import PythonParser, PythonRequirementsParser -from cve_bin_tool.parsers.r import RParser -from cve_bin_tool.parsers.ruby import RubyParser -from cve_bin_tool.parsers.rust import RustParser -from cve_bin_tool.parsers.swift import SwiftParser - -valid_files = { - "pom.xml": JavaParser, - "package-lock.json": JavascriptParser, - "Cargo.lock": RustParser, - "renv.lock": RParser, - "requirements.txt": PythonRequirementsParser, - "go.mod": GoParser, - "PKG-INFO: ": PythonParser, - "METADATA: ": PythonParser, - "Gemfile.lock": RubyParser, - "Package.resolved": SwiftParser, - "composer.lock": PhpParser, - "cpanfile": PerlParser, - "pubspec.lock": DartParser, -} +import sys + +if sys.version_info >= (3, 10): + from importlib import metadata as importlib_metadata +else: + import importlib_metadata + +from cve_bin_tool.parsers import Parser + +PARSERS_ENTRYPOINT = "cve_bin_tool.parsers" + + +def load_valid_files() -> dict[str, list[type[Parser]]]: + """Loads file parsers""" + valid_files: dict[str, list[type[Parser]]] = {} + for entrypoint in importlib_metadata.entry_points().select( + group=PARSERS_ENTRYPOINT + ): + parser_cls = entrypoint.load() + for match_filename in getattr(parser_cls, "PARSER_MATCH_FILENAMES", []): + valid_files.setdefault(match_filename, []) + valid_files[match_filename].append(parser_cls) + for match_filename in valid_files: + valid_files[match_filename] = list(set(valid_files[match_filename])) + return valid_files + + +valid_files = load_valid_files() def parse(filename, output, cve_db, logger): """ Parses the given filename using the appropriate parser. """ + parsers = [] for file in list(valid_files.keys()): if file in output: - parser = valid_files[file](cve_db, logger) - yield from parser.run_checker(filename) + for valid_file_parser in valid_files[file]: + parsers.append(valid_file_parser(cve_db, logger)) + for parser in parsers: + yield from parser.run_checker(filename) diff --git a/cve_bin_tool/parsers/perl.py b/cve_bin_tool/parsers/perl.py index 566e0fcdc9..5bff60f023 100644 --- a/cve_bin_tool/parsers/perl.py +++ b/cve_bin_tool/parsers/perl.py @@ -9,6 +9,10 @@ class PerlParser(Parser): """Parser for perl's cpan files""" + PARSER_MATCH_FILENAMES = [ + "cpanfile", + ] + def __init__(self, cve_db, logger): super().__init__(cve_db, logger) self.purl_pkg_type = "cpan" diff --git a/cve_bin_tool/parsers/php.py b/cve_bin_tool/parsers/php.py index c39f8ff644..0fca9bfae3 100644 --- a/cve_bin_tool/parsers/php.py +++ b/cve_bin_tool/parsers/php.py @@ -17,6 +17,10 @@ class PhpParser(Parser): generate PURLs (Package URLs) for the listed packages. """ + PARSER_MATCH_FILENAMES = [ + "composer.lock", + ] + def __init__(self, cve_db, logger): """Initialize the PhpParser.""" super().__init__(cve_db, logger) diff --git a/cve_bin_tool/parsers/python.py b/cve_bin_tool/parsers/python.py index eec04e18be..6d1201993f 100644 --- a/cve_bin_tool/parsers/python.py +++ b/cve_bin_tool/parsers/python.py @@ -20,6 +20,10 @@ class PythonRequirementsParser(Parser): requirements.txt) and generate PURLs (Package URLs) for the listed packages. """ + PARSER_MATCH_FILENAMES = [ + "requirements.txt", + ] + def __init__(self, cve_db, logger): """Initialize the python requirements file parser.""" super().__init__(cve_db, logger) @@ -114,6 +118,11 @@ class PythonParser(Parser): PKG-INFO or METADATA) and generate PURLs (Package URLs) for the package. """ + PARSER_MATCH_FILENAMES = [ + "PKG-INFO: ", + "METADATA: ", + ] + def __init__(self, cve_db, logger): """Initialize the python package metadata parser.""" super().__init__(cve_db, logger) diff --git a/cve_bin_tool/parsers/r.py b/cve_bin_tool/parsers/r.py index a3e5da6d9a..a907b28fa1 100644 --- a/cve_bin_tool/parsers/r.py +++ b/cve_bin_tool/parsers/r.py @@ -26,6 +26,10 @@ class RParser(Parser): """ + PARSER_MATCH_FILENAMES = [ + "renv.lock", + ] + def __init__(self, cve_db, logger): super().__init__(cve_db, logger) self.purl_pkg_type = "cran" diff --git a/cve_bin_tool/parsers/ruby.py b/cve_bin_tool/parsers/ruby.py index e904b9a638..0bc65db22c 100644 --- a/cve_bin_tool/parsers/ruby.py +++ b/cve_bin_tool/parsers/ruby.py @@ -27,6 +27,10 @@ class RubyParser(Parser): """ + PARSER_MATCH_FILENAMES = [ + "Gemfile.lock", + ] + def __init__(self, cve_db, logger): super().__init__(cve_db, logger) self.purl_pkg_type = "gem" diff --git a/cve_bin_tool/parsers/rust.py b/cve_bin_tool/parsers/rust.py index f7b7e25a97..6f574f41f0 100644 --- a/cve_bin_tool/parsers/rust.py +++ b/cve_bin_tool/parsers/rust.py @@ -24,6 +24,10 @@ class RustParser(Parser): Parse the Rust dependency file and yield valid PURLs for the packages listed in the file. """ + PARSER_MATCH_FILENAMES = [ + "Cargo.lock", + ] + def __init__(self, cve_db, logger): super().__init__(cve_db, logger) self.purl_pkg_type = "cargo" diff --git a/cve_bin_tool/parsers/swift.py b/cve_bin_tool/parsers/swift.py index c983826a4a..64ba63ff9d 100644 --- a/cve_bin_tool/parsers/swift.py +++ b/cve_bin_tool/parsers/swift.py @@ -28,6 +28,10 @@ class SwiftParser(Parser): """ + PARSER_MATCH_FILENAMES = [ + "Package.resolved", + ] + def __init__(self, cve_db, logger): super().__init__(cve_db, logger) self.purl_pkg_type = "swift" diff --git a/cve_bin_tool/version_scanner.py b/cve_bin_tool/version_scanner.py index 3e617ff07d..68f71bf4a0 100644 --- a/cve_bin_tool/version_scanner.py +++ b/cve_bin_tool/version_scanner.py @@ -2,10 +2,11 @@ # SPDX-License-Identifier: GPL-3.0-or-later from __future__ import annotations +import itertools import subprocess import sys from logging import Logger -from pathlib import Path, PurePath +from pathlib import Path from typing import Iterator from cve_bin_tool.checkers import Checker @@ -30,10 +31,6 @@ from importlib import metadata as importlib_metadata else: import importlib_metadata -if sys.version_info >= (3, 9): - import importlib.resources as resources -else: - import importlib_resources as resources class InvalidFileError(Exception): @@ -129,16 +126,7 @@ def number_of_checkers(self) -> int: @classmethod def available_language_checkers(cls) -> list[str]: """Find Language checkers""" - language_directory = resources.files(cls.LANGUAGE_CHECKER_ENTRYPOINT) - parsers = language_directory.iterdir() - language_checkers = [] - for parser in parsers: - if str(parser).endswith(".py"): - language = PurePath(parser).name.replace(".py", "").capitalize() - if language not in ["__init__", "Parse"]: - language_checkers.append(language) - - return sorted(language_checkers) + return list(sorted(map(str, set(itertools.chain(*valid_files.values()))))) def print_language_checkers(self) -> None: """Logs the message that lists the names of the language checkers""" diff --git a/setup.py b/setup.py index 5f0ba969c6..979618656c 100644 --- a/setup.py +++ b/setup.py @@ -3,9 +3,13 @@ import ast import os +import pathlib +import re from setuptools import find_packages, setup +PACKAGE_ROOT_PATH = pathlib.Path(__file__).parent.resolve() + with open("README.md", encoding="utf-8") as f: readme = f.read() @@ -18,6 +22,30 @@ VERSION = ast.literal_eval(line.strip().split("=")[-1].strip()) break + +def enumerate_entry_points_parsers(): + """Reads the files in cve_bin_tool/parsers/to auto determine list""" + parsers = {} + for path in PACKAGE_ROOT_PATH.joinpath( + "cve_bin_tool", + "parsers", + ).glob("*.py"): + if "__init__" == path.stem: + continue + contents = path.read_text() + for re_match in re.finditer(r"^class (\w+)", contents, re.MULTILINE): + parser_cls_name = re_match[1] + parsers[".".join([path.stem, parser_cls_name])] = ":".join( + [ + str(path.relative_to(PACKAGE_ROOT_PATH).with_suffix("")).replace( + os.path.sep, "." + ), + parser_cls_name, + ], + ) + return parsers + + setup_kwargs = dict( name="cve-bin-tool", version=VERSION, @@ -89,7 +117,18 @@ ) if filename.endswith(".py") and "__init__" not in filename ], + "cve_bin_tool.parsers": [ + "{} = {}".format( + parser_entry_point_name, + entry_point_path, + ) + for ( + parser_entry_point_name, + entry_point_path, + ) in enumerate_entry_points_parsers().items() + ], }, ) -setup(**setup_kwargs) +if __name__ == "__main__": + setup(**setup_kwargs) diff --git a/test/test_parsers.py b/test/test_parsers.py new file mode 100644 index 0000000000..09e6e88c98 --- /dev/null +++ b/test/test_parsers.py @@ -0,0 +1,42 @@ +import pytest +import unittest + +from cve_bin_tool.parsers.parse import valid_files as actual_valid_files +from cve_bin_tool.parsers.dart import DartParser +from cve_bin_tool.parsers.go import GoParser +from cve_bin_tool.parsers.java import JavaParser +from cve_bin_tool.parsers.javascript import JavascriptParser +from cve_bin_tool.parsers.perl import PerlParser +from cve_bin_tool.parsers.php import PhpParser +from cve_bin_tool.parsers.python import PythonParser, PythonRequirementsParser +from cve_bin_tool.parsers.r import RParser +from cve_bin_tool.parsers.ruby import RubyParser +from cve_bin_tool.parsers.rust import RustParser +from cve_bin_tool.parsers.swift import SwiftParser + + +EXPECTED_VALID_FILES = { + "pom.xml": [JavaParser], + "package-lock.json": [JavascriptParser], + "Cargo.lock": [RustParser], + "renv.lock": [RParser], + "requirements.txt": [PythonRequirementsParser], + "go.mod": [GoParser], + "PKG-INFO: ": [PythonParser], + "METADATA: ": [PythonParser], + "Gemfile.lock": [RubyParser], + "Package.resolved": [SwiftParser], + "composer.lock": [PhpParser], + "cpanfile": [PerlParser], + "pubspec.lock": [DartParser], +} + + +class TestParsers: + @pytest.mark.asyncio + async def test_parser_match_filenames_results_in_correct_valid_files(self): + unittest.TestCase().assertDictEqual( + EXPECTED_VALID_FILES, + actual_valid_files, + "Expected registered file types not the same as loaded file types, second dict is actual file types loaded, first is expected", + ) From f29b25187cf4a8e4bac941709c4cb2f69775eb5f Mon Sep 17 00:00:00 2001 From: John Andersen Date: Tue, 18 Jun 2024 15:03:04 +0000 Subject: [PATCH 2/2] feat: documentation on out of tree parsers Signed-off-by: John Andersen --- cve_bin_tool/cvedb.py | 9 + cve_bin_tool/parsers/__init__.py | 1 + cve_bin_tool/parsers/env.py | 134 +++++++++ doc/PARSERS.rst | 261 ++++++++++++++++++ doc/index.rst | 1 + .../static_analysis_bandit.py | 203 ++++++++++++++ example/oot-parser/entry_points.txt | 2 + example/oot-parser/entry_points_env.txt | 2 + example/oot-parser/setup.cfg | 10 + example/oot-parser/setup.py | 9 + example/oot-parser/setup_env.cfg | 10 + test/parser_env_test_0001.env | 4 + test/test_parsers.py | 80 +++++- 13 files changed, 717 insertions(+), 9 deletions(-) create mode 100644 cve_bin_tool/parsers/env.py create mode 100644 doc/PARSERS.rst create mode 100644 example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py create mode 100644 example/oot-parser/entry_points.txt create mode 100644 example/oot-parser/entry_points_env.txt create mode 100644 example/oot-parser/setup.cfg create mode 100644 example/oot-parser/setup.py create mode 100644 example/oot-parser/setup_env.cfg create mode 100644 test/parser_env_test_0001.env diff --git a/cve_bin_tool/cvedb.py b/cve_bin_tool/cvedb.py index 1451eaa996..b32fa7f3b9 100644 --- a/cve_bin_tool/cvedb.py +++ b/cve_bin_tool/cvedb.py @@ -7,6 +7,7 @@ from __future__ import annotations import asyncio +import contextlib import datetime import json import logging @@ -1193,3 +1194,11 @@ def fetch_from_mirror(self, mirror, pubkey, ignore_signature, log_signature_erro else: self.clear_cached_data() return -1 + + @contextlib.contextmanager + def with_cursor(self): + cursor = self.db_open_and_get_cursor() + try: + yield cursor + finally: + self.db_close() diff --git a/cve_bin_tool/parsers/__init__.py b/cve_bin_tool/parsers/__init__.py index 29ba3948c9..632dd7d6c7 100644 --- a/cve_bin_tool/parsers/__init__.py +++ b/cve_bin_tool/parsers/__init__.py @@ -26,6 +26,7 @@ "php", "perl", "dart", + "env", ] diff --git a/cve_bin_tool/parsers/env.py b/cve_bin_tool/parsers/env.py new file mode 100644 index 0000000000..536f681752 --- /dev/null +++ b/cve_bin_tool/parsers/env.py @@ -0,0 +1,134 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: GPL-3.0-or-later + +from __future__ import annotations + +import dataclasses +import pathlib +import re + +from packageurl import PackageURL + +from cve_bin_tool.parsers import Parser +from cve_bin_tool.util import ProductInfo, ScanInfo + + +@dataclasses.dataclass +class EnvNamespaceConfig: + ad_hoc_cve_id: str + vendor: str + product: str + version: str + location: str = "/usr/local/bin/product" + + +@dataclasses.dataclass +class EnvConfig: + namespaces: dict[str, EnvNamespaceConfig] + + +class EnvParser(Parser): + """ + Parser for Python requirements files. + This parser is designed to parse Python requirements files (usually named + requirements.txt) and generate PURLs (Package URLs) for the listed packages. + """ + + PARSER_MATCH_FILENAMES = [ + ".env", + ] + + @staticmethod + def parse_file_contents(contents): + lines = list( + [ + line + for line in contents.replace("\r\n", "\n").split("\n") + if line.strip() and line.startswith("CVE_BIN_TOOL_") + ] + ) + namespaces = {} + for i, line in enumerate(lines): + key, value = line.split("=", maxsplit=1) + namespace, key = key[len("CVE_BIN_TOOL_") :].split("_", maxsplit=1) + if value.startswith('"'): + value = value[1:] + if value.endswith('"'): + value = value[:-1] + namespaces.setdefault(namespace, {}) + namespaces[namespace][key.lower()] = value + for namespace, config in namespaces.items(): + namespaces[namespace] = EnvNamespaceConfig(**config) + return EnvConfig(namespaces=namespaces) + + def run_checker(self, filename): + """ + Parse the .env file and yield ScanInfo objects for the listed packages. + Args: + filename (str): The path to the .env file. + Yields: + str: ScanInfo objects for the packages listed in the file. + """ + self.filename = filename + contents = pathlib.Path(self.filename).read_text() + + env_config = self.parse_file_contents(contents) + + data_source = "environment" + affected_data = [ + { + "cve_id": cve.ad_hoc_cve_id, + "vendor": cve.vendor, + "product": cve.product, + # TODO Version MUST be unique to this bug! + "version": cve.version, + "versionStartIncluding": "", + # "versionStartIncluding": cve.version, + "versionStartExcluding": "", + "versionEndIncluding": "", + # "versionEndIncluding": cve.version, + "versionEndExcluding": "", + } + for _namespace, cve in env_config.namespaces.items() + ] + severity_data = [ + { + "ID": cve.ad_hoc_cve_id, + # TODO severity + "severity": "LOW", + # TODO description + "description": "TODO", + # TODO score + "score": 0, + # TODO CVSS_version + "CVSS_version": 3, + # TODO CVSS_vector + "CVSS_vector": "", + "last_modified": "", + } + for _namespace, cve in env_config.namespaces.items() + ] + + with self.cve_db.with_cursor() as cursor: + self.cve_db.populate_cve_metrics(severity_data, cursor) + self.cve_db.populate_severity(severity_data, cursor, data_source) + self.cve_db.populate_affected(affected_data, cursor, data_source) + + for _namespace, cve in env_config.namespaces.items(): + yield ScanInfo( + ProductInfo( + cve.vendor, + cve.product, + cve.version, + cve.location, + PackageURL( + type="ad-hoc", + namespace=cve.vendor, + name=re.sub(r"[^a-zA-Z0-9._-]", "", cve.product).lower(), + version=cve.version, + qualifiers={}, + subpath=None, + ), + ), + pathlib.Path(filename).resolve(), + ) diff --git a/doc/PARSERS.rst b/doc/PARSERS.rst new file mode 100644 index 0000000000..8857c0e770 --- /dev/null +++ b/doc/PARSERS.rst @@ -0,0 +1,261 @@ +Adding a new parser to cve-bin-tool +=================================== + +Overview +-------- + +Parsers enhance ``cve-bin-tool`` by helping it discover vulnerabilities for +different file types and manifest formats. + +Parsers +------- + +The following parsers have been added to the project: + +- **DartParser** +- **GoParser** +- **JavaParser** +- **JavascriptParser** +- **PerlParser** +- **PhpParser** +- **PythonParser** +- **PythonRequirementsParser** +- **RParser** +- **RubyParser** +- **RustParser** +- **SwiftParser** +- **BanditParser** + +Usage +----- + +To utilize these parsers, ensure that your project includes the following imports: + +.. code-block:: python + + from cve_bin_tool.parsers.dart import DartParser + from cve_bin_tool.parsers.go import GoParser + from cve_bin_tool.parsers.java import JavaParser + from cve_bin_tool.parsers.javascript import JavascriptParser + from cve_bin_tool.parsers.perl import PerlParser + from cve_bin_tool.parsers.php import PhpParser + from cve_bin_tool.parsers.python import PythonParser, PythonRequirementsParser + from cve_bin_tool.parsers.r import RParser + from cve_bin_tool.parsers.ruby import RubyParser + from cve_bin_tool.parsers.rust import RustParser + from cve_bin_tool.parsers.swift import SwiftParser + from cve_bin_tool.parsers.bandit import BanditParser + +Setting Up a New Package and Entry Point +---------------------------------------- + +To implement a new parser plugin follow these steps: + +1. Create the Parser Class +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +First, create the parser class. This class should be located in the appropriate directory within your project. For example, you might place it in ``cve_bin_tool_parser_env/env.py``. + +.. literalinclude:: /../cve_bin_tool/parsers/env.py + +2. Set Up ``setup.py`` +^^^^^^^^^^^^^^^^^^^^^^ + +Next, configure the ``setup.py`` file boilerplate. + +.. literalinclude:: /../example/oot-parser/setup.py + +3. Set Up ``setup.cfg`` +^^^^^^^^^^^^^^^^^^^^^^^ + +Next, configure the ``setup.cfg`` file to include your new parser as an entry point. This allows the parser to be dynamically discovered and used by the project. + +.. literalinclude:: /../example/oot-parser/setup_env.cfg + +4. Create ``entry_points.txt`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You may also need to configure an ``entry_points.txt`` file if your project uses it to manage entry points. + +.. literalinclude:: /../example/oot-parser/entry_points_env.txt + +5. Install your plugin +^^^^^^^^^^^^^^^^^^^^^^ + +You need to activate your virtualenv before installing if you set one up. + +.. code-block:: console + + $ touch cve_bin_tool_parser_env/__init__.py + $ git init + $ python -m pip install -e . + +6. Populate the to-be-parsed file +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +In this example we implemented the ``EnvParser`` which is the standard +``/etc/environment`` style format, let's save the following as ``.env``. + +.. literalinclude:: /../test/parser_env_test_0001.env + +7. Run ``cve-bin-tool`` and see your plugin's findings +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Let's test that our defined CVE comes up by scanning a ``.env`` file. + +.. code-block:: console + + $ cve-bin-tool --log debug .env + +Advanced Example: Ad-Hoc CVEs +----------------------------- + +For more information see: https://github.com/ossf/wg-vulnerability-disclosures/issues/94 + +1. Create the Parser Class +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +First, create the parser class. This class should be located in the appropriate directory within your project. For example, you might place it in ``cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py``. + +.. literalinclude:: /../example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py + +2. Set Up ``setup.py`` +^^^^^^^^^^^^^^^^^^^^^^ + +Next, configure the ``setup.py`` file boilerplate. + +.. literalinclude:: /../example/oot-parser/setup.py + +3. Set Up ``setup.cfg`` +^^^^^^^^^^^^^^^^^^^^^^^ + +Next, configure the ``setup.cfg`` file to include your new parser as an entry point. This allows the parser to be dynamically discovered and used by the project. + +.. literalinclude:: /../example/oot-parser/setup.cfg + +4. Create ``entry_points.txt`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You may also need to configure an ``entry_points.txt`` file if your project uses it to manage entry points. + +.. literalinclude:: /../example/oot-parser/entry_points.txt + +5. Install your plugin +^^^^^^^^^^^^^^^^^^^^^^ + +You need to activate your virtualenv before installing if you set one up. + +.. code-block:: console + + $ touch cve_bin_tool_parser_static_analysis_bandit/__init__.py + $ git init + $ python -m pip install -e . + +6. Run ``cve-bin-tool`` +^^^^^^^^^^^^^^^^^^^^^^^ + +In this example we implemented the ``BanditParser`` which is a static +analysis tool for Python files. We'll test that it loads by scanning +a ``.py`` file. + +.. code-block:: console + + $ cve-bin-tool --format json --detail -- cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py + +7. View Findings +^^^^^^^^^^^^^^^^ + +Let's view our two findings, we need to decode the JSON stored in the +description which will be an object describing the bug. + +.. code-block:: console + + $ cat output.cve-bin-tool.*.json | jq '.[] | .description = (.description | fromjson)' + +.. code-block:: json + + { + "vendor": "username:alice:platform:example.com", + "product": "filepath:example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py", + "version": "v0.0.0.dev-SomeShaValue-N-Other-Branches-Workload-ID-Scan-Number-2d8852cf-ebfd-4495-97e2-2ce23e4e557d", + "location": 11, + "cve_number": "CVE-0001-urn:ietf:params:scitt:statement:sha-256:base64url:5i6UeRzg1...0...qnGmr1o", + "severity": "LOW", + "score": "unknown", + "source": "SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE", + "cvss_version": "3", + "cvss_vector": "unknown", + "paths": "example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py", + "remarks": "NewFound", + "comments": "", + "description": { + "code": "10 import re\n11 import subprocess\n12 import sys\n", + "col_offset": 0, + "end_col_offset": 17, + "filename": "/home/alice/Documents/python/cve-bin-tool/example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py", + "issue_confidence": "HIGH", + "issue_cwe": { + "id": 78, + "link": "https://cwe.mitre.org/data/definitions/78.html" + }, + "issue_severity": "LOW", + "issue_text": "Consider possible security implications associated with the subprocess module.", + "line_number": 11, + "line_range": [ + 11 + ], + "more_info": "https://bandit.readthedocs.io/en/1.7.8/blacklists/blacklist_imports.html#b404-import-subprocess", + "test_id": "B404", + "test_name": "blacklist" + } + } + +.. code-block:: json + + { + "vendor": "username:alice:platform:example.com", + "product": "filepath:example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py", + "version": "v0.0.0.dev-SomeShaValue-N-Other-Branches-Workload-ID-Scan-Number-2d8852cf-ebfd-4495-97e2-2ce23e4e557d", + "location": 11, + "cve_number": "CVE-0001-urn:ietf:params:scitt:statement:sha-256:base64url:5i6UeRzg1...1...qnGmr1o", + "severity": "LOW", + "score": "unknown", + "source": "SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE", + "cvss_version": "3", + "cvss_vector": "unknown", + "paths": "example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py", + "remarks": "NewFound", + "comments": "", + "description": { + "code": "118 try:\n119 stdout = subprocess.check_output(\n120 cmd,\n121 )\n122 except subprocess.CalledProcessError as error:\n", + "col_offset": 21, + "end_col_offset": 13, + "filename": "/home/alice/Documents/python/cve-bin-tool/example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py", + "issue_confidence": "HIGH", + "issue_cwe": { + "id": 78, + "link": "https://cwe.mitre.org/data/definitions/78.html" + }, + "issue_severity": "LOW", + "issue_text": "subprocess call - check for execution of untrusted input.", + "line_number": 119, + "line_range": [ + 119, + 120, + 121 + ], + "more_info": "https://bandit.readthedocs.io/en/1.7.8/plugins/b603_subprocess_without_shell_equals_true.html", + "test_id": "B603", + "test_name": "subprocess_without_shell_equals_true" + } + } + +Test Implementation +------------------- + +A new test class `TestParsers` has been introduced to verify that the expected file types are correctly mapped to their respective parsers. The test ensures that the actual valid files match the expected valid files. + +Test Method +^^^^^^^^^^^ + +- `test_parser_match_filenames_results_in_correct_valid_files`: This test compares the `EXPECTED_VALID_FILES` dictionary with the `actual_valid_files` dictionary imported from `cve_bin_tool.parsers.parse`. If there is any discrepancy between the two, the test will fail, indicating that the loaded file types do not match the expected registered file types. diff --git a/doc/index.rst b/doc/index.rst index a0de036e1a..798c77d831 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -19,6 +19,7 @@ The CVE Binary Tool helps you determine if your system includes known vulnerabil RELEASE.md CONTRIBUTING.md CHECKERS.md + PARSERS.rst sboms_for_humans/README.md new-contributor-tips.md pypi_downloads.md diff --git a/example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py b/example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py new file mode 100644 index 0000000000..b49eb04ae1 --- /dev/null +++ b/example/oot-parser/cve_bin_tool_parser_static_analysis_bandit/static_analysis_bandit.py @@ -0,0 +1,203 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: GPL-3.0-or-later + +from __future__ import annotations + +import dataclasses +import json +import os +import pathlib +import re +import subprocess +import sys +import uuid + +import yaml +from packageurl import PackageURL + +from cve_bin_tool.parsers import Parser +from cve_bin_tool.util import ProductInfo, ScanInfo + + +@dataclasses.dataclass +class BanditNamespaceConfig: + ad_hoc_cve_id: str + vendor: str + product: str + version: str + location: str + description: str + severity: str + score: float + + +@dataclasses.dataclass +class BanditConfig: + namespaces: dict[str, BanditNamespaceConfig] + + +class BanditParser(Parser): + """ + Parser for Python requirements files. + This parser is designed to parse Python requirements files (usually named + requirements.txt) and generate PURLs (Package URLs) for the listed packages. + """ + + PARSER_MATCH_FILENAMES = [ + ".py", + ] + + @staticmethod + def parse_bandit_output(filename, contents): + username = os.environ.get("USER", "unknown-user") + config_gh_hosts_yaml_path = pathlib.Path( + "~", ".config", "gh", "hosts.yml" + ).expanduser() + if config_gh_hosts_yaml_path.exists(): + # GitHub username if gh CLI installed + config_gh_hosts_yaml = yaml.safe_load(config_gh_hosts_yaml_path.read_text()) + platform = "github.com" + username = config_gh_hosts_yaml[platform]["user"] + vendor = f"username:{username}:platform:{platform}" + product = f"filepath:{filename}" + version = f"v0.0.0.dev-SomeShaValue-N-Other-Branches-Workload-ID-Scan-Number-{uuid.uuid4()}" + + contents = json.loads(contents) + + errors = contents.get("errors", []) + if errors: + raise Exception(json.dumps(contents)) + + namespaces = {} + for i, result in enumerate(contents.get("results", [])): + # Version is the same when code at location matches code from output + result["issue_text"] + result["code"] + + # TODO Replace UUID with with SCITT URN + # SCITT A.4.2 + ad_hoc_cve_id = f"CVE-0001-urn:ietf:params:scitt:statement:sha-256:base64url:5i6UeRzg1...{i}...qnGmr1o" + + # TODO Sort by something, line? Int of content address? + namespace = f"bandit-{i}" + + # TODO Take vendor product and version automatically from git repo + # or installed pypi package meta-info. + namespaces[namespace] = BanditNamespaceConfig( + ad_hoc_cve_id=ad_hoc_cve_id, + vendor=vendor, + product=product, + version=version, + severity="LOW", + score=0.0, + location=result["line_number"], + description=json.dumps(result), + ) + return BanditConfig(namespaces=namespaces) + + def run_checker(self, filename): + """ + Parse the .bandit file and yield ScanInfo objects for the listed packages. + Args: + filename (str): The path to the .bandit file. + Yields: + str: ScanInfo objects for the packages listed in the file. + """ + file_path = pathlib.Path(filename).resolve() + cmd = [ + sys.executable, + "-um", + "bandit", + "-f", + "json", + "--exit-zero", + "--", + # TODO Relative paths? Need top level directory being scanned + str(file_path), + ] + try: + stdout = subprocess.check_output( + cmd, + ) + except subprocess.CalledProcessError as error: + raise Exception(error.stderr) from error + + bandit_config = self.parse_bandit_output(filename, stdout) + + # TODO Create SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE + # by making a request to the poligy engine and getting it's workflow + # manifest as output and deriving from that or extend it to return that. + data_source = "SCITT_URN_FOR_MANIFEST_OF_EXECUTED_WORKFLOW_WITH_SARIF_OUTPUTS_DEREFERENCEABLE" + + affected_data = [] + severity_data = [] + + for _namespace, cve in bandit_config.namespaces.items(): + affected_data.append( + { + "cve_id": cve.ad_hoc_cve_id, + "vendor": cve.vendor, + "product": cve.product, + # TODO Version MUST be unique to this bug! + "version": cve.version, + "versionStartIncluding": "", + # "versionStartIncluding": cve.version, + "versionStartExcluding": "", + "versionEndIncluding": "", + # "versionEndIncluding": cve.version, + "versionEndExcluding": "", + } + ) + severity_data.append( + { + "ID": cve.ad_hoc_cve_id, + # TODO severity + "severity": cve.severity, + # TODO description + "description": cve.description, + # TODO score + "score": 0, + # TODO CVSS_version + "CVSS_version": 3, + # TODO CVSS_vector + "CVSS_vector": "", + # TODO Ideally this comes from bisecting and pinpointing the + # bug's introduction to the codebase + "last_modified": "", + } + ) + + with self.cve_db.with_cursor() as cursor: + self.cve_db.populate_cve_metrics(severity_data, cursor) + self.cve_db.populate_severity(severity_data, cursor, data_source) + self.cve_db.populate_affected(affected_data, cursor, data_source) + + product_infos = {} + for _namespace, cve in bandit_config.namespaces.items(): + product_infos_key = ( + cve.vendor, + cve.product, + cve.version, + ) + product_infos.setdefault( + product_infos_key, + ProductInfo( + cve.vendor, + cve.product, + cve.version, + cve.location, + PackageURL( + type="ad-hoc", + namespace=cve.vendor, + name=re.sub(r"[^a-zA-Z0-9._-]", "", cve.product).lower(), + version=cve.version, + qualifiers={}, + subpath=None, + ), + ), + ) + product_info = product_infos[product_infos_key] + for _namespace, cve in bandit_config.namespaces.items(): + yield ScanInfo(product_info, pathlib.Path(filename).resolve()) + + # TODO VEX attached via linked data to ad-hoc CVE-ID diff --git a/example/oot-parser/entry_points.txt b/example/oot-parser/entry_points.txt new file mode 100644 index 0000000000..a1275e8972 --- /dev/null +++ b/example/oot-parser/entry_points.txt @@ -0,0 +1,2 @@ +[cve_bin_tool.parsers] +static_analysis_bandit = cve_bin_tool_parser_static_analysis_bandit.static_analysis_bandit:BanditParser diff --git a/example/oot-parser/entry_points_env.txt b/example/oot-parser/entry_points_env.txt new file mode 100644 index 0000000000..5efda540bf --- /dev/null +++ b/example/oot-parser/entry_points_env.txt @@ -0,0 +1,2 @@ +[cve_bin_tool.parsers] +env = cve_bin_tool_parser_env.env:EnvParser diff --git a/example/oot-parser/setup.cfg b/example/oot-parser/setup.cfg new file mode 100644 index 0000000000..77ded2c4b5 --- /dev/null +++ b/example/oot-parser/setup.cfg @@ -0,0 +1,10 @@ +[metadata] +name = cve-bin-tool-parser-static-analysis-bandit +version = 1.0.0 +description = CVE Binary Tool: Parser: Static Analysis: Bandit + +[options] +packages = find: +entry_points = file: entry_points.txt +setup_requires = + setuptools_scm[toml]>=3.4.3 diff --git a/example/oot-parser/setup.py b/example/oot-parser/setup.py new file mode 100644 index 0000000000..d102c58907 --- /dev/null +++ b/example/oot-parser/setup.py @@ -0,0 +1,9 @@ +import site +import sys + +import setuptools + +# See https://github.com/pypa/pip/issues/7953 +site.ENABLE_USER_SITE = "--user" in sys.argv[1:] + +setuptools.setup(use_scm_version=True) diff --git a/example/oot-parser/setup_env.cfg b/example/oot-parser/setup_env.cfg new file mode 100644 index 0000000000..1ddcc812c7 --- /dev/null +++ b/example/oot-parser/setup_env.cfg @@ -0,0 +1,10 @@ +[metadata] +name = cve-bin-tool-parser-env +version = 1.0.0 +description = CVE Binary Tool: Parser: .env + +[options] +packages = find: +entry_points = file: entry_points.txt +setup_requires = + setuptools_scm[toml]>=3.4.3 diff --git a/test/parser_env_test_0001.env b/test/parser_env_test_0001.env new file mode 100644 index 0000000000..5935efa108 --- /dev/null +++ b/test/parser_env_test_0001.env @@ -0,0 +1,4 @@ +CVE_BIN_TOOL_0_PRODUCT="myproduct" +CVE_BIN_TOOL_0_VENDOR="myvendor" +CVE_BIN_TOOL_0_VERSION="v0.0.0.dev-15abff2d529396937e18c657ecee1ed224842000" +CVE_BIN_TOOL_0_AD_HOC_CVE_ID="CVE-0001-15004435-aa84-43ff-9c26-f703a26069f8" diff --git a/test/test_parsers.py b/test/test_parsers.py index 09e6e88c98..7e102e59f2 100644 --- a/test/test_parsers.py +++ b/test/test_parsers.py @@ -1,11 +1,22 @@ -import pytest +from __future__ import annotations + +import atexit +import contextlib +import pathlib +import re +import tempfile import unittest -from cve_bin_tool.parsers.parse import valid_files as actual_valid_files +from packageurl import PackageURL + +from cve_bin_tool.cvedb import CVEDB +from cve_bin_tool.log import LOGGER from cve_bin_tool.parsers.dart import DartParser +from cve_bin_tool.parsers.env import EnvParser from cve_bin_tool.parsers.go import GoParser from cve_bin_tool.parsers.java import JavaParser from cve_bin_tool.parsers.javascript import JavascriptParser +from cve_bin_tool.parsers.parse import valid_files as actual_valid_files from cve_bin_tool.parsers.perl import PerlParser from cve_bin_tool.parsers.php import PhpParser from cve_bin_tool.parsers.python import PythonParser, PythonRequirementsParser @@ -13,7 +24,16 @@ from cve_bin_tool.parsers.ruby import RubyParser from cve_bin_tool.parsers.rust import RustParser from cve_bin_tool.parsers.swift import SwiftParser +from cve_bin_tool.util import ProductInfo, ScanInfo +cve_db = CVEDB() +logger = LOGGER.getChild(__name__) + +stack = contextlib.ExitStack().__enter__() +tmpdir = stack.enter_context( + tempfile.TemporaryDirectory(prefix="cve-bin-tool-TEST_ENV") +) +atexit.register(lambda: stack.__exit__(None, None, None)) EXPECTED_VALID_FILES = { "pom.xml": [JavaParser], @@ -31,12 +51,54 @@ "pubspec.lock": [DartParser], } +PARSER_ENV_TEST_0001_ENV_CONTENTS = ( + pathlib.Path(__file__).parent.joinpath("parser_env_test_0001.env").read_text() +) + + +class TestParsers(unittest.TestCase): + maxDiff = None + + def test_parser_match_filenames_results_in_correct_valid_files(self): + for detection in EXPECTED_VALID_FILES.keys(): + self.assertIn( + detection, + actual_valid_files, + "Expected registered file type {detection!r} not found in loaded file type list", + ) + for plugin in EXPECTED_VALID_FILES[detection]: + self.assertIn( + plugin, + actual_valid_files[detection], + "Expected registered file type {detection!r} is missing Parser class {plugin!r}", + ) -class TestParsers: - @pytest.mark.asyncio - async def test_parser_match_filenames_results_in_correct_valid_files(self): - unittest.TestCase().assertDictEqual( - EXPECTED_VALID_FILES, - actual_valid_files, - "Expected registered file types not the same as loaded file types, second dict is actual file types loaded, first is expected", + def test_parser_env_test_0001(self): + file_path = pathlib.Path(tmpdir, ".env").resolve() + file_path.write_text(PARSER_ENV_TEST_0001_ENV_CONTENTS) + env_parser = EnvParser(cve_db, logger) + results = list(env_parser.run_checker(file_path)) + self.assertListEqual( + results, + [ + ScanInfo( + product_info=ProductInfo( + vendor="myvendor", + product="myproduct", + version="v0.0.0.dev-15abff2d529396937e18c657ecee1ed224842000", + # TODO location? + location="/usr/local/bin/product", + # TODO purl + purl=PackageURL( + type="ad-hoc", + namespace="myvendor", + name=re.sub(r"[^a-zA-Z0-9._-]", "", "myproduct").lower(), + version="v0.0.0.dev-15abff2d529396937e18c657ecee1ed224842000", + qualifiers={}, + subpath=None, + ), + ), + file_path=file_path, + ) + ], )