Skip to content

Commit b95b8be

Browse files
authored
feat: added deduplication database table (#4206)
* feat: added deduplication database table * feat: removed a test for 'FAIL-PKG-INFO' * feat: changed logic of handling products with 'UNKNOWN' vendors Signed-off-by: Meet Soni <[email protected]>
1 parent a8210b9 commit b95b8be

File tree

5 files changed

+76
-31
lines changed

5 files changed

+76
-31
lines changed

cve_bin_tool/cvedb.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,13 @@ class CVEDB:
116116
PRIMARY KEY(metrics_id)
117117
)
118118
""",
119+
"deduplication": """
120+
CREATE TABLE IF NOT EXISTS deduplication (
121+
purl TEXT,
122+
vendor TEXT,
123+
PRIMARY KEY (purl, vendor)
124+
)
125+
""",
119126
}
120127

121128
EMPTY_SELECT_QUERIES = {
@@ -392,20 +399,25 @@ def init_database(self) -> None:
392399
exploit_table_create,
393400
cve_metrics_table_create,
394401
metrics_table_create,
402+
deduplication,
395403
) = (
396404
self.TABLE_SCHEMAS["cve_severity"],
397405
self.TABLE_SCHEMAS["cve_range"],
398406
self.TABLE_SCHEMAS["cve_exploited"],
399407
self.TABLE_SCHEMAS["cve_metrics"],
400408
self.TABLE_SCHEMAS["metrics"],
409+
self.TABLE_SCHEMAS["deduplication"],
401410
)
402411
index_range = "CREATE INDEX IF NOT EXISTS product_index ON cve_range (cve_number, vendor, product)"
412+
index_purl = "CREATE INDEX IF NOT EXISTS purl_index ON deduplication (purl)"
403413
cursor.execute(cve_data_create)
404414
cursor.execute(version_range_create)
405415
cursor.execute(exploit_table_create)
406416
cursor.execute(cve_metrics_table_create)
407417
cursor.execute(metrics_table_create)
418+
cursor.execute(deduplication)
408419
cursor.execute(index_range)
420+
cursor.execute(index_purl)
409421

410422
(
411423
severity_schema,

cve_bin_tool/parsers/__init__.py

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def __init__(self, cve_db, logger):
4747
self.logger = logger
4848
self.filename = ""
4949
self.purl_pkg_type = "default"
50+
self.connections = {}
5051

5152
def run_checker(self, filename):
5253
"""
@@ -117,7 +118,7 @@ def find_vendor_from_purl(self, purl, ver) -> tuple[list[ScanInfo], bool]:
117118
UNION
118119
SELECT cpe from purl2cpe WHERE purl LIKE ?
119120
"""
120-
cursor = self.db_open_and_get_cursor()
121+
cursor = self.db_open_and_get_cursor("purl2cpe/purl2cpe.db")
121122
cursor.execute(query, (param1, param2))
122123
cpeList = cursor.fetchall()
123124
vendorlist: list[ScanInfo] = []
@@ -147,22 +148,69 @@ def find_vendor_from_purl(self, purl, ver) -> tuple[list[ScanInfo], bool]:
147148

148149
return vendorlist, True
149150
except Exception as e:
150-
self.logger.error(f"Error occurred: {e}")
151+
self.logger.debug(f"Error occurred: {e}")
152+
self.logger.error("Unable to access purl2cpe database.")
151153
return [], False
152154

153-
def db_open_and_get_cursor(self) -> sqlite3.Cursor:
154-
"""Opens connection to sqlite database, returns cursor object."""
155+
def deduplication(self, purl, vendorlist) -> list[ScanInfo]:
156+
"""
157+
Modifies invalid vendors associated with a given PURL using the deduplication database.
155158
156-
dbpath = (
157-
Path("~").expanduser() / ".cache" / "cve-bin-tool" / "purl2cpe/purl2cpe.db"
158-
)
159-
connection = sqlite3.connect(dbpath)
159+
It queries the database for vendors associated with the PURL and filters the input 'vendorlist'
160+
accordingly:
161+
162+
- If a vendor from 'vendorlist' is found in the database (valid vendor), it is added directly
163+
to 'vendorlist_filtered'.
164+
- If a vendor from 'vendorlist' is not found in the database (invalid vendor), a new ScanInfo
165+
object is created with the vendor marked as 'UNKNOWN' and added to 'vendorlist_filtered'.
166+
167+
"""
168+
try:
169+
purl = purl.to_dict()
170+
param = f"pkg:{purl['type']}/{purl['name']}"
171+
query = """
172+
SELECT vendor FROM deduplication WHERE purl LIKE ?
173+
"""
174+
vendorlist_filtered: list[ScanInfo] = []
175+
cursor = self.db_open_and_get_cursor("cve.db")
176+
cursor.execute(query, (param,))
177+
178+
invalidVendorList = [i[0] for i in cursor.fetchall()]
179+
180+
for item in vendorlist:
181+
if item.product_info.vendor not in invalidVendorList:
182+
vendorlist_filtered.append(item)
183+
184+
if len(vendorlist_filtered) == 0:
185+
vendorlist_filtered.append(
186+
ScanInfo(
187+
ProductInfo(
188+
"UNKNOWN",
189+
item.product_info.product,
190+
item.product_info.version,
191+
item.file_path,
192+
item.product_info.purl,
193+
),
194+
item.file_path,
195+
)
196+
)
197+
return vendorlist_filtered
198+
except Exception as e:
199+
self.logger.debug(f"error: {e}")
200+
self.logger.error("Unable to access deduplication database.")
201+
return vendorlist
202+
203+
def db_open_and_get_cursor(self, dbname) -> sqlite3.Cursor:
204+
"""Opens connection to sqlite database, returns cursor object."""
160205

161-
if connection is not None:
162-
cursor = connection.cursor()
163-
if cursor is None:
206+
dbpath = Path("~").expanduser() / ".cache" / "cve-bin-tool" / dbname
207+
if dbname not in self.connections:
208+
self.connections[dbname] = sqlite3.connect(dbpath)
209+
connection = self.connections[dbname]
210+
if connection.cursor() is None:
211+
self.logger.error("Database cursor does not exist")
164212
raise CVEDBError
165-
return cursor
213+
return connection.cursor()
166214

167215
def decode_cpe23(self, cpe23) -> tuple[str, str, str]:
168216
"""

cve_bin_tool/parsers/python.py

Lines changed: 4 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010

1111
from cve_bin_tool.parsers import Parser
1212
from cve_bin_tool.strings import parse_strings
13-
from cve_bin_tool.util import ProductInfo, ScanInfo
1413

1514

1615
class PythonRequirementsParser(Parser):
@@ -106,6 +105,7 @@ def run_checker(self, filename):
106105
if not result:
107106
vendor = self.find_vendor(product, version)
108107

108+
vendor = self.deduplication(purl, vendor)
109109
if vendor is not None:
110110
yield from vendor
111111
self.logger.debug(f"Done scanning file: {self.filename}")
@@ -159,23 +159,12 @@ def run_checker(self, filename):
159159
purl = self.generate_purl(product)
160160
vendor, result = self.find_vendor_from_purl(purl, version)
161161

162+
if not result:
163+
vendor = self.find_vendor(product, version)
164+
162165
if vendor is not None:
163166
yield from vendor
164167

165-
if not result:
166-
vendor_package_pair = self.cve_db.get_vendor_product_pairs(product)
167-
if vendor_package_pair != []:
168-
for pair in vendor_package_pair:
169-
vendor = pair["vendor"]
170-
location = pair.get("location", self.filename)
171-
file_path = self.filename
172-
self.logger.debug(
173-
f"{file_path} is {vendor}.{product} {version}"
174-
)
175-
yield ScanInfo(
176-
ProductInfo(vendor, product, version, location), file_path
177-
)
178-
179168
# There are packages with a METADATA file in them containing different data from what the tool expects
180169
except AttributeError:
181170
self.logger.debug(f"{filename} is an invalid METADATA/PKG-INFO")

test/language_data/FAIL-PKG-INFO

Lines changed: 0 additions & 3 deletions
This file was deleted.

test/test_language_scanner.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,6 @@ def test_javascript_package_none_found(self, filename: str) -> None:
203203
@pytest.mark.parametrize(
204204
"filename",
205205
[
206-
(str(TEST_FILE_PATH / "FAIL-PKG-INFO")),
207206
(str(TEST_FILE_PATH / "fail_pom.xml")),
208207
],
209208
)

0 commit comments

Comments
 (0)