Skip to content

Commit 58e6a86

Browse files
authored
refactor: Add util function for checking kwarg count (#1423)
### Description - Add a simple util function to limit the number of kwargs and reuse it in the code. ### Testing - Added unit tests.
1 parent a2fe509 commit 58e6a86

12 files changed

+115
-32
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
from typing import Any
2+
3+
4+
def raise_if_too_many_kwargs(max_kwargs: int = 1, **kwargs: Any) -> None:
5+
"""Raise ValueError if there are more non-None kwargs then max_kwargs."""
6+
none_kwargs_names = [f'"{kwarg_name}"' for kwarg_name, value in kwargs.items() if value is not None]
7+
if len(none_kwargs_names) > max_kwargs:
8+
all_kwargs_names = [f'"{kwarg_name}"' for kwarg_name in kwargs]
9+
raise ValueError(
10+
f'Only one of {", ".join(all_kwargs_names)} can be specified, but following arguments were '
11+
f'specified: {", ".join(none_kwargs_names)}.'
12+
)

src/crawlee/storage_clients/_file_system/_dataset_client.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from crawlee._consts import METADATA_FILENAME
1515
from crawlee._utils.crypto import crypto_random_object_id
1616
from crawlee._utils.file import atomic_write, json_dumps
17+
from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
1718
from crawlee.storage_clients._base import DatasetClient
1819
from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
1920

@@ -114,9 +115,7 @@ async def open(
114115
or if both name and alias are provided.
115116
"""
116117
# Validate input parameters.
117-
specified_params = sum(1 for param in [id, name, alias] if param is not None)
118-
if specified_params > 1:
119-
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
118+
raise_if_too_many_kwargs(id=id, name=name, alias=alias)
120119

121120
dataset_base_path = Path(configuration.storage_dir) / cls._STORAGE_SUBDIR
122121

src/crawlee/storage_clients/_file_system/_key_value_store_client.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from crawlee._consts import METADATA_FILENAME
1616
from crawlee._utils.crypto import crypto_random_object_id
1717
from crawlee._utils.file import atomic_write, infer_mime_type, json_dumps
18+
from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
1819
from crawlee.storage_clients._base import KeyValueStoreClient
1920
from crawlee.storage_clients.models import KeyValueStoreMetadata, KeyValueStoreRecord, KeyValueStoreRecordMetadata
2021

@@ -113,9 +114,7 @@ async def open(
113114
or if both name and alias are provided.
114115
"""
115116
# Validate input parameters.
116-
specified_params = sum(1 for param in [id, name, alias] if param is not None)
117-
if specified_params > 1:
118-
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
117+
raise_if_too_many_kwargs(id=id, name=name, alias=alias)
119118

120119
kvs_base_path = Path(configuration.storage_dir) / cls._STORAGE_SUBDIR
121120

src/crawlee/storage_clients/_file_system/_request_queue_client.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from crawlee._consts import METADATA_FILENAME
1818
from crawlee._utils.crypto import crypto_random_object_id
1919
from crawlee._utils.file import atomic_write, json_dumps
20+
from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
2021
from crawlee._utils.recoverable_state import RecoverableState
2122
from crawlee.storage_clients._base import RequestQueueClient
2223
from crawlee.storage_clients.models import (
@@ -164,9 +165,7 @@ async def open(
164165
or if both name and alias are provided.
165166
"""
166167
# Validate input parameters.
167-
specified_params = sum(1 for param in [id, name, alias] if param is not None)
168-
if specified_params > 1:
169-
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
168+
raise_if_too_many_kwargs(id=id, name=name, alias=alias)
170169

171170
rq_base_path = Path(configuration.storage_dir) / cls._STORAGE_SUBDIR
172171

src/crawlee/storage_clients/_memory/_dataset_client.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from typing_extensions import override
88

99
from crawlee._utils.crypto import crypto_random_object_id
10+
from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
1011
from crawlee.storage_clients._base import DatasetClient
1112
from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
1213

@@ -76,9 +77,7 @@ async def open(
7677
ValueError: If both name and alias are provided, or if neither id, name, nor alias is provided.
7778
"""
7879
# Validate input parameters.
79-
specified_params = sum(1 for param in [id, name, alias] if param is not None)
80-
if specified_params > 1:
81-
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
80+
raise_if_too_many_kwargs(id=id, name=name, alias=alias)
8281

8382
# Create a new dataset
8483
dataset_id = id or crypto_random_object_id()

src/crawlee/storage_clients/_memory/_key_value_store_client.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
from crawlee._utils.crypto import crypto_random_object_id
1010
from crawlee._utils.file import infer_mime_type
11+
from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
1112
from crawlee.storage_clients._base import KeyValueStoreClient
1213
from crawlee.storage_clients.models import KeyValueStoreMetadata, KeyValueStoreRecord, KeyValueStoreRecordMetadata
1314

@@ -74,9 +75,7 @@ async def open(
7475
ValueError: If both name and alias are provided.
7576
"""
7677
# Validate input parameters.
77-
specified_params = sum(1 for param in [id, name, alias] if param is not None)
78-
if specified_params > 1:
79-
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
78+
raise_if_too_many_kwargs(id=id, name=name, alias=alias)
8079

8180
# Create a new key-value store
8281
store_id = id or crypto_random_object_id()

src/crawlee/storage_clients/_memory/_request_queue_client.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
from crawlee import Request
1212
from crawlee._utils.crypto import crypto_random_object_id
13+
from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
1314
from crawlee.storage_clients._base import RequestQueueClient
1415
from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata
1516

@@ -86,9 +87,7 @@ async def open(
8687
ValueError: If both name and alias are provided.
8788
"""
8889
# Validate input parameters.
89-
specified_params = sum(1 for param in [id, name, alias] if param is not None)
90-
if specified_params > 1:
91-
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
90+
raise_if_too_many_kwargs(id=id, name=name, alias=alias)
9291

9392
# Create a new queue
9493
queue_id = id or crypto_random_object_id()

src/crawlee/storages/_storage_instance_manager.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from dataclasses import dataclass, field
66
from typing import TYPE_CHECKING, TypeVar
77

8+
from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
89
from crawlee.storage_clients._base import DatasetClient, KeyValueStoreClient, RequestQueueClient
910

1011
if TYPE_CHECKING:
@@ -107,13 +108,11 @@ async def open_storage_instance(
107108
)
108109

109110
# Validate input parameters.
110-
specified_params = sum(1 for param in [id, name, alias] if param is not None)
111-
if specified_params > 1:
112-
raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
111+
raise_if_too_many_kwargs(id=id, name=name, alias=alias)
113112

114113
# Auto-set alias='default' when no parameters are specified.
115114
# Default unnamed storage is equal to alias=default unnamed storage.
116-
if specified_params == 0:
115+
if not any([name, alias, id]):
117116
alias = self._DEFAULT_STORAGE_ALIAS
118117

119118
# Check cache
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
from contextlib import nullcontext
2+
from typing import Any
3+
4+
import pytest
5+
6+
from crawlee._utils.raise_if_too_many_kwargs import raise_if_too_many_kwargs
7+
8+
9+
@pytest.mark.parametrize(
10+
('kwargs', 'should_raise'),
11+
[
12+
({'alias': 'alias', 'name': None, 'id': None}, False),
13+
({'alias': None, 'name': 'name', 'id': None}, False),
14+
({'alias': None, 'name': None, 'id': 'id'}, False),
15+
({'alias': 'alias', 'name': 'name', 'id': None}, True),
16+
({'alias': 'alias', 'name': None, 'id': 'id'}, True),
17+
({'alias': None, 'name': 'name', 'id': 'id'}, True),
18+
({'alias': 'alias', 'name': 'name', 'id': 'id'}, True),
19+
({'alias': None, 'name': None, 'id': None}, False),
20+
],
21+
)
22+
def test_limit_kwargs_default(kwargs: dict[str, Any], *, should_raise: bool) -> None:
23+
context = pytest.raises(ValueError, match=r'^Only one of .*') if should_raise else nullcontext()
24+
with context:
25+
raise_if_too_many_kwargs(**kwargs)
26+
27+
28+
@pytest.mark.parametrize(
29+
('kwargs', 'should_raise'),
30+
[
31+
({'alias': 'alias', 'name': 'name', 'id': 'id'}, True),
32+
({'alias': 'alias', 'name': 'name', 'id': None}, False),
33+
],
34+
)
35+
def test_limit_kwargs(kwargs: dict[str, Any], *, should_raise: bool) -> None:
36+
context = pytest.raises(ValueError, match=r'^Only one of .*') if should_raise else nullcontext()
37+
with context:
38+
raise_if_too_many_kwargs(max_kwargs=2, **kwargs)

tests/unit/storages/test_dataset.py

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,11 @@ async def test_open_with_id_and_name(
150150
storage_client: StorageClient,
151151
) -> None:
152152
"""Test that open() raises an error when both id and name are provided."""
153-
with pytest.raises(ValueError, match=r'Only one of "id", "name", or "alias" can be specified, not multiple.'):
153+
with pytest.raises(
154+
ValueError,
155+
match=r'Only one of "id", "name", "alias" can be specified, but following arguments '
156+
r'were specified: "id", "name".',
157+
):
154158
await Dataset.open(
155159
id='some-id',
156160
name='some-name',
@@ -651,7 +655,11 @@ async def test_alias_with_id_error(
651655
storage_client: StorageClient,
652656
) -> None:
653657
"""Test that providing both alias and id raises error."""
654-
with pytest.raises(ValueError, match=r'Only one of "id", "name", or "alias" can be specified, not multiple.'):
658+
with pytest.raises(
659+
ValueError,
660+
match=r'Only one of "id", "name", "alias" can be specified, but following arguments '
661+
r'were specified: "id", "alias".',
662+
):
655663
await Dataset.open(
656664
id='some-id',
657665
alias='some-alias',
@@ -663,7 +671,11 @@ async def test_alias_with_name_error(
663671
storage_client: StorageClient,
664672
) -> None:
665673
"""Test that providing both alias and name raises error."""
666-
with pytest.raises(ValueError, match=r'Only one of "id", "name", or "alias" can be specified, not multiple.'):
674+
with pytest.raises(
675+
ValueError,
676+
match=r'Only one of "id", "name", "alias" can be specified, but following arguments '
677+
r'were specified: "name", "alias".',
678+
):
667679
await Dataset.open(
668680
name='some-name',
669681
alias='some-alias',
@@ -675,7 +687,11 @@ async def test_alias_with_all_parameters_error(
675687
storage_client: StorageClient,
676688
) -> None:
677689
"""Test that providing id, name, and alias raises error."""
678-
with pytest.raises(ValueError, match=r'Only one of "id", "name", or "alias" can be specified, not multiple.'):
690+
with pytest.raises(
691+
ValueError,
692+
match=r'Only one of "id", "name", "alias" can be specified, but following arguments '
693+
r'were specified: "id", "name", "alias".',
694+
):
679695
await Dataset.open(
680696
id='some-id',
681697
name='some-name',

0 commit comments

Comments
 (0)