From a83ee49eacb43e2e41b59a66a79f7dbf4f5f9519 Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Fri, 8 Aug 2025 18:06:06 +0800 Subject: [PATCH 01/19] feat: wrap blockchain test for benchmark --- src/ethereum_test_specs/__init__.py | 4 + src/ethereum_test_specs/benchmark.py | 164 +++++++++++++++++++++++++++ src/ethereum_test_tools/__init__.py | 4 + tests/benchmark/test_worst_blocks.py | 9 +- 4 files changed, 178 insertions(+), 3 deletions(-) create mode 100644 src/ethereum_test_specs/benchmark.py diff --git a/src/ethereum_test_specs/__init__.py b/src/ethereum_test_specs/__init__.py index 790e2b4351f..e0baf8c5188 100644 --- a/src/ethereum_test_specs/__init__.py +++ b/src/ethereum_test_specs/__init__.py @@ -2,6 +2,7 @@ from .base import BaseTest, TestSpec from .base_static import BaseStaticTest +from .benchmark import BenchmarkTest, BenchmarkTestFiller, BenchmarkTestSpec from .blobs import BlobsTest, BlobsTestFiller, BlobsTestSpec from .blockchain import ( BlockchainTest, @@ -23,6 +24,9 @@ __all__ = ( "BaseStaticTest", "BaseTest", + "BenchmarkTest", + "BenchmarkTestFiller", + "BenchmarkTestSpec", "BlobsTest", "BlobsTestFiller", "BlobsTestSpec", diff --git a/src/ethereum_test_specs/benchmark.py b/src/ethereum_test_specs/benchmark.py new file mode 100644 index 00000000000..fb0eb8b68f1 --- /dev/null +++ b/src/ethereum_test_specs/benchmark.py @@ -0,0 +1,164 @@ +"""Ethereum benchmark test spec definition and filler.""" + +from typing import Callable, ClassVar, Dict, Generator, List, Optional, Sequence, Type + +import pytest +from pydantic import Field + +from ethereum_clis import TransitionTool +from ethereum_test_base_types import HexNumber +from ethereum_test_exceptions import BlockException, TransactionException +from ethereum_test_execution import ( + BaseExecute, + ExecuteFormat, + LabeledExecuteFormat, + TransactionPost, +) +from ethereum_test_fixtures import ( + BaseFixture, + BlockchainEngineFixture, + BlockchainEngineXFixture, + BlockchainFixture, + FixtureFormat, + LabeledFixtureFormat, +) +from ethereum_test_forks import Fork +from ethereum_test_types import Alloc, Environment, Transaction + +from .base import BaseTest +from .blockchain import Block, BlockchainTest + + +class BenchmarkTest(BaseTest): + """Test type designed specifically for benchmark test cases.""" + + pre: Alloc + post: Alloc + tx: Optional[Transaction] = None + blocks: Optional[List[Block]] = None + block_exception: ( + List[TransactionException | BlockException] | TransactionException | BlockException | None + ) = None + env: Environment = Field(default_factory=Environment) + expected_benchmark_gas_used: int | None = None + + supported_fixture_formats: ClassVar[Sequence[FixtureFormat | LabeledFixtureFormat]] = [ + BlockchainFixture, + BlockchainEngineFixture, + BlockchainEngineXFixture, + ] + + supported_execute_formats: ClassVar[Sequence[LabeledExecuteFormat]] = [ + LabeledExecuteFormat( + TransactionPost, + "benchmark_test", + "An execute test derived from a benchmark test", + ), + ] + + supported_markers: ClassVar[Dict[str, str]] = { + "blockchain_test_engine_only": "Only generate a blockchain test engine fixture", + "blockchain_test_only": "Only generate a blockchain test fixture", + } + + @classmethod + def pytest_parameter_name(cls) -> str: + """Return the parameter name used in pytest to select this spec type.""" + return "benchmark_test" + + @classmethod + def discard_fixture_format_by_marks( + cls, + fixture_format: FixtureFormat, + fork: Fork, + markers: List[pytest.Mark], + ) -> bool: + """Discard a fixture format from filling if the appropriate marker is used.""" + if "blockchain_test_only" in [m.name for m in markers]: + return fixture_format != BlockchainFixture + if "blockchain_test_engine_only" in [m.name for m in markers]: + return fixture_format != BlockchainEngineFixture + return False + + def get_genesis_environment(self, fork: Fork) -> Environment: + """Get the genesis environment for this benchmark test.""" + return self.env + + def split_transaction(self, tx: Transaction, gas_limit_cap: int | None) -> List[Transaction]: + """Split a transaction that exceeds the gas limit cap into multiple transactions.""" + if (gas_limit_cap is None) or (tx.gas_limit <= gas_limit_cap): + return [tx] + + total_gas = int(self.expected_benchmark_gas_used or self.env.gas_limit) + print(f"total_gas: {total_gas}") + num_splits = total_gas // gas_limit_cap + + split_transactions = [] + for i in range(num_splits): + split_tx = tx.model_copy() + total_gas -= gas_limit_cap + split_tx.gas_limit = HexNumber(total_gas if i == num_splits - 1 else gas_limit_cap) + split_tx.nonce = HexNumber(tx.nonce + i) + split_transactions.append(split_tx) + + return split_transactions + + def generate_blockchain_test(self, fork: Fork) -> BlockchainTest: + """Create a BlockchainTest from this BenchmarkTest.""" + if self.blocks is not None: + return BlockchainTest.from_test( + base_test=self, + genesis_environment=self.env, + pre=self.pre, + post=self.post, + blocks=self.blocks, + ) + elif self.tx is not None: + gas_limit_cap = fork.transaction_gas_limit_cap() + + transactions = self.split_transaction(self.tx, gas_limit_cap) + + blocks = [Block(txs=transactions)] + + return BlockchainTest.from_test( + base_test=self, + pre=self.pre, + post=self.post, + blocks=blocks, + genesis_environment=self.env, + ) + else: + raise ValueError("Cannot create BlockchainTest without transactions or blocks") + + def generate( + self, + t8n: TransitionTool, + fork: Fork, + fixture_format: FixtureFormat, + ) -> BaseFixture: + """Generate the blockchain test fixture.""" + self.check_exception_test(exception=self.tx.error is not None if self.tx else False) + if fixture_format in BlockchainTest.supported_fixture_formats: + return self.generate_blockchain_test(fork=fork).generate( + t8n=t8n, fork=fork, fixture_format=fixture_format + ) + else: + raise Exception(f"Unsupported fixture format: {fixture_format}") + + def execute( + self, + *, + fork: Fork, + execute_format: ExecuteFormat, + ) -> BaseExecute: + """Execute the benchmark test by sending it to the live network.""" + if execute_format == TransactionPost: + return TransactionPost( + blocks=[[self.tx]], + post=self.post, + ) + raise Exception(f"Unsupported execute format: {execute_format}") + + +BenchmarkTestSpec = Callable[[str], Generator[BenchmarkTest, None, None]] +BenchmarkTestFiller = Type[BenchmarkTest] diff --git a/src/ethereum_test_tools/__init__.py b/src/ethereum_test_tools/__init__.py index bb0b026ef9e..6a822305f94 100644 --- a/src/ethereum_test_tools/__init__.py +++ b/src/ethereum_test_tools/__init__.py @@ -25,6 +25,8 @@ from ethereum_test_fixtures import BaseFixture, FixtureCollector from ethereum_test_specs import ( BaseTest, + BenchmarkTest, + BenchmarkTestFiller, BlobsTest, BlobsTestFiller, BlockchainTest, @@ -112,6 +114,8 @@ "BalStorageSlot", "BaseFixture", "BaseTest", + "BenchmarkTest", + "BenchmarkTestFiller", "Blob", "BlockAccessList", "BlobsTest", diff --git a/tests/benchmark/test_worst_blocks.py b/tests/benchmark/test_worst_blocks.py index 38e6d5f71e6..df007629349 100644 --- a/tests/benchmark/test_worst_blocks.py +++ b/tests/benchmark/test_worst_blocks.py @@ -15,8 +15,9 @@ Account, Address, Alloc, + BenchmarkTestFiller, Block, - BlockchainTestFiller, + Environment, Hash, StateTestFiller, Transaction, @@ -110,8 +111,9 @@ def ether_transfer_case( ["a_to_a", "a_to_b", "diff_acc_to_b", "a_to_diff_acc", "diff_acc_to_diff_acc"], ) def test_block_full_of_ether_transfers( - blockchain_test: BlockchainTestFiller, + benchmark_test: BenchmarkTestFiller, pre: Alloc, + env: Environment, case_id: str, ether_transfer_case, iteration_count: int, @@ -152,7 +154,8 @@ def test_block_full_of_ether_transfers( else {receiver: Account(balance=balance) for receiver, balance in balances.items()} ) - blockchain_test( + benchmark_test( + genesis_environment=env, pre=pre, post=post_state, blocks=[Block(txs=txs)], From 09c09cbb9715d8b15535eb72553b1f3a2fa195b4 Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Fri, 8 Aug 2025 18:06:30 +0800 Subject: [PATCH 02/19] feat: wrap state test for benchmark --- src/ethereum_test_specs/__init__.py | 4 + src/ethereum_test_specs/benchmark_state.py | 229 +++++++++++++++++++++ src/ethereum_test_tools/__init__.py | 4 + tests/benchmark/test_worst_compute.py | 8 +- 4 files changed, 243 insertions(+), 2 deletions(-) create mode 100644 src/ethereum_test_specs/benchmark_state.py diff --git a/src/ethereum_test_specs/__init__.py b/src/ethereum_test_specs/__init__.py index e0baf8c5188..9a714640746 100644 --- a/src/ethereum_test_specs/__init__.py +++ b/src/ethereum_test_specs/__init__.py @@ -3,6 +3,7 @@ from .base import BaseTest, TestSpec from .base_static import BaseStaticTest from .benchmark import BenchmarkTest, BenchmarkTestFiller, BenchmarkTestSpec +from .benchmark_state import BenchmarkStateTest, BenchmarkStateTestFiller, BenchmarkStateTestSpec from .blobs import BlobsTest, BlobsTestFiller, BlobsTestSpec from .blockchain import ( BlockchainTest, @@ -27,6 +28,9 @@ "BenchmarkTest", "BenchmarkTestFiller", "BenchmarkTestSpec", + "BenchmarkStateTest", + "BenchmarkStateTestFiller", + "BenchmarkStateTestSpec", "BlobsTest", "BlobsTestFiller", "BlobsTestSpec", diff --git a/src/ethereum_test_specs/benchmark_state.py b/src/ethereum_test_specs/benchmark_state.py new file mode 100644 index 00000000000..e9e959f0615 --- /dev/null +++ b/src/ethereum_test_specs/benchmark_state.py @@ -0,0 +1,229 @@ +"""Ethereum benchmark state test spec definition and filler.""" + +import math +from pprint import pprint +from typing import Callable, ClassVar, Generator, List, Sequence, Type + +from pydantic import ConfigDict + +from ethereum_clis import TransitionTool +from ethereum_test_base_types import HexNumber +from ethereum_test_execution import ( + BaseExecute, + ExecuteFormat, + LabeledExecuteFormat, + TransactionPost, +) +from ethereum_test_fixtures import ( + BaseFixture, + FixtureFormat, + LabeledFixtureFormat, + StateFixture, +) +from ethereum_test_fixtures.common import FixtureBlobSchedule +from ethereum_test_fixtures.state import ( + FixtureConfig, + FixtureEnvironment, + FixtureForkPost, + FixtureTransaction, +) +from ethereum_test_forks import Fork +from ethereum_test_types import Alloc, Environment, Transaction +from ethereum_test_vm import Bytecode + +from .base import BaseTest, OpMode +from .blockchain import Block, BlockchainTest +from .debugging import print_traces +from .helpers import verify_transactions + + +class BenchmarkStateTest(BaseTest): + """Test type designed specifically for benchmark state test cases with full verification.""" + + pre: Alloc + post: Alloc + tx: Transaction + gas_benchmark_value: int + setup_bytecode: Bytecode | None = None + attack_bytecode: Bytecode | None = None + env: Environment + chain_id: int = 1 + + model_config = ConfigDict(arbitrary_types_allowed=True) + + supported_fixture_formats: ClassVar[Sequence[FixtureFormat | LabeledFixtureFormat]] = [ + StateFixture, + ] + [ + LabeledFixtureFormat( + fixture_format, + f"{fixture_format.format_name}_from_benchmark_state_test", + f"A {fixture_format.format_name} generated from a benchmark_state_test", + ) + for fixture_format in BlockchainTest.supported_fixture_formats + ] + + supported_execute_formats: ClassVar[Sequence[LabeledExecuteFormat]] = [ + LabeledExecuteFormat( + TransactionPost, + "benchmark_state_test_with_verification", + "An execute test derived from a benchmark state test with verification", + ), + ] + + def split_transaction(self, tx: Transaction, gas_limit_cap: int | None) -> List[Transaction]: + """Split a transaction that exceeds the gas limit cap into multiple transactions.""" + if (gas_limit_cap is None) or (tx.gas_limit <= gas_limit_cap): + return [tx] + + total_gas = int(tx.gas_limit) + num_splits = math.ceil(total_gas / gas_limit_cap) + + split_transactions = [] + remaining_gas = total_gas + for i in range(num_splits): + split_tx = tx.model_copy() + split_tx.gas_limit = HexNumber(min(gas_limit_cap, remaining_gas)) + split_tx.nonce = HexNumber(tx.nonce + i) + split_transactions.append(split_tx) + remaining_gas -= gas_limit_cap + + return split_transactions + + def make_benchmark_state_test_fixture( + self, + t8n: TransitionTool, + fork: Fork, + ) -> StateFixture: + """Create a fixture from the benchmark state test definition with full verification.""" + # We can't generate a state test fixture that names a transition fork, + # so we get the fork at the block number and timestamp of the state test + fork = fork.fork_at(self.env.number, self.env.timestamp) + + env = self.env.set_fork_requirements(fork) + tx = self.tx.with_signature_and_sender(keep_secret_key=True) + pre_alloc = Alloc.merge( + Alloc.model_validate(fork.pre_allocation()), + self.pre, + ) + + # Verification 1: Check for empty accounts + if empty_accounts := pre_alloc.empty_accounts(): + raise Exception(f"Empty accounts in pre state: {empty_accounts}") + + transition_tool_output = t8n.evaluate( + transition_tool_data=TransitionTool.TransitionToolData( + alloc=pre_alloc, + txs=[tx], + env=env, + fork=fork, + chain_id=self.chain_id, + reward=0, # Reward on state tests is always zero + blob_schedule=fork.blob_schedule(), + state_test=True, + ), + debug_output_path=self.get_next_transition_tool_output_path(), + slow_request=self.is_tx_gas_heavy_test(), + ) + + # Verification 2: Post-allocation verification + try: + self.post.verify_post_alloc(transition_tool_output.alloc) + except Exception as e: + print_traces(t8n.get_traces()) + raise e + + # Verification 3: Transaction verification + try: + verify_transactions( + txs=[tx], + result=transition_tool_output.result, + transition_tool_exceptions_reliable=t8n.exception_mapper.reliable, + ) + except Exception as e: + print_traces(t8n.get_traces()) + pprint(transition_tool_output.result) + pprint(transition_tool_output.alloc) + raise e + + # Verification 4: Benchmark gas validation + if self._operation_mode == OpMode.BENCHMARKING: + expected_benchmark_gas_used = self.gas_benchmark_value + gas_used = int(transition_tool_output.result.gas_used) + assert expected_benchmark_gas_used is not None, "gas_benchmark_value is not set" + assert gas_used == expected_benchmark_gas_used, ( + f"gas_used ({gas_used}) does not match gas_benchmark_value " + f"({expected_benchmark_gas_used})" + f", difference: {gas_used - expected_benchmark_gas_used}" + ) + + return StateFixture( + env=FixtureEnvironment(**env.model_dump(exclude_none=True)), + pre=pre_alloc, + post={ + fork: [ + FixtureForkPost( + state_root=transition_tool_output.result.state_root, + logs_hash=transition_tool_output.result.logs_hash, + tx_bytes=tx.rlp(), + expect_exception=tx.error, + state=transition_tool_output.alloc, + ) + ] + }, + transaction=FixtureTransaction.from_transaction(tx), + config=FixtureConfig( + blob_schedule=FixtureBlobSchedule.from_blob_schedule(fork.blob_schedule()), + chain_id=self.chain_id, + ), + ) + + def generate_blockchain_test(self, fork: Fork) -> BlockchainTest: + """Create a BlockchainTest from this BenchmarkStateTestWithVerification.""" + gas_limit_cap = fork.transaction_gas_limit_cap() + + transactions = self.split_transaction(self.tx, gas_limit_cap) + + blocks = [Block(txs=transactions)] + + return BlockchainTest.from_test( + base_test=self, + pre=self.pre, + post=self.post, + blocks=blocks, + genesis_environment=self.env, + ) + + def generate( + self, + t8n: TransitionTool, + fork: Fork, + fixture_format: FixtureFormat, + ) -> BaseFixture: + """Generate the test fixture.""" + self.check_exception_test(exception=self.tx.error is not None) + if fixture_format in BlockchainTest.supported_fixture_formats: + return self.generate_blockchain_test(fork=fork).generate( + t8n=t8n, fork=fork, fixture_format=fixture_format + ) + elif fixture_format == StateFixture: + return self.make_benchmark_state_test_fixture(t8n, fork) + + raise Exception(f"Unknown fixture format: {fixture_format}") + + def execute( + self, + *, + fork: Fork, + execute_format: ExecuteFormat, + ) -> BaseExecute: + """Execute the benchmark state test by sending it to the live network.""" + if execute_format == TransactionPost: + return TransactionPost( + blocks=[[self.tx]], + post=self.post, + ) + raise Exception(f"Unsupported execute format: {execute_format}") + + +BenchmarkStateTestFiller = Type[BenchmarkStateTest] +BenchmarkStateTestSpec = Callable[[str], Generator[BenchmarkStateTest, None, None]] diff --git a/src/ethereum_test_tools/__init__.py b/src/ethereum_test_tools/__init__.py index 6a822305f94..3dd5e0439ba 100644 --- a/src/ethereum_test_tools/__init__.py +++ b/src/ethereum_test_tools/__init__.py @@ -25,6 +25,8 @@ from ethereum_test_fixtures import BaseFixture, FixtureCollector from ethereum_test_specs import ( BaseTest, + BenchmarkStateTest, + BenchmarkStateTestFiller, BenchmarkTest, BenchmarkTestFiller, BlobsTest, @@ -116,6 +118,8 @@ "BaseTest", "BenchmarkTest", "BenchmarkTestFiller", + "BenchmarkStateTest", + "BenchmarkStateTestFiller", "Blob", "BlockAccessList", "BlobsTest", diff --git a/tests/benchmark/test_worst_compute.py b/tests/benchmark/test_worst_compute.py index 9bfdee16482..410e08820c5 100644 --- a/tests/benchmark/test_worst_compute.py +++ b/tests/benchmark/test_worst_compute.py @@ -19,6 +19,7 @@ from ethereum_test_tools import ( Address, Alloc, + BenchmarkStateTestFiller, Block, BlockchainTestFiller, Bytecode, @@ -2764,8 +2765,9 @@ def test_worst_calldataload( ], ) def test_worst_swap( - state_test: StateTestFiller, + benchmark_state_test: BenchmarkStateTestFiller, pre: Alloc, + env: Environment, fork: Fork, opcode: Opcode, gas_benchmark_value: int, @@ -2785,8 +2787,10 @@ def test_worst_swap( sender=pre.fund_eoa(), ) - state_test( + benchmark_state_test( + env=env, pre=pre, + gas_benchmark_value=gas_benchmark_value, post={}, tx=tx, ) From 87bd45d34d2b4ad91e9bb77c406beccba79f7689 Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Thu, 14 Aug 2025 20:48:59 +0800 Subject: [PATCH 03/19] feat(benchmark): add code generator to generate transaction --- src/ethereum_test_specs/benchmark.py | 4 +- src/ethereum_test_specs/benchmark_state.py | 3 - src/ethereum_test_tools/__init__.py | 8 ++ .../benchmark_code_generator.py | 96 +++++++++++++++++++ tests/benchmark/test_worst_compute.py | 24 ++--- 5 files changed, 115 insertions(+), 20 deletions(-) create mode 100644 src/ethereum_test_tools/benchmark_code_generator.py diff --git a/src/ethereum_test_specs/benchmark.py b/src/ethereum_test_specs/benchmark.py index fb0eb8b68f1..d1ffdb306db 100644 --- a/src/ethereum_test_specs/benchmark.py +++ b/src/ethereum_test_specs/benchmark.py @@ -128,7 +128,9 @@ def generate_blockchain_test(self, fork: Fork) -> BlockchainTest: genesis_environment=self.env, ) else: - raise ValueError("Cannot create BlockchainTest without transactions or blocks") + raise ValueError( + "Cannot create BlockchainTest without transactions, blocks, or code_generator" + ) def generate( self, diff --git a/src/ethereum_test_specs/benchmark_state.py b/src/ethereum_test_specs/benchmark_state.py index e9e959f0615..454af1a3844 100644 --- a/src/ethereum_test_specs/benchmark_state.py +++ b/src/ethereum_test_specs/benchmark_state.py @@ -29,7 +29,6 @@ ) from ethereum_test_forks import Fork from ethereum_test_types import Alloc, Environment, Transaction -from ethereum_test_vm import Bytecode from .base import BaseTest, OpMode from .blockchain import Block, BlockchainTest @@ -44,8 +43,6 @@ class BenchmarkStateTest(BaseTest): post: Alloc tx: Transaction gas_benchmark_value: int - setup_bytecode: Bytecode | None = None - attack_bytecode: Bytecode | None = None env: Environment chain_id: int = 1 diff --git a/src/ethereum_test_tools/__init__.py b/src/ethereum_test_tools/__init__.py index 3dd5e0439ba..4c6d7980166 100644 --- a/src/ethereum_test_tools/__init__.py +++ b/src/ethereum_test_tools/__init__.py @@ -86,6 +86,11 @@ call_return_code, ) +from .benchmark_code_generator import ( + BenchmarkCodeGenerator, + ExtCallGenerator, + JumpLoopGenerator, +) from .tools_code import ( CalldataCase, Case, @@ -116,6 +121,7 @@ "BalStorageSlot", "BaseFixture", "BaseTest", + "BenchmarkCodeGenerator", "BenchmarkTest", "BenchmarkTestFiller", "BenchmarkStateTest", @@ -136,6 +142,7 @@ "CodeGasMeasure", "Conditional", "ConsolidationRequest", + "ExtCallGenerator", "DeploymentTestType", "DepositRequest", "EngineAPIError", @@ -151,6 +158,7 @@ "Hash", "Header", "Initcode", + "JumpLoopGenerator", "Macro", "Macros", "NetworkWrappedTransaction", diff --git a/src/ethereum_test_tools/benchmark_code_generator.py b/src/ethereum_test_tools/benchmark_code_generator.py new file mode 100644 index 00000000000..57e7b0e1e4c --- /dev/null +++ b/src/ethereum_test_tools/benchmark_code_generator.py @@ -0,0 +1,96 @@ +"""Benchmark code generator classes for creating optimized bytecode patterns.""" + +from abc import ABC, abstractmethod +from typing import Optional + +from ethereum_test_forks import Fork +from ethereum_test_tools import Alloc, Bytecode, Transaction +from ethereum_test_tools.vm.opcode import Opcodes as Op + + +class BenchmarkCodeGenerator(ABC): + """Abstract base class for generating benchmark bytecode.""" + + def __init__( + self, + fork: Fork, + attack_block: Bytecode, + setup: Optional[Bytecode] = None, + ): + """Initialize with fork, attack block, and optional setup bytecode.""" + self.fork = fork + self.setup = setup or Bytecode() + self.attack_block = attack_block + + @abstractmethod + def generate_transaction(self, pre: Alloc, gas_limit: int) -> Transaction: + """Generate a transaction with the specified gas limit.""" + pass + + def generate_repeated_code(self, repeated_code: Bytecode, setup: Bytecode) -> Bytecode: + """Calculate the maximum number of iterations that can fit in the code size limit.""" + max_code_size = self.fork.max_code_size() + + overhead = len(Op.JUMPDEST) + len(Op.JUMP(len(setup))) + available_space = max_code_size - overhead + max_iterations = available_space // len(repeated_code) if len(repeated_code) > 0 else 0 + + code = setup + Op.JUMPDEST + repeated_code * max_iterations + Op.JUMP(len(setup)) + + self._validate_code_size(code) + + return code + + def _validate_code_size(self, code: Bytecode) -> None: + """Validate that the generated code fits within size limits.""" + if len(code) > self.fork.max_code_size(): + raise ValueError( + f"Generated code size {len(code)} exceeds maximum allowed size " + f"{self.fork.max_code_size()}" + ) + + +class JumpLoopGenerator(BenchmarkCodeGenerator): + """Generates bytecode that loops execution using JUMP operations.""" + + def generate_transaction(self, pre: Alloc, gas_limit: int) -> Transaction: + """Generate transaction with looping bytecode pattern.""" + # Benchmark Test Structure: + # setup + JUMPDEST + attack + attack + ... + attack + JUMP(setup_length) + + code = self.generate_repeated_code(self.attack_block, self.setup) + + return Transaction( + to=pre.deploy_contract(code=code), + gas_limit=self.fork.transaction_gas_limit_cap() or 30_000_000, + sender=pre.fund_eoa(), + ) + + +class ExtCallGenerator(BenchmarkCodeGenerator): + """Generates bytecode that fills the contract to maximum allowed code size.""" + + def generate_transaction(self, pre: Alloc, gas_limit: int) -> Transaction: + """Generate transaction with maximal code size coverage.""" + # Benchmark Test Structure: + # There are two contracts: + # 1. The target contract that executes certain operation but not loop (e.g. PUSH) + # 2. The loop contract that calls the target contract in a loop + # + # attack = POP(STATICCALL(GAS, target_contract_address, 0, 0, 0, 0)) + # setup + JUMPDEST + attack + attack + ... + attack + JUMP(setup_lengt) + # This could optimize the gas consumption and increase the cycle count. + + max_stack_height = self.fork.max_stack_height() + + target_contract_address = pre.deploy_contract(code=self.attack_block * max_stack_height) + + code_sequence = Op.POP(Op.STATICCALL(Op.GAS, target_contract_address, 0, 0, 0, 0)) + + code = self.generate_repeated_code(code_sequence, Bytecode()) + + return Transaction( + to=pre.deploy_contract(code=code), + gas_limit=self.fork.transaction_gas_limit_cap() or 30_000_000, + sender=pre.fund_eoa(), + ) diff --git a/tests/benchmark/test_worst_compute.py b/tests/benchmark/test_worst_compute.py index 410e08820c5..fa6e8f63432 100644 --- a/tests/benchmark/test_worst_compute.py +++ b/tests/benchmark/test_worst_compute.py @@ -28,6 +28,7 @@ Transaction, add_kzg_version, ) +from ethereum_test_tools.benchmark_code_generator import JumpLoopGenerator from ethereum_test_types import TransactionType from ethereum_test_vm import Opcode from ethereum_test_vm import Opcodes as Op @@ -1843,27 +1844,19 @@ def test_worst_jumpis( @pytest.mark.slow def test_worst_jumpdests( - state_test: StateTestFiller, + benchmark_state_test: BenchmarkStateTestFiller, pre: Alloc, + env: Environment, fork: Fork, gas_benchmark_value: int, ): """Test running a JUMPDEST-intensive contract.""" - max_code_size = fork.max_code_size() + generator = JumpLoopGenerator(fork, Op.JUMPDEST) + tx = generator.generate_transaction(pre, gas_benchmark_value) - # Create and deploy a contract with many JUMPDESTs - code_suffix = Op.JUMP(Op.PUSH0) - code_body = Op.JUMPDEST * (max_code_size - len(code_suffix)) - code = code_body + code_suffix - jumpdests_address = pre.deploy_contract(code=code) - - tx = Transaction( - to=jumpdests_address, - gas_limit=gas_benchmark_value, - sender=pre.fund_eoa(), - ) - - state_test( + benchmark_state_test( + env=env, + gas_benchmark_value=gas_benchmark_value, pre=pre, post={}, tx=tx, @@ -2783,7 +2776,6 @@ def test_worst_swap( tx = Transaction( to=pre.deploy_contract(code=code), - gas_limit=gas_benchmark_value, sender=pre.fund_eoa(), ) From 8ca027fa10ac8db37c2e93bac39f93798b594d1f Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Tue, 9 Sep 2025 21:31:31 +0800 Subject: [PATCH 04/19] fix: resolve typing issue --- tests/benchmark/test_worst_blocks.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/benchmark/test_worst_blocks.py b/tests/benchmark/test_worst_blocks.py index df007629349..d50ad80b91c 100644 --- a/tests/benchmark/test_worst_blocks.py +++ b/tests/benchmark/test_worst_blocks.py @@ -155,11 +155,10 @@ def test_block_full_of_ether_transfers( ) benchmark_test( - genesis_environment=env, + env=env, pre=pre, post=post_state, blocks=[Block(txs=txs)], - exclude_full_post_state_in_output=True, expected_benchmark_gas_used=iteration_count * intrinsic_cost, ) From d76104fe037d99378a6afd3ca801835b1e463c63 Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Thu, 11 Sep 2025 20:46:56 +0800 Subject: [PATCH 05/19] refactor: update benchmark code generator and test wrapper --- src/ethereum_test_benchmark/__init__.py | 13 ++ .../benchmark_code_generator.py | 79 +++++---- src/ethereum_test_specs/benchmark.py | 155 ++++++++++++++++-- src/ethereum_test_tools/__init__.py | 5 + tests/benchmark/conftest.py | 15 ++ tests/benchmark/test_worst_blocks.py | 2 + tests/benchmark/test_worst_compute.py | 36 ++-- .../benchmark/test_worst_stateful_opcodes.py | 49 ++++-- 8 files changed, 266 insertions(+), 88 deletions(-) create mode 100644 src/ethereum_test_benchmark/__init__.py rename src/{ethereum_test_tools => ethereum_test_benchmark}/benchmark_code_generator.py (54%) diff --git a/src/ethereum_test_benchmark/__init__.py b/src/ethereum_test_benchmark/__init__.py new file mode 100644 index 00000000000..60f0e66a5fb --- /dev/null +++ b/src/ethereum_test_benchmark/__init__.py @@ -0,0 +1,13 @@ +"""Benchmark code generator classes for creating optimized bytecode patterns.""" + +from .benchmark_code_generator import ( + BenchmarkCodeGenerator, + ExtCallGenerator, + JumpLoopGenerator, +) + +__all__ = ( + "BenchmarkCodeGenerator", + "ExtCallGenerator", + "JumpLoopGenerator", +) diff --git a/src/ethereum_test_tools/benchmark_code_generator.py b/src/ethereum_test_benchmark/benchmark_code_generator.py similarity index 54% rename from src/ethereum_test_tools/benchmark_code_generator.py rename to src/ethereum_test_benchmark/benchmark_code_generator.py index 57e7b0e1e4c..32b73637c5f 100644 --- a/src/ethereum_test_tools/benchmark_code_generator.py +++ b/src/ethereum_test_benchmark/benchmark_code_generator.py @@ -1,26 +1,26 @@ """Benchmark code generator classes for creating optimized bytecode patterns.""" from abc import ABC, abstractmethod -from typing import Optional +from dataclasses import dataclass, field from ethereum_test_forks import Fork -from ethereum_test_tools import Alloc, Bytecode, Transaction -from ethereum_test_tools.vm.opcode import Opcodes as Op +from ethereum_test_types import Alloc, Transaction +from ethereum_test_vm import Bytecode +from ethereum_test_vm.opcode import Opcodes as Op +@dataclass class BenchmarkCodeGenerator(ABC): """Abstract base class for generating benchmark bytecode.""" - def __init__( - self, - fork: Fork, - attack_block: Bytecode, - setup: Optional[Bytecode] = None, - ): - """Initialize with fork, attack block, and optional setup bytecode.""" - self.fork = fork - self.setup = setup or Bytecode() - self.attack_block = attack_block + fork: Fork + attack_block: Bytecode + setup: Bytecode = field(default_factory=Bytecode) + + @abstractmethod + def deploy_contracts(self, pre: Alloc) -> None: + """Deploy any contracts needed for the benchmark.""" + pass @abstractmethod def generate_transaction(self, pre: Alloc, gas_limit: int) -> Transaction: @@ -29,14 +29,14 @@ def generate_transaction(self, pre: Alloc, gas_limit: int) -> Transaction: def generate_repeated_code(self, repeated_code: Bytecode, setup: Bytecode) -> Bytecode: """Calculate the maximum number of iterations that can fit in the code size limit.""" + assert len(repeated_code) > 0, "repeated_code cannot be empty" max_code_size = self.fork.max_code_size() - overhead = len(Op.JUMPDEST) + len(Op.JUMP(len(setup))) + overhead = len(setup) + len(Op.JUMPDEST) + len(Op.JUMP(len(setup))) available_space = max_code_size - overhead - max_iterations = available_space // len(repeated_code) if len(repeated_code) > 0 else 0 + max_iterations = available_space // len(repeated_code) code = setup + Op.JUMPDEST + repeated_code * max_iterations + Op.JUMP(len(setup)) - self._validate_code_size(code) return code @@ -50,47 +50,62 @@ def _validate_code_size(self, code: Bytecode) -> None: ) +@dataclass class JumpLoopGenerator(BenchmarkCodeGenerator): """Generates bytecode that loops execution using JUMP operations.""" - def generate_transaction(self, pre: Alloc, gas_limit: int) -> Transaction: - """Generate transaction with looping bytecode pattern.""" + def deploy_contracts(self, pre: Alloc) -> None: + """Deploy the looping contract.""" # Benchmark Test Structure: # setup + JUMPDEST + attack + attack + ... + attack + JUMP(setup_length) - code = self.generate_repeated_code(self.attack_block, self.setup) + self._contract_address = pre.deploy_contract(code=code) + + def generate_transaction(self, pre: Alloc, gas_limit: int) -> Transaction: + """Generate transaction that executes the looping contract.""" + if not hasattr(self, "_contract_address"): + raise ValueError("deploy_contracts must be called before generate_transaction") return Transaction( - to=pre.deploy_contract(code=code), - gas_limit=self.fork.transaction_gas_limit_cap() or 30_000_000, + to=self._contract_address, + gas_limit=gas_limit, sender=pre.fund_eoa(), ) +@dataclass class ExtCallGenerator(BenchmarkCodeGenerator): """Generates bytecode that fills the contract to maximum allowed code size.""" - def generate_transaction(self, pre: Alloc, gas_limit: int) -> Transaction: - """Generate transaction with maximal code size coverage.""" + def deploy_contracts(self, pre: Alloc) -> None: + """Deploy both target and caller contracts.""" # Benchmark Test Structure: # There are two contracts: # 1. The target contract that executes certain operation but not loop (e.g. PUSH) # 2. The loop contract that calls the target contract in a loop - # - # attack = POP(STATICCALL(GAS, target_contract_address, 0, 0, 0, 0)) - # setup + JUMPDEST + attack + attack + ... + attack + JUMP(setup_lengt) - # This could optimize the gas consumption and increase the cycle count. max_stack_height = self.fork.max_stack_height() - target_contract_address = pre.deploy_contract(code=self.attack_block * max_stack_height) + # Deploy target contract that contains the actual attack block + self._target_contract_address = pre.deploy_contract( + code=self.attack_block * max_stack_height + ) - code_sequence = Op.POP(Op.STATICCALL(Op.GAS, target_contract_address, 0, 0, 0, 0)) + # Create caller contract that repeatedly calls the target contract + # attack = POP(STATICCALL(GAS, target_contract_address, 0, 0, 0, 0)) + # setup + JUMPDEST + attack + attack + ... + attack + JUMP(setup_length) + code_sequence = Op.POP(Op.STATICCALL(Op.GAS, self._target_contract_address, 0, 0, 0, 0)) + + caller_code = self.generate_repeated_code(code_sequence, Bytecode()) + self._contract_address = pre.deploy_contract(code=caller_code) - code = self.generate_repeated_code(code_sequence, Bytecode()) + def generate_transaction(self, pre: Alloc, gas_limit: int) -> Transaction: + """Generate transaction that executes the caller contract.""" + if not hasattr(self, "_contract_address"): + raise ValueError("deploy_contracts must be called before generate_transaction") return Transaction( - to=pre.deploy_contract(code=code), - gas_limit=self.fork.transaction_gas_limit_cap() or 30_000_000, + to=self._contract_address, + gas_limit=gas_limit, sender=pre.fund_eoa(), ) diff --git a/src/ethereum_test_specs/benchmark.py b/src/ethereum_test_specs/benchmark.py index d1ffdb306db..5bf670e0cce 100644 --- a/src/ethereum_test_specs/benchmark.py +++ b/src/ethereum_test_specs/benchmark.py @@ -1,9 +1,12 @@ """Ethereum benchmark test spec definition and filler.""" -from typing import Callable, ClassVar, Dict, Generator, List, Optional, Sequence, Type +from contextlib import contextmanager +from contextvars import ContextVar +from enum import Enum +from typing import Any, Callable, ClassVar, Dict, Generator, List, Optional, Sequence, Type import pytest -from pydantic import Field +from pydantic import ConfigDict, Field from ethereum_clis import TransitionTool from ethereum_test_base_types import HexNumber @@ -29,9 +32,74 @@ from .blockchain import Block, BlockchainTest +class BenchmarkPhase(Enum): + """Phases of a benchmark test.""" + + SETUP = "setup" + EXECUTION = "execution" + + +_current_phase: ContextVar[Optional[BenchmarkPhase]] = ContextVar("benchmark_phase", default=None) + + +class BenchmarkManager: + """Context manager for managing benchmark test phases.""" + + def __init__(self): + """Initialize the BenchmarkManager with empty transaction and block lists.""" + self.setup_transactions: List[Transaction] = [] + self.setup_blocks: List[Block] = [] + self.execution_transactions: List[Transaction] = [] + self.execution_blocks: List[Block] = [] + + @contextmanager + def setup(self): + """Context manager for the setup phase of a benchmark test.""" + token = _current_phase.set(BenchmarkPhase.SETUP) + try: + yield self + finally: + _current_phase.reset(token) + + @contextmanager + def execution(self): + """Context manager for the execution phase of a benchmark test.""" + token = _current_phase.set(BenchmarkPhase.EXECUTION) + try: + yield self + finally: + _current_phase.reset(token) + + def add_transaction(self, tx: Transaction): + """Add a transaction to the current phase.""" + current_phase = _current_phase.get() + if current_phase == BenchmarkPhase.SETUP: + self.setup_transactions.append(tx) + elif current_phase == BenchmarkPhase.EXECUTION: + self.execution_transactions.append(tx) + else: + self.setup_transactions.append(tx) + + def add_block(self, block: Block): + """Add a block to the current phase.""" + current_phase = _current_phase.get() + if current_phase == BenchmarkPhase.SETUP: + self.setup_blocks.append(block) + elif current_phase == BenchmarkPhase.EXECUTION: + self.execution_blocks.append(block) + else: + self.setup_blocks.append(block) + + def get_current_phase(self) -> Optional[BenchmarkPhase]: + """Get the current benchmark phase.""" + return _current_phase.get() + + class BenchmarkTest(BaseTest): """Test type designed specifically for benchmark test cases.""" + model_config = ConfigDict(extra="forbid") + pre: Alloc post: Alloc tx: Optional[Transaction] = None @@ -41,6 +109,9 @@ class BenchmarkTest(BaseTest): ) = None env: Environment = Field(default_factory=Environment) expected_benchmark_gas_used: int | None = None + gas_benchmark_value: int + benchmark_manager: Optional[Any] = Field(default=None, exclude=True) + code_generator: Optional[Any] = Field(default=None, exclude=True) supported_fixture_formats: ClassVar[Sequence[FixtureFormat | LabeledFixtureFormat]] = [ BlockchainFixture, @@ -86,26 +157,81 @@ def get_genesis_environment(self, fork: Fork) -> Environment: def split_transaction(self, tx: Transaction, gas_limit_cap: int | None) -> List[Transaction]: """Split a transaction that exceeds the gas limit cap into multiple transactions.""" - if (gas_limit_cap is None) or (tx.gas_limit <= gas_limit_cap): + if gas_limit_cap is None: + tx.gas_limit = HexNumber(self.gas_benchmark_value) + return [tx] + + if gas_limit_cap >= self.gas_benchmark_value: + tx.gas_limit = HexNumber(min(tx.gas_limit, self.gas_benchmark_value)) return [tx] - total_gas = int(self.expected_benchmark_gas_used or self.env.gas_limit) - print(f"total_gas: {total_gas}") - num_splits = total_gas // gas_limit_cap + remaining_gas = self.gas_benchmark_value + num_splits = remaining_gas // gas_limit_cap + int(remaining_gas % gas_limit_cap) split_transactions = [] for i in range(num_splits): split_tx = tx.model_copy() - total_gas -= gas_limit_cap - split_tx.gas_limit = HexNumber(total_gas if i == num_splits - 1 else gas_limit_cap) + split_tx.gas_limit = HexNumber(remaining_gas if i == num_splits - 1 else gas_limit_cap) + remaining_gas -= gas_limit_cap split_tx.nonce = HexNumber(tx.nonce + i) split_transactions.append(split_tx) return split_transactions + def generate_blocks_from_code_generator(self, fork: Fork) -> List[Block]: + """Generate blocks using the code generator.""" + if self.code_generator is None: + return [] + + self.code_generator.deploy_contracts(self.pre) + gas_limit = fork.transaction_gas_limit_cap() or self.gas_benchmark_value + benchmark_tx = self.code_generator.generate_transaction(self.pre, gas_limit) + + execution_txs = self.split_transaction(benchmark_tx, gas_limit) + execution_block = Block(txs=execution_txs) + + return [execution_block] + def generate_blockchain_test(self, fork: Fork) -> BlockchainTest: """Create a BlockchainTest from this BenchmarkTest.""" - if self.blocks is not None: + if self.code_generator is not None: + generated_blocks = self.generate_blocks_from_code_generator(fork) + return BlockchainTest.from_test( + base_test=self, + genesis_environment=self.env, + pre=self.pre, + post=self.post, + blocks=generated_blocks, + ) + + elif self.benchmark_manager is not None: + all_blocks = [] + gas_limit = fork.transaction_gas_limit_cap() or self.gas_benchmark_value + + if self.benchmark_manager.setup_blocks: + all_blocks.extend(self.benchmark_manager.setup_blocks) + elif self.benchmark_manager.setup_transactions: + setup_txs = [] + for tx in self.benchmark_manager.setup_transactions: + setup_txs.extend(self.split_transaction(tx, gas_limit)) + all_blocks.append(Block(txs=setup_txs)) + + if self.benchmark_manager.execution_blocks: + all_blocks.extend(self.benchmark_manager.execution_blocks) + elif self.benchmark_manager.execution_transactions: + execution_txs = [] + for tx in self.benchmark_manager.execution_transactions: + execution_txs.extend(self.split_transaction(tx, gas_limit)) + all_blocks.append(Block(txs=execution_txs)) + + return BlockchainTest.from_test( + base_test=self, + genesis_environment=self.env, + pre=self.pre, + post=self.post, + blocks=all_blocks, + ) + elif self.blocks is not None: return BlockchainTest.from_test( base_test=self, genesis_environment=self.env, @@ -114,9 +240,9 @@ def generate_blockchain_test(self, fork: Fork) -> BlockchainTest: blocks=self.blocks, ) elif self.tx is not None: - gas_limit_cap = fork.transaction_gas_limit_cap() + gas_limit = fork.transaction_gas_limit_cap() or self.gas_benchmark_value - transactions = self.split_transaction(self.tx, gas_limit_cap) + transactions = self.split_transaction(self.tx, gas_limit) blocks = [Block(txs=transactions)] @@ -129,7 +255,7 @@ def generate_blockchain_test(self, fork: Fork) -> BlockchainTest: ) else: raise ValueError( - "Cannot create BlockchainTest without transactions, blocks, or code_generator" + "Cannot create BlockchainTest without transactions, blocks, or benchmark_manager" ) def generate( @@ -162,5 +288,10 @@ def execute( raise Exception(f"Unsupported execute format: {execute_format}") +def create_benchmark_manager() -> BenchmarkManager: + """Create a new BenchmarkManager instance for phase-aware benchmark testing.""" + return BenchmarkManager() + + BenchmarkTestSpec = Callable[[str], Generator[BenchmarkTest, None, None]] BenchmarkTestFiller = Type[BenchmarkTest] diff --git a/src/ethereum_test_tools/__init__.py b/src/ethereum_test_tools/__init__.py index 4c6d7980166..04b1770ac61 100644 --- a/src/ethereum_test_tools/__init__.py +++ b/src/ethereum_test_tools/__init__.py @@ -16,6 +16,11 @@ TestPrivateKey2, ) from ethereum_test_base_types.reference_spec import ReferenceSpec, ReferenceSpecTypes +from ethereum_test_benchmark import ( + BenchmarkCodeGenerator, + ExtCallGenerator, + JumpLoopGenerator, +) from ethereum_test_exceptions import ( BlockException, EngineAPIError, diff --git a/tests/benchmark/conftest.py b/tests/benchmark/conftest.py index 3af1bf9ade7..3f0a67ab556 100644 --- a/tests/benchmark/conftest.py +++ b/tests/benchmark/conftest.py @@ -4,6 +4,9 @@ import pytest +from ethereum_test_forks import Fork +from ethereum_test_specs.benchmark import BenchmarkManager, create_benchmark_manager + DEFAULT_BENCHMARK_FORK = "Prague" @@ -59,3 +62,15 @@ def pytest_collection_modifyitems(config, items): for i in reversed(items_for_removal): items.pop(i) + + +@pytest.fixture +def tx_gas_limit_cap(fork: Fork, gas_benchmark_value: int) -> int: + """Return the transaction gas limit cap.""" + return fork.transaction_gas_limit_cap() or gas_benchmark_value + + +@pytest.fixture +def benchmark_manager() -> BenchmarkManager: + """Return a benchmark manager.""" + return create_benchmark_manager() diff --git a/tests/benchmark/test_worst_blocks.py b/tests/benchmark/test_worst_blocks.py index d50ad80b91c..d19f34e8b35 100644 --- a/tests/benchmark/test_worst_blocks.py +++ b/tests/benchmark/test_worst_blocks.py @@ -119,6 +119,7 @@ def test_block_full_of_ether_transfers( iteration_count: int, transfer_amount: int, intrinsic_cost: int, + gas_benchmark_value: int, ): """ Single test for ether transfer scenarios. @@ -159,6 +160,7 @@ def test_block_full_of_ether_transfers( pre=pre, post=post_state, blocks=[Block(txs=txs)], + gas_benchmark_value=gas_benchmark_value, expected_benchmark_gas_used=iteration_count * intrinsic_cost, ) diff --git a/tests/benchmark/test_worst_compute.py b/tests/benchmark/test_worst_compute.py index fa6e8f63432..efa854625fe 100644 --- a/tests/benchmark/test_worst_compute.py +++ b/tests/benchmark/test_worst_compute.py @@ -15,11 +15,12 @@ from py_ecc.bn128 import G1, G2, multiply from ethereum_test_base_types.base_types import Bytes +from ethereum_test_benchmark import JumpLoopGenerator from ethereum_test_forks import Fork from ethereum_test_tools import ( Address, Alloc, - BenchmarkStateTestFiller, + BenchmarkTestFiller, Block, BlockchainTestFiller, Bytecode, @@ -1844,22 +1845,19 @@ def test_worst_jumpis( @pytest.mark.slow def test_worst_jumpdests( - benchmark_state_test: BenchmarkStateTestFiller, + benchmark_test: BenchmarkTestFiller, pre: Alloc, env: Environment, fork: Fork, gas_benchmark_value: int, ): """Test running a JUMPDEST-intensive contract.""" - generator = JumpLoopGenerator(fork, Op.JUMPDEST) - tx = generator.generate_transaction(pre, gas_benchmark_value) - - benchmark_state_test( + benchmark_test( env=env, - gas_benchmark_value=gas_benchmark_value, pre=pre, post={}, - tx=tx, + code_generator=JumpLoopGenerator(fork, Op.JUMPDEST), + gas_benchmark_value=gas_benchmark_value, ) @@ -2758,7 +2756,7 @@ def test_worst_calldataload( ], ) def test_worst_swap( - benchmark_state_test: BenchmarkStateTestFiller, + benchmark_test: BenchmarkTestFiller, pre: Alloc, env: Environment, fork: Fork, @@ -2766,25 +2764,11 @@ def test_worst_swap( gas_benchmark_value: int, ): """Test running a block with as many SWAP as possible.""" - max_code_size = fork.max_code_size() - - code_prefix = Op.JUMPDEST + Op.PUSH0 * opcode.min_stack_height - code_suffix = Op.PUSH0 + Op.JUMP - opcode_sequence = opcode * (max_code_size - len(code_prefix) - len(code_suffix)) - code = code_prefix + opcode_sequence + code_suffix - assert len(code) <= max_code_size - - tx = Transaction( - to=pre.deploy_contract(code=code), - sender=pre.fund_eoa(), - ) - - benchmark_state_test( - env=env, + benchmark_test( pre=pre, - gas_benchmark_value=gas_benchmark_value, post={}, - tx=tx, + code_generator=JumpLoopGenerator(fork, opcode, setup=Op.PUSH0 * opcode.min_stack_height), + gas_benchmark_value=gas_benchmark_value, ) diff --git a/tests/benchmark/test_worst_stateful_opcodes.py b/tests/benchmark/test_worst_stateful_opcodes.py index f68783e61c3..01d74b55518 100644 --- a/tests/benchmark/test_worst_stateful_opcodes.py +++ b/tests/benchmark/test_worst_stateful_opcodes.py @@ -10,6 +10,8 @@ import pytest from ethereum_test_forks import Fork +from ethereum_test_specs import BenchmarkTestFiller +from ethereum_test_specs.benchmark import BenchmarkManager from ethereum_test_tools import ( Account, Address, @@ -47,7 +49,8 @@ ], ) def test_worst_address_state_cold( - blockchain_test: BlockchainTestFiller, + benchmark_test: BenchmarkTestFiller, + benchmark_manager: BenchmarkManager, pre: Alloc, fork: Fork, opcode: Op, @@ -67,7 +70,6 @@ def test_worst_address_state_cold( attack_gas_limit - intrinsic_gas_cost_calc() ) // gas_costs.G_COLD_ACCOUNT_ACCESS - blocks = [] post = {} # Setup @@ -76,42 +78,53 @@ def test_worst_address_state_cold( # collisions with the addresses indirectly created by the testing framework. addr_offset = int.from_bytes(pre.fund_eoa(amount=0)) + # Create sender accounts upfront so we can include them in post-state + execution_sender = pre.fund_eoa() + if not absent_accounts: + setup_sender = pre.fund_eoa() factory_code = Op.PUSH4(num_target_accounts) + While( body=Op.POP(Op.CALL(address=Op.ADD(addr_offset, Op.DUP6), value=10)), condition=Op.PUSH1(1) + Op.SWAP1 + Op.SUB + Op.DUP1 + Op.ISZERO + Op.ISZERO, ) factory_address = pre.deploy_contract(code=factory_code, balance=10**18) - setup_tx = Transaction( - to=factory_address, - gas_limit=env.gas_limit, - sender=pre.fund_eoa(), - ) - blocks.append(Block(txs=[setup_tx])) + with benchmark_manager.setup(): + setup_tx = Transaction( + to=factory_address, + gas_limit=env.gas_limit, + sender=setup_sender, + ) + benchmark_manager.add_transaction(setup_tx) for i in range(num_target_accounts): addr = Address(i + addr_offset + 1) post[addr] = Account(balance=10) - # Execution + # Include setup sender in post-state + post[setup_sender] = Account() + + # Execution phase op_code = Op.PUSH4(num_target_accounts) + While( body=Op.POP(opcode(Op.ADD(addr_offset, Op.DUP1))), condition=Op.PUSH1(1) + Op.SWAP1 + Op.SUB + Op.DUP1 + Op.ISZERO + Op.ISZERO, ) op_address = pre.deploy_contract(code=op_code) - op_tx = Transaction( - to=op_address, - gas_limit=attack_gas_limit, - sender=pre.fund_eoa(), - ) - blocks.append(Block(txs=[op_tx])) - blockchain_test( + with benchmark_manager.execution(): + benchmark_manager.add_transaction( + Transaction( + to=op_address, + gas_limit=attack_gas_limit, + sender=execution_sender, + ) + ) + + benchmark_test( pre=pre, post=post, - blocks=blocks, - exclude_full_post_state_in_output=True, + benchmark_manager=benchmark_manager, + gas_benchmark_value=gas_benchmark_value, ) From 51d6817c2d8108f7d804c5036ed5fb4cc8fdc602 Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Thu, 11 Sep 2025 22:41:21 +0800 Subject: [PATCH 06/19] fix: udpate example changes --- tests/benchmark/test_worst_blocks.py | 23 +++--- .../benchmark/test_worst_stateful_opcodes.py | 73 +++++++++---------- 2 files changed, 47 insertions(+), 49 deletions(-) diff --git a/tests/benchmark/test_worst_blocks.py b/tests/benchmark/test_worst_blocks.py index d19f34e8b35..e8d557c1b8e 100644 --- a/tests/benchmark/test_worst_blocks.py +++ b/tests/benchmark/test_worst_blocks.py @@ -10,6 +10,7 @@ import pytest from ethereum_test_forks import Fork +from ethereum_test_specs.benchmark import BenchmarkManager from ethereum_test_tools import ( AccessList, Account, @@ -112,6 +113,7 @@ def ether_transfer_case( ) def test_block_full_of_ether_transfers( benchmark_test: BenchmarkTestFiller, + benchmark_manager: BenchmarkManager, pre: Alloc, env: Environment, case_id: str, @@ -136,17 +138,18 @@ def test_block_full_of_ether_transfers( # Create a single block with all transactions txs = [] balances: dict[Address, int] = {} - for _ in range(iteration_count): - receiver = next(receivers) - balances[receiver] = balances.get(receiver, 0) + transfer_amount - txs.append( - Transaction( - to=receiver, - value=transfer_amount, - gas_limit=intrinsic_cost, - sender=next(senders), + with benchmark_manager.execution(): + for _ in range(iteration_count): + receiver = next(receivers) + balances[receiver] = balances.get(receiver, 0) + transfer_amount + txs.append( + Transaction( + to=receiver, + value=transfer_amount, + gas_limit=intrinsic_cost, + sender=next(senders), + ) ) - ) # Only include post state for non a_to_a cases post_state = ( diff --git a/tests/benchmark/test_worst_stateful_opcodes.py b/tests/benchmark/test_worst_stateful_opcodes.py index 01d74b55518..5f92c7e6a2c 100644 --- a/tests/benchmark/test_worst_stateful_opcodes.py +++ b/tests/benchmark/test_worst_stateful_opcodes.py @@ -49,8 +49,7 @@ ], ) def test_worst_address_state_cold( - benchmark_test: BenchmarkTestFiller, - benchmark_manager: BenchmarkManager, + blockchain_test: BlockchainTestFiller, pre: Alloc, fork: Fork, opcode: Op, @@ -70,6 +69,7 @@ def test_worst_address_state_cold( attack_gas_limit - intrinsic_gas_cost_calc() ) // gas_costs.G_COLD_ACCOUNT_ACCESS + blocks = [] post = {} # Setup @@ -78,53 +78,42 @@ def test_worst_address_state_cold( # collisions with the addresses indirectly created by the testing framework. addr_offset = int.from_bytes(pre.fund_eoa(amount=0)) - # Create sender accounts upfront so we can include them in post-state - execution_sender = pre.fund_eoa() - if not absent_accounts: - setup_sender = pre.fund_eoa() factory_code = Op.PUSH4(num_target_accounts) + While( body=Op.POP(Op.CALL(address=Op.ADD(addr_offset, Op.DUP6), value=10)), condition=Op.PUSH1(1) + Op.SWAP1 + Op.SUB + Op.DUP1 + Op.ISZERO + Op.ISZERO, ) factory_address = pre.deploy_contract(code=factory_code, balance=10**18) - with benchmark_manager.setup(): - setup_tx = Transaction( - to=factory_address, - gas_limit=env.gas_limit, - sender=setup_sender, - ) - benchmark_manager.add_transaction(setup_tx) + setup_tx = Transaction( + to=factory_address, + gas_limit=env.gas_limit, + sender=pre.fund_eoa(), + ) + blocks.append(Block(txs=[setup_tx])) for i in range(num_target_accounts): addr = Address(i + addr_offset + 1) post[addr] = Account(balance=10) - # Include setup sender in post-state - post[setup_sender] = Account() - - # Execution phase + # Execution op_code = Op.PUSH4(num_target_accounts) + While( body=Op.POP(opcode(Op.ADD(addr_offset, Op.DUP1))), condition=Op.PUSH1(1) + Op.SWAP1 + Op.SUB + Op.DUP1 + Op.ISZERO + Op.ISZERO, ) op_address = pre.deploy_contract(code=op_code) + op_tx = Transaction( + to=op_address, + gas_limit=attack_gas_limit, + sender=pre.fund_eoa(), + ) + blocks.append(Block(txs=[op_tx])) - with benchmark_manager.execution(): - benchmark_manager.add_transaction( - Transaction( - to=op_address, - gas_limit=attack_gas_limit, - sender=execution_sender, - ) - ) - - benchmark_test( + blockchain_test( pre=pre, post=post, - benchmark_manager=benchmark_manager, - gas_benchmark_value=gas_benchmark_value, + blocks=blocks, + exclude_full_post_state_in_output=True, ) @@ -464,30 +453,36 @@ def test_worst_storage_access_warm( def test_worst_blockhash( - blockchain_test: BlockchainTestFiller, + benchmark_test: BenchmarkTestFiller, + benchmark_manager: BenchmarkManager, pre: Alloc, gas_benchmark_value: int, ): """Test running a block with as many blockhash accessing oldest allowed block as possible.""" # Create 256 dummy blocks to fill the blockhash window. - blocks = [Block()] * 256 + with benchmark_manager.setup(): + for _ in range(256): + benchmark_manager.add_block(Block()) # Always ask for the oldest allowed BLOCKHASH block. execution_code = Op.PUSH1(1) + While( body=Op.POP(Op.BLOCKHASH(Op.DUP1)), ) execution_code_address = pre.deploy_contract(code=execution_code) - op_tx = Transaction( - to=execution_code_address, - gas_limit=gas_benchmark_value, - sender=pre.fund_eoa(), - ) - blocks.append(Block(txs=[op_tx])) + with benchmark_manager.execution(): + benchmark_manager.add_transaction( + Transaction( + to=execution_code_address, + gas_limit=gas_benchmark_value, + sender=pre.fund_eoa(), + ) + ) - blockchain_test( + benchmark_test( pre=pre, post={}, - blocks=blocks, + benchmark_manager=benchmark_manager, + gas_benchmark_value=gas_benchmark_value, ) From 99f22d73d53f92559d2c1c3089348b70a85a20df Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Fri, 12 Sep 2025 13:18:37 +0800 Subject: [PATCH 07/19] refactor: resolve typing and update func interface --- .../benchmark_code_generator.py | 59 +++----------- src/ethereum_test_specs/benchmark.py | 79 ++++++++++++++++--- src/ethereum_test_vm/bytecode.py | 19 ++++- src/pytest_plugins/shared/execute_fill.py | 1 + tests/benchmark/test_worst_compute.py | 10 +-- 5 files changed, 102 insertions(+), 66 deletions(-) diff --git a/src/ethereum_test_benchmark/benchmark_code_generator.py b/src/ethereum_test_benchmark/benchmark_code_generator.py index 32b73637c5f..dce3ef7392e 100644 --- a/src/ethereum_test_benchmark/benchmark_code_generator.py +++ b/src/ethereum_test_benchmark/benchmark_code_generator.py @@ -1,67 +1,26 @@ """Benchmark code generator classes for creating optimized bytecode patterns.""" -from abc import ABC, abstractmethod -from dataclasses import dataclass, field +from dataclasses import dataclass from ethereum_test_forks import Fork +from ethereum_test_specs.benchmark import BenchmarkCodeGenerator from ethereum_test_types import Alloc, Transaction from ethereum_test_vm import Bytecode from ethereum_test_vm.opcode import Opcodes as Op -@dataclass -class BenchmarkCodeGenerator(ABC): - """Abstract base class for generating benchmark bytecode.""" - - fork: Fork - attack_block: Bytecode - setup: Bytecode = field(default_factory=Bytecode) - - @abstractmethod - def deploy_contracts(self, pre: Alloc) -> None: - """Deploy any contracts needed for the benchmark.""" - pass - - @abstractmethod - def generate_transaction(self, pre: Alloc, gas_limit: int) -> Transaction: - """Generate a transaction with the specified gas limit.""" - pass - - def generate_repeated_code(self, repeated_code: Bytecode, setup: Bytecode) -> Bytecode: - """Calculate the maximum number of iterations that can fit in the code size limit.""" - assert len(repeated_code) > 0, "repeated_code cannot be empty" - max_code_size = self.fork.max_code_size() - - overhead = len(setup) + len(Op.JUMPDEST) + len(Op.JUMP(len(setup))) - available_space = max_code_size - overhead - max_iterations = available_space // len(repeated_code) - - code = setup + Op.JUMPDEST + repeated_code * max_iterations + Op.JUMP(len(setup)) - self._validate_code_size(code) - - return code - - def _validate_code_size(self, code: Bytecode) -> None: - """Validate that the generated code fits within size limits.""" - if len(code) > self.fork.max_code_size(): - raise ValueError( - f"Generated code size {len(code)} exceeds maximum allowed size " - f"{self.fork.max_code_size()}" - ) - - @dataclass class JumpLoopGenerator(BenchmarkCodeGenerator): """Generates bytecode that loops execution using JUMP operations.""" - def deploy_contracts(self, pre: Alloc) -> None: + def deploy_contracts(self, pre: Alloc, fork: Fork) -> None: """Deploy the looping contract.""" # Benchmark Test Structure: # setup + JUMPDEST + attack + attack + ... + attack + JUMP(setup_length) - code = self.generate_repeated_code(self.attack_block, self.setup) + code = self.generate_repeated_code(self.attack_block, self.setup, fork) self._contract_address = pre.deploy_contract(code=code) - def generate_transaction(self, pre: Alloc, gas_limit: int) -> Transaction: + def generate_transaction(self, pre: Alloc, gas_limit: int, fork: Fork) -> Transaction: """Generate transaction that executes the looping contract.""" if not hasattr(self, "_contract_address"): raise ValueError("deploy_contracts must be called before generate_transaction") @@ -77,14 +36,14 @@ def generate_transaction(self, pre: Alloc, gas_limit: int) -> Transaction: class ExtCallGenerator(BenchmarkCodeGenerator): """Generates bytecode that fills the contract to maximum allowed code size.""" - def deploy_contracts(self, pre: Alloc) -> None: + def deploy_contracts(self, pre: Alloc, fork: Fork) -> None: """Deploy both target and caller contracts.""" # Benchmark Test Structure: # There are two contracts: # 1. The target contract that executes certain operation but not loop (e.g. PUSH) # 2. The loop contract that calls the target contract in a loop - max_stack_height = self.fork.max_stack_height() + max_stack_height = fork.max_stack_height() # Deploy target contract that contains the actual attack block self._target_contract_address = pre.deploy_contract( @@ -96,10 +55,10 @@ def deploy_contracts(self, pre: Alloc) -> None: # setup + JUMPDEST + attack + attack + ... + attack + JUMP(setup_length) code_sequence = Op.POP(Op.STATICCALL(Op.GAS, self._target_contract_address, 0, 0, 0, 0)) - caller_code = self.generate_repeated_code(code_sequence, Bytecode()) + caller_code = self.generate_repeated_code(code_sequence, Bytecode(), fork) self._contract_address = pre.deploy_contract(code=caller_code) - def generate_transaction(self, pre: Alloc, gas_limit: int) -> Transaction: + def generate_transaction(self, pre: Alloc, gas_limit: int, fork: Fork) -> Transaction: """Generate transaction that executes the caller contract.""" if not hasattr(self, "_contract_address"): raise ValueError("deploy_contracts must be called before generate_transaction") diff --git a/src/ethereum_test_specs/benchmark.py b/src/ethereum_test_specs/benchmark.py index 5bf670e0cce..57c241f7478 100644 --- a/src/ethereum_test_specs/benchmark.py +++ b/src/ethereum_test_specs/benchmark.py @@ -1,12 +1,19 @@ """Ethereum benchmark test spec definition and filler.""" +from abc import ABC, abstractmethod from contextlib import contextmanager from contextvars import ContextVar +from dataclasses import dataclass, field from enum import Enum from typing import Any, Callable, ClassVar, Dict, Generator, List, Optional, Sequence, Type import pytest -from pydantic import ConfigDict, Field +from pydantic import ConfigDict, Field, GetCoreSchemaHandler +from pydantic_core.core_schema import ( + PlainValidatorFunctionSchema, + no_info_plain_validator_function, + to_string_ser_schema, +) from ethereum_clis import TransitionTool from ethereum_test_base_types import HexNumber @@ -27,11 +34,55 @@ ) from ethereum_test_forks import Fork from ethereum_test_types import Alloc, Environment, Transaction +from ethereum_test_vm import Bytecode +from ethereum_test_vm.opcode import Opcodes as Op from .base import BaseTest from .blockchain import Block, BlockchainTest +@dataclass(kw_only=True) +class BenchmarkCodeGenerator(ABC): + """Abstract base class for generating benchmark bytecode.""" + + attack_block: Bytecode + setup: Bytecode = field(default_factory=Bytecode) + + @abstractmethod + def deploy_contracts(self, pre: Alloc, fork: Fork) -> None: + """Deploy any contracts needed for the benchmark.""" + pass + + @abstractmethod + def generate_transaction(self, pre: Alloc, gas_limit: int, fork: Fork) -> Transaction: + """Generate a transaction with the specified gas limit.""" + pass + + def generate_repeated_code( + self, repeated_code: Bytecode, setup: Bytecode, fork: Fork + ) -> Bytecode: + """Calculate the maximum number of iterations that can fit in the code size limit.""" + assert len(repeated_code) > 0, "repeated_code cannot be empty" + max_code_size = fork.max_code_size() + + overhead = len(setup) + len(Op.JUMPDEST) + len(Op.JUMP(len(setup))) + available_space = max_code_size - overhead + max_iterations = available_space // len(repeated_code) + + code = setup + Op.JUMPDEST + repeated_code * max_iterations + Op.JUMP(len(setup)) + self._validate_code_size(code, fork) + + return code + + def _validate_code_size(self, code: Bytecode, fork: Fork) -> None: + """Validate that the generated code fits within size limits.""" + if len(code) > fork.max_code_size(): + raise ValueError( + f"Generated code size {len(code)} exceeds maximum allowed size " + f"{fork.max_code_size()}" + ) + + class BenchmarkPhase(Enum): """Phases of a benchmark test.""" @@ -94,6 +145,16 @@ def get_current_phase(self) -> Optional[BenchmarkPhase]: """Get the current benchmark phase.""" return _current_phase.get() + @classmethod + def __get_pydantic_core_schema__( + cls, source_type: Any, handler: GetCoreSchemaHandler + ) -> PlainValidatorFunctionSchema: + """Provide Pydantic core schema for BenchmarkManager serialization and validation.""" + return no_info_plain_validator_function( + cls, + serialization=to_string_ser_schema(), + ) + class BenchmarkTest(BaseTest): """Test type designed specifically for benchmark test cases.""" @@ -101,17 +162,17 @@ class BenchmarkTest(BaseTest): model_config = ConfigDict(extra="forbid") pre: Alloc - post: Alloc - tx: Optional[Transaction] = None - blocks: Optional[List[Block]] = None + post: Alloc = Field(default_factory=Alloc) + tx: Transaction | None = None + blocks: List[Block] | None = None block_exception: ( List[TransactionException | BlockException] | TransactionException | BlockException | None ) = None env: Environment = Field(default_factory=Environment) expected_benchmark_gas_used: int | None = None - gas_benchmark_value: int - benchmark_manager: Optional[Any] = Field(default=None, exclude=True) - code_generator: Optional[Any] = Field(default=None, exclude=True) + gas_benchmark_value: int = Field(default_factory=lambda: int(Environment().gas_limit)) + benchmark_manager: BenchmarkManager | None = None + code_generator: BenchmarkCodeGenerator | None = None supported_fixture_formats: ClassVar[Sequence[FixtureFormat | LabeledFixtureFormat]] = [ BlockchainFixture, @@ -183,9 +244,9 @@ def generate_blocks_from_code_generator(self, fork: Fork) -> List[Block]: if self.code_generator is None: return [] - self.code_generator.deploy_contracts(self.pre) + self.code_generator.deploy_contracts(self.pre, fork) gas_limit = fork.transaction_gas_limit_cap() or self.gas_benchmark_value - benchmark_tx = self.code_generator.generate_transaction(self.pre, gas_limit) + benchmark_tx = self.code_generator.generate_transaction(self.pre, gas_limit, fork) execution_txs = self.split_transaction(benchmark_tx, gas_limit) execution_block = Block(txs=execution_txs) diff --git a/src/ethereum_test_vm/bytecode.py b/src/ethereum_test_vm/bytecode.py index e07ab2cad0e..12f07e528d3 100644 --- a/src/ethereum_test_vm/bytecode.py +++ b/src/ethereum_test_vm/bytecode.py @@ -1,6 +1,13 @@ """Ethereum Virtual Machine bytecode primitives and utilities.""" -from typing import SupportsBytes +from typing import Any, SupportsBytes + +from pydantic import GetCoreSchemaHandler +from pydantic_core.core_schema import ( + PlainValidatorFunctionSchema, + no_info_plain_validator_function, + to_string_ser_schema, +) from ethereum_test_base_types import Bytes, Hash @@ -217,3 +224,13 @@ def hex(self) -> str: def keccak256(self) -> Hash: """Return the keccak256 hash of the opcode byte representation.""" return Bytes(self._bytes_).keccak256() + + @classmethod + def __get_pydantic_core_schema__( + cls, source_type: Any, handler: GetCoreSchemaHandler + ) -> PlainValidatorFunctionSchema: + """Provide Pydantic core schema for Bytecode serialization and validation.""" + return no_info_plain_validator_function( + cls, + serialization=to_string_ser_schema(), + ) diff --git a/src/pytest_plugins/shared/execute_fill.py b/src/pytest_plugins/shared/execute_fill.py index 98fc765db07..21b9b7ea5ba 100644 --- a/src/pytest_plugins/shared/execute_fill.py +++ b/src/pytest_plugins/shared/execute_fill.py @@ -13,6 +13,7 @@ from ..spec_version_checker.spec_version_checker import EIPSpecTestItem ALL_FIXTURE_PARAMETERS = { + "gas_benchmark_value", "genesis_environment", "env", } diff --git a/tests/benchmark/test_worst_compute.py b/tests/benchmark/test_worst_compute.py index efa854625fe..e4338de3b7d 100644 --- a/tests/benchmark/test_worst_compute.py +++ b/tests/benchmark/test_worst_compute.py @@ -1849,15 +1849,13 @@ def test_worst_jumpdests( pre: Alloc, env: Environment, fork: Fork, - gas_benchmark_value: int, ): """Test running a JUMPDEST-intensive contract.""" benchmark_test( env=env, pre=pre, post={}, - code_generator=JumpLoopGenerator(fork, Op.JUMPDEST), - gas_benchmark_value=gas_benchmark_value, + code_generator=JumpLoopGenerator(attack_block=Op.JUMPDEST), ) @@ -2761,14 +2759,14 @@ def test_worst_swap( env: Environment, fork: Fork, opcode: Opcode, - gas_benchmark_value: int, ): """Test running a block with as many SWAP as possible.""" benchmark_test( pre=pre, post={}, - code_generator=JumpLoopGenerator(fork, opcode, setup=Op.PUSH0 * opcode.min_stack_height), - gas_benchmark_value=gas_benchmark_value, + code_generator=JumpLoopGenerator( + attack_block=opcode, setup=Op.PUSH0 * opcode.min_stack_height + ), ) From 67a07d767ca351749f4118ea1762e24ab65106eb Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Fri, 12 Sep 2025 13:23:48 +0800 Subject: [PATCH 08/19] refactor: remove benchmark state test wrapper --- src/ethereum_test_specs/__init__.py | 4 - src/ethereum_test_specs/benchmark_state.py | 226 --------------------- src/ethereum_test_tools/__init__.py | 4 - 3 files changed, 234 deletions(-) delete mode 100644 src/ethereum_test_specs/benchmark_state.py diff --git a/src/ethereum_test_specs/__init__.py b/src/ethereum_test_specs/__init__.py index 9a714640746..e0baf8c5188 100644 --- a/src/ethereum_test_specs/__init__.py +++ b/src/ethereum_test_specs/__init__.py @@ -3,7 +3,6 @@ from .base import BaseTest, TestSpec from .base_static import BaseStaticTest from .benchmark import BenchmarkTest, BenchmarkTestFiller, BenchmarkTestSpec -from .benchmark_state import BenchmarkStateTest, BenchmarkStateTestFiller, BenchmarkStateTestSpec from .blobs import BlobsTest, BlobsTestFiller, BlobsTestSpec from .blockchain import ( BlockchainTest, @@ -28,9 +27,6 @@ "BenchmarkTest", "BenchmarkTestFiller", "BenchmarkTestSpec", - "BenchmarkStateTest", - "BenchmarkStateTestFiller", - "BenchmarkStateTestSpec", "BlobsTest", "BlobsTestFiller", "BlobsTestSpec", diff --git a/src/ethereum_test_specs/benchmark_state.py b/src/ethereum_test_specs/benchmark_state.py deleted file mode 100644 index 454af1a3844..00000000000 --- a/src/ethereum_test_specs/benchmark_state.py +++ /dev/null @@ -1,226 +0,0 @@ -"""Ethereum benchmark state test spec definition and filler.""" - -import math -from pprint import pprint -from typing import Callable, ClassVar, Generator, List, Sequence, Type - -from pydantic import ConfigDict - -from ethereum_clis import TransitionTool -from ethereum_test_base_types import HexNumber -from ethereum_test_execution import ( - BaseExecute, - ExecuteFormat, - LabeledExecuteFormat, - TransactionPost, -) -from ethereum_test_fixtures import ( - BaseFixture, - FixtureFormat, - LabeledFixtureFormat, - StateFixture, -) -from ethereum_test_fixtures.common import FixtureBlobSchedule -from ethereum_test_fixtures.state import ( - FixtureConfig, - FixtureEnvironment, - FixtureForkPost, - FixtureTransaction, -) -from ethereum_test_forks import Fork -from ethereum_test_types import Alloc, Environment, Transaction - -from .base import BaseTest, OpMode -from .blockchain import Block, BlockchainTest -from .debugging import print_traces -from .helpers import verify_transactions - - -class BenchmarkStateTest(BaseTest): - """Test type designed specifically for benchmark state test cases with full verification.""" - - pre: Alloc - post: Alloc - tx: Transaction - gas_benchmark_value: int - env: Environment - chain_id: int = 1 - - model_config = ConfigDict(arbitrary_types_allowed=True) - - supported_fixture_formats: ClassVar[Sequence[FixtureFormat | LabeledFixtureFormat]] = [ - StateFixture, - ] + [ - LabeledFixtureFormat( - fixture_format, - f"{fixture_format.format_name}_from_benchmark_state_test", - f"A {fixture_format.format_name} generated from a benchmark_state_test", - ) - for fixture_format in BlockchainTest.supported_fixture_formats - ] - - supported_execute_formats: ClassVar[Sequence[LabeledExecuteFormat]] = [ - LabeledExecuteFormat( - TransactionPost, - "benchmark_state_test_with_verification", - "An execute test derived from a benchmark state test with verification", - ), - ] - - def split_transaction(self, tx: Transaction, gas_limit_cap: int | None) -> List[Transaction]: - """Split a transaction that exceeds the gas limit cap into multiple transactions.""" - if (gas_limit_cap is None) or (tx.gas_limit <= gas_limit_cap): - return [tx] - - total_gas = int(tx.gas_limit) - num_splits = math.ceil(total_gas / gas_limit_cap) - - split_transactions = [] - remaining_gas = total_gas - for i in range(num_splits): - split_tx = tx.model_copy() - split_tx.gas_limit = HexNumber(min(gas_limit_cap, remaining_gas)) - split_tx.nonce = HexNumber(tx.nonce + i) - split_transactions.append(split_tx) - remaining_gas -= gas_limit_cap - - return split_transactions - - def make_benchmark_state_test_fixture( - self, - t8n: TransitionTool, - fork: Fork, - ) -> StateFixture: - """Create a fixture from the benchmark state test definition with full verification.""" - # We can't generate a state test fixture that names a transition fork, - # so we get the fork at the block number and timestamp of the state test - fork = fork.fork_at(self.env.number, self.env.timestamp) - - env = self.env.set_fork_requirements(fork) - tx = self.tx.with_signature_and_sender(keep_secret_key=True) - pre_alloc = Alloc.merge( - Alloc.model_validate(fork.pre_allocation()), - self.pre, - ) - - # Verification 1: Check for empty accounts - if empty_accounts := pre_alloc.empty_accounts(): - raise Exception(f"Empty accounts in pre state: {empty_accounts}") - - transition_tool_output = t8n.evaluate( - transition_tool_data=TransitionTool.TransitionToolData( - alloc=pre_alloc, - txs=[tx], - env=env, - fork=fork, - chain_id=self.chain_id, - reward=0, # Reward on state tests is always zero - blob_schedule=fork.blob_schedule(), - state_test=True, - ), - debug_output_path=self.get_next_transition_tool_output_path(), - slow_request=self.is_tx_gas_heavy_test(), - ) - - # Verification 2: Post-allocation verification - try: - self.post.verify_post_alloc(transition_tool_output.alloc) - except Exception as e: - print_traces(t8n.get_traces()) - raise e - - # Verification 3: Transaction verification - try: - verify_transactions( - txs=[tx], - result=transition_tool_output.result, - transition_tool_exceptions_reliable=t8n.exception_mapper.reliable, - ) - except Exception as e: - print_traces(t8n.get_traces()) - pprint(transition_tool_output.result) - pprint(transition_tool_output.alloc) - raise e - - # Verification 4: Benchmark gas validation - if self._operation_mode == OpMode.BENCHMARKING: - expected_benchmark_gas_used = self.gas_benchmark_value - gas_used = int(transition_tool_output.result.gas_used) - assert expected_benchmark_gas_used is not None, "gas_benchmark_value is not set" - assert gas_used == expected_benchmark_gas_used, ( - f"gas_used ({gas_used}) does not match gas_benchmark_value " - f"({expected_benchmark_gas_used})" - f", difference: {gas_used - expected_benchmark_gas_used}" - ) - - return StateFixture( - env=FixtureEnvironment(**env.model_dump(exclude_none=True)), - pre=pre_alloc, - post={ - fork: [ - FixtureForkPost( - state_root=transition_tool_output.result.state_root, - logs_hash=transition_tool_output.result.logs_hash, - tx_bytes=tx.rlp(), - expect_exception=tx.error, - state=transition_tool_output.alloc, - ) - ] - }, - transaction=FixtureTransaction.from_transaction(tx), - config=FixtureConfig( - blob_schedule=FixtureBlobSchedule.from_blob_schedule(fork.blob_schedule()), - chain_id=self.chain_id, - ), - ) - - def generate_blockchain_test(self, fork: Fork) -> BlockchainTest: - """Create a BlockchainTest from this BenchmarkStateTestWithVerification.""" - gas_limit_cap = fork.transaction_gas_limit_cap() - - transactions = self.split_transaction(self.tx, gas_limit_cap) - - blocks = [Block(txs=transactions)] - - return BlockchainTest.from_test( - base_test=self, - pre=self.pre, - post=self.post, - blocks=blocks, - genesis_environment=self.env, - ) - - def generate( - self, - t8n: TransitionTool, - fork: Fork, - fixture_format: FixtureFormat, - ) -> BaseFixture: - """Generate the test fixture.""" - self.check_exception_test(exception=self.tx.error is not None) - if fixture_format in BlockchainTest.supported_fixture_formats: - return self.generate_blockchain_test(fork=fork).generate( - t8n=t8n, fork=fork, fixture_format=fixture_format - ) - elif fixture_format == StateFixture: - return self.make_benchmark_state_test_fixture(t8n, fork) - - raise Exception(f"Unknown fixture format: {fixture_format}") - - def execute( - self, - *, - fork: Fork, - execute_format: ExecuteFormat, - ) -> BaseExecute: - """Execute the benchmark state test by sending it to the live network.""" - if execute_format == TransactionPost: - return TransactionPost( - blocks=[[self.tx]], - post=self.post, - ) - raise Exception(f"Unsupported execute format: {execute_format}") - - -BenchmarkStateTestFiller = Type[BenchmarkStateTest] -BenchmarkStateTestSpec = Callable[[str], Generator[BenchmarkStateTest, None, None]] diff --git a/src/ethereum_test_tools/__init__.py b/src/ethereum_test_tools/__init__.py index 04b1770ac61..3f99b1b9772 100644 --- a/src/ethereum_test_tools/__init__.py +++ b/src/ethereum_test_tools/__init__.py @@ -30,8 +30,6 @@ from ethereum_test_fixtures import BaseFixture, FixtureCollector from ethereum_test_specs import ( BaseTest, - BenchmarkStateTest, - BenchmarkStateTestFiller, BenchmarkTest, BenchmarkTestFiller, BlobsTest, @@ -129,8 +127,6 @@ "BenchmarkCodeGenerator", "BenchmarkTest", "BenchmarkTestFiller", - "BenchmarkStateTest", - "BenchmarkStateTestFiller", "Blob", "BlockAccessList", "BlobsTest", From 2e34a6a2313ae42f682b6f94cea77adb444532a9 Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Fri, 12 Sep 2025 15:08:28 +0800 Subject: [PATCH 09/19] fix: pydantic model validation for benchmark manager --- src/ethereum_test_specs/benchmark.py | 11 ++++++++++- tests/benchmark/test_worst_stateful_opcodes.py | 1 - 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/ethereum_test_specs/benchmark.py b/src/ethereum_test_specs/benchmark.py index 57c241f7478..67b1152f646 100644 --- a/src/ethereum_test_specs/benchmark.py +++ b/src/ethereum_test_specs/benchmark.py @@ -150,8 +150,17 @@ def __get_pydantic_core_schema__( cls, source_type: Any, handler: GetCoreSchemaHandler ) -> PlainValidatorFunctionSchema: """Provide Pydantic core schema for BenchmarkManager serialization and validation.""" + + def validate_benchmark_manager(value): + if isinstance(value, cls): + return value + if value is None: + return None + # If value is passed as arguments, create new instance with no args + return cls() + return no_info_plain_validator_function( - cls, + validate_benchmark_manager, serialization=to_string_ser_schema(), ) diff --git a/tests/benchmark/test_worst_stateful_opcodes.py b/tests/benchmark/test_worst_stateful_opcodes.py index 5f92c7e6a2c..ae7ac31ba22 100644 --- a/tests/benchmark/test_worst_stateful_opcodes.py +++ b/tests/benchmark/test_worst_stateful_opcodes.py @@ -482,7 +482,6 @@ def test_worst_blockhash( pre=pre, post={}, benchmark_manager=benchmark_manager, - gas_benchmark_value=gas_benchmark_value, ) From 6470b46675214aad477b78487c2ecfdcef63bfdb Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Wed, 17 Sep 2025 12:16:03 +0800 Subject: [PATCH 10/19] refactor synatx and parameter --- src/ethereum_test_specs/benchmark.py | 6 +++--- tests/benchmark/test_worst_blocks.py | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/ethereum_test_specs/benchmark.py b/src/ethereum_test_specs/benchmark.py index 67b1152f646..667dda9184d 100644 --- a/src/ethereum_test_specs/benchmark.py +++ b/src/ethereum_test_specs/benchmark.py @@ -51,12 +51,12 @@ class BenchmarkCodeGenerator(ABC): @abstractmethod def deploy_contracts(self, pre: Alloc, fork: Fork) -> None: """Deploy any contracts needed for the benchmark.""" - pass + ... @abstractmethod def generate_transaction(self, pre: Alloc, gas_limit: int, fork: Fork) -> Transaction: """Generate a transaction with the specified gas limit.""" - pass + ... def generate_repeated_code( self, repeated_code: Bytecode, setup: Bytecode, fork: Fork @@ -251,7 +251,7 @@ def split_transaction(self, tx: Transaction, gas_limit_cap: int | None) -> List[ def generate_blocks_from_code_generator(self, fork: Fork) -> List[Block]: """Generate blocks using the code generator.""" if self.code_generator is None: - return [] + raise Exception("Code generator is not set") self.code_generator.deploy_contracts(self.pre, fork) gas_limit = fork.transaction_gas_limit_cap() or self.gas_benchmark_value diff --git a/tests/benchmark/test_worst_blocks.py b/tests/benchmark/test_worst_blocks.py index e8d557c1b8e..4f1039a04ca 100644 --- a/tests/benchmark/test_worst_blocks.py +++ b/tests/benchmark/test_worst_blocks.py @@ -163,7 +163,6 @@ def test_block_full_of_ether_transfers( pre=pre, post=post_state, blocks=[Block(txs=txs)], - gas_benchmark_value=gas_benchmark_value, expected_benchmark_gas_used=iteration_count * intrinsic_cost, ) From 56e3b280ac795352edbd8319fbba85c257809e5e Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Wed, 17 Sep 2025 15:11:07 +0800 Subject: [PATCH 11/19] refactor: remove benchmark manager feature --- src/ethereum_test_specs/benchmark.py | 131 +----------------- tests/benchmark/conftest.py | 7 - tests/benchmark/test_worst_blocks.py | 33 ++--- .../benchmark/test_worst_stateful_opcodes.py | 27 ++-- 4 files changed, 28 insertions(+), 170 deletions(-) diff --git a/src/ethereum_test_specs/benchmark.py b/src/ethereum_test_specs/benchmark.py index 667dda9184d..5f875ccc45d 100644 --- a/src/ethereum_test_specs/benchmark.py +++ b/src/ethereum_test_specs/benchmark.py @@ -1,19 +1,11 @@ """Ethereum benchmark test spec definition and filler.""" from abc import ABC, abstractmethod -from contextlib import contextmanager -from contextvars import ContextVar from dataclasses import dataclass, field -from enum import Enum -from typing import Any, Callable, ClassVar, Dict, Generator, List, Optional, Sequence, Type +from typing import Callable, ClassVar, Dict, Generator, List, Sequence, Type import pytest -from pydantic import ConfigDict, Field, GetCoreSchemaHandler -from pydantic_core.core_schema import ( - PlainValidatorFunctionSchema, - no_info_plain_validator_function, - to_string_ser_schema, -) +from pydantic import ConfigDict, Field from ethereum_clis import TransitionTool from ethereum_test_base_types import HexNumber @@ -83,88 +75,6 @@ def _validate_code_size(self, code: Bytecode, fork: Fork) -> None: ) -class BenchmarkPhase(Enum): - """Phases of a benchmark test.""" - - SETUP = "setup" - EXECUTION = "execution" - - -_current_phase: ContextVar[Optional[BenchmarkPhase]] = ContextVar("benchmark_phase", default=None) - - -class BenchmarkManager: - """Context manager for managing benchmark test phases.""" - - def __init__(self): - """Initialize the BenchmarkManager with empty transaction and block lists.""" - self.setup_transactions: List[Transaction] = [] - self.setup_blocks: List[Block] = [] - self.execution_transactions: List[Transaction] = [] - self.execution_blocks: List[Block] = [] - - @contextmanager - def setup(self): - """Context manager for the setup phase of a benchmark test.""" - token = _current_phase.set(BenchmarkPhase.SETUP) - try: - yield self - finally: - _current_phase.reset(token) - - @contextmanager - def execution(self): - """Context manager for the execution phase of a benchmark test.""" - token = _current_phase.set(BenchmarkPhase.EXECUTION) - try: - yield self - finally: - _current_phase.reset(token) - - def add_transaction(self, tx: Transaction): - """Add a transaction to the current phase.""" - current_phase = _current_phase.get() - if current_phase == BenchmarkPhase.SETUP: - self.setup_transactions.append(tx) - elif current_phase == BenchmarkPhase.EXECUTION: - self.execution_transactions.append(tx) - else: - self.setup_transactions.append(tx) - - def add_block(self, block: Block): - """Add a block to the current phase.""" - current_phase = _current_phase.get() - if current_phase == BenchmarkPhase.SETUP: - self.setup_blocks.append(block) - elif current_phase == BenchmarkPhase.EXECUTION: - self.execution_blocks.append(block) - else: - self.setup_blocks.append(block) - - def get_current_phase(self) -> Optional[BenchmarkPhase]: - """Get the current benchmark phase.""" - return _current_phase.get() - - @classmethod - def __get_pydantic_core_schema__( - cls, source_type: Any, handler: GetCoreSchemaHandler - ) -> PlainValidatorFunctionSchema: - """Provide Pydantic core schema for BenchmarkManager serialization and validation.""" - - def validate_benchmark_manager(value): - if isinstance(value, cls): - return value - if value is None: - return None - # If value is passed as arguments, create new instance with no args - return cls() - - return no_info_plain_validator_function( - validate_benchmark_manager, - serialization=to_string_ser_schema(), - ) - - class BenchmarkTest(BaseTest): """Test type designed specifically for benchmark test cases.""" @@ -180,7 +90,6 @@ class BenchmarkTest(BaseTest): env: Environment = Field(default_factory=Environment) expected_benchmark_gas_used: int | None = None gas_benchmark_value: int = Field(default_factory=lambda: int(Environment().gas_limit)) - benchmark_manager: BenchmarkManager | None = None code_generator: BenchmarkCodeGenerator | None = None supported_fixture_formats: ClassVar[Sequence[FixtureFormat | LabeledFixtureFormat]] = [ @@ -274,33 +183,6 @@ def generate_blockchain_test(self, fork: Fork) -> BlockchainTest: blocks=generated_blocks, ) - elif self.benchmark_manager is not None: - all_blocks = [] - gas_limit = fork.transaction_gas_limit_cap() or self.gas_benchmark_value - - if self.benchmark_manager.setup_blocks: - all_blocks.extend(self.benchmark_manager.setup_blocks) - elif self.benchmark_manager.setup_transactions: - setup_txs = [] - for tx in self.benchmark_manager.setup_transactions: - setup_txs.extend(self.split_transaction(tx, gas_limit)) - all_blocks.append(Block(txs=setup_txs)) - - if self.benchmark_manager.execution_blocks: - all_blocks.extend(self.benchmark_manager.execution_blocks) - elif self.benchmark_manager.execution_transactions: - execution_txs = [] - for tx in self.benchmark_manager.execution_transactions: - execution_txs.extend(self.split_transaction(tx, gas_limit)) - all_blocks.append(Block(txs=execution_txs)) - - return BlockchainTest.from_test( - base_test=self, - genesis_environment=self.env, - pre=self.pre, - post=self.post, - blocks=all_blocks, - ) elif self.blocks is not None: return BlockchainTest.from_test( base_test=self, @@ -324,9 +206,7 @@ def generate_blockchain_test(self, fork: Fork) -> BlockchainTest: genesis_environment=self.env, ) else: - raise ValueError( - "Cannot create BlockchainTest without transactions, blocks, or benchmark_manager" - ) + raise ValueError("Cannot create BlockchainTest without transactions or blocks") def generate( self, @@ -358,10 +238,5 @@ def execute( raise Exception(f"Unsupported execute format: {execute_format}") -def create_benchmark_manager() -> BenchmarkManager: - """Create a new BenchmarkManager instance for phase-aware benchmark testing.""" - return BenchmarkManager() - - BenchmarkTestSpec = Callable[[str], Generator[BenchmarkTest, None, None]] BenchmarkTestFiller = Type[BenchmarkTest] diff --git a/tests/benchmark/conftest.py b/tests/benchmark/conftest.py index 3f0a67ab556..1e2e7813817 100644 --- a/tests/benchmark/conftest.py +++ b/tests/benchmark/conftest.py @@ -5,7 +5,6 @@ import pytest from ethereum_test_forks import Fork -from ethereum_test_specs.benchmark import BenchmarkManager, create_benchmark_manager DEFAULT_BENCHMARK_FORK = "Prague" @@ -68,9 +67,3 @@ def pytest_collection_modifyitems(config, items): def tx_gas_limit_cap(fork: Fork, gas_benchmark_value: int) -> int: """Return the transaction gas limit cap.""" return fork.transaction_gas_limit_cap() or gas_benchmark_value - - -@pytest.fixture -def benchmark_manager() -> BenchmarkManager: - """Return a benchmark manager.""" - return create_benchmark_manager() diff --git a/tests/benchmark/test_worst_blocks.py b/tests/benchmark/test_worst_blocks.py index 4f1039a04ca..b71f3035067 100644 --- a/tests/benchmark/test_worst_blocks.py +++ b/tests/benchmark/test_worst_blocks.py @@ -9,15 +9,14 @@ import pytest +from ethereum_test_base_types import Account from ethereum_test_forks import Fork -from ethereum_test_specs.benchmark import BenchmarkManager from ethereum_test_tools import ( AccessList, - Account, Address, Alloc, - BenchmarkTestFiller, Block, + BlockchainTestFiller, Environment, Hash, StateTestFiller, @@ -112,8 +111,7 @@ def ether_transfer_case( ["a_to_a", "a_to_b", "diff_acc_to_b", "a_to_diff_acc", "diff_acc_to_diff_acc"], ) def test_block_full_of_ether_transfers( - benchmark_test: BenchmarkTestFiller, - benchmark_manager: BenchmarkManager, + blockchain_test: BlockchainTestFiller, pre: Alloc, env: Environment, case_id: str, @@ -138,18 +136,17 @@ def test_block_full_of_ether_transfers( # Create a single block with all transactions txs = [] balances: dict[Address, int] = {} - with benchmark_manager.execution(): - for _ in range(iteration_count): - receiver = next(receivers) - balances[receiver] = balances.get(receiver, 0) + transfer_amount - txs.append( - Transaction( - to=receiver, - value=transfer_amount, - gas_limit=intrinsic_cost, - sender=next(senders), - ) + for _ in range(iteration_count): + receiver = next(receivers) + balances[receiver] = balances.get(receiver, 0) + transfer_amount + txs.append( + Transaction( + to=receiver, + value=transfer_amount, + gas_limit=intrinsic_cost, + sender=next(senders), ) + ) # Only include post state for non a_to_a cases post_state = ( @@ -158,8 +155,8 @@ def test_block_full_of_ether_transfers( else {receiver: Account(balance=balance) for receiver, balance in balances.items()} ) - benchmark_test( - env=env, + blockchain_test( + genesis_environment=env, pre=pre, post=post_state, blocks=[Block(txs=txs)], diff --git a/tests/benchmark/test_worst_stateful_opcodes.py b/tests/benchmark/test_worst_stateful_opcodes.py index ae7ac31ba22..f68783e61c3 100644 --- a/tests/benchmark/test_worst_stateful_opcodes.py +++ b/tests/benchmark/test_worst_stateful_opcodes.py @@ -10,8 +10,6 @@ import pytest from ethereum_test_forks import Fork -from ethereum_test_specs import BenchmarkTestFiller -from ethereum_test_specs.benchmark import BenchmarkManager from ethereum_test_tools import ( Account, Address, @@ -453,35 +451,30 @@ def test_worst_storage_access_warm( def test_worst_blockhash( - benchmark_test: BenchmarkTestFiller, - benchmark_manager: BenchmarkManager, + blockchain_test: BlockchainTestFiller, pre: Alloc, gas_benchmark_value: int, ): """Test running a block with as many blockhash accessing oldest allowed block as possible.""" # Create 256 dummy blocks to fill the blockhash window. - with benchmark_manager.setup(): - for _ in range(256): - benchmark_manager.add_block(Block()) + blocks = [Block()] * 256 # Always ask for the oldest allowed BLOCKHASH block. execution_code = Op.PUSH1(1) + While( body=Op.POP(Op.BLOCKHASH(Op.DUP1)), ) execution_code_address = pre.deploy_contract(code=execution_code) - with benchmark_manager.execution(): - benchmark_manager.add_transaction( - Transaction( - to=execution_code_address, - gas_limit=gas_benchmark_value, - sender=pre.fund_eoa(), - ) - ) + op_tx = Transaction( + to=execution_code_address, + gas_limit=gas_benchmark_value, + sender=pre.fund_eoa(), + ) + blocks.append(Block(txs=[op_tx])) - benchmark_test( + blockchain_test( pre=pre, post={}, - benchmark_manager=benchmark_manager, + blocks=blocks, ) From d88f680f12b862e5a1e6ae2c848a198506a3b3fe Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Wed, 17 Sep 2025 16:02:09 +0800 Subject: [PATCH 12/19] refactor: update logic and add benchmark tests --- .../benchmark_code_generator.py | 10 +- src/ethereum_test_specs/benchmark.py | 5 +- .../tests/test_benchmark.py | 105 ++++++++++++++++++ 3 files changed, 112 insertions(+), 8 deletions(-) create mode 100644 src/ethereum_test_specs/tests/test_benchmark.py diff --git a/src/ethereum_test_benchmark/benchmark_code_generator.py b/src/ethereum_test_benchmark/benchmark_code_generator.py index dce3ef7392e..bae71051e16 100644 --- a/src/ethereum_test_benchmark/benchmark_code_generator.py +++ b/src/ethereum_test_benchmark/benchmark_code_generator.py @@ -1,7 +1,5 @@ """Benchmark code generator classes for creating optimized bytecode patterns.""" -from dataclasses import dataclass - from ethereum_test_forks import Fork from ethereum_test_specs.benchmark import BenchmarkCodeGenerator from ethereum_test_types import Alloc, Transaction @@ -9,7 +7,6 @@ from ethereum_test_vm.opcode import Opcodes as Op -@dataclass class JumpLoopGenerator(BenchmarkCodeGenerator): """Generates bytecode that loops execution using JUMP operations.""" @@ -32,7 +29,6 @@ def generate_transaction(self, pre: Alloc, gas_limit: int, fork: Fork) -> Transa ) -@dataclass class ExtCallGenerator(BenchmarkCodeGenerator): """Generates bytecode that fills the contract to maximum allowed code size.""" @@ -43,11 +39,13 @@ def deploy_contracts(self, pre: Alloc, fork: Fork) -> None: # 1. The target contract that executes certain operation but not loop (e.g. PUSH) # 2. The loop contract that calls the target contract in a loop - max_stack_height = fork.max_stack_height() + max_iterations = min( + fork.max_stack_height(), fork.max_code_size() // len(self.attack_block) + ) # Deploy target contract that contains the actual attack block self._target_contract_address = pre.deploy_contract( - code=self.attack_block * max_stack_height + code=self.attack_block * max_iterations ) # Create caller contract that repeatedly calls the target contract diff --git a/src/ethereum_test_specs/benchmark.py b/src/ethereum_test_specs/benchmark.py index 5f875ccc45d..90beec4dd55 100644 --- a/src/ethereum_test_specs/benchmark.py +++ b/src/ethereum_test_specs/benchmark.py @@ -1,5 +1,6 @@ """Ethereum benchmark test spec definition and filler.""" +import math from abc import ABC, abstractmethod from dataclasses import dataclass, field from typing import Callable, ClassVar, Dict, Generator, List, Sequence, Type @@ -141,11 +142,11 @@ def split_transaction(self, tx: Transaction, gas_limit_cap: int | None) -> List[ return [tx] if gas_limit_cap >= self.gas_benchmark_value: - tx.gas_limit = HexNumber(min(tx.gas_limit, self.gas_benchmark_value)) + tx.gas_limit = HexNumber(self.gas_benchmark_value) return [tx] + num_splits = math.ceil(self.gas_benchmark_value / gas_limit_cap) remaining_gas = self.gas_benchmark_value - num_splits = remaining_gas // gas_limit_cap + int(remaining_gas % gas_limit_cap) split_transactions = [] for i in range(num_splits): diff --git a/src/ethereum_test_specs/tests/test_benchmark.py b/src/ethereum_test_specs/tests/test_benchmark.py new file mode 100644 index 00000000000..bd4a699720b --- /dev/null +++ b/src/ethereum_test_specs/tests/test_benchmark.py @@ -0,0 +1,105 @@ +"""Tests for the BenchmarkTest class and its transaction splitting functionality.""" + +import pytest + +from ethereum_test_base_types import HexNumber +from ethereum_test_specs.benchmark import BenchmarkTest +from ethereum_test_types import Alloc, Environment, Transaction + + +@pytest.mark.parametrize( + "gas_benchmark_value_millions,expected_splits", + [ + (1, 1), # 1M / 16M = 1 transaction + (10, 1), # 10M / 16M = 1 transaction + (30, 2), # 30M / 16M = 2 transactions (16M + 14M) + (45, 3), # 45M / 16M = 3 transactions (16M + 16M + 13M) + (60, 4), # 60M / 16M = 4 transactions (16M + 16M + 16M + 12M) + (100, 7), # 100M / 16M = 7 transactions (6x16M + 4M) + (150, 10), # 150M / 16M = 10 transactions (9x16M + 6M) + ], +) +def test_split_transaction(gas_benchmark_value_millions: int, expected_splits: int): + """Test that transaction splitting works correctly for Osaka fork gas cap.""" + gas_benchmark_value = gas_benchmark_value_millions * 1_000_000 + gas_limit_cap = 16_000_000 # Osaka's transaction gas limit cap + + # Create a minimal BenchmarkTest instance + benchmark_test = BenchmarkTest( + pre=Alloc(), + post=Alloc(), + tx=Transaction(sender=HexNumber(0), to=HexNumber(0), nonce=0), + env=Environment(), + gas_benchmark_value=gas_benchmark_value, + ) + + # Test the split_transaction method + assert benchmark_test.tx is not None, "Transaction should not be None" + split_txs = benchmark_test.split_transaction(benchmark_test.tx, gas_limit_cap) + + # Verify the number of transactions + assert len(split_txs) == expected_splits, ( + f"Expected {expected_splits} transactions for {gas_benchmark_value_millions}M gas, " + f"got {len(split_txs)}" + ) + + # Verify total gas equals the benchmark value + total_gas = sum(tx.gas_limit for tx in split_txs) + assert total_gas == gas_benchmark_value, ( + f"Total gas {total_gas} doesn't match benchmark value {gas_benchmark_value}" + ) + + # Verify no transaction exceeds the cap + for i, tx in enumerate(split_txs): + assert tx.gas_limit <= gas_limit_cap, ( + f"Transaction {i} gas limit {tx.gas_limit} exceeds cap {gas_limit_cap}" + ) + + # Verify nonces increment correctly + for i, tx in enumerate(split_txs): + assert tx.nonce == i, f"Transaction {i} has incorrect nonce {tx.nonce}" + + # Verify gas distribution + for i, tx in enumerate(split_txs[:-1]): # All but last should be at cap + assert tx.gas_limit == gas_limit_cap, ( + f"Transaction {i} should have gas limit {gas_limit_cap}, got {tx.gas_limit}" + ) + + # Last transaction should have the remainder + if expected_splits > 1: + expected_last_gas = gas_benchmark_value - (gas_limit_cap * (expected_splits - 1)) + assert split_txs[-1].gas_limit == expected_last_gas, ( + f"Last transaction should have {expected_last_gas} gas, got {split_txs[-1].gas_limit}" + ) + + +@pytest.mark.parametrize( + "gas_benchmark_value,gas_limit_cap", + [ + (50_000_000, None), # No cap - should return single transaction + (50_000_000, 100_000_000), # Cap higher than benchmark value + ], +) +def test_split_transaction_edge_cases(gas_benchmark_value: int, gas_limit_cap: int | None): + """Test edge cases for transaction splitting.""" + benchmark_test = BenchmarkTest( + pre=Alloc(), + post=Alloc(), + tx=Transaction(sender=HexNumber(0), to=HexNumber(0), nonce=0, gas_limit=1_000_000_000), + env=Environment(), + gas_benchmark_value=gas_benchmark_value, + ) + + assert benchmark_test.tx is not None, "Transaction should not be None" + split_txs = benchmark_test.split_transaction(benchmark_test.tx, gas_limit_cap) + + # Should return single transaction in both cases + assert len(split_txs) == 1, f"Expected 1 transaction, got {len(split_txs)}" + + if gas_limit_cap is None: + # When no cap, gas_limit should be benchmark value + assert split_txs[0].gas_limit == gas_benchmark_value + else: + # When cap > benchmark, gas_limit should be min of tx.gas_limit and benchmark + assert benchmark_test.tx is not None, "Transaction should not be None" + assert split_txs[0].gas_limit == min(benchmark_test.tx.gas_limit, gas_benchmark_value) From 80281e9cc3669206396b836e4b7835c541e7444d Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Thu, 18 Sep 2025 15:59:25 +0800 Subject: [PATCH 13/19] refactor: enforce single property requirement in blockchain test generation --- src/ethereum_test_specs/benchmark.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/ethereum_test_specs/benchmark.py b/src/ethereum_test_specs/benchmark.py index 90beec4dd55..6c3b915891d 100644 --- a/src/ethereum_test_specs/benchmark.py +++ b/src/ethereum_test_specs/benchmark.py @@ -174,6 +174,21 @@ def generate_blocks_from_code_generator(self, fork: Fork) -> List[Block]: def generate_blockchain_test(self, fork: Fork) -> BlockchainTest: """Create a BlockchainTest from this BenchmarkTest.""" + set_props = [ + name + for name, val in [ + ("code_generator", self.code_generator), + ("blocks", self.blocks), + ("tx", self.tx), + ] + if val is not None + ] + + if len(set_props) != 1: + raise ValueError( + f"Exactly one must be set, but got {len(set_props)}: {', '.join(set_props)}" + ) + if self.code_generator is not None: generated_blocks = self.generate_blocks_from_code_generator(fork) return BlockchainTest.from_test( @@ -183,7 +198,6 @@ def generate_blockchain_test(self, fork: Fork) -> BlockchainTest: post=self.post, blocks=generated_blocks, ) - elif self.blocks is not None: return BlockchainTest.from_test( base_test=self, From 0a0c149ccf6be1ce76edd2ec32b7282e13bde9f7 Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Thu, 18 Sep 2025 16:02:39 +0800 Subject: [PATCH 14/19] refactor: update Bytecode serialization schema to use format_ser_schema --- src/ethereum_test_vm/bytecode.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ethereum_test_vm/bytecode.py b/src/ethereum_test_vm/bytecode.py index 12f07e528d3..d93188ee2ca 100644 --- a/src/ethereum_test_vm/bytecode.py +++ b/src/ethereum_test_vm/bytecode.py @@ -5,8 +5,8 @@ from pydantic import GetCoreSchemaHandler from pydantic_core.core_schema import ( PlainValidatorFunctionSchema, + format_ser_schema, no_info_plain_validator_function, - to_string_ser_schema, ) from ethereum_test_base_types import Bytes, Hash @@ -232,5 +232,5 @@ def __get_pydantic_core_schema__( """Provide Pydantic core schema for Bytecode serialization and validation.""" return no_info_plain_validator_function( cls, - serialization=to_string_ser_schema(), + serialization=format_ser_schema("0x{}"), ) From f5ca3e5c036f17d65d07ee8449b85095f606fcb0 Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Mon, 22 Sep 2025 11:39:26 +0800 Subject: [PATCH 15/19] refactor: update import paths --- src/ethereum_test_benchmark/benchmark_code_generator.py | 2 +- src/ethereum_test_specs/benchmark.py | 2 +- src/ethereum_test_tools/__init__.py | 5 ----- tests/benchmark/test_worst_compute.py | 3 +-- 4 files changed, 3 insertions(+), 9 deletions(-) diff --git a/src/ethereum_test_benchmark/benchmark_code_generator.py b/src/ethereum_test_benchmark/benchmark_code_generator.py index bae71051e16..9c2c9b7814a 100644 --- a/src/ethereum_test_benchmark/benchmark_code_generator.py +++ b/src/ethereum_test_benchmark/benchmark_code_generator.py @@ -4,7 +4,7 @@ from ethereum_test_specs.benchmark import BenchmarkCodeGenerator from ethereum_test_types import Alloc, Transaction from ethereum_test_vm import Bytecode -from ethereum_test_vm.opcode import Opcodes as Op +from ethereum_test_vm.opcodes import Opcodes as Op class JumpLoopGenerator(BenchmarkCodeGenerator): diff --git a/src/ethereum_test_specs/benchmark.py b/src/ethereum_test_specs/benchmark.py index 6c3b915891d..440faa8b844 100644 --- a/src/ethereum_test_specs/benchmark.py +++ b/src/ethereum_test_specs/benchmark.py @@ -28,7 +28,7 @@ from ethereum_test_forks import Fork from ethereum_test_types import Alloc, Environment, Transaction from ethereum_test_vm import Bytecode -from ethereum_test_vm.opcode import Opcodes as Op +from ethereum_test_vm.opcodes import Opcodes as Op from .base import BaseTest from .blockchain import Block, BlockchainTest diff --git a/src/ethereum_test_tools/__init__.py b/src/ethereum_test_tools/__init__.py index 3f99b1b9772..fc8057c0017 100644 --- a/src/ethereum_test_tools/__init__.py +++ b/src/ethereum_test_tools/__init__.py @@ -89,11 +89,6 @@ call_return_code, ) -from .benchmark_code_generator import ( - BenchmarkCodeGenerator, - ExtCallGenerator, - JumpLoopGenerator, -) from .tools_code import ( CalldataCase, Case, diff --git a/tests/benchmark/test_worst_compute.py b/tests/benchmark/test_worst_compute.py index e4338de3b7d..4c1f74b5137 100644 --- a/tests/benchmark/test_worst_compute.py +++ b/tests/benchmark/test_worst_compute.py @@ -15,7 +15,7 @@ from py_ecc.bn128 import G1, G2, multiply from ethereum_test_base_types.base_types import Bytes -from ethereum_test_benchmark import JumpLoopGenerator +from ethereum_test_benchmark.benchmark_code_generator import JumpLoopGenerator from ethereum_test_forks import Fork from ethereum_test_tools import ( Address, @@ -29,7 +29,6 @@ Transaction, add_kzg_version, ) -from ethereum_test_tools.benchmark_code_generator import JumpLoopGenerator from ethereum_test_types import TransactionType from ethereum_test_vm import Opcode from ethereum_test_vm import Opcodes as Op From c4e8fbd337319a00620dd5b8b04902c1aec4b8ba Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Mon, 22 Sep 2025 12:03:24 +0800 Subject: [PATCH 16/19] refactor: update serialization schema --- src/ethereum_test_vm/bytecode.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/ethereum_test_vm/bytecode.py b/src/ethereum_test_vm/bytecode.py index d93188ee2ca..5eea5b0cce3 100644 --- a/src/ethereum_test_vm/bytecode.py +++ b/src/ethereum_test_vm/bytecode.py @@ -5,8 +5,8 @@ from pydantic import GetCoreSchemaHandler from pydantic_core.core_schema import ( PlainValidatorFunctionSchema, - format_ser_schema, no_info_plain_validator_function, + plain_serializer_function_ser_schema, ) from ethereum_test_base_types import Bytes, Hash @@ -232,5 +232,8 @@ def __get_pydantic_core_schema__( """Provide Pydantic core schema for Bytecode serialization and validation.""" return no_info_plain_validator_function( cls, - serialization=format_ser_schema("0x{}"), + serialization=plain_serializer_function_ser_schema( + lambda bytecode: "0x" + bytecode.hex(), + info_arg=False, + ), ) From 1df840bac7fcf6f9d42257caa2b5db3ba2f8bcd7 Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Mon, 22 Sep 2025 12:10:14 +0800 Subject: [PATCH 17/19] refactor: remove unused parameters --- tests/benchmark/test_worst_compute.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/benchmark/test_worst_compute.py b/tests/benchmark/test_worst_compute.py index 4c1f74b5137..d7437a77e4c 100644 --- a/tests/benchmark/test_worst_compute.py +++ b/tests/benchmark/test_worst_compute.py @@ -1846,12 +1846,9 @@ def test_worst_jumpis( def test_worst_jumpdests( benchmark_test: BenchmarkTestFiller, pre: Alloc, - env: Environment, - fork: Fork, ): """Test running a JUMPDEST-intensive contract.""" benchmark_test( - env=env, pre=pre, post={}, code_generator=JumpLoopGenerator(attack_block=Op.JUMPDEST), @@ -2755,8 +2752,6 @@ def test_worst_calldataload( def test_worst_swap( benchmark_test: BenchmarkTestFiller, pre: Alloc, - env: Environment, - fork: Fork, opcode: Opcode, ): """Test running a block with as many SWAP as possible.""" From e2f462b03406ec7b6b7d9dbe227c84f3758aefe9 Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Thu, 25 Sep 2025 01:01:53 +0800 Subject: [PATCH 18/19] doc: add changelog entry --- docs/CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index c2bc908362e..c075e66cb9d 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -10,6 +10,8 @@ Test fixtures for use by clients are available for each release on the [Github r ### 🛠️ Framework +- ✨ Add benchmark specify test wrapper (`benchmark_test`) that supports **EIP-7825** and create a benchmark code generator for common test pattern ([#1945](https://github.com/ethereum/execution-spec-tests/pull/1945)). + #### `fill` - Move pytest marker registration for `fill` and `execute-*` from their respective ini files to the shared `pytest_plugins.shared.execute_fill` pytest plugin ([#2110](https://github.com/ethereum/execution-spec-tests/pull/2110)). From 0e597d5a0e032f889a767d97019fcce767196ac4 Mon Sep 17 00:00:00 2001 From: LouisTsai Date: Thu, 25 Sep 2025 01:17:15 +0800 Subject: [PATCH 19/19] fix typo --- docs/CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index c075e66cb9d..f24e5611029 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -10,7 +10,7 @@ Test fixtures for use by clients are available for each release on the [Github r ### 🛠️ Framework -- ✨ Add benchmark specify test wrapper (`benchmark_test`) that supports **EIP-7825** and create a benchmark code generator for common test pattern ([#1945](https://github.com/ethereum/execution-spec-tests/pull/1945)). +- ✨ Add benchmark-specific test wrapper (`benchmark_test`) that supports **EIP-7825** and create a benchmark code generator for common test pattern ([#1945](https://github.com/ethereum/execution-spec-tests/pull/1945)). #### `fill`