-
Notifications
You must be signed in to change notification settings - Fork 178
feat(benchmark): add benchmark_test
test type
#1945
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
19 commits
Select commit
Hold shift + click to select a range
a83ee49
feat: wrap blockchain test for benchmark
LouisTsai-Csie 09c09cb
feat: wrap state test for benchmark
LouisTsai-Csie 87bd45d
feat(benchmark): add code generator to generate transaction
LouisTsai-Csie 8ca027f
fix: resolve typing issue
LouisTsai-Csie d76104f
refactor: update benchmark code generator and test wrapper
LouisTsai-Csie 51d6817
fix: udpate example changes
LouisTsai-Csie 99f22d7
refactor: resolve typing and update func interface
LouisTsai-Csie 67a07d7
refactor: remove benchmark state test wrapper
LouisTsai-Csie 2e34a6a
fix: pydantic model validation for benchmark manager
LouisTsai-Csie 6470b46
refactor synatx and parameter
LouisTsai-Csie 56e3b28
refactor: remove benchmark manager feature
LouisTsai-Csie d88f680
refactor: update logic and add benchmark tests
LouisTsai-Csie 80281e9
refactor: enforce single property requirement in blockchain test gene…
LouisTsai-Csie 0a0c149
refactor: update Bytecode serialization schema to use format_ser_schema
LouisTsai-Csie f5ca3e5
refactor: update import paths
LouisTsai-Csie c4e8fbd
refactor: update serialization schema
LouisTsai-Csie 1df840b
refactor: remove unused parameters
LouisTsai-Csie e2f462b
doc: add changelog entry
LouisTsai-Csie 0e597d5
fix typo
LouisTsai-Csie File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
"""Benchmark code generator classes for creating optimized bytecode patterns.""" | ||
|
||
from .benchmark_code_generator import ( | ||
BenchmarkCodeGenerator, | ||
ExtCallGenerator, | ||
JumpLoopGenerator, | ||
) | ||
|
||
__all__ = ( | ||
"BenchmarkCodeGenerator", | ||
"ExtCallGenerator", | ||
"JumpLoopGenerator", | ||
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
"""Benchmark code generator classes for creating optimized bytecode patterns.""" | ||
|
||
from ethereum_test_forks import Fork | ||
from ethereum_test_specs.benchmark import BenchmarkCodeGenerator | ||
from ethereum_test_types import Alloc, Transaction | ||
from ethereum_test_vm import Bytecode | ||
from ethereum_test_vm.opcodes import Opcodes as Op | ||
|
||
|
||
class JumpLoopGenerator(BenchmarkCodeGenerator): | ||
"""Generates bytecode that loops execution using JUMP operations.""" | ||
|
||
def deploy_contracts(self, pre: Alloc, fork: Fork) -> None: | ||
"""Deploy the looping contract.""" | ||
# Benchmark Test Structure: | ||
# setup + JUMPDEST + attack + attack + ... + attack + JUMP(setup_length) | ||
code = self.generate_repeated_code(self.attack_block, self.setup, fork) | ||
self._contract_address = pre.deploy_contract(code=code) | ||
|
||
def generate_transaction(self, pre: Alloc, gas_limit: int, fork: Fork) -> Transaction: | ||
"""Generate transaction that executes the looping contract.""" | ||
if not hasattr(self, "_contract_address"): | ||
raise ValueError("deploy_contracts must be called before generate_transaction") | ||
|
||
return Transaction( | ||
to=self._contract_address, | ||
gas_limit=gas_limit, | ||
sender=pre.fund_eoa(), | ||
) | ||
|
||
|
||
class ExtCallGenerator(BenchmarkCodeGenerator): | ||
"""Generates bytecode that fills the contract to maximum allowed code size.""" | ||
|
||
def deploy_contracts(self, pre: Alloc, fork: Fork) -> None: | ||
"""Deploy both target and caller contracts.""" | ||
# Benchmark Test Structure: | ||
# There are two contracts: | ||
# 1. The target contract that executes certain operation but not loop (e.g. PUSH) | ||
# 2. The loop contract that calls the target contract in a loop | ||
|
||
max_iterations = min( | ||
fork.max_stack_height(), fork.max_code_size() // len(self.attack_block) | ||
) | ||
|
||
# Deploy target contract that contains the actual attack block | ||
self._target_contract_address = pre.deploy_contract( | ||
code=self.attack_block * max_iterations | ||
) | ||
|
||
# Create caller contract that repeatedly calls the target contract | ||
# attack = POP(STATICCALL(GAS, target_contract_address, 0, 0, 0, 0)) | ||
# setup + JUMPDEST + attack + attack + ... + attack + JUMP(setup_length) | ||
code_sequence = Op.POP(Op.STATICCALL(Op.GAS, self._target_contract_address, 0, 0, 0, 0)) | ||
|
||
caller_code = self.generate_repeated_code(code_sequence, Bytecode(), fork) | ||
self._contract_address = pre.deploy_contract(code=caller_code) | ||
|
||
def generate_transaction(self, pre: Alloc, gas_limit: int, fork: Fork) -> Transaction: | ||
"""Generate transaction that executes the caller contract.""" | ||
if not hasattr(self, "_contract_address"): | ||
raise ValueError("deploy_contracts must be called before generate_transaction") | ||
|
||
return Transaction( | ||
to=self._contract_address, | ||
gas_limit=gas_limit, | ||
sender=pre.fund_eoa(), | ||
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,257 @@ | ||
"""Ethereum benchmark test spec definition and filler.""" | ||
|
||
import math | ||
from abc import ABC, abstractmethod | ||
from dataclasses import dataclass, field | ||
from typing import Callable, ClassVar, Dict, Generator, List, Sequence, Type | ||
|
||
import pytest | ||
from pydantic import ConfigDict, Field | ||
|
||
from ethereum_clis import TransitionTool | ||
from ethereum_test_base_types import HexNumber | ||
from ethereum_test_exceptions import BlockException, TransactionException | ||
from ethereum_test_execution import ( | ||
BaseExecute, | ||
ExecuteFormat, | ||
LabeledExecuteFormat, | ||
TransactionPost, | ||
) | ||
from ethereum_test_fixtures import ( | ||
BaseFixture, | ||
BlockchainEngineFixture, | ||
BlockchainEngineXFixture, | ||
BlockchainFixture, | ||
FixtureFormat, | ||
LabeledFixtureFormat, | ||
) | ||
from ethereum_test_forks import Fork | ||
from ethereum_test_types import Alloc, Environment, Transaction | ||
from ethereum_test_vm import Bytecode | ||
from ethereum_test_vm.opcodes import Opcodes as Op | ||
|
||
from .base import BaseTest | ||
from .blockchain import Block, BlockchainTest | ||
|
||
|
||
@dataclass(kw_only=True) | ||
class BenchmarkCodeGenerator(ABC): | ||
fselmo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"""Abstract base class for generating benchmark bytecode.""" | ||
|
||
attack_block: Bytecode | ||
setup: Bytecode = field(default_factory=Bytecode) | ||
|
||
@abstractmethod | ||
def deploy_contracts(self, pre: Alloc, fork: Fork) -> None: | ||
"""Deploy any contracts needed for the benchmark.""" | ||
... | ||
|
||
@abstractmethod | ||
def generate_transaction(self, pre: Alloc, gas_limit: int, fork: Fork) -> Transaction: | ||
"""Generate a transaction with the specified gas limit.""" | ||
... | ||
|
||
def generate_repeated_code( | ||
self, repeated_code: Bytecode, setup: Bytecode, fork: Fork | ||
) -> Bytecode: | ||
"""Calculate the maximum number of iterations that can fit in the code size limit.""" | ||
assert len(repeated_code) > 0, "repeated_code cannot be empty" | ||
max_code_size = fork.max_code_size() | ||
|
||
overhead = len(setup) + len(Op.JUMPDEST) + len(Op.JUMP(len(setup))) | ||
available_space = max_code_size - overhead | ||
max_iterations = available_space // len(repeated_code) | ||
|
||
code = setup + Op.JUMPDEST + repeated_code * max_iterations + Op.JUMP(len(setup)) | ||
self._validate_code_size(code, fork) | ||
|
||
return code | ||
|
||
def _validate_code_size(self, code: Bytecode, fork: Fork) -> None: | ||
"""Validate that the generated code fits within size limits.""" | ||
if len(code) > fork.max_code_size(): | ||
raise ValueError( | ||
f"Generated code size {len(code)} exceeds maximum allowed size " | ||
f"{fork.max_code_size()}" | ||
) | ||
|
||
|
||
class BenchmarkTest(BaseTest): | ||
"""Test type designed specifically for benchmark test cases.""" | ||
|
||
model_config = ConfigDict(extra="forbid") | ||
|
||
pre: Alloc | ||
post: Alloc = Field(default_factory=Alloc) | ||
tx: Transaction | None = None | ||
blocks: List[Block] | None = None | ||
block_exception: ( | ||
List[TransactionException | BlockException] | TransactionException | BlockException | None | ||
) = None | ||
env: Environment = Field(default_factory=Environment) | ||
expected_benchmark_gas_used: int | None = None | ||
gas_benchmark_value: int = Field(default_factory=lambda: int(Environment().gas_limit)) | ||
code_generator: BenchmarkCodeGenerator | None = None | ||
|
||
supported_fixture_formats: ClassVar[Sequence[FixtureFormat | LabeledFixtureFormat]] = [ | ||
BlockchainFixture, | ||
BlockchainEngineFixture, | ||
BlockchainEngineXFixture, | ||
] | ||
|
||
supported_execute_formats: ClassVar[Sequence[LabeledExecuteFormat]] = [ | ||
LabeledExecuteFormat( | ||
TransactionPost, | ||
"benchmark_test", | ||
"An execute test derived from a benchmark test", | ||
), | ||
] | ||
|
||
supported_markers: ClassVar[Dict[str, str]] = { | ||
"blockchain_test_engine_only": "Only generate a blockchain test engine fixture", | ||
"blockchain_test_only": "Only generate a blockchain test fixture", | ||
} | ||
|
||
@classmethod | ||
def pytest_parameter_name(cls) -> str: | ||
"""Return the parameter name used in pytest to select this spec type.""" | ||
return "benchmark_test" | ||
|
||
@classmethod | ||
def discard_fixture_format_by_marks( | ||
cls, | ||
fixture_format: FixtureFormat, | ||
fork: Fork, | ||
markers: List[pytest.Mark], | ||
) -> bool: | ||
"""Discard a fixture format from filling if the appropriate marker is used.""" | ||
if "blockchain_test_only" in [m.name for m in markers]: | ||
return fixture_format != BlockchainFixture | ||
if "blockchain_test_engine_only" in [m.name for m in markers]: | ||
return fixture_format != BlockchainEngineFixture | ||
return False | ||
|
||
def get_genesis_environment(self, fork: Fork) -> Environment: | ||
"""Get the genesis environment for this benchmark test.""" | ||
return self.env | ||
|
||
def split_transaction(self, tx: Transaction, gas_limit_cap: int | None) -> List[Transaction]: | ||
"""Split a transaction that exceeds the gas limit cap into multiple transactions.""" | ||
if gas_limit_cap is None: | ||
tx.gas_limit = HexNumber(self.gas_benchmark_value) | ||
return [tx] | ||
|
||
if gas_limit_cap >= self.gas_benchmark_value: | ||
tx.gas_limit = HexNumber(self.gas_benchmark_value) | ||
return [tx] | ||
|
||
num_splits = math.ceil(self.gas_benchmark_value / gas_limit_cap) | ||
remaining_gas = self.gas_benchmark_value | ||
|
||
split_transactions = [] | ||
for i in range(num_splits): | ||
split_tx = tx.model_copy() | ||
split_tx.gas_limit = HexNumber(remaining_gas if i == num_splits - 1 else gas_limit_cap) | ||
remaining_gas -= gas_limit_cap | ||
split_tx.nonce = HexNumber(tx.nonce + i) | ||
split_transactions.append(split_tx) | ||
|
||
return split_transactions | ||
|
||
def generate_blocks_from_code_generator(self, fork: Fork) -> List[Block]: | ||
"""Generate blocks using the code generator.""" | ||
if self.code_generator is None: | ||
fselmo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
raise Exception("Code generator is not set") | ||
|
||
self.code_generator.deploy_contracts(self.pre, fork) | ||
gas_limit = fork.transaction_gas_limit_cap() or self.gas_benchmark_value | ||
benchmark_tx = self.code_generator.generate_transaction(self.pre, gas_limit, fork) | ||
|
||
execution_txs = self.split_transaction(benchmark_tx, gas_limit) | ||
execution_block = Block(txs=execution_txs) | ||
|
||
return [execution_block] | ||
|
||
def generate_blockchain_test(self, fork: Fork) -> BlockchainTest: | ||
"""Create a BlockchainTest from this BenchmarkTest.""" | ||
set_props = [ | ||
name | ||
for name, val in [ | ||
("code_generator", self.code_generator), | ||
("blocks", self.blocks), | ||
("tx", self.tx), | ||
] | ||
if val is not None | ||
] | ||
|
||
if len(set_props) != 1: | ||
raise ValueError( | ||
f"Exactly one must be set, but got {len(set_props)}: {', '.join(set_props)}" | ||
) | ||
|
||
if self.code_generator is not None: | ||
generated_blocks = self.generate_blocks_from_code_generator(fork) | ||
return BlockchainTest.from_test( | ||
base_test=self, | ||
genesis_environment=self.env, | ||
pre=self.pre, | ||
post=self.post, | ||
blocks=generated_blocks, | ||
) | ||
elif self.blocks is not None: | ||
fselmo marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return BlockchainTest.from_test( | ||
base_test=self, | ||
genesis_environment=self.env, | ||
pre=self.pre, | ||
post=self.post, | ||
blocks=self.blocks, | ||
) | ||
elif self.tx is not None: | ||
gas_limit = fork.transaction_gas_limit_cap() or self.gas_benchmark_value | ||
|
||
transactions = self.split_transaction(self.tx, gas_limit) | ||
|
||
blocks = [Block(txs=transactions)] | ||
|
||
return BlockchainTest.from_test( | ||
base_test=self, | ||
pre=self.pre, | ||
post=self.post, | ||
blocks=blocks, | ||
genesis_environment=self.env, | ||
) | ||
else: | ||
raise ValueError("Cannot create BlockchainTest without transactions or blocks") | ||
|
||
def generate( | ||
self, | ||
t8n: TransitionTool, | ||
fork: Fork, | ||
fixture_format: FixtureFormat, | ||
) -> BaseFixture: | ||
"""Generate the blockchain test fixture.""" | ||
self.check_exception_test(exception=self.tx.error is not None if self.tx else False) | ||
if fixture_format in BlockchainTest.supported_fixture_formats: | ||
return self.generate_blockchain_test(fork=fork).generate( | ||
t8n=t8n, fork=fork, fixture_format=fixture_format | ||
) | ||
else: | ||
raise Exception(f"Unsupported fixture format: {fixture_format}") | ||
|
||
def execute( | ||
self, | ||
*, | ||
fork: Fork, | ||
execute_format: ExecuteFormat, | ||
) -> BaseExecute: | ||
"""Execute the benchmark test by sending it to the live network.""" | ||
if execute_format == TransactionPost: | ||
return TransactionPost( | ||
blocks=[[self.tx]], | ||
post=self.post, | ||
) | ||
raise Exception(f"Unsupported execute format: {execute_format}") | ||
|
||
|
||
BenchmarkTestSpec = Callable[[str], Generator[BenchmarkTest, None, None]] | ||
BenchmarkTestFiller = Type[BenchmarkTest] |
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.