Skip to content

Commit 1f36913

Browse files
authored
Merge branch 'main' into litellm_model_changes
2 parents 6e8b0c2 + 6ee4ff8 commit 1f36913

File tree

115 files changed

+6554
-352
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

115 files changed

+6554
-352
lines changed

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
*.json filter=lfs diff=lfs merge=lfs -text
2+
tests/unit/metrics/test_cases/*.json -filter -diff -merge text

README.md

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -164,15 +164,10 @@ results = pipeline.get_results()
164164

165165
## 🙏 Acknowledgements
166166

167-
Lighteval started as an extension of the *fantastic* [Eleuther AI
168-
Harness](https://github.com/EleutherAI/lm-evaluation-harness) (which powers the
169-
[Open LLM
170-
Leaderboard](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard))
171-
and draws inspiration from the *amazing*
172-
[HELM](https://crfm.stanford.edu/helm/latest/) framework.
173-
174-
While evolving Lighteval into its own *standalone tool*, we are grateful to the
175-
Harness and HELM teams for their **pioneering work** on LLM evaluations.
167+
Lighteval took inspiration from the following *amazing* frameworks: Eleuther's [AI Harness](https://github.com/EleutherAI/lm-evaluation-harness) and Stanford's
168+
[HELM](https://crfm.stanford.edu/helm/latest/). We are grateful to their teams for their **pioneering work** on LLM evaluations.
169+
170+
We'd also like to offer our thanks to all the community members who have contributed to the library, adding new features and reporting or fixing bugs.
176171

177172
## 🌟 Contributions Welcome 💙💚💛💜🧡
178173

docs/source/using-the-python-api.mdx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@ import lighteval
1212
from lighteval.logging.evaluation_tracker import EvaluationTracker
1313
from lighteval.models.vllm.vllm_model import VLLMModelConfig
1414
from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
15-
from lighteval.utils.imports import is_accelerate_available
15+
from lighteval.utils.imports import is_package_available
1616

17-
if is_accelerate_available():
17+
if is_package_available("accelerate"):
1818
from datetime import timedelta
1919
from accelerate import Accelerator, InitProcessGroupKwargs
2020
accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))])

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ dependencies = [
8484
"fsspec>=2023.12.2",
8585
"httpx>=0.27.2",
8686
"latex2sympy2_extended==1.0.6",
87+
"langcodes",
8788
]
8889

8990
[project.optional-dependencies]
@@ -98,8 +99,9 @@ nanotron = [
9899
]
99100
tensorboardX = ["tensorboardX"]
100101
vllm = ["vllm>=0.10.0,<0.10.2", "ray", "more_itertools"]
102+
sglang = ["sglang"]
101103
quality = ["ruff>=v0.11.0","pre-commit"]
102-
tests = ["pytest>=7.4.0","deepdiff"]
104+
tests = ["pytest>=7.4.0","deepdiff","pip>=25.2"]
103105
dev = ["lighteval[accelerate,quality,tests,multilingual,math,extended_tasks,vllm]"]
104106
docs = ["hf-doc-builder", "watchdog"]
105107
extended_tasks = [

src/lighteval/logging/evaluation_tracker.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,13 +43,13 @@
4343
TaskConfigLogger,
4444
VersionsLogger,
4545
)
46-
from lighteval.utils.imports import NO_TENSORBOARDX_WARN_MSG, is_nanotron_available, is_tensorboardX_available
46+
from lighteval.utils.imports import is_package_available, not_installed_error_message
4747
from lighteval.utils.utils import obj_to_markdown
4848

4949

5050
logger = logging.getLogger(__name__)
5151

52-
if is_nanotron_available():
52+
if is_package_available("nanotron"):
5353
from nanotron.config import GeneralArgs # type: ignore
5454

5555
try:
@@ -659,11 +659,11 @@ def recreate_metadata_card(self, repo_id: str) -> None: # noqa: C901
659659
def push_to_tensorboard( # noqa: C901
660660
self, results: dict[str, dict[str, float]], details: dict[str, DetailsLogger.CompiledDetail]
661661
):
662-
if not is_tensorboardX_available:
663-
logger.warning(NO_TENSORBOARDX_WARN_MSG)
662+
if not is_package_available("tensorboardX"):
663+
logger.warning(not_installed_error_message("tensorboardX"))
664664
return
665665

666-
if not is_nanotron_available():
666+
if not is_package_available("nanotron"):
667667
logger.warning("You cannot push results to tensorboard without having nanotron installed. Skipping")
668668
return
669669

src/lighteval/logging/info_loggers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,13 @@
3434
from lighteval.models.model_output import ModelResponse
3535
from lighteval.tasks.lighteval_task import LightevalTask, LightevalTaskConfig
3636
from lighteval.tasks.requests import Doc
37-
from lighteval.utils.imports import is_nanotron_available
37+
from lighteval.utils.imports import is_package_available
3838

3939

4040
logger = logging.getLogger(__name__)
4141

4242

43-
if is_nanotron_available():
43+
if is_package_available("nanotron"):
4444
pass
4545

4646

src/lighteval/main_nanotron.py

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,13 @@
3232
reasoning_tags,
3333
remove_reasoning_tags,
3434
)
35+
from lighteval.utils.imports import requires
3536

3637

3738
SEED = 1234
3839

3940

41+
@requires("nanotron")
4042
def nanotron(
4143
checkpoint_config_path: Annotated[
4244
str, Option(help="Path to the nanotron checkpoint YAML or python config file, potentially on s3.")
@@ -45,12 +47,9 @@ def nanotron(
4547
remove_reasoning_tags: remove_reasoning_tags.type = remove_reasoning_tags.default,
4648
reasoning_tags: reasoning_tags.type = reasoning_tags.default,
4749
):
48-
"""Evaluate models using nanotron as backend."""
49-
from lighteval.utils.imports import NO_NANOTRON_ERROR_MSG, is_nanotron_available
50-
51-
if not is_nanotron_available():
52-
raise ImportError(NO_NANOTRON_ERROR_MSG)
53-
50+
"""
51+
Evaluate models using nanotron as backend.
52+
"""
5453
from nanotron.config import GeneralArgs, ModelArgs, TokenizerArgs, get_config_from_dict, get_config_from_file
5554

5655
from lighteval.logging.evaluation_tracker import EvaluationTracker

src/lighteval/metrics/imports/data_stats_metric.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
from typing import Literal
3131

3232
from lighteval.metrics.imports.data_stats_utils import Fragments
33-
from lighteval.utils.imports import NO_SPACY_ERROR_MSG, is_spacy_available
33+
from lighteval.utils.imports import Extra, requires
3434

3535

3636
logger = logging.getLogger(__name__)
@@ -55,6 +55,7 @@ def find_ngrams(input_list, n):
5555
return zip(*[input_list[i:] for i in range(n)])
5656

5757

58+
@requires(Extra.MULTILINGUAL)
5859
class DataStatsMetric(Metric):
5960
def __init__(
6061
self,
@@ -86,8 +87,6 @@ def __init__(
8687
determines the spaCy model used for tokenization. Currently supports English,
8788
German, French, and Italian.
8889
"""
89-
if not is_spacy_available():
90-
raise ImportError(NO_SPACY_ERROR_MSG)
9190
import spacy
9291

9392
self.n_gram = n_gram

src/lighteval/metrics/imports/summac.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,6 @@ def build_image(self, original, generated):
221221
truncation=True,
222222
max_length=self.max_input_length,
223223
return_tensors="pt",
224-
truncation_strategy="only_first",
225224
)
226225
batch_tokens = {k: v.to(self.device) for k, v in batch_tokens.items()}
227226
with torch.no_grad():

src/lighteval/metrics/metrics.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,7 +390,7 @@ class Metrics(Enum):
390390
metric_name="mf1",
391391
sample_level_fn=LoglikelihoodPreparator(is_single_token=True),
392392
category=SamplingMethod.LOGPROBS,
393-
corpus_level_fn=CorpusLevelF1Score(average=None, num_classes=3),
393+
corpus_level_fn=CorpusLevelF1Score(average="micro", num_classes=3),
394394
higher_is_better=True,
395395
)
396396
pass_at_k = SampleLevelMetric(

0 commit comments

Comments
 (0)