huggingface
diff --git a/‎.gitattributes‎
Lines changed: 1 addition & 0 deletions b/‎.gitattributes‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎README.md‎
Lines changed: 4 additions & 9 deletions b/‎README.md‎
Lines changed: 4 additions & 9 deletions
diff --git a/‎docs/source/using-the-python-api.mdx‎
Lines changed: 2 additions & 2 deletions b/‎docs/source/using-the-python-api.mdx‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 3 additions & 1 deletion b/‎pyproject.toml‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/lighteval/logging/evaluation_tracker.py‎
Lines changed: 5 additions & 5 deletions b/‎src/lighteval/logging/evaluation_tracker.py‎
Lines changed: 5 additions & 5 deletions
diff --git a/‎src/lighteval/logging/info_loggers.py‎
Lines changed: 2 additions & 2 deletions b/‎src/lighteval/logging/info_loggers.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/lighteval/main_nanotron.py‎
Lines changed: 5 additions & 6 deletions b/‎src/lighteval/main_nanotron.py‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎src/lighteval/metrics/imports/data_stats_metric.py‎
Lines changed: 2 additions & 3 deletions b/‎src/lighteval/metrics/imports/data_stats_metric.py‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎src/lighteval/metrics/imports/summac.py‎
Lines changed: 0 additions & 1 deletion b/‎src/lighteval/metrics/imports/summac.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/lighteval/metrics/metrics.py‎
Lines changed: 1 addition & 1 deletion b/‎src/lighteval/metrics/metrics.py‎
Lines changed: 1 addition & 1 deletion
@@ -1 +1,2 @@
 *.json filter=lfs diff=lfs merge=lfs -text
+tests/unit/metrics/test_cases/*.json -filter -diff -merge text
@@ -164,15 +164,10 @@ results = pipeline.get_results()
 
 ## 🙏 Acknowledgements
 
-Lighteval started as an extension of the *fantastic* [Eleuther AI
-Harness](https://github.com/EleutherAI/lm-evaluation-harness) (which powers the
-[Open LLM
-Leaderboard](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard))
-and draws inspiration from the *amazing*
-[HELM](https://crfm.stanford.edu/helm/latest/) framework.
-
-While evolving Lighteval into its own *standalone tool*, we are grateful to the
-Harness and HELM teams for their **pioneering work** on LLM evaluations.
+Lighteval took inspiration from the following *amazing* frameworks: Eleuther's [AI Harness](https://github.com/EleutherAI/lm-evaluation-harness) and Stanford's
+[HELM](https://crfm.stanford.edu/helm/latest/). We are grateful to their teams for their **pioneering work** on LLM evaluations.
+
+We'd also like to offer our thanks to all the community members who have contributed to the library, adding new features and reporting or fixing bugs.
 
 ## 🌟 Contributions Welcome 💙💚💛💜🧡
 
 
@@ -12,9 +12,9 @@ import lighteval
 from lighteval.logging.evaluation_tracker import EvaluationTracker
 from lighteval.models.vllm.vllm_model import VLLMModelConfig
 from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
-from lighteval.utils.imports import is_accelerate_available
+from lighteval.utils.imports import is_package_available
 
-if is_accelerate_available():
+if is_package_available("accelerate"):
     from datetime import timedelta
     from accelerate import Accelerator, InitProcessGroupKwargs
     accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))])
 
@@ -84,6 +84,7 @@ dependencies = [
     "fsspec>=2023.12.2",
     "httpx>=0.27.2",
     "latex2sympy2_extended==1.0.6",
+    "langcodes",
 ]
 
 [project.optional-dependencies]
@@ -98,8 +99,9 @@ nanotron = [
 ]
 tensorboardX = ["tensorboardX"]
 vllm = ["vllm>=0.10.0,<0.10.2", "ray", "more_itertools"]
+sglang = ["sglang"]
 quality = ["ruff>=v0.11.0","pre-commit"]
-tests = ["pytest>=7.4.0","deepdiff"]
+tests = ["pytest>=7.4.0","deepdiff","pip>=25.2"]
 dev = ["lighteval[accelerate,quality,tests,multilingual,math,extended_tasks,vllm]"]
 docs = ["hf-doc-builder", "watchdog"]
 extended_tasks = [
 
@@ -43,13 +43,13 @@
     TaskConfigLogger,
     VersionsLogger,
 )
-from lighteval.utils.imports import NO_TENSORBOARDX_WARN_MSG, is_nanotron_available, is_tensorboardX_available
+from lighteval.utils.imports import is_package_available, not_installed_error_message
 from lighteval.utils.utils import obj_to_markdown
 
 
 logger = logging.getLogger(__name__)
 
-if is_nanotron_available():
+if is_package_available("nanotron"):
     from nanotron.config import GeneralArgs  # type: ignore
 
 try:
@@ -659,11 +659,11 @@ def recreate_metadata_card(self, repo_id: str) -> None:  # noqa: C901
     def push_to_tensorboard(  # noqa: C901
         self, results: dict[str, dict[str, float]], details: dict[str, DetailsLogger.CompiledDetail]
     ):
-        if not is_tensorboardX_available:
-            logger.warning(NO_TENSORBOARDX_WARN_MSG)
+        if not is_package_available("tensorboardX"):
+            logger.warning(not_installed_error_message("tensorboardX"))
             return
 
-        if not is_nanotron_available():
+        if not is_package_available("nanotron"):
             logger.warning("You cannot push results to tensorboard without having nanotron installed. Skipping")
             return
 
 
@@ -34,13 +34,13 @@
 from lighteval.models.model_output import ModelResponse
 from lighteval.tasks.lighteval_task import LightevalTask, LightevalTaskConfig
 from lighteval.tasks.requests import Doc
-from lighteval.utils.imports import is_nanotron_available
+from lighteval.utils.imports import is_package_available
 
 
 logger = logging.getLogger(__name__)
 
 
-if is_nanotron_available():
+if is_package_available("nanotron"):
     pass
 
 
 
@@ -32,11 +32,13 @@
     reasoning_tags,
     remove_reasoning_tags,
 )
+from lighteval.utils.imports import requires
 
 
 SEED = 1234
 
 
+@requires("nanotron")
 def nanotron(
     checkpoint_config_path: Annotated[
         str, Option(help="Path to the nanotron checkpoint YAML or python config file, potentially on s3.")
@@ -45,12 +47,9 @@ def nanotron(
     remove_reasoning_tags: remove_reasoning_tags.type = remove_reasoning_tags.default,
     reasoning_tags: reasoning_tags.type = reasoning_tags.default,
 ):
-    """Evaluate models using nanotron as backend."""
-    from lighteval.utils.imports import NO_NANOTRON_ERROR_MSG, is_nanotron_available
-
-    if not is_nanotron_available():
-        raise ImportError(NO_NANOTRON_ERROR_MSG)
-
+    """
+    Evaluate models using nanotron as backend.
+    """
     from nanotron.config import GeneralArgs, ModelArgs, TokenizerArgs, get_config_from_dict, get_config_from_file
 
     from lighteval.logging.evaluation_tracker import EvaluationTracker
 
@@ -30,7 +30,7 @@
 from typing import Literal
 
 from lighteval.metrics.imports.data_stats_utils import Fragments
-from lighteval.utils.imports import NO_SPACY_ERROR_MSG, is_spacy_available
+from lighteval.utils.imports import Extra, requires
 
 
 logger = logging.getLogger(__name__)
@@ -55,6 +55,7 @@ def find_ngrams(input_list, n):
     return zip(*[input_list[i:] for i in range(n)])
 
 
+@requires(Extra.MULTILINGUAL)
 class DataStatsMetric(Metric):
     def __init__(
         self,
@@ -86,8 +87,6 @@ def __init__(
                 determines the spaCy model used for tokenization. Currently supports English,
                 German, French, and Italian.
         """
-        if not is_spacy_available():
-            raise ImportError(NO_SPACY_ERROR_MSG)
         import spacy
 
         self.n_gram = n_gram
 
@@ -221,7 +221,6 @@ def build_image(self, original, generated):
                     truncation=True,
                     max_length=self.max_input_length,
                     return_tensors="pt",
-                    truncation_strategy="only_first",
                 )
                 batch_tokens = {k: v.to(self.device) for k, v in batch_tokens.items()}
                 with torch.no_grad():
 
@@ -390,7 +390,7 @@ class Metrics(Enum):
         metric_name="mf1",
         sample_level_fn=LoglikelihoodPreparator(is_single_token=True),
         category=SamplingMethod.LOGPROBS,
-        corpus_level_fn=CorpusLevelF1Score(average=None, num_classes=3),
+        corpus_level_fn=CorpusLevelF1Score(average="micro", num_classes=3),
         higher_is_better=True,
     )
     pass_at_k = SampleLevelMetric(
Original file line number	Diff line number	Diff line change
`@@ -1 +1,2 @@`
`1`	`1`	`*.json filter=lfs diff=lfs merge=lfs -text`
	`2`	`+tests/unit/metrics/test_cases/*.json -filter -diff -merge text`
Original file line number	Diff line number	Diff line change
`@@ -221,7 +221,6 @@ def build_image(self, original, generated):`
`221`	`221`	`truncation=True,`
`222`	`222`	`max_length=self.max_input_length,`
`223`	`223`	`return_tensors="pt",`
`224`		`- truncation_strategy="only_first",`
`225`	`224`	`)`
`226`	`225`	`batch_tokens = {k: v.to(self.device) for k, v in batch_tokens.items()}`
`227`	`226`	`with torch.no_grad():`
Original file line number	Diff line number	Diff line change
`@@ -390,7 +390,7 @@ class Metrics(Enum):`
`390`	`390`	`metric_name="mf1",`
`391`	`391`	`sample_level_fn=LoglikelihoodPreparator(is_single_token=True),`
`392`	`392`	`category=SamplingMethod.LOGPROBS,`
`393`		`- corpus_level_fn=CorpusLevelF1Score(average=None, num_classes=3),`
	`393`	`+ corpus_level_fn=CorpusLevelF1Score(average="micro", num_classes=3),`
`394`	`394`	`higher_is_better=True,`
`395`	`395`	`)`
`396`	`396`	`pass_at_k = SampleLevelMetric(`