From d722e78702de1d5a3ce68cb373b2ece1207c0fa2 Mon Sep 17 00:00:00 2001
From: cmpatino <carlos.patino@huggingface.co>
Date: Mon, 29 Sep 2025 21:09:38 +0200
Subject: [PATCH 1/4] Revert extraction setting for IndicesExtractionConfig

Revert `try_extract_without_anchor` to True in `IndicesExtractionConfig` to avoid issues in `gpqa:diamond` eval
---
 src/lighteval/metrics/utils/extractive_match_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/lighteval/metrics/utils/extractive_match_utils.py b/src/lighteval/metrics/utils/extractive_match_utils.py
index cce2b1793..1a3dc518c 100644
--- a/src/lighteval/metrics/utils/extractive_match_utils.py
+++ b/src/lighteval/metrics/utils/extractive_match_utils.py
@@ -90,7 +90,7 @@ class IndicesExtractionConfig:
     """
 
     prefix_for_extraction: ChoicePrefix
-    try_extract_without_anchor: bool = False
+    try_extract_without_anchor: bool = True
 
 
 ExtractionTarget = LatexExtractionConfig | ExprExtractionConfig | IndicesExtractionConfig

From 6c5af4274320f56649400b70d88a330971ed0323 Mon Sep 17 00:00:00 2001
From: cmpatino <carlos.patino@huggingface.co>
Date: Tue, 30 Sep 2025 11:33:12 +0200
Subject: [PATCH 2/4] Change `try_extract_without_anchor` only for GPQA

---
 src/lighteval/metrics/metrics.py                      | 8 ++++----
 src/lighteval/metrics/utils/extractive_match_utils.py | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/lighteval/metrics/metrics.py b/src/lighteval/metrics/metrics.py
index 0674d2df1..fbe15dbf4 100644
--- a/src/lighteval/metrics/metrics.py
+++ b/src/lighteval/metrics/metrics.py
@@ -526,8 +526,8 @@ class Metrics(Enum):
         metric_name="extractive_match",
         sample_level_fn=MultilingualExtractiveMatchMetric(
             language=Language.ENGLISH,
-            gold_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters")],
-            pred_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters")],
+            gold_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True)],
+            pred_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True)],
             precision=6,
         ),
         category=SamplingMethod.GENERATIVE,
@@ -539,8 +539,8 @@ class Metrics(Enum):
         sample_level_fn=PassAtK(
             sample_scoring_function=MultilingualExtractiveMatchMetric(
                 language=Language.ENGLISH,
-                gold_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters")],
-                pred_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters")],
+                gold_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True)],
+                pred_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True)],
                 precision=6,
             ),
         ),
diff --git a/src/lighteval/metrics/utils/extractive_match_utils.py b/src/lighteval/metrics/utils/extractive_match_utils.py
index 1a3dc518c..cce2b1793 100644
--- a/src/lighteval/metrics/utils/extractive_match_utils.py
+++ b/src/lighteval/metrics/utils/extractive_match_utils.py
@@ -90,7 +90,7 @@ class IndicesExtractionConfig:
     """
 
     prefix_for_extraction: ChoicePrefix
-    try_extract_without_anchor: bool = True
+    try_extract_without_anchor: bool = False
 
 
 ExtractionTarget = LatexExtractionConfig | ExprExtractionConfig | IndicesExtractionConfig

From 5b213cbdd47213c956743dde31753b5babeb6d6f Mon Sep 17 00:00:00 2001
From: cmpatino <carlos.patino@huggingface.co>
Date: Tue, 30 Sep 2025 11:44:07 +0200
Subject: [PATCH 3/4] Fix style

---
 src/lighteval/metrics/metrics.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/lighteval/metrics/metrics.py b/src/lighteval/metrics/metrics.py
index fbe15dbf4..167919974 100644
--- a/src/lighteval/metrics/metrics.py
+++ b/src/lighteval/metrics/metrics.py
@@ -526,8 +526,12 @@ class Metrics(Enum):
         metric_name="extractive_match",
         sample_level_fn=MultilingualExtractiveMatchMetric(
             language=Language.ENGLISH,
-            gold_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True)],
-            pred_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True)],
+            gold_extraction_target=[
+                IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True)
+            ],
+            pred_extraction_target=[
+                IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True)
+            ],
             precision=6,
         ),
         category=SamplingMethod.GENERATIVE,
@@ -539,8 +543,12 @@ class Metrics(Enum):
         sample_level_fn=PassAtK(
             sample_scoring_function=MultilingualExtractiveMatchMetric(
                 language=Language.ENGLISH,
-                gold_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True)],
-                pred_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True)],
+                gold_extraction_target=[
+                    IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True)
+                ],
+                pred_extraction_target=[
+                    IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True)
+                ],
                 precision=6,
             ),
         ),

From 1b6354425b0f6a9651b6dfb07e869d4e10b75646 Mon Sep 17 00:00:00 2001
From: cmpatino <carlos.patino@huggingface.co>
Date: Tue, 30 Sep 2025 12:17:58 +0200
Subject: [PATCH 4/4] Update GPQA test to reflect the extract setting

---
 tests/unit/metrics/test_cases/gpqa_instruct_metric.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/unit/metrics/test_cases/gpqa_instruct_metric.json b/tests/unit/metrics/test_cases/gpqa_instruct_metric.json
index af68ba3e5..4dddf83cc 100644
--- a/tests/unit/metrics/test_cases/gpqa_instruct_metric.json
+++ b/tests/unit/metrics/test_cases/gpqa_instruct_metric.json
@@ -249,7 +249,7 @@
         ]
       },
       "expected_output": {
-        "extractive_match": 0.0
+        "extractive_match": 1.0
       },
       "tolerance": 0.01,
       "description": "Answer with quotes but still extractable"