diff --git a/src/lighteval/metrics/metrics.py b/src/lighteval/metrics/metrics.py index 0674d2df1..167919974 100644 --- a/src/lighteval/metrics/metrics.py +++ b/src/lighteval/metrics/metrics.py @@ -526,8 +526,12 @@ class Metrics(Enum): metric_name="extractive_match", sample_level_fn=MultilingualExtractiveMatchMetric( language=Language.ENGLISH, - gold_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters")], - pred_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters")], + gold_extraction_target=[ + IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True) + ], + pred_extraction_target=[ + IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True) + ], precision=6, ), category=SamplingMethod.GENERATIVE, @@ -539,8 +543,12 @@ class Metrics(Enum): sample_level_fn=PassAtK( sample_scoring_function=MultilingualExtractiveMatchMetric( language=Language.ENGLISH, - gold_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters")], - pred_extraction_target=[IndicesExtractionConfig(prefix_for_extraction="NativeLetters")], + gold_extraction_target=[ + IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True) + ], + pred_extraction_target=[ + IndicesExtractionConfig(prefix_for_extraction="NativeLetters", try_extract_without_anchor=True) + ], precision=6, ), ), diff --git a/tests/unit/metrics/test_cases/gpqa_instruct_metric.json b/tests/unit/metrics/test_cases/gpqa_instruct_metric.json index af68ba3e5..4dddf83cc 100644 --- a/tests/unit/metrics/test_cases/gpqa_instruct_metric.json +++ b/tests/unit/metrics/test_cases/gpqa_instruct_metric.json @@ -249,7 +249,7 @@ ] }, "expected_output": { - "extractive_match": 0.0 + "extractive_match": 1.0 }, "tolerance": 0.01, "description": "Answer with quotes but still extractable"