From 281e88bcc5cd11a69bba88310d4b479d4a5b32ce Mon Sep 17 00:00:00 2001
From: Roman Solomatin <36135455+Samoed@users.noreply.github.com>
Date: Sat, 20 Sep 2025 22:35:18 +0300
Subject: [PATCH 1/2] enable `PTH` rule

---
 docs/mmteb/create_points_table.py             | 13 +++----
 docs/mmteb/validate_points.py                 | 39 +++++++++----------
 mteb/MTEB.py                                  |  4 +-
 mteb/_evaluators/_download.py                 |  4 +-
 mteb/cli/generate_readme.py                   |  2 +-
 .../model_implementations/cache_wrapper.py    |  8 ++--
 .../model_implementations/evaclip_models.py   |  4 +-
 .../ZeroShotClassification/eng/Country211.py  |  6 +--
 .../Image/ZeroShotClassification/eng/GTSRB.py |  6 +--
 .../ZeroShotClassification/eng/Imagenet1k.py  |  6 +--
 .../eng/PatchCamelyon.py                      |  6 +--
 mteb/tasks/Retrieval/eng/MLQuestions.py       | 10 +++--
 pyproject.toml                                |  7 ++--
 tests/test_models/model_loading.py            |  2 +-
 14 files changed, 58 insertions(+), 59 deletions(-)

diff --git a/docs/mmteb/create_points_table.py b/docs/mmteb/create_points_table.py
index 0e4a2a04dd..740e7912c7 100644
--- a/docs/mmteb/create_points_table.py
+++ b/docs/mmteb/create_points_table.py
@@ -6,18 +6,15 @@
 import pandas as pd
 
 
-def load_data() -> pd.DataFrame:
-    file_path = Path(__file__).parent / "points"
+def load_data(file_path: Path) -> pd.DataFrame:
     files = file_path.glob("*.jsonl")
 
     json_data = []
     for file in files:
-        with open(file) as f:
+        with file.open() as f:
             for line in f:
                 json_data.append(json.loads(line))
-
-    df = pd.DataFrame(json_data)
-    return df
+    return pd.DataFrame(json_data)
 
 
 def save_to_markdown(df: pd.DataFrame, file_path: Path) -> None:
@@ -30,7 +27,7 @@ def save_to_markdown(df: pd.DataFrame, file_path: Path) -> None:
     md = df.to_markdown()
     # add title
     md = f"# Points\n\n_Note_: this table is **autogenerated** and should not be edited. It is intended to get an overview of contributions.\n\n {md}"
-    with open(file_path, "w") as f:
+    with file_path.open("w") as f:
         f.write(md)
 
 
@@ -38,5 +35,5 @@ def save_to_markdown(df: pd.DataFrame, file_path: Path) -> None:
     file_path = Path(__file__).parent / "points"
     save_path = Path(__file__).parent / "points_table.md"
 
-    df = load_data()
+    df = load_data(file_path)
     save_to_markdown(df, save_path)
diff --git a/docs/mmteb/validate_points.py b/docs/mmteb/validate_points.py
index 5b2a4614ac..1975b396e1 100644
--- a/docs/mmteb/validate_points.py
+++ b/docs/mmteb/validate_points.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import logging
-import os
+from pathlib import Path
 from typing import Optional
 
 from jsonlines import Reader
@@ -44,32 +44,29 @@ def check_max_points(obj: JsonObject, commit_n: str):
 
 # Function to validate JSONL files in a folder
 def validate_jsonl_files(folder_path):
-    for filename in os.listdir(folder_path):
-        if filename.endswith(".jsonl"):
-            file_path = os.path.join(folder_path, filename)
-            commit_n = os.path.splitext(filename)[0]
-            with open(file_path, encoding="utf-8") as file:
+    folder_path = Path(folder_path)
+    for file_path in folder_path.glob("*.jsonl"):
+        commit_n = file_path.stem
+        with file_path.open(encoding="utf-8") as file:
+            try:
+                # Read JSONL file
+                reader = Reader(file)
+            except Exception:
+                raise Exception("Error reading file:", file_path)
+            for line in reader:
                 try:
-                    # Read JSONL file
-                    reader = Reader(file)
-                except Exception:
-                    raise Exception("Error reading file:", file_path)
-                for line in reader:
-                    try:
-                        # Validate JSON object against schema
-                        x = JsonObject(**line)
-                        logging.debug(x)
-                        check_max_points(x, commit_n)
+                    # Validate JSON object against schema
+                    x = JsonObject(**line)
+                    logging.debug(x)
+                    check_max_points(x, commit_n)
 
-                    except ValidationError as e:
-                        raise Exception(
-                            "Validation Error in file:", file_path, line
-                        ) from e
+                except ValidationError as e:
+                    raise Exception("Validation Error in file:", file_path, line) from e
 
 
 # Main function
 def main():
-    folder_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "points")
+    folder_path = Path(__file__).parent / "points"
     validate_jsonl_files(folder_path)
 
 
diff --git a/mteb/MTEB.py b/mteb/MTEB.py
index 7757043e9d..fdb97ed787 100644
--- a/mteb/MTEB.py
+++ b/mteb/MTEB.py
@@ -73,7 +73,7 @@ def __init__(
             self.benchmarks = tasks
             self.tasks = list(chain.from_iterable(self.tasks))
 
-        self.err_logs_path = err_logs_path
+        self.err_logs_path = Path(err_logs_path)
         self.last_evaluated_splits = {}
 
     @property
@@ -541,7 +541,7 @@ def run(
                 logger.error(
                     f"Please check all the error logs at: {self.err_logs_path}"
                 )
-                with open(self.err_logs_path, "a") as f_out:
+                with self.err_logs_path.open("a") as f_out:
                     f_out.write(f"{datetime.now()} >>> {task.metadata.name}\n")
                     f_out.write(traceback.format_exc())
                     f_out.write("\n\n")
diff --git a/mteb/_evaluators/_download.py b/mteb/_evaluators/_download.py
index 1c053c7153..8366c1cda1 100644
--- a/mteb/_evaluators/_download.py
+++ b/mteb/_evaluators/_download.py
@@ -1,5 +1,7 @@
 from __future__ import annotations
 
+from pathlib import Path
+
 import requests
 import tqdm
 
@@ -9,7 +11,7 @@ def download(url: str, fname: str):
     resp = requests.get(url, stream=True)
     total = int(resp.headers.get("content-length", 0))
     with (
-        open(fname, "wb") as file,
+        Path(fname).open("wb") as file,
         tqdm.tqdm(
             desc=fname,
             total=total,
diff --git a/mteb/cli/generate_readme.py b/mteb/cli/generate_readme.py
index 66a55ce1fa..7c77b719d3 100644
--- a/mteb/cli/generate_readme.py
+++ b/mteb/cli/generate_readme.py
@@ -144,7 +144,7 @@ def _merge_yamls(
     if not existing_readme.name.lower().endswith(".md"):
         raise ValueError("Readme file should be markdown and end with '.md'")
 
-    with open(existing_readme) as f:
+    with existing_readme.open() as f:
         existing_file = f.read()
 
     existing_yaml_dict, readme_end = _extract_yaml_and_content(existing_file)
diff --git a/mteb/models/model_implementations/cache_wrapper.py b/mteb/models/model_implementations/cache_wrapper.py
index b3644ac668..879ecee465 100644
--- a/mteb/models/model_implementations/cache_wrapper.py
+++ b/mteb/models/model_implementations/cache_wrapper.py
@@ -114,7 +114,7 @@ def _double_vectors_file(self) -> None:
         self.vectors = new_vectors
 
     def _save_dimension(self) -> None:
-        with open(self.dimension_file, "w") as f:
+        with self.dimension_file.open("w") as f:
             f.write(str(self.vector_dim))
         logger.info(
             f"Saved vector dimension {self.vector_dim} to {self.dimension_file}"
@@ -122,7 +122,7 @@ def _save_dimension(self) -> None:
 
     def _load_dimension(self) -> None:
         if self.dimension_file.exists():
-            with open(self.dimension_file) as f:
+            with self.dimension_file.open() as f:
                 self.vector_dim = int(f.read().strip())
             logger.info(
                 f"Loaded vector dimension {self.vector_dim} from {self.dimension_file}"
@@ -144,7 +144,7 @@ def save(self) -> None:
                 for hash_, index in self.hash_to_index.items()
             }
 
-            with open(self.index_file, "w", encoding="utf-8") as f:
+            with self.index_file.open("w", encoding="utf-8") as f:
                 json.dump(serializable_index, f, indent=2)
             self._save_dimension()
             logger.info(f"Saved VectorCacheMap to {self.directory}")
@@ -156,7 +156,7 @@ def load(self, name: str | None = None) -> None:
         try:
             self._load_dimension()
             if self.index_file.exists() and self.vectors_file.exists():
-                with open(self.index_file, encoding="utf-8") as f:
+                with self.index_file.open(encoding="utf-8") as f:
                     loaded_index = json.load(f)
                     self.hash_to_index = {
                         str(hash_): int(index)  # Ensure we maintain the correct types
diff --git a/mteb/models/model_implementations/evaclip_models.py b/mteb/models/model_implementations/evaclip_models.py
index 0763ccce91..207dc38341 100644
--- a/mteb/models/model_implementations/evaclip_models.py
+++ b/mteb/models/model_implementations/evaclip_models.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+from pathlib import Path
 from typing import Any
 
 import torch
@@ -15,10 +16,9 @@
 
 def evaclip_loader(model_name, **kwargs):
     try:
-        import os
         import sys
 
-        sys.path.insert(0, os.path.join(os.getcwd(), "EVA/EVA-CLIP/rei"))
+        sys.path.insert(0, str(Path.cwd() / "EVA" / "EVA-CLIP" / "rei"))
 
         from eva_clip import create_model_and_transforms, get_tokenizer
     except ImportError:
diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/Country211.py b/mteb/tasks/Image/ZeroShotClassification/eng/Country211.py
index a7949b8e19..c6dd599be3 100644
--- a/mteb/tasks/Image/ZeroShotClassification/eng/Country211.py
+++ b/mteb/tasks/Image/ZeroShotClassification/eng/Country211.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-import os
+from pathlib import Path
 
 from mteb.abstasks.AbsTaskAnyZeroShotClassification import (
     AbsTaskAnyZeroShotClassification,
@@ -47,8 +47,8 @@ class Country211ZeroShotClassification(AbsTaskAnyZeroShotClassification):
     label_column_name: str = "cls"
 
     def get_candidate_labels(self) -> list[str]:
-        path = os.path.dirname(__file__)
-        with open(os.path.join(path, "templates/Country211_labels.txt")) as f:
+        path = Path(__file__).parent / "templates" / "Country211_labels.txt"
+        with path.open() as f:
             labels = f.readlines()
 
         return [f"a photo showing the country of {c}." for c in labels]
diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/GTSRB.py b/mteb/tasks/Image/ZeroShotClassification/eng/GTSRB.py
index 30d6878ed2..925e44bec0 100644
--- a/mteb/tasks/Image/ZeroShotClassification/eng/GTSRB.py
+++ b/mteb/tasks/Image/ZeroShotClassification/eng/GTSRB.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-import os
+from pathlib import Path
 
 from mteb.abstasks.AbsTaskAnyZeroShotClassification import (
     AbsTaskAnyZeroShotClassification,
@@ -52,8 +52,8 @@ class GTSRBZeroShotClassification(AbsTaskAnyZeroShotClassification):
     label_column_name: str = "cls"
 
     def get_candidate_labels(self) -> list[str]:
-        path = os.path.dirname(__file__)
-        with open(os.path.join(path, "templates/GTSRB_labels.txt")) as f:
+        path = Path(__file__).parent / "templates" / "GTSRB_labels.txt"
+        with path.open() as f:
             labels = f.readlines()
 
         return [f"a close up photo of a '{c}' traffic sign." for c in labels]
diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/Imagenet1k.py b/mteb/tasks/Image/ZeroShotClassification/eng/Imagenet1k.py
index 3ed110a2ac..24626d7df2 100644
--- a/mteb/tasks/Image/ZeroShotClassification/eng/Imagenet1k.py
+++ b/mteb/tasks/Image/ZeroShotClassification/eng/Imagenet1k.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-import os
+from pathlib import Path
 
 from mteb.abstasks.AbsTaskAnyZeroShotClassification import (
     AbsTaskAnyZeroShotClassification,
@@ -48,8 +48,8 @@ class Imagenet1kZeroShotClassification(AbsTaskAnyZeroShotClassification):
     label_column_name: str = "cls"
 
     def get_candidate_labels(self) -> list[str]:
-        path = os.path.dirname(__file__)
-        with open(os.path.join(path, "templates/Imagenet1k_labels.txt")) as f:
+        path = Path(__file__).parent / "templates" / "Imagenet1k_labels.txt"
+        with path.open() as f:
             labels = f.readlines()
 
         return [f"a photo of {c}." for c in labels]
diff --git a/mteb/tasks/Image/ZeroShotClassification/eng/PatchCamelyon.py b/mteb/tasks/Image/ZeroShotClassification/eng/PatchCamelyon.py
index 7727faef59..2b47cdddc3 100644
--- a/mteb/tasks/Image/ZeroShotClassification/eng/PatchCamelyon.py
+++ b/mteb/tasks/Image/ZeroShotClassification/eng/PatchCamelyon.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-import os
+from pathlib import Path
 
 from mteb.abstasks.AbsTaskAnyZeroShotClassification import (
     AbsTaskAnyZeroShotClassification,
@@ -60,8 +60,8 @@ class PatchCamelyonZeroShotClassification(AbsTaskAnyZeroShotClassification):
     label_column_name = "cls"
 
     def get_candidate_labels(self) -> list[str]:
-        path = os.path.dirname(__file__)
-        with open(os.path.join(path, "templates/PatchCamelyon_labels.txt")) as f:
+        path = Path(__file__).parent / "templates" / "PatchCamelyon_labels.txt"
+        with path.open() as f:
             labels = f.readlines()
 
         return [f"histopathology image of {c}" for c in labels]
diff --git a/mteb/tasks/Retrieval/eng/MLQuestions.py b/mteb/tasks/Retrieval/eng/MLQuestions.py
index 49519ebd20..6995228a21 100644
--- a/mteb/tasks/Retrieval/eng/MLQuestions.py
+++ b/mteb/tasks/Retrieval/eng/MLQuestions.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import csv
+from pathlib import Path
 
 from huggingface_hub import snapshot_download
 
@@ -80,8 +81,9 @@ def load_data(self) -> None:
     def _load_data_for_split(self, download_dir, split):
         queries, corpus, qrels = {}, {}, {}
 
-        dataset_path = f"{download_dir}/{split}.csv"
-        with open(dataset_path) as csvfile:
+        download_dir = Path(download_dir)
+        dataset_path = download_dir / f"{split}.csv"
+        with dataset_path.open() as csvfile:
             reader = csv.DictReader(csvfile)
             for i, row in enumerate(reader):
                 query_id = f"Q{str(i)}"
@@ -91,8 +93,8 @@ def _load_data_for_split(self, download_dir, split):
                 qrels[query_id] = {f"C{doc_id}": 1}
 
         # Same corpus for all splits
-        corpus_path = f"{download_dir}/test_passages.csv"
-        with open(corpus_path) as csvfile:
+        corpus_path = download_dir / "test_passages.csv"
+        with corpus_path.open() as csvfile:
             reader = csv.DictReader(csvfile)
             for i, row in enumerate(reader):
                 doc_id = f"C{str(i)}"
diff --git a/pyproject.toml b/pyproject.toml
index 2e4da63816..b027e26b68 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -161,7 +161,6 @@ namespaces = false
 "mteb.tasks.Image.ZeroShotClassification.eng.templates" = ["*.txt"]
 
 [tool.ruff]
-
 target-version = "py39"
 
 
@@ -193,10 +192,9 @@ select = [
     # would exclude:
     # "N806", # probably not worth it
     # "N812" # disallows: import torch.nn.functional as F which is standard
-
+    "PTH",
 ]
 
-
 ignore = [
     "E501",   # line too long
     "E741",     # ambiguous variable name
@@ -213,6 +211,9 @@ ignore = [
     "C408",     # don't use unecc. collection call, e.g. dict over {}
 ]
 
+[tool.ruff.lint.per-file-ignores]
+"scripts/*" = ["PTH"]
+
 [tool.ruff.lint.flake8-implicit-str-concat]
 allow-multiline = false
 
diff --git a/tests/test_models/model_loading.py b/tests/test_models/model_loading.py
index a922b01c30..61f0d026ed 100644
--- a/tests/test_models/model_loading.py
+++ b/tests/test_models/model_loading.py
@@ -95,7 +95,7 @@ def parse_args():
     elif args.model_name_file:
         all_model_names = []
         if Path(args.model_name_file).exists():
-            with open(args.model_name_file) as f:
+            with args.model_name_file.open() as f:
                 all_model_names = f.read().strip().split()
         else:
             logging.warning(

From c0914da12c9fa0457f6786886042b2f97a948019 Mon Sep 17 00:00:00 2001
From: Roman Solomatin <36135455+Samoed@users.noreply.github.com>
Date: Sat, 20 Sep 2025 22:54:04 +0300
Subject: [PATCH 2/2] fix script

---
 tests/test_models/model_loading.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/test_models/model_loading.py b/tests/test_models/model_loading.py
index 61f0d026ed..b1c330d73d 100644
--- a/tests/test_models/model_loading.py
+++ b/tests/test_models/model_loading.py
@@ -94,8 +94,9 @@ def parse_args():
         all_model_names = args.model_name
     elif args.model_name_file:
         all_model_names = []
-        if Path(args.model_name_file).exists():
-            with args.model_name_file.open() as f:
+        model_name_file = Path(args.model_name_file)
+        if model_name_file.exists():
+            with model_name_file.open() as f:
                 all_model_names = f.read().strip().split()
         else:
             logging.warning(