From 200852ecfb53af843bb9d920b4108e9f324bb4c4 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Mon, 15 Sep 2025 16:05:49 +0200 Subject: [PATCH 01/30] Adapt and test huggingface_hub v1.0.0.rc0 --- src/transformers/testing_utils.py | 16 ++++++++-------- src/transformers/utils/hub.py | 14 +++++--------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index afc0c3e6d794..c0a14ddcce01 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -43,8 +43,7 @@ from unittest import mock from unittest.mock import patch -import huggingface_hub.utils -import requests +import httpx import urllib3 from huggingface_hub import delete_repo from packaging import version @@ -2748,13 +2747,14 @@ def wrapper(*args, **kwargs): while retry_count < max_attempts: try: return test_func_ref(*args, **kwargs) - # We catch all exceptions related to network issues from requests + # We catch all exceptions related to network issues from httpx except ( - requests.exceptions.ConnectionError, - requests.exceptions.Timeout, - requests.exceptions.ReadTimeout, - requests.exceptions.HTTPError, - requests.exceptions.RequestException, + httpx.HTTPError, + httpx.RequestError, + httpx.TimeoutException, + httpx.ReadTimeout, + httpx.ConnectError, + httpx.NetworkError, ) as err: logger.error( f"Test failed with {err} at try {retry_count}/{max_attempts} as it couldn't connect to the specified Hub repository." diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index 4beacbe25aeb..3cfaf2503579 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -27,6 +27,7 @@ from urllib.parse import urlparse from uuid import uuid4 +import httpx import huggingface_hub import requests from huggingface_hub import ( @@ -58,7 +59,6 @@ hf_raise_for_status, send_telemetry, ) -from requests.exceptions import HTTPError from . import __version__, logging from .generic import working_or_temp_dir @@ -176,7 +176,7 @@ def list_repo_templates( ] except (GatedRepoError, RepositoryNotFoundError, RevisionNotFoundError): raise # valid errors => do not catch - except (HTTPError, OfflineModeIsEnabled, requests.exceptions.ConnectionError): + except (HfHubHTTPError, OfflineModeIsEnabled, httpx.NetworkError): pass # offline mode, internet down, etc. => try local files # check local files @@ -683,14 +683,10 @@ def has_file( proxies=proxies, timeout=10, ) - except (requests.exceptions.SSLError, requests.exceptions.ProxyError): + except httpx.ProxyError: # Actually raise for those subclasses of ConnectionError raise - except ( - requests.exceptions.ConnectionError, - requests.exceptions.Timeout, - OfflineModeIsEnabled, - ): + except (httpx.ConnectError, httpx.TimeoutException, OfflineModeIsEnabled): return has_file_in_cache try: @@ -714,7 +710,7 @@ def has_file( ) from e except EntryNotFoundError: return False # File does not exist - except requests.HTTPError: + except HfHubHttpError: # Any authentication/authorization error will be caught here => default to cache return has_file_in_cache From 9d681efde05b71bffa1cdb20770a52c54f0b653d Mon Sep 17 00:00:00 2001 From: Wauplin Date: Mon, 15 Sep 2025 16:47:33 +0200 Subject: [PATCH 02/30] forgot to bump hfh --- setup.py | 2 +- src/transformers/dependency_versions_table.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index b4feedbc77a1..efd0ad5bc2e7 100644 --- a/setup.py +++ b/setup.py @@ -115,7 +115,7 @@ "GitPython<3.1.19", "hf-doc-builder>=0.3.0", "hf_xet", - "huggingface-hub>=0.34.0,<1.0", + "huggingface-hub==1.0.0.rc0", "importlib_metadata", "ipadic>=1.0.0,<2.0", "jax>=0.4.1,<=0.4.13", diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index ab6e747d14db..2ecb7682cd3e 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -24,7 +24,7 @@ "GitPython": "GitPython<3.1.19", "hf-doc-builder": "hf-doc-builder>=0.3.0", "hf_xet": "hf_xet", - "huggingface-hub": "huggingface-hub>=0.34.0,<1.0", + "huggingface-hub": "huggingface-hub==1.0.0.rc0", "importlib_metadata": "importlib_metadata", "ipadic": "ipadic>=1.0.0,<2.0", "jax": "jax>=0.4.1,<=0.4.13", From 3e9c1d6dc67b6440960fc555bc51a187dfbc8c6f Mon Sep 17 00:00:00 2001 From: Wauplin Date: Mon, 15 Sep 2025 18:39:59 +0200 Subject: [PATCH 03/30] bump --- .circleci/config.yml | 4 + setup.py | 2 +- src/transformers/file_utils.py | 3 - src/transformers/keras_callbacks.py | 19 ++-- src/transformers/utils/__init__.py | 2 - tests/generation/test_configuration_utils.py | 3 +- tests/models/auto/test_modeling_auto.py | 39 ++++--- tests/models/auto/test_processor_auto.py | 103 +++++++++---------- tests/pipelines/test_pipelines_common.py | 15 ++- tests/trainer/test_trainer.py | 3 +- tests/utils/test_configuration_utils.py | 2 - tests/utils/test_feature_extraction_utils.py | 2 - tests/utils/test_image_processing_utils.py | 2 - tests/utils/test_modeling_utils.py | 3 +- tests/utils/test_tokenization_utils.py | 2 - utils/create_dummy_models.py | 2 +- utils/fetch_hub_objects_for_ci.py | 2 +- utils/update_tiny_models.py | 6 +- 18 files changed, 98 insertions(+), 116 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 5616355415b4..034f514138c1 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -44,6 +44,7 @@ jobs: steps: - checkout - run: uv pip install -U -e . + - run: uv pip install --prerelease allow "huggingface_hub==1.0.0.rc0" - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV" - run: mkdir -p test_preparation - run: python utils/tests_fetcher.py | tee tests_fetched_summary.txt @@ -96,6 +97,7 @@ jobs: steps: - checkout - run: uv pip install -U -e . + - run: uv pip install --prerelease allow "huggingface_hub==1.0.0.rc0" - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV" - run: mkdir -p test_preparation - run: python utils/tests_fetcher.py --fetch_all | tee tests_fetched_summary.txt @@ -149,6 +151,7 @@ jobs: steps: - checkout - run: uv pip install -e ".[quality]" + - run: uv pip install --prerelease allow "huggingface_hub==1.0.0.rc0" - run: name: Show installed libraries and their versions command: pip freeze | tee installed.txt @@ -174,6 +177,7 @@ jobs: steps: - checkout - run: uv pip install -e ".[quality]" + - run: uv pip install --prerelease allow "huggingface_hub==1.0.0.rc0" - run: name: Show installed libraries and their versions command: pip freeze | tee installed.txt diff --git a/setup.py b/setup.py index efd0ad5bc2e7..5b43228cccba 100644 --- a/setup.py +++ b/setup.py @@ -115,7 +115,7 @@ "GitPython<3.1.19", "hf-doc-builder>=0.3.0", "hf_xet", - "huggingface-hub==1.0.0.rc0", + "huggingface-hub", "importlib_metadata", "ipadic>=1.0.0,<2.0", "jax>=0.4.1,<=0.4.13", diff --git a/src/transformers/file_utils.py b/src/transformers/file_utils.py index fc6f722262d9..6d7b3c6f88fc 100644 --- a/src/transformers/file_utils.py +++ b/src/transformers/file_utils.py @@ -17,9 +17,6 @@ This module should not be update anymore and is only left for backward compatibility. """ -from huggingface_hub import get_full_repo_name # for backward compatibility -from huggingface_hub.constants import HF_HUB_DISABLE_TELEMETRY as DISABLE_TELEMETRY # for backward compatibility - from . import __version__ # Backward compatibility imports, to make sure all those objects can be found in file_utils diff --git a/src/transformers/keras_callbacks.py b/src/transformers/keras_callbacks.py index ab7fc4615b47..4491e9386e04 100644 --- a/src/transformers/keras_callbacks.py +++ b/src/transformers/keras_callbacks.py @@ -6,7 +6,7 @@ import numpy as np import tensorflow as tf -from huggingface_hub import Repository, create_repo +from huggingface_hub import create_repo from packaging.version import parse from . import IntervalStrategy, PreTrainedTokenizerBase @@ -340,7 +340,8 @@ def __init__( self.hub_model_id = create_repo(repo_id=hub_model_id, exist_ok=True, token=hub_token).repo_id self.output_dir = output_dir - self.repo = Repository(str(self.output_dir), clone_from=self.hub_model_id, token=hub_token) + # TODO: remove Repository (or even deprecated Keras altogether?) + # self.repo = Repository(str(self.output_dir), clone_from=self.hub_model_id, token=hub_token) self.tokenizer = tokenizer self.last_job = None @@ -360,9 +361,9 @@ def on_train_batch_end(self, batch, logs=None): self.model.save_pretrained(self.output_dir) if self.tokenizer is not None: self.tokenizer.save_pretrained(self.output_dir) - _, self.last_job = self.repo.push_to_hub( - commit_message=f"Training in progress steps {batch}", blocking=False - ) + # _, self.last_job = self.repo.push_to_hub( + # commit_message=f"Training in progress steps {batch}", blocking=False + # ) def on_epoch_end(self, epoch, logs=None): logs = logs.copy() # Don't accidentally write things that Keras will read later @@ -387,9 +388,9 @@ def on_epoch_end(self, epoch, logs=None): model_card = train_summary.to_model_card() with (self.output_dir / "README.md").open("w") as f: f.write(model_card) - _, self.last_job = self.repo.push_to_hub( - commit_message=f"Training in progress epoch {epoch}", blocking=False - ) + # _, self.last_job = self.repo.push_to_hub( + # commit_message=f"Training in progress epoch {epoch}", blocking=False + # ) def on_train_end(self, logs=None): # Makes sure the latest version of the model is uploaded @@ -410,4 +411,4 @@ def on_train_end(self, logs=None): model_card = train_summary.to_model_card() with (self.output_dir / "README.md").open("w") as f: f.write(model_card) - self.repo.push_to_hub(commit_message="End of training", blocking=True) + # self.repo.push_to_hub(commit_message="End of training", blocking=True) diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index 1b4671a55e8c..bc1b760dd84b 100644 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -16,8 +16,6 @@ from functools import lru_cache -from huggingface_hub import get_full_repo_name # for backward compatibility -from huggingface_hub.constants import HF_HUB_DISABLE_TELEMETRY as DISABLE_TELEMETRY # for backward compatibility from packaging import version from .. import __version__ diff --git a/tests/generation/test_configuration_utils.py b/tests/generation/test_configuration_utils.py index bb4c2a52986c..adfa1af09d5c 100644 --- a/tests/generation/test_configuration_utils.py +++ b/tests/generation/test_configuration_utils.py @@ -19,7 +19,7 @@ import unittest import warnings -from huggingface_hub import HfFolder, create_pull_request +from huggingface_hub import create_pull_request from parameterized import parameterized from transformers import AutoConfig, GenerationConfig, WatermarkingConfig, is_torch_available @@ -688,7 +688,6 @@ class ConfigPushToHubTester(unittest.TestCase): @classmethod def setUpClass(cls): cls._token = TOKEN - HfFolder.save_token(TOKEN) def test_push_to_hub(self): with TemporaryHubRepo(token=self._token) as tmp_repo: diff --git a/tests/models/auto/test_modeling_auto.py b/tests/models/auto/test_modeling_auto.py index 7af5315f844c..4e3d2196f812 100644 --- a/tests/models/auto/test_modeling_auto.py +++ b/tests/models/auto/test_modeling_auto.py @@ -23,7 +23,6 @@ from pathlib import Path import pytest -from huggingface_hub import Repository import transformers from transformers import BertConfig, GPT2Model, is_safetensors_available, is_torch_available @@ -558,25 +557,25 @@ def test_attr_not_existing(self): _MODEL_MAPPING = _LazyAutoMapping(_CONFIG_MAPPING_NAMES, _MODEL_MAPPING_NAMES) self.assertEqual(_MODEL_MAPPING[BertConfig], GPT2Model) - def test_dynamic_saving_from_local_repo(self): - with tempfile.TemporaryDirectory() as tmp_dir, tempfile.TemporaryDirectory() as tmp_dir_out: - # `Repository` is deprecated and will be removed in `huggingface_hub v1.0`. - # TODO: Remove this test when this comes. - # Here is a ugly approach to avoid `too many requests` - repo_id = url_to_local_path("hf-internal-testing/tiny-random-custom-architecture") - if os.path.isdir(repo_id): - shutil.copytree(repo_id, tmp_dir, dirs_exist_ok=True) - else: - _ = Repository( - local_dir=tmp_dir, - clone_from=url_to_local_path("hf-internal-testing/tiny-random-custom-architecture"), - ) - - model = AutoModelForCausalLM.from_pretrained(tmp_dir, trust_remote_code=True) - model.save_pretrained(tmp_dir_out) - _ = AutoModelForCausalLM.from_pretrained(tmp_dir_out, trust_remote_code=True) - self.assertTrue((Path(tmp_dir_out) / "modeling_fake_custom.py").is_file()) - self.assertTrue((Path(tmp_dir_out) / "configuration_fake_custom.py").is_file()) + # def test_dynamic_saving_from_local_repo(self): + # with tempfile.TemporaryDirectory() as tmp_dir, tempfile.TemporaryDirectory() as tmp_dir_out: + # # `Repository` is deprecated and will be removed in `huggingface_hub v1.0`. + # # TODO: Remove this test when this comes. + # # Here is a ugly approach to avoid `too many requests` + # repo_id = url_to_local_path("hf-internal-testing/tiny-random-custom-architecture") + # if os.path.isdir(repo_id): + # shutil.copytree(repo_id, tmp_dir, dirs_exist_ok=True) + # else: + # _ = Repository( + # local_dir=tmp_dir, + # clone_from=url_to_local_path("hf-internal-testing/tiny-random-custom-architecture"), + # ) + + # model = AutoModelForCausalLM.from_pretrained(tmp_dir, trust_remote_code=True) + # model.save_pretrained(tmp_dir_out) + # _ = AutoModelForCausalLM.from_pretrained(tmp_dir_out, trust_remote_code=True) + # self.assertTrue((Path(tmp_dir_out) / "modeling_fake_custom.py").is_file()) + # self.assertTrue((Path(tmp_dir_out) / "configuration_fake_custom.py").is_file()) def test_custom_model_patched_generation_inheritance(self): """ diff --git a/tests/models/auto/test_processor_auto.py b/tests/models/auto/test_processor_auto.py index 60500001a3b6..5335779b7ac7 100644 --- a/tests/models/auto/test_processor_auto.py +++ b/tests/models/auto/test_processor_auto.py @@ -20,8 +20,6 @@ from pathlib import Path from shutil import copyfile -from huggingface_hub import HfFolder, Repository - import transformers from transformers import ( CONFIG_MAPPING, @@ -423,7 +421,6 @@ class ProcessorPushToHubTester(unittest.TestCase): @classmethod def setUpClass(cls): cls._token = TOKEN - HfFolder.save_token(TOKEN) def test_push_to_hub_via_save_pretrained(self): with TemporaryHubRepo(token=self._token) as tmp_repo: @@ -454,53 +451,53 @@ def test_push_to_hub_in_organization_via_save_pretrained(self): self.assertEqual(v, getattr(new_processor.feature_extractor, k)) self.assertDictEqual(new_processor.tokenizer.get_vocab(), processor.tokenizer.get_vocab()) - def test_push_to_hub_dynamic_processor(self): - with TemporaryHubRepo(token=self._token) as tmp_repo: - CustomFeatureExtractor.register_for_auto_class() - CustomTokenizer.register_for_auto_class() - CustomProcessor.register_for_auto_class() - - feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR) - - with tempfile.TemporaryDirectory() as tmp_dir: - vocab_file = os.path.join(tmp_dir, "vocab.txt") - with open(vocab_file, "w", encoding="utf-8") as vocab_writer: - vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens])) - tokenizer = CustomTokenizer(vocab_file) - - processor = CustomProcessor(feature_extractor, tokenizer) - - with tempfile.TemporaryDirectory() as tmp_dir: - repo = Repository(tmp_dir, clone_from=tmp_repo, token=self._token) - processor.save_pretrained(tmp_dir) - - # This has added the proper auto_map field to the feature extractor config - self.assertDictEqual( - processor.feature_extractor.auto_map, - { - "AutoFeatureExtractor": "custom_feature_extraction.CustomFeatureExtractor", - "AutoProcessor": "custom_processing.CustomProcessor", - }, - ) - - # This has added the proper auto_map field to the tokenizer config - with open(os.path.join(tmp_dir, "tokenizer_config.json")) as f: - tokenizer_config = json.load(f) - self.assertDictEqual( - tokenizer_config["auto_map"], - { - "AutoTokenizer": ["custom_tokenization.CustomTokenizer", None], - "AutoProcessor": "custom_processing.CustomProcessor", - }, - ) - - # The code has been copied from fixtures - self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "custom_feature_extraction.py"))) - self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "custom_tokenization.py"))) - self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "custom_processing.py"))) - - repo.push_to_hub() - - new_processor = AutoProcessor.from_pretrained(tmp_repo.repo_id, trust_remote_code=True) - # Can't make an isinstance check because the new_processor is from the CustomProcessor class of a dynamic module - self.assertEqual(new_processor.__class__.__name__, "CustomProcessor") + # def test_push_to_hub_dynamic_processor(self): + # with TemporaryHubRepo(token=self._token) as tmp_repo: + # CustomFeatureExtractor.register_for_auto_class() + # CustomTokenizer.register_for_auto_class() + # CustomProcessor.register_for_auto_class() + + # feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR) + + # with tempfile.TemporaryDirectory() as tmp_dir: + # vocab_file = os.path.join(tmp_dir, "vocab.txt") + # with open(vocab_file, "w", encoding="utf-8") as vocab_writer: + # vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens])) + # tokenizer = CustomTokenizer(vocab_file) + + # processor = CustomProcessor(feature_extractor, tokenizer) + + # with tempfile.TemporaryDirectory() as tmp_dir: + # repo = Repository(tmp_dir, clone_from=tmp_repo, token=self._token) + # processor.save_pretrained(tmp_dir) + + # # This has added the proper auto_map field to the feature extractor config + # self.assertDictEqual( + # processor.feature_extractor.auto_map, + # { + # "AutoFeatureExtractor": "custom_feature_extraction.CustomFeatureExtractor", + # "AutoProcessor": "custom_processing.CustomProcessor", + # }, + # ) + + # # This has added the proper auto_map field to the tokenizer config + # with open(os.path.join(tmp_dir, "tokenizer_config.json")) as f: + # tokenizer_config = json.load(f) + # self.assertDictEqual( + # tokenizer_config["auto_map"], + # { + # "AutoTokenizer": ["custom_tokenization.CustomTokenizer", None], + # "AutoProcessor": "custom_processing.CustomProcessor", + # }, + # ) + + # # The code has been copied from fixtures + # self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "custom_feature_extraction.py"))) + # self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "custom_tokenization.py"))) + # self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "custom_processing.py"))) + + # repo.push_to_hub() + + # new_processor = AutoProcessor.from_pretrained(tmp_repo.repo_id, trust_remote_code=True) + # # Can't make an isinstance check because the new_processor is from the CustomProcessor class of a dynamic module + # self.assertEqual(new_processor.__class__.__name__, "CustomProcessor") diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 8fb87136a024..453d07a2f755 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -22,7 +22,7 @@ import datasets import numpy as np -from huggingface_hub import HfFolder, Repository, delete_repo +from huggingface_hub import delete_repo from requests.exceptions import HTTPError from transformers import ( @@ -211,13 +211,13 @@ def test_dtype_property(self): pipe.model = None self.assertIsNone(pipe.dtype) - @require_torch - def test_auto_model_pipeline_registration_from_local_dir(self): - with tempfile.TemporaryDirectory() as tmp_dir: - _ = Repository(local_dir=tmp_dir, clone_from="hf-internal-testing/tiny-random-custom-architecture") - pipe = pipeline("text-generation", tmp_dir, trust_remote_code=True) + # @require_torch + # def test_auto_model_pipeline_registration_from_local_dir(self): + # with tempfile.TemporaryDirectory() as tmp_dir: + # _ = Repository(local_dir=tmp_dir, clone_from="hf-internal-testing/tiny-random-custom-architecture") + # pipe = pipeline("text-generation", tmp_dir, trust_remote_code=True) - self.assertIsInstance(pipe, TextGenerationPipeline) # Assert successful load + # self.assertIsInstance(pipe, TextGenerationPipeline) # Assert successful load @require_torch def test_pipeline_with_task_parameters_no_side_effects(self): @@ -900,7 +900,6 @@ class DynamicPipelineTester(unittest.TestCase): @classmethod def setUpClass(cls): cls._token = TOKEN - HfFolder.save_token(TOKEN) @classmethod def tearDownClass(cls): diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 8fc1628c7f6d..62cc6a5bd5b4 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -32,7 +32,7 @@ import numpy as np import pytest -from huggingface_hub import HfFolder, ModelCard, create_branch, list_repo_commits, list_repo_files +from huggingface_hub import ModelCard, create_branch, list_repo_commits, list_repo_files from packaging import version from parameterized import parameterized @@ -5168,7 +5168,6 @@ class TrainerIntegrationWithHubTester(unittest.TestCase): @classmethod def setUpClass(cls): cls._token = TOKEN - HfFolder.save_token(TOKEN) def test_push_to_hub(self): with TemporaryHubRepo(token=self._token) as tmp_repo: diff --git a/tests/utils/test_configuration_utils.py b/tests/utils/test_configuration_utils.py index 60e8703937cd..72ad96877441 100644 --- a/tests/utils/test_configuration_utils.py +++ b/tests/utils/test_configuration_utils.py @@ -21,7 +21,6 @@ import warnings from pathlib import Path -from huggingface_hub import HfFolder from requests.exceptions import HTTPError from transformers import AutoConfig, BertConfig, Florence2Config, GPT2Config @@ -95,7 +94,6 @@ class ConfigPushToHubTester(unittest.TestCase): @classmethod def setUpClass(cls): cls._token = TOKEN - HfFolder.save_token(TOKEN) def test_push_to_hub(self): with TemporaryHubRepo(token=self._token) as tmp_repo: diff --git a/tests/utils/test_feature_extraction_utils.py b/tests/utils/test_feature_extraction_utils.py index b3d8932e7cda..1181c0de3287 100644 --- a/tests/utils/test_feature_extraction_utils.py +++ b/tests/utils/test_feature_extraction_utils.py @@ -19,7 +19,6 @@ import unittest.mock as mock from pathlib import Path -from huggingface_hub import HfFolder from requests.exceptions import HTTPError from transformers import AutoFeatureExtractor, Wav2Vec2FeatureExtractor @@ -57,7 +56,6 @@ class FeatureExtractorPushToHubTester(unittest.TestCase): @classmethod def setUpClass(cls): cls._token = TOKEN - HfFolder.save_token(TOKEN) def test_push_to_hub(self): with TemporaryHubRepo(token=self._token) as tmp_repo: diff --git a/tests/utils/test_image_processing_utils.py b/tests/utils/test_image_processing_utils.py index ea8c06fa9930..ebe256a534af 100644 --- a/tests/utils/test_image_processing_utils.py +++ b/tests/utils/test_image_processing_utils.py @@ -18,7 +18,6 @@ import unittest.mock as mock from pathlib import Path -from huggingface_hub import HfFolder from requests.exceptions import HTTPError from transformers import AutoImageProcessor, ViTImageProcessor, ViTImageProcessorFast @@ -71,7 +70,6 @@ class ImageProcessorPushToHubTester(unittest.TestCase): @classmethod def setUpClass(cls): cls._token = TOKEN - HfFolder.save_token(TOKEN) def test_push_to_hub(self): with TemporaryHubRepo(token=self._token) as tmp_repo: diff --git a/tests/utils/test_modeling_utils.py b/tests/utils/test_modeling_utils.py index 7f24c9882540..6242d9faa7ab 100644 --- a/tests/utils/test_modeling_utils.py +++ b/tests/utils/test_modeling_utils.py @@ -29,7 +29,7 @@ import pytest import requests -from huggingface_hub import HfApi, HfFolder +from huggingface_hub import HfApi from parameterized import parameterized from pytest import mark from requests.exceptions import HTTPError @@ -2265,7 +2265,6 @@ class ModelPushToHubTester(unittest.TestCase): @classmethod def setUpClass(cls): cls._token = TOKEN - HfFolder.save_token(TOKEN) @unittest.skip(reason="This test is flaky") def test_push_to_hub(self): diff --git a/tests/utils/test_tokenization_utils.py b/tests/utils/test_tokenization_utils.py index ff0f6499fe13..b30a2b79af01 100644 --- a/tests/utils/test_tokenization_utils.py +++ b/tests/utils/test_tokenization_utils.py @@ -19,7 +19,6 @@ import unittest.mock as mock from pathlib import Path -from huggingface_hub import HfFolder from huggingface_hub.file_download import http_get from requests.exceptions import HTTPError @@ -115,7 +114,6 @@ class TokenizerPushToHubTester(unittest.TestCase): @classmethod def setUpClass(cls): cls._token = TOKEN - HfFolder.save_token(TOKEN) def test_push_to_hub(self): with TemporaryHubRepo(token=self._token) as tmp_repo: diff --git a/utils/create_dummy_models.py b/utils/create_dummy_models.py index 53ee7597d89c..0b3cdc9d4df7 100644 --- a/utils/create_dummy_models.py +++ b/utils/create_dummy_models.py @@ -28,7 +28,7 @@ from check_config_docstrings import get_checkpoint_from_config_class from datasets import load_dataset from get_test_info import get_model_to_tester_mapping, get_tester_classes_for_model -from huggingface_hub import Repository, create_repo, hf_api, upload_folder +from huggingface_hub import Repository, create_repo, hf_api, upload_folder # TODO: remove Repository from transformers import ( CONFIG_MAPPING, diff --git a/utils/fetch_hub_objects_for_ci.py b/utils/fetch_hub_objects_for_ci.py index 349fd8ed58cd..b6686547d378 100644 --- a/utils/fetch_hub_objects_for_ci.py +++ b/utils/fetch_hub_objects_for_ci.py @@ -1,7 +1,7 @@ import os import requests -from huggingface_hub import Repository, hf_hub_download +from huggingface_hub import Repository, hf_hub_download # TODO: remove Repository from transformers.testing_utils import _run_pipeline_tests, _run_staging from transformers.utils.import_utils import is_mistral_common_available diff --git a/utils/update_tiny_models.py b/utils/update_tiny_models.py index 9dc4cdf6e6b2..27acc7261917 100644 --- a/utils/update_tiny_models.py +++ b/utils/update_tiny_models.py @@ -28,7 +28,7 @@ import time from create_dummy_models import COMPOSITE_MODELS, create_tiny_models -from huggingface_hub import ModelFilter, hf_api +from huggingface_hub import hf_api import transformers from transformers import AutoFeatureExtractor, AutoImageProcessor, AutoTokenizer @@ -88,9 +88,7 @@ def get_tiny_model_summary_from_hub(output_path): # All tiny model base names on Hub model_names = get_all_model_names() models = hf_api.list_models( - filter=ModelFilter( - author="hf-internal-testing", - ) + author="hf-internal-testing", ) _models = set() for x in models: From b562554722a1f1ad3509a2c8c972fab7fee48af7 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Mon, 15 Sep 2025 18:54:44 +0200 Subject: [PATCH 04/30] code quality --- src/transformers/testing_utils.py | 4 ++-- src/transformers/utils/hub.py | 4 ++-- tests/models/auto/test_modeling_auto.py | 4 ---- 3 files changed, 4 insertions(+), 8 deletions(-) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index c0a14ddcce01..5033991178ae 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -45,7 +45,7 @@ import httpx import urllib3 -from huggingface_hub import delete_repo +from huggingface_hub import delete_repo, create_repo from packaging import version from transformers import Trainer @@ -1933,7 +1933,7 @@ def __init__(self, namespace: Optional[str] = None, token: Optional[str] = None) repo_id = Path(tmp_dir).name if namespace is not None: repo_id = f"{namespace}/{repo_id}" - self.repo_url = huggingface_hub.create_repo(repo_id, token=self.token) + self.repo_url = create_repo(repo_id, token=self.token) def __enter__(self): return self.repo_url diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index 3cfaf2503579..b56859b6e918 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -556,7 +556,7 @@ def cached_files( ) from e # snapshot_download will not raise EntryNotFoundError, but hf_hub_download can. If this is the case, it will be treated # later on anyway and re-raised if needed - elif isinstance(e, HTTPError) and not isinstance(e, EntryNotFoundError): + elif isinstance(e, HfHubHTTPError) and not isinstance(e, EntryNotFoundError): if not _raise_exceptions_for_connection_errors: return None raise OSError(f"There was a specific connection error when trying to load {path_or_repo_id}:\n{e}") from e @@ -710,7 +710,7 @@ def has_file( ) from e except EntryNotFoundError: return False # File does not exist - except HfHubHttpError: + except HfHubHTTPError: # Any authentication/authorization error will be caught here => default to cache return has_file_in_cache diff --git a/tests/models/auto/test_modeling_auto.py b/tests/models/auto/test_modeling_auto.py index 4e3d2196f812..d96b7c7a6b98 100644 --- a/tests/models/auto/test_modeling_auto.py +++ b/tests/models/auto/test_modeling_auto.py @@ -13,9 +13,6 @@ # limitations under the License. import copy -import os -import os.path -import shutil import sys import tempfile import unittest @@ -41,7 +38,6 @@ sys.path.append(str(Path(__file__).parent.parent.parent.parent / "utils")) from test_module.custom_configuration import CustomConfig # noqa E402 -from utils.fetch_hub_objects_for_ci import url_to_local_path if is_torch_available(): From 8a0dd1b9fdc46a794e9cc08167c7e68adeb7095d Mon Sep 17 00:00:00 2001 From: Wauplin Date: Mon, 15 Sep 2025 19:17:27 +0200 Subject: [PATCH 05/30] code quality --- src/transformers/testing_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 5033991178ae..b68d7ea39eee 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -45,7 +45,7 @@ import httpx import urllib3 -from huggingface_hub import delete_repo, create_repo +from huggingface_hub import create_repo, delete_repo from packaging import version from transformers import Trainer From c4ad0b2a637e32ed111f4f0040c57ba1385e5a3b Mon Sep 17 00:00:00 2001 From: Wauplin Date: Tue, 16 Sep 2025 16:17:44 +0200 Subject: [PATCH 06/30] relax dependency table --- src/transformers/dependency_versions_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index 2ecb7682cd3e..78dd436b1c11 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -24,7 +24,7 @@ "GitPython": "GitPython<3.1.19", "hf-doc-builder": "hf-doc-builder>=0.3.0", "hf_xet": "hf_xet", - "huggingface-hub": "huggingface-hub==1.0.0.rc0", + "huggingface-hub": "huggingface-hub", "importlib_metadata": "importlib_metadata", "ipadic": "ipadic>=1.0.0,<2.0", "jax": "jax>=0.4.1,<=0.4.13", From 9b7d4044c1bef786eb086870f8f758efec08d4cb Mon Sep 17 00:00:00 2001 From: Wauplin Date: Tue, 16 Sep 2025 16:31:51 +0200 Subject: [PATCH 07/30] fix has_file --- src/transformers/utils/hub.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index b56859b6e918..9b114ea68a58 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -679,7 +679,7 @@ def has_file( response = get_session().head( hf_hub_url(path_or_repo, filename=filename, revision=revision, repo_type=repo_type), headers=build_hf_headers(token=token, user_agent=http_user_agent()), - allow_redirects=False, + follow_redirects=False, proxies=proxies, timeout=10, ) From 6dc72c2871ce68e972e14771274f80a967359bce Mon Sep 17 00:00:00 2001 From: Wauplin Date: Tue, 16 Sep 2025 17:23:23 +0200 Subject: [PATCH 08/30] install hfh 1.0.0.rc0 in circle ci jobs --- .circleci/create_circleci_config.py | 1 + 1 file changed, 1 insertion(+) diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py index aff69510d636..624b6a3b3e65 100644 --- a/.circleci/create_circleci_config.py +++ b/.circleci/create_circleci_config.py @@ -112,6 +112,7 @@ def __post_init__(self): self.install_steps = ["uv pip install ."] # Use a custom patched pytest to force exit the process at the end, to avoid `Too long with no output (exceeded 10m0s): context deadline exceeded` self.install_steps.append("uv pip install git+https://github.com/ydshieh/pytest.git@8.4.1-ydshieh") + self.install_steps.append("uv pip install --prerelease allow 'huggingface_hub==1.0.0.rc0'") if self.pytest_options is None: self.pytest_options = {} if isinstance(self.tests_to_run, str): From 1592a16b8211ec2b1b2170d1b758560ea8993277 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Tue, 16 Sep 2025 17:34:13 +0200 Subject: [PATCH 09/30] repostiryo --- utils/fetch_hub_objects_for_ci.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/utils/fetch_hub_objects_for_ci.py b/utils/fetch_hub_objects_for_ci.py index b6686547d378..0f41ec344f5d 100644 --- a/utils/fetch_hub_objects_for_ci.py +++ b/utils/fetch_hub_objects_for_ci.py @@ -1,7 +1,7 @@ import os import requests -from huggingface_hub import Repository, hf_hub_download # TODO: remove Repository +from huggingface_hub import hf_hub_download, snapshot_download from transformers.testing_utils import _run_pipeline_tests, _run_staging from transformers.utils.import_utils import is_mistral_common_available @@ -173,9 +173,9 @@ def url_to_local_path(url, return_url_if_not_found=True): # But this repo. is never used in a test decorated by `is_staging_test`. if not _run_staging: if not os.path.isdir("tiny-random-custom-architecture"): - _ = Repository( + snapshot_download( + "hf-internal-testing/tiny-random-custom-architecture", local_dir="tiny-random-custom-architecture", - clone_from="hf-internal-testing/tiny-random-custom-architecture", ) # For `tests/test_tokenization_mistral_common.py:TestMistralCommonTokenizer`, which eventually calls From 61f353034b7254020dab884d4502cc1725deebcf Mon Sep 17 00:00:00 2001 From: Wauplin Date: Thu, 18 Sep 2025 11:17:57 +0200 Subject: [PATCH 10/30] push to hub now returns a commit url --- tests/trainer/test_trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 62cc6a5bd5b4..7e23626464d5 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -5355,7 +5355,7 @@ def test_push_to_hub_with_revision(self): url = trainer.push_to_hub(revision=branch) # Extract branch from the url - re_search = re.search(r"tree/([^/]+)/", url) + re_search = re.search(r"commit/([^/]+)/", url) self.assertIsNotNone(re_search) branch_name = re_search.groups()[0] From fc3c183b2c667540d5d0adf40bd1603d8881cfcb Mon Sep 17 00:00:00 2001 From: Wauplin Date: Thu, 18 Sep 2025 11:19:49 +0200 Subject: [PATCH 11/30] catch HfHubHTTPError --- tests/pipelines/test_pipelines_common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 453d07a2f755..304cb28d5796 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -23,7 +23,7 @@ import datasets import numpy as np from huggingface_hub import delete_repo -from requests.exceptions import HTTPError +from huggingface_hub.errors import HfHubHTTPError from transformers import ( AutomaticSpeechRecognitionPipeline, @@ -905,7 +905,7 @@ def setUpClass(cls): def tearDownClass(cls): try: delete_repo(token=cls._token, repo_id="test-dynamic-pipeline") - except HTTPError: + except HfHubHTTPError: pass @unittest.skip("Broken, TODO @Yih-Dar") From 990d6d09e28d0307a646f10e496278a23a8085a8 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Thu, 18 Sep 2025 11:34:24 +0200 Subject: [PATCH 12/30] check commit on branch --- tests/trainer/test_trainer.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 7e23626464d5..f2ab2d3bed49 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -5352,14 +5352,10 @@ def test_push_to_hub_with_revision(self): ) branch = "v1.0" create_branch(repo_id=trainer.hub_model_id, branch=branch, token=self._token, exist_ok=True) - url = trainer.push_to_hub(revision=branch) + trainer.push_to_hub(revision=branch) - # Extract branch from the url - re_search = re.search(r"commit/([^/]+)/", url) - self.assertIsNotNone(re_search) - - branch_name = re_search.groups()[0] - self.assertEqual(branch_name, branch) + commits = list_repo_commits(repo_id=trainer.hub_model_id, revision=branch, token=self._token) + self.assertEqual(commits[0].message, "End of training") @require_torch From 10d4fb0aca7b26e67fef308b29f5694d3e2ec6d7 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Thu, 18 Sep 2025 11:48:33 +0200 Subject: [PATCH 13/30] add it back --- tests/models/auto/test_processor_auto.py | 102 ++++++++++++----------- 1 file changed, 52 insertions(+), 50 deletions(-) diff --git a/tests/models/auto/test_processor_auto.py b/tests/models/auto/test_processor_auto.py index 5335779b7ac7..6eabd690eed9 100644 --- a/tests/models/auto/test_processor_auto.py +++ b/tests/models/auto/test_processor_auto.py @@ -20,6 +20,8 @@ from pathlib import Path from shutil import copyfile +from huggingface_hub import snapshot_download, upload_folder + import transformers from transformers import ( CONFIG_MAPPING, @@ -451,53 +453,53 @@ def test_push_to_hub_in_organization_via_save_pretrained(self): self.assertEqual(v, getattr(new_processor.feature_extractor, k)) self.assertDictEqual(new_processor.tokenizer.get_vocab(), processor.tokenizer.get_vocab()) - # def test_push_to_hub_dynamic_processor(self): - # with TemporaryHubRepo(token=self._token) as tmp_repo: - # CustomFeatureExtractor.register_for_auto_class() - # CustomTokenizer.register_for_auto_class() - # CustomProcessor.register_for_auto_class() - - # feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR) - - # with tempfile.TemporaryDirectory() as tmp_dir: - # vocab_file = os.path.join(tmp_dir, "vocab.txt") - # with open(vocab_file, "w", encoding="utf-8") as vocab_writer: - # vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens])) - # tokenizer = CustomTokenizer(vocab_file) - - # processor = CustomProcessor(feature_extractor, tokenizer) - - # with tempfile.TemporaryDirectory() as tmp_dir: - # repo = Repository(tmp_dir, clone_from=tmp_repo, token=self._token) - # processor.save_pretrained(tmp_dir) - - # # This has added the proper auto_map field to the feature extractor config - # self.assertDictEqual( - # processor.feature_extractor.auto_map, - # { - # "AutoFeatureExtractor": "custom_feature_extraction.CustomFeatureExtractor", - # "AutoProcessor": "custom_processing.CustomProcessor", - # }, - # ) - - # # This has added the proper auto_map field to the tokenizer config - # with open(os.path.join(tmp_dir, "tokenizer_config.json")) as f: - # tokenizer_config = json.load(f) - # self.assertDictEqual( - # tokenizer_config["auto_map"], - # { - # "AutoTokenizer": ["custom_tokenization.CustomTokenizer", None], - # "AutoProcessor": "custom_processing.CustomProcessor", - # }, - # ) - - # # The code has been copied from fixtures - # self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "custom_feature_extraction.py"))) - # self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "custom_tokenization.py"))) - # self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "custom_processing.py"))) - - # repo.push_to_hub() - - # new_processor = AutoProcessor.from_pretrained(tmp_repo.repo_id, trust_remote_code=True) - # # Can't make an isinstance check because the new_processor is from the CustomProcessor class of a dynamic module - # self.assertEqual(new_processor.__class__.__name__, "CustomProcessor") + def test_push_to_hub_dynamic_processor(self): + with TemporaryHubRepo(token=self._token) as tmp_repo: + CustomFeatureExtractor.register_for_auto_class() + CustomTokenizer.register_for_auto_class() + CustomProcessor.register_for_auto_class() + + feature_extractor = CustomFeatureExtractor.from_pretrained(SAMPLE_PROCESSOR_CONFIG_DIR) + + with tempfile.TemporaryDirectory() as tmp_dir: + vocab_file = os.path.join(tmp_dir, "vocab.txt") + with open(vocab_file, "w", encoding="utf-8") as vocab_writer: + vocab_writer.write("".join([x + "\n" for x in self.vocab_tokens])) + tokenizer = CustomTokenizer(vocab_file) + + processor = CustomProcessor(feature_extractor, tokenizer) + + with tempfile.TemporaryDirectory() as tmp_dir: + snapshot_download(tmp_repo.repo_id, token=self._token) + processor.save_pretrained(tmp_dir) + + # This has added the proper auto_map field to the feature extractor config + self.assertDictEqual( + processor.feature_extractor.auto_map, + { + "AutoFeatureExtractor": "custom_feature_extraction.CustomFeatureExtractor", + "AutoProcessor": "custom_processing.CustomProcessor", + }, + ) + + # This has added the proper auto_map field to the tokenizer config + with open(os.path.join(tmp_dir, "tokenizer_config.json")) as f: + tokenizer_config = json.load(f) + self.assertDictEqual( + tokenizer_config["auto_map"], + { + "AutoTokenizer": ["custom_tokenization.CustomTokenizer", None], + "AutoProcessor": "custom_processing.CustomProcessor", + }, + ) + + # The code has been copied from fixtures + self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "custom_feature_extraction.py"))) + self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "custom_tokenization.py"))) + self.assertTrue(os.path.isfile(os.path.join(tmp_dir, "custom_processing.py"))) + + upload_folder(repo_id=tmp_repo.repo_id, folder_path=tmp_dir, token=self._token) + + new_processor = AutoProcessor.from_pretrained(tmp_repo.repo_id, trust_remote_code=True) + # Can't make an isinstance check because the new_processor is from the CustomProcessor class of a dynamic module + self.assertEqual(new_processor.__class__.__name__, "CustomProcessor") From 98daf342ab70f16e75f0e1eb890b3139ddd1aaa4 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Thu, 18 Sep 2025 11:52:59 +0200 Subject: [PATCH 14/30] fix ? --- src/transformers/trainer.py | 4 ++-- tests/trainer/test_trainer.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 49e14ce56574..bd8b7d767caf 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -49,7 +49,7 @@ import numpy as np import torch import torch.distributed as dist -from huggingface_hub import ModelCard, create_repo, upload_folder +from huggingface_hub import CommitInfo, ModelCard, create_repo, upload_folder from packaging import version from torch import nn from torch.utils.data import DataLoader, Dataset, IterableDataset, RandomSampler, SequentialSampler @@ -5125,7 +5125,7 @@ def push_to_hub( token: Optional[str] = None, revision: Optional[str] = None, **kwargs, - ) -> str: + ) -> CommitInfo: """ Upload `self.model` and `self.processing_class` to the 🤗 model hub on the repo `self.args.hub_model_id`. diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index f2ab2d3bed49..03112e2d3446 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -5352,10 +5352,10 @@ def test_push_to_hub_with_revision(self): ) branch = "v1.0" create_branch(repo_id=trainer.hub_model_id, branch=branch, token=self._token, exist_ok=True) - trainer.push_to_hub(revision=branch) + push_commit = trainer.push_to_hub(revision=branch) commits = list_repo_commits(repo_id=trainer.hub_model_id, revision=branch, token=self._token) - self.assertEqual(commits[0].message, "End of training") + self.assertEqual(commits[0].commit_id, push_commit.oid) @require_torch From e760990db481eaddd0ff8beb380585cb7d07fab6 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Thu, 18 Sep 2025 11:57:48 +0200 Subject: [PATCH 15/30] remove deprecated test --- tests/models/auto/test_modeling_auto.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/tests/models/auto/test_modeling_auto.py b/tests/models/auto/test_modeling_auto.py index d96b7c7a6b98..006ee30e6f49 100644 --- a/tests/models/auto/test_modeling_auto.py +++ b/tests/models/auto/test_modeling_auto.py @@ -553,26 +553,6 @@ def test_attr_not_existing(self): _MODEL_MAPPING = _LazyAutoMapping(_CONFIG_MAPPING_NAMES, _MODEL_MAPPING_NAMES) self.assertEqual(_MODEL_MAPPING[BertConfig], GPT2Model) - # def test_dynamic_saving_from_local_repo(self): - # with tempfile.TemporaryDirectory() as tmp_dir, tempfile.TemporaryDirectory() as tmp_dir_out: - # # `Repository` is deprecated and will be removed in `huggingface_hub v1.0`. - # # TODO: Remove this test when this comes. - # # Here is a ugly approach to avoid `too many requests` - # repo_id = url_to_local_path("hf-internal-testing/tiny-random-custom-architecture") - # if os.path.isdir(repo_id): - # shutil.copytree(repo_id, tmp_dir, dirs_exist_ok=True) - # else: - # _ = Repository( - # local_dir=tmp_dir, - # clone_from=url_to_local_path("hf-internal-testing/tiny-random-custom-architecture"), - # ) - - # model = AutoModelForCausalLM.from_pretrained(tmp_dir, trust_remote_code=True) - # model.save_pretrained(tmp_dir_out) - # _ = AutoModelForCausalLM.from_pretrained(tmp_dir_out, trust_remote_code=True) - # self.assertTrue((Path(tmp_dir_out) / "modeling_fake_custom.py").is_file()) - # self.assertTrue((Path(tmp_dir_out) / "configuration_fake_custom.py").is_file()) - def test_custom_model_patched_generation_inheritance(self): """ Tests that our inheritance patching for generate-compatible models works as expected. Without this feature, From 5e92130bbb51ca2a4307d079aa65b406c33e3a1f Mon Sep 17 00:00:00 2001 From: Wauplin Date: Thu, 18 Sep 2025 12:00:36 +0200 Subject: [PATCH 16/30] uncomment another test --- tests/pipelines/test_pipelines_common.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 304cb28d5796..c909c2d8ea30 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -22,7 +22,7 @@ import datasets import numpy as np -from huggingface_hub import delete_repo +from huggingface_hub import delete_repo, snapshot_download from huggingface_hub.errors import HfHubHTTPError from transformers import ( @@ -211,13 +211,13 @@ def test_dtype_property(self): pipe.model = None self.assertIsNone(pipe.dtype) - # @require_torch - # def test_auto_model_pipeline_registration_from_local_dir(self): - # with tempfile.TemporaryDirectory() as tmp_dir: - # _ = Repository(local_dir=tmp_dir, clone_from="hf-internal-testing/tiny-random-custom-architecture") - # pipe = pipeline("text-generation", tmp_dir, trust_remote_code=True) + @require_torch + def test_auto_model_pipeline_registration_from_local_dir(self): + with tempfile.TemporaryDirectory() as tmp_dir: + snapshot_download("hf-internal-testing/tiny-random-custom-architecture", local_dir=tmp_dir) + pipe = pipeline("text-generation", tmp_dir, trust_remote_code=True) - # self.assertIsInstance(pipe, TextGenerationPipeline) # Assert successful load + self.assertIsInstance(pipe, TextGenerationPipeline) # Assert successful load @require_torch def test_pipeline_with_task_parameters_no_side_effects(self): From 3deeadd4c9393330f77e50deff3b65c2a7f412f8 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Thu, 18 Sep 2025 15:11:55 +0200 Subject: [PATCH 17/30] trigger --- utils/update_tiny_models.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/utils/update_tiny_models.py b/utils/update_tiny_models.py index 27acc7261917..3d502efdf951 100644 --- a/utils/update_tiny_models.py +++ b/utils/update_tiny_models.py @@ -28,7 +28,7 @@ import time from create_dummy_models import COMPOSITE_MODELS, create_tiny_models -from huggingface_hub import hf_api +from huggingface_hub import HfApi import transformers from transformers import AutoFeatureExtractor, AutoImageProcessor, AutoTokenizer @@ -83,13 +83,12 @@ def get_tiny_model_names_from_repo(): def get_tiny_model_summary_from_hub(output_path): + api = HfApi() special_models = COMPOSITE_MODELS.values() # All tiny model base names on Hub model_names = get_all_model_names() - models = hf_api.list_models( - author="hf-internal-testing", - ) + models = api.list_models(author="hf-internal-testing") _models = set() for x in models: model = x.id @@ -110,7 +109,7 @@ def get_tiny_model_summary_from_hub(output_path): repo_id = f"hf-internal-testing/tiny-random-{model}" model = model.split("-")[0] try: - repo_info = hf_api.repo_info(repo_id) + repo_info = api.repo_info(repo_id) content = { "tokenizer_classes": set(), "processor_classes": set(), From d010fbc39956455adc2cee1409ea899471304d50 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Thu, 18 Sep 2025 16:47:53 +0200 Subject: [PATCH 18/30] no proxies --- src/transformers/utils/hub.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index 9b114ea68a58..ef68a8f3a4c0 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -680,7 +680,6 @@ def has_file( hf_hub_url(path_or_repo, filename=filename, revision=revision, repo_type=repo_type), headers=build_hf_headers(token=token, user_agent=http_user_agent()), follow_redirects=False, - proxies=proxies, timeout=10, ) except httpx.ProxyError: From 528e84bc5e7878fca8201a47b303705905a8d9f9 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Thu, 18 Sep 2025 17:32:14 +0200 Subject: [PATCH 19/30] many more small changes --- src/transformers/audio_utils.py | 8 +++++--- src/transformers/image_utils.py | 4 ++-- src/transformers/modelcard.py | 9 ++------- .../pipelines/audio_classification.py | 4 ++-- .../pipelines/automatic_speech_recognition.py | 4 ++-- src/transformers/pipelines/image_to_image.py | 4 ++-- .../pipelines/video_classification.py | 4 ++-- .../zero_shot_audio_classification.py | 4 ++-- src/transformers/safetensors_conversion.py | 6 +++--- src/transformers/utils/attention_visualizer.py | 4 ++-- src/transformers/utils/hub.py | 3 +-- src/transformers/video_utils.py | 4 ++-- tests/commands/test_serving.py | 14 +++++++------- .../test_pipelines_image_segmentation.py | 4 ++-- .../pipelines/test_pipelines_image_to_text.py | 12 ++++++------ tests/test_image_processing_common.py | 4 ++-- tests/test_tokenization_mistral_common.py | 2 +- tests/utils/test_configuration_utils.py | 6 +++--- tests/utils/test_feature_extraction_utils.py | 6 +++--- tests/utils/test_hub_utils.py | 8 +++++--- tests/utils/test_image_processing_utils.py | 6 +++--- tests/utils/test_image_utils.py | 7 +++---- tests/utils/test_modeling_utils.py | 18 +++++++----------- tests/utils/test_tokenization_utils.py | 10 +++++----- 24 files changed, 74 insertions(+), 81 deletions(-) diff --git a/src/transformers/audio_utils.py b/src/transformers/audio_utils.py index e848f558738c..e32b32dfc108 100644 --- a/src/transformers/audio_utils.py +++ b/src/transformers/audio_utils.py @@ -25,8 +25,8 @@ from io import BytesIO from typing import Any, Optional, Union +import httpx import numpy as np -import requests from packaging import version from .utils import ( @@ -131,7 +131,9 @@ def load_audio_librosa(audio: Union[str, np.ndarray], sampling_rate=16000, timeo # Load audio from URL (e.g https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen2-Audio/audio/translate_to_chinese.wav) if audio.startswith("http://") or audio.startswith("https://"): - audio = librosa.load(BytesIO(requests.get(audio, timeout=timeout).content), sr=sampling_rate)[0] + audio = librosa.load( + BytesIO(httpx.get(audio, follow_redirects=True, timeout=timeout).content), sr=sampling_rate + )[0] elif os.path.isfile(audio): audio = librosa.load(audio, sr=sampling_rate)[0] return audio @@ -173,7 +175,7 @@ def load_audio_as( # Load audio bytes from URL or file audio_bytes = None if audio.startswith(("http://", "https://")): - response = requests.get(audio, timeout=timeout) + response = httpx.get(audio, follow_redirects=True, timeout=timeout) response.raise_for_status() audio_bytes = response.content elif os.path.isfile(audio): diff --git a/src/transformers/image_utils.py b/src/transformers/image_utils.py index cb7c4bbf422a..1a16cebd90b2 100644 --- a/src/transformers/image_utils.py +++ b/src/transformers/image_utils.py @@ -19,8 +19,8 @@ from io import BytesIO from typing import Optional, Union +import httpx import numpy as np -import requests from .utils import ( ExplicitEnum, @@ -471,7 +471,7 @@ def load_image(image: Union[str, "PIL.Image.Image"], timeout: Optional[float] = if image.startswith("http://") or image.startswith("https://"): # We need to actually check for a real protocol, otherwise it's impossible to use a local file # like http_huggingface_co.png - image = PIL.Image.open(BytesIO(requests.get(image, timeout=timeout).content)) + image = PIL.Image.open(BytesIO(httpx.get(image, timeout=timeout, follow_redirects=True).content)) elif os.path.isfile(image): image = PIL.Image.open(image) else: diff --git a/src/transformers/modelcard.py b/src/transformers/modelcard.py index 8c68d8b8af10..eeac086cffec 100644 --- a/src/transformers/modelcard.py +++ b/src/transformers/modelcard.py @@ -21,7 +21,7 @@ from pathlib import Path from typing import Any, Optional, Union -import requests +import httpx import yaml from huggingface_hub import model_info from huggingface_hub.errors import OfflineModeIsEnabled @@ -386,12 +386,7 @@ def __post_init__(self): for tag in info.tags: if tag.startswith("license:"): self.license = tag[8:] - except ( - requests.exceptions.HTTPError, - requests.exceptions.ConnectionError, - HFValidationError, - OfflineModeIsEnabled, - ): + except (httpx.HTTPError, HFValidationError, OfflineModeIsEnabled): pass def create_model_index(self, metric_mapping): diff --git a/src/transformers/pipelines/audio_classification.py b/src/transformers/pipelines/audio_classification.py index 9f4822e2b2be..827504da18d7 100644 --- a/src/transformers/pipelines/audio_classification.py +++ b/src/transformers/pipelines/audio_classification.py @@ -14,8 +14,8 @@ import subprocess from typing import Any, Union +import httpx import numpy as np -import requests from ..utils import add_end_docstrings, is_torch_available, is_torchaudio_available, is_torchcodec_available, logging from .base import Pipeline, build_pipeline_init_args @@ -171,7 +171,7 @@ def preprocess(self, inputs): if inputs.startswith("http://") or inputs.startswith("https://"): # We need to actually check for a real protocol, otherwise it's impossible to use a local file # like http_huggingface_co.png - inputs = requests.get(inputs).content + inputs = httpx.get(inputs, follow_redirects=True).content else: with open(inputs, "rb") as f: inputs = f.read() diff --git a/src/transformers/pipelines/automatic_speech_recognition.py b/src/transformers/pipelines/automatic_speech_recognition.py index b4d1b96ea87f..9b73a2cbd6fc 100644 --- a/src/transformers/pipelines/automatic_speech_recognition.py +++ b/src/transformers/pipelines/automatic_speech_recognition.py @@ -14,8 +14,8 @@ from collections import defaultdict from typing import TYPE_CHECKING, Any, Optional, Union +import httpx import numpy as np -import requests from ..generation import GenerationConfig from ..tokenization_utils import PreTrainedTokenizer @@ -360,7 +360,7 @@ def preprocess(self, inputs, chunk_length_s=0, stride_length_s=None): if inputs.startswith("http://") or inputs.startswith("https://"): # We need to actually check for a real protocol, otherwise it's impossible to use a local file # like http_huggingface_co.png - inputs = requests.get(inputs).content + inputs = httpx.get(inputs, follow_redirects=True).content else: with open(inputs, "rb") as f: inputs = f.read() diff --git a/src/transformers/pipelines/image_to_image.py b/src/transformers/pipelines/image_to_image.py index d469024bff17..e68b49049825 100644 --- a/src/transformers/pipelines/image_to_image.py +++ b/src/transformers/pipelines/image_to_image.py @@ -46,12 +46,12 @@ class ImageToImagePipeline(Pipeline): ```python >>> from PIL import Image - >>> import requests + >>> import httpx >>> from transformers import pipeline >>> upscaler = pipeline("image-to-image", model="caidas/swin2SR-classical-sr-x2-64") - >>> img = Image.open(requests.get("http://images.cocodataset.org/val2017/000000039769.jpg", stream=True).raw) + >>> img = Image.open(httpx.get("http://images.cocodataset.org/val2017/000000039769.jpg").content) >>> img = img.resize((64, 64)) >>> upscaled_img = upscaler(img) >>> img.size diff --git a/src/transformers/pipelines/video_classification.py b/src/transformers/pipelines/video_classification.py index 1ee8dc86e161..dc2c60480562 100644 --- a/src/transformers/pipelines/video_classification.py +++ b/src/transformers/pipelines/video_classification.py @@ -15,7 +15,7 @@ from io import BytesIO from typing import Any, Optional, Union, overload -import requests +import httpx from ..utils import ( add_end_docstrings, @@ -142,7 +142,7 @@ def preprocess(self, video, num_frames=None, frame_sampling_rate=1): num_frames = self.model.config.num_frames if video.startswith("http://") or video.startswith("https://"): - video = BytesIO(requests.get(video).content) + video = BytesIO(httpx.get(video, follow_redirects=True).content) container = av.open(video) diff --git a/src/transformers/pipelines/zero_shot_audio_classification.py b/src/transformers/pipelines/zero_shot_audio_classification.py index 9c21681a0d8e..7934acf634ba 100644 --- a/src/transformers/pipelines/zero_shot_audio_classification.py +++ b/src/transformers/pipelines/zero_shot_audio_classification.py @@ -15,8 +15,8 @@ from collections import UserDict from typing import Any, Union +import httpx import numpy as np -import requests from ..utils import ( add_end_docstrings, @@ -111,7 +111,7 @@ def preprocess(self, audio, candidate_labels=None, hypothesis_template="This is if audio.startswith("http://") or audio.startswith("https://"): # We need to actually check for a real protocol, otherwise it's impossible to use a local file # like http_huggingface_co.png - audio = requests.get(audio).content + audio = httpx.get(audio, follow_redirects=True).content else: with open(audio, "rb") as f: audio = f.read() diff --git a/src/transformers/safetensors_conversion.py b/src/transformers/safetensors_conversion.py index f1612d3ea57c..397240cadc9f 100644 --- a/src/transformers/safetensors_conversion.py +++ b/src/transformers/safetensors_conversion.py @@ -1,6 +1,6 @@ from typing import Optional -import requests +import httpx from huggingface_hub import Discussion, HfApi, get_repo_discussions from .utils import cached_file, http_user_agent, logging @@ -44,10 +44,10 @@ def start(_sse_connection): data = {"data": [model_id, private, token]} - result = requests.post(sse_url, stream=True, json=data).json() + result = httpx.post(sse_url, follow_redirects=True, json=data).json() event_id = result["event_id"] - with requests.get(f"{sse_url}/{event_id}", stream=True) as sse_connection: + with httpx.stream("GET", f"{sse_url}/{event_id}") as sse_connection: try: logger.debug("Spawning safetensors automatic conversion.") start(sse_connection) diff --git a/src/transformers/utils/attention_visualizer.py b/src/transformers/utils/attention_visualizer.py index 2d0783ae2d4e..acddc1ecbe98 100644 --- a/src/transformers/utils/attention_visualizer.py +++ b/src/transformers/utils/attention_visualizer.py @@ -13,7 +13,7 @@ # limitations under the License. -import requests +import httpx from PIL import Image from ..masking_utils import create_causal_mask @@ -180,7 +180,7 @@ def visualize_attention_mask(self, input_sentence: str, suffix=""): image_seq_length = None if self.config.model_type in PROCESSOR_MAPPING_NAMES: img = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg?download=true" - img = Image.open(requests.get(img, stream=True).raw) + img = Image.open(httpx.get(img, follow_redirects=True).content) image_seq_length = 5 processor = AutoProcessor.from_pretrained(self.repo_id, image_seq_length=image_seq_length) if hasattr(processor, "image_token"): diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index ef68a8f3a4c0..ca37c2ca651f 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -29,7 +29,6 @@ import httpx import huggingface_hub -import requests from huggingface_hub import ( _CACHED_NO_EXIST, CommitOperationAdd, @@ -199,7 +198,7 @@ def is_remote_url(url_or_filename): def define_sagemaker_information(): try: - instance_data = requests.get(os.environ["ECS_CONTAINER_METADATA_URI"]).json() + instance_data = httpx.get(os.environ["ECS_CONTAINER_METADATA_URI"]).json() dlc_container_used = instance_data["Image"] dlc_tag = instance_data["Image"].split(":")[1] except Exception: diff --git a/src/transformers/video_utils.py b/src/transformers/video_utils.py index 1749b0b3b1c5..5948b0e8c031 100644 --- a/src/transformers/video_utils.py +++ b/src/transformers/video_utils.py @@ -22,8 +22,8 @@ from typing import Callable, NewType, Optional, Union from urllib.parse import urlparse +import httpx import numpy as np -import requests from .image_transforms import PaddingMode, to_channel_dimension_format from .image_utils import ChannelDimension, infer_channel_dimension_format, is_valid_image @@ -683,7 +683,7 @@ def sample_indices_fn_func(metadata, **fn_kwargs): bytes_obj = buffer.getvalue() file_obj = BytesIO(bytes_obj) elif video.startswith("http://") or video.startswith("https://"): - file_obj = BytesIO(requests.get(video).content) + file_obj = BytesIO(httpx.get(video, follow_redirects=True).content) elif os.path.isfile(video): file_obj = video else: diff --git a/tests/commands/test_serving.py b/tests/commands/test_serving.py index 9fbd79464b17..e745dad3c885 100644 --- a/tests/commands/test_serving.py +++ b/tests/commands/test_serving.py @@ -19,7 +19,7 @@ from unittest.mock import patch import aiohttp.client_exceptions -import requests +import httpx from huggingface_hub import AsyncInferenceClient, ChatCompletionStreamOutput from parameterized import parameterized @@ -509,17 +509,18 @@ def _call_healthcheck(base_url: str): retries = 10 while retries > 0: try: - response = requests.get(f"{base_url}/health") + response = httpx.get(f"{base_url}/health") break - except requests.exceptions.ConnectionError: + except httpx.NetworkError: time.sleep(0.1) retries -= 1 return response def _open_stream_and_cancel(base_url: str, request_id: str): - with requests.Session() as s: - with s.post( + with httpx.Client() as s: + with s.stream( + "POST", f"{base_url}/v1/chat/completions", headers={"X-Request-ID": request_id}, json={ @@ -527,13 +528,12 @@ def _open_stream_and_cancel(base_url: str, request_id: str): "stream": True, "messages": [{"role": "user", "content": "Count slowly so I can cancel you."}], }, - stream=True, timeout=30, ) as resp: assert resp.status_code == 200 wait_for_n_chunks = 3 - for i, _ in enumerate(resp.iter_content(chunk_size=None)): + for i, _ in enumerate(resp.iter_bytes(chunk_size=None)): if i >= wait_for_n_chunks: resp.close() break diff --git a/tests/pipelines/test_pipelines_image_segmentation.py b/tests/pipelines/test_pipelines_image_segmentation.py index b1f0dd14057e..17426130e6be 100644 --- a/tests/pipelines/test_pipelines_image_segmentation.py +++ b/tests/pipelines/test_pipelines_image_segmentation.py @@ -16,8 +16,8 @@ import unittest import datasets +import httpx import numpy as np -import requests from datasets import load_dataset from huggingface_hub import ImageSegmentationOutputElement from huggingface_hub.utils import insecure_hashlib @@ -318,7 +318,7 @@ def test_small_model_pt(self): ] # actual links to get files expected_masks = [x.replace("/blob/", "/resolve/") for x in expected_masks] - expected_masks = [Image.open(requests.get(image, stream=True).raw) for image in expected_masks] + expected_masks = [Image.open(httpx.get(image, follow_redirects=True).content) for image in expected_masks] # Convert masks to numpy array output_masks = [np.array(x) for x in output_masks] diff --git a/tests/pipelines/test_pipelines_image_to_text.py b/tests/pipelines/test_pipelines_image_to_text.py index ee73a1dfb63b..f079b8cda029 100644 --- a/tests/pipelines/test_pipelines_image_to_text.py +++ b/tests/pipelines/test_pipelines_image_to_text.py @@ -14,7 +14,7 @@ import unittest -import requests +import httpx from transformers import MODEL_FOR_VISION_2_SEQ_MAPPING, TF_MODEL_FOR_VISION_2_SEQ_MAPPING, is_vision_available from transformers.pipelines import ImageToTextPipeline, pipeline @@ -173,7 +173,7 @@ def test_large_model_pt(self): def test_generation_pt_blip(self): pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") url = "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/pokemon.png" - image = Image.open(requests.get(url, stream=True).raw) + image = Image.open(httpx.get(url, follow_redirects=True).content) outputs = pipe(image) self.assertEqual(outputs, [{"generated_text": "a pink pokemon pokemon with a blue shirt and a blue shirt"}]) @@ -183,7 +183,7 @@ def test_generation_pt_blip(self): def test_generation_pt_git(self): pipe = pipeline("image-to-text", model="microsoft/git-base-coco") url = "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/pokemon.png" - image = Image.open(requests.get(url, stream=True).raw) + image = Image.open(httpx.get(url, follow_redirects=True).content) outputs = pipe(image) self.assertEqual(outputs, [{"generated_text": "a cartoon of a purple character."}]) @@ -193,7 +193,7 @@ def test_generation_pt_git(self): def test_conditional_generation_pt_blip(self): pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg" - image = Image.open(requests.get(url, stream=True).raw) + image = Image.open(httpx.get(url, follow_redirects=True).content) prompt = "a photography of" @@ -208,7 +208,7 @@ def test_conditional_generation_pt_blip(self): def test_conditional_generation_pt_git(self): pipe = pipeline("image-to-text", model="microsoft/git-base-coco") url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg" - image = Image.open(requests.get(url, stream=True).raw) + image = Image.open(httpx.get(url, follow_redirects=True).content) prompt = "a photo of a" @@ -223,7 +223,7 @@ def test_conditional_generation_pt_git(self): def test_conditional_generation_pt_pix2struct(self): pipe = pipeline("image-to-text", model="google/pix2struct-ai2d-base") url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg" - image = Image.open(requests.get(url, stream=True).raw) + image = Image.open(httpx.get(url, follow_redirects=True).content) prompt = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud" diff --git a/tests/test_image_processing_common.py b/tests/test_image_processing_common.py index 635d6a35dc85..e072f88955b3 100644 --- a/tests/test_image_processing_common.py +++ b/tests/test_image_processing_common.py @@ -21,9 +21,9 @@ import warnings from copy import deepcopy +import httpx import numpy as np import pytest -import requests from packaging import version from transformers import AutoImageProcessor, BatchFeature @@ -182,7 +182,7 @@ def test_slow_fast_equivalence(self): self.skipTest(reason="Skipping slow/fast equivalence test as one of the image processors is not defined") dummy_image = Image.open( - requests.get("http://images.cocodataset.org/val2017/000000039769.jpg", stream=True).raw + httpx.get("http://images.cocodataset.org/val2017/000000039769.jpg", follow_redirects=True).content ) image_processor_slow = self.image_processing_class(**self.image_processor_dict) image_processor_fast = self.fast_image_processing_class(**self.image_processor_dict) diff --git a/tests/test_tokenization_mistral_common.py b/tests/test_tokenization_mistral_common.py index 05b9ae636fb3..e34083177587 100644 --- a/tests/test_tokenization_mistral_common.py +++ b/tests/test_tokenization_mistral_common.py @@ -34,7 +34,7 @@ from mistral_common.tokens.tokenizers.mistral import MistralTokenizer from mistral_common.tokens.tokenizers.utils import list_local_hf_repo_files - # To avoid unnecessary `requests.get` calls which give us `Error: Too Many Requests for url` on CircleCI + # To avoid unnecessary `httpx.get` calls which give us `Error: Too Many Requests for url` on CircleCI mistral_common.tokens.tokenizers.image.download_image = load_image diff --git a/tests/utils/test_configuration_utils.py b/tests/utils/test_configuration_utils.py index 72ad96877441..d79558f9194e 100644 --- a/tests/utils/test_configuration_utils.py +++ b/tests/utils/test_configuration_utils.py @@ -21,7 +21,7 @@ import warnings from pathlib import Path -from requests.exceptions import HTTPError +import httpx from transformers import AutoConfig, BertConfig, Florence2Config, GPT2Config from transformers.configuration_utils import PretrainedConfig @@ -222,14 +222,14 @@ def test_cached_files_are_used_when_internet_is_down(self): response_mock = mock.Mock() response_mock.status_code = 500 response_mock.headers = {} - response_mock.raise_for_status.side_effect = HTTPError + response_mock.raise_for_status.side_effect = httpx.HTTPError("failed") response_mock.json.return_value = {} # Download this model to make sure it's in the cache. _ = BertConfig.from_pretrained("hf-internal-testing/tiny-random-bert") # Under the mock environment we get a 500 error when trying to reach the model. - with mock.patch("requests.Session.request", return_value=response_mock) as mock_head: + with mock.patch("httpx.Client.request", return_value=response_mock) as mock_head: _ = BertConfig.from_pretrained("hf-internal-testing/tiny-random-bert") # This check we did call the fake head request mock_head.assert_called() diff --git a/tests/utils/test_feature_extraction_utils.py b/tests/utils/test_feature_extraction_utils.py index 1181c0de3287..a39aadc5f36d 100644 --- a/tests/utils/test_feature_extraction_utils.py +++ b/tests/utils/test_feature_extraction_utils.py @@ -19,7 +19,7 @@ import unittest.mock as mock from pathlib import Path -from requests.exceptions import HTTPError +import httpx from transformers import AutoFeatureExtractor, Wav2Vec2FeatureExtractor from transformers.testing_utils import TOKEN, TemporaryHubRepo, get_tests_dir, is_staging_test @@ -39,13 +39,13 @@ def test_cached_files_are_used_when_internet_is_down(self): response_mock = mock.Mock() response_mock.status_code = 500 response_mock.headers = {} - response_mock.raise_for_status.side_effect = HTTPError + response_mock.raise_for_status.side_effect = httpx.HTTPError("failed") response_mock.json.return_value = {} # Download this model to make sure it's in the cache. _ = Wav2Vec2FeatureExtractor.from_pretrained("hf-internal-testing/tiny-random-wav2vec2") # Under the mock environment we get a 500 error when trying to reach the model. - with mock.patch("requests.Session.request", return_value=response_mock) as mock_head: + with mock.patch("httpx.Client.request", return_value=response_mock) as mock_head: _ = Wav2Vec2FeatureExtractor.from_pretrained("hf-internal-testing/tiny-random-wav2vec2") # This check we did call the fake head request mock_head.assert_called() diff --git a/tests/utils/test_hub_utils.py b/tests/utils/test_hub_utils.py index b86773793a84..3c7b96fc9f89 100644 --- a/tests/utils/test_hub_utils.py +++ b/tests/utils/test_hub_utils.py @@ -19,8 +19,7 @@ from pathlib import Path from huggingface_hub import hf_hub_download -from huggingface_hub.errors import LocalEntryNotFoundError, OfflineModeIsEnabled -from requests.exceptions import HTTPError +from huggingface_hub.errors import HfHubHTTPError, LocalEntryNotFoundError, OfflineModeIsEnabled from transformers.utils import ( CONFIG_NAME, @@ -89,7 +88,10 @@ def test_non_existence_is_cached(self): self.assertIsNone(path) # Under the mock environment, hf_hub_download will always raise an HTTPError - with mock.patch("transformers.utils.hub.hf_hub_download", side_effect=HTTPError) as mock_head: + with mock.patch( + "transformers.utils.hub.hf_hub_download", + side_effect=HfHubHTTPError("failed", response=mock.Mock(status_code=404)), + ) as mock_head: path = cached_file(RANDOM_BERT, "conf", _raise_exceptions_for_connection_errors=False) self.assertIsNone(path) # This check we did call the fake head request diff --git a/tests/utils/test_image_processing_utils.py b/tests/utils/test_image_processing_utils.py index ebe256a534af..7483e7de1f6e 100644 --- a/tests/utils/test_image_processing_utils.py +++ b/tests/utils/test_image_processing_utils.py @@ -18,7 +18,7 @@ import unittest.mock as mock from pathlib import Path -from requests.exceptions import HTTPError +import httpx from transformers import AutoImageProcessor, ViTImageProcessor, ViTImageProcessorFast from transformers.image_processing_utils import get_size_dict @@ -39,7 +39,7 @@ def test_cached_files_are_used_when_internet_is_down(self): response_mock = mock.Mock() response_mock.status_code = 500 response_mock.headers = {} - response_mock.raise_for_status.side_effect = HTTPError + response_mock.raise_for_status.side_effect = httpx.HTTPError("failed") response_mock.json.return_value = {} # Download this model to make sure it's in the cache. @@ -47,7 +47,7 @@ def test_cached_files_are_used_when_internet_is_down(self): _ = ViTImageProcessorFast.from_pretrained("hf-internal-testing/tiny-random-vit") # Under the mock environment we get a 500 error when trying to reach the model. - with mock.patch("requests.Session.request", return_value=response_mock) as mock_head: + with mock.patch("httpx.Client.request", return_value=response_mock) as mock_head: _ = ViTImageProcessor.from_pretrained("hf-internal-testing/tiny-random-vit") _ = ViTImageProcessorFast.from_pretrained("hf-internal-testing/tiny-random-vit") # This check we did call the fake head request diff --git a/tests/utils/test_image_utils.py b/tests/utils/test_image_utils.py index 8d124d361c2a..6c2db861ffe2 100644 --- a/tests/utils/test_image_utils.py +++ b/tests/utils/test_image_utils.py @@ -19,11 +19,10 @@ from io import BytesIO from typing import Optional +import httpx import numpy as np import pytest -import requests from huggingface_hub.file_download import hf_hub_url, http_get -from requests import ConnectTimeout, ReadTimeout from tests.pipelines.test_pipelines_document_question_answering import INVOICE_URL from transformers import is_torch_available, is_vision_available @@ -49,7 +48,7 @@ def get_image_from_hub_dataset(dataset_id: str, filename: str, revision: Optional[str] = None) -> "PIL.Image.Image": url = hf_hub_url(dataset_id, filename, repo_type="dataset", revision=revision) - return PIL.Image.open(BytesIO(requests.get(url).content)) + return PIL.Image.open(BytesIO(httpx.get(url, follow_redirects=True).content)) def get_random_image(height, width): @@ -727,7 +726,7 @@ def test_load_img_url(self): @is_flaky() def test_load_img_url_timeout(self): - with self.assertRaises((ReadTimeout, ConnectTimeout)): + with self.assertRaises(httpx.ConnectTimeout): load_image(INVOICE_URL, timeout=0.001) def test_load_img_local(self): diff --git a/tests/utils/test_modeling_utils.py b/tests/utils/test_modeling_utils.py index 6242d9faa7ab..85a6709787f6 100644 --- a/tests/utils/test_modeling_utils.py +++ b/tests/utils/test_modeling_utils.py @@ -27,12 +27,11 @@ import warnings from pathlib import Path +import httpx import pytest -import requests from huggingface_hub import HfApi from parameterized import parameterized from pytest import mark -from requests.exceptions import HTTPError from transformers import ( AutoConfig, @@ -382,7 +381,7 @@ def test_func(): # First attempt will fail with a connection error if not hasattr(test_func, "attempt"): test_func.attempt = 1 - raise requests.exceptions.ConnectionError("Connection failed") + raise httpx.ConnectError("Connection failed") # Second attempt will succeed return True @@ -1135,14 +1134,14 @@ def test_cached_files_are_used_when_internet_is_down(self): response_mock = mock.Mock() response_mock.status_code = 500 response_mock.headers = {} - response_mock.raise_for_status.side_effect = HTTPError + response_mock.raise_for_status.side_effect = httpx.HTTPError("failed") response_mock.json.return_value = {} # Download this model to make sure it's in the cache. _ = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert") # Under the mock environment we get a 500 error when trying to reach the model. - with mock.patch("requests.Session.request", return_value=response_mock) as mock_head: + with mock.patch("httpx.Client.request", return_value=response_mock) as mock_head: _ = BertModel.from_pretrained("hf-internal-testing/tiny-random-bert") # This check we did call the fake head request mock_head.assert_called() @@ -2082,10 +2081,7 @@ def test_safetensors_on_the_fly_conversion_gated(self): initial_model = BertModel(config) initial_model.push_to_hub(self.repo_name, token=self.token, safe_serialization=False) - headers = {"Authorization": f"Bearer {self.token}"} - requests.put( - f"https://huggingface.co/api/models/{self.repo_name}/settings", json={"gated": "auto"}, headers=headers - ) + self.api.update_repo_settings(self.repo_name, gated="auto") converted_model = BertModel.from_pretrained(self.repo_name, use_safetensors=True, token=self.token) with self.subTest("Initial and converted models are equal"): @@ -2146,7 +2142,7 @@ def test_safetensors_on_the_fly_sharded_conversion_gated(self): initial_model.push_to_hub(self.repo_name, token=self.token, max_shard_size="200kb", safe_serialization=False) headers = {"Authorization": f"Bearer {self.token}"} - requests.put( + httpx.put( f"https://huggingface.co/api/models/{self.repo_name}/settings", json={"gated": "auto"}, headers=headers ) converted_model = BertModel.from_pretrained(self.repo_name, use_safetensors=True, token=self.token) @@ -2245,7 +2241,7 @@ def test_absence_of_safetensors_triggers_conversion(self): @mock.patch("transformers.safetensors_conversion.spawn_conversion") def test_absence_of_safetensors_triggers_conversion_failed(self, spawn_conversion_mock): - spawn_conversion_mock.side_effect = HTTPError() + spawn_conversion_mock.side_effect = httpx.HTTPError("failed") config = BertConfig( vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 diff --git a/tests/utils/test_tokenization_utils.py b/tests/utils/test_tokenization_utils.py index b30a2b79af01..e11564090a4d 100644 --- a/tests/utils/test_tokenization_utils.py +++ b/tests/utils/test_tokenization_utils.py @@ -19,8 +19,8 @@ import unittest.mock as mock from pathlib import Path +import httpx from huggingface_hub.file_download import http_get -from requests.exceptions import HTTPError from transformers import ( AlbertTokenizer, @@ -49,14 +49,14 @@ def test_cached_files_are_used_when_internet_is_down(self): response_mock = mock.Mock() response_mock.status_code = 500 response_mock.headers = {} - response_mock.raise_for_status.side_effect = HTTPError + response_mock.raise_for_status.side_effect = httpx.HTTPError("failed") response_mock.json.return_value = {} # Download this model to make sure it's in the cache. _ = BertTokenizer.from_pretrained("hf-internal-testing/tiny-random-bert") # Under the mock environment we get a 500 error when trying to reach the tokenizer. - with mock.patch("requests.Session.request", return_value=response_mock) as mock_head: + with mock.patch("httpx.Client.request", return_value=response_mock) as mock_head: _ = BertTokenizer.from_pretrained("hf-internal-testing/tiny-random-bert") # This check we did call the fake head request mock_head.assert_called() @@ -67,14 +67,14 @@ def test_cached_files_are_used_when_internet_is_down_missing_files(self): response_mock = mock.Mock() response_mock.status_code = 500 response_mock.headers = {} - response_mock.raise_for_status.side_effect = HTTPError + response_mock.raise_for_status.side_effect = httpx.HTTPError("failed") response_mock.json.return_value = {} # Download this model to make sure it's in the cache. _ = GPT2TokenizerFast.from_pretrained("openai-community/gpt2") # Under the mock environment we get a 500 error when trying to reach the tokenizer. - with mock.patch("requests.Session.request", return_value=response_mock) as mock_head: + with mock.patch("httpx.Client.request", return_value=response_mock) as mock_head: _ = GPT2TokenizerFast.from_pretrained("openai-community/gpt2") # This check we did call the fake head request mock_head.assert_called() From b45ebc764efcfe92ce08437dde3d92b37949cbc8 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Fri, 19 Sep 2025 14:40:36 +0200 Subject: [PATCH 20/30] fix load PIL Image from httpx --- src/transformers/pipelines/image_to_image.py | 3 ++- src/transformers/utils/attention_visualizer.py | 4 ++-- tests/pipelines/test_pipelines_image_segmentation.py | 6 ++++-- tests/pipelines/test_pipelines_image_to_text.py | 12 ++++++------ tests/test_image_processing_common.py | 6 ++++-- 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/transformers/pipelines/image_to_image.py b/src/transformers/pipelines/image_to_image.py index e68b49049825..c5fe67c2b406 100644 --- a/src/transformers/pipelines/image_to_image.py +++ b/src/transformers/pipelines/image_to_image.py @@ -47,11 +47,12 @@ class ImageToImagePipeline(Pipeline): ```python >>> from PIL import Image >>> import httpx + >>> import io >>> from transformers import pipeline >>> upscaler = pipeline("image-to-image", model="caidas/swin2SR-classical-sr-x2-64") - >>> img = Image.open(httpx.get("http://images.cocodataset.org/val2017/000000039769.jpg").content) + >>> img = Image.open(io.BytesIO(httpx.get("http://images.cocodataset.org/val2017/000000039769.jpg").content)) >>> img = img.resize((64, 64)) >>> upscaled_img = upscaler(img) >>> img.size diff --git a/src/transformers/utils/attention_visualizer.py b/src/transformers/utils/attention_visualizer.py index acddc1ecbe98..4ddcbd021a62 100644 --- a/src/transformers/utils/attention_visualizer.py +++ b/src/transformers/utils/attention_visualizer.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import io import httpx from PIL import Image @@ -180,7 +180,7 @@ def visualize_attention_mask(self, input_sentence: str, suffix=""): image_seq_length = None if self.config.model_type in PROCESSOR_MAPPING_NAMES: img = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/bee.jpg?download=true" - img = Image.open(httpx.get(img, follow_redirects=True).content) + img = Image.open(io.BytesIO(httpx.get(img, follow_redirects=True).content)) image_seq_length = 5 processor = AutoProcessor.from_pretrained(self.repo_id, image_seq_length=image_seq_length) if hasattr(processor, "image_token"): diff --git a/tests/pipelines/test_pipelines_image_segmentation.py b/tests/pipelines/test_pipelines_image_segmentation.py index 17426130e6be..c926dd004f32 100644 --- a/tests/pipelines/test_pipelines_image_segmentation.py +++ b/tests/pipelines/test_pipelines_image_segmentation.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import io import tempfile import unittest @@ -318,7 +318,9 @@ def test_small_model_pt(self): ] # actual links to get files expected_masks = [x.replace("/blob/", "/resolve/") for x in expected_masks] - expected_masks = [Image.open(httpx.get(image, follow_redirects=True).content) for image in expected_masks] + expected_masks = [ + Image.open(io.BytesIO(httpx.get(image, follow_redirects=True).content)) for image in expected_masks + ] # Convert masks to numpy array output_masks = [np.array(x) for x in output_masks] diff --git a/tests/pipelines/test_pipelines_image_to_text.py b/tests/pipelines/test_pipelines_image_to_text.py index f079b8cda029..2b504b314cce 100644 --- a/tests/pipelines/test_pipelines_image_to_text.py +++ b/tests/pipelines/test_pipelines_image_to_text.py @@ -11,7 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +import io import unittest import httpx @@ -173,7 +173,7 @@ def test_large_model_pt(self): def test_generation_pt_blip(self): pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") url = "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/pokemon.png" - image = Image.open(httpx.get(url, follow_redirects=True).content) + image = Image.open(io.BytesIO(httpx.get(url, follow_redirects=True).content)) outputs = pipe(image) self.assertEqual(outputs, [{"generated_text": "a pink pokemon pokemon with a blue shirt and a blue shirt"}]) @@ -183,7 +183,7 @@ def test_generation_pt_blip(self): def test_generation_pt_git(self): pipe = pipeline("image-to-text", model="microsoft/git-base-coco") url = "https://huggingface.co/datasets/sayakpaul/sample-datasets/resolve/main/pokemon.png" - image = Image.open(httpx.get(url, follow_redirects=True).content) + image = Image.open(io.BytesIO(httpx.get(url, follow_redirects=True).content)) outputs = pipe(image) self.assertEqual(outputs, [{"generated_text": "a cartoon of a purple character."}]) @@ -193,7 +193,7 @@ def test_generation_pt_git(self): def test_conditional_generation_pt_blip(self): pipe = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg" - image = Image.open(httpx.get(url, follow_redirects=True).content) + image = Image.open(io.BytesIO(httpx.get(url, follow_redirects=True).content)) prompt = "a photography of" @@ -208,7 +208,7 @@ def test_conditional_generation_pt_blip(self): def test_conditional_generation_pt_git(self): pipe = pipeline("image-to-text", model="microsoft/git-base-coco") url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg" - image = Image.open(httpx.get(url, follow_redirects=True).content) + image = Image.open(io.BytesIO(httpx.get(url, follow_redirects=True).content)) prompt = "a photo of a" @@ -223,7 +223,7 @@ def test_conditional_generation_pt_git(self): def test_conditional_generation_pt_pix2struct(self): pipe = pipeline("image-to-text", model="google/pix2struct-ai2d-base") url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/ai2d-demo.jpg" - image = Image.open(httpx.get(url, follow_redirects=True).content) + image = Image.open(io.BytesIO(httpx.get(url, follow_redirects=True).content)) prompt = "What does the label 15 represent? (1) lava (2) core (3) tunnel (4) ash cloud" diff --git a/tests/test_image_processing_common.py b/tests/test_image_processing_common.py index e072f88955b3..1d772a92aba3 100644 --- a/tests/test_image_processing_common.py +++ b/tests/test_image_processing_common.py @@ -11,8 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - import inspect +import io import json import os import pathlib @@ -182,7 +182,9 @@ def test_slow_fast_equivalence(self): self.skipTest(reason="Skipping slow/fast equivalence test as one of the image processors is not defined") dummy_image = Image.open( - httpx.get("http://images.cocodataset.org/val2017/000000039769.jpg", follow_redirects=True).content + io.BytesIO( + httpx.get("http://images.cocodataset.org/val2017/000000039769.jpg", follow_redirects=True).content + ) ) image_processor_slow = self.image_processing_class(**self.image_processor_dict) image_processor_fast = self.fast_image_processing_class(**self.image_processor_dict) From 084ea01eabab3b61ad10a59a8836d644b1a154cd Mon Sep 17 00:00:00 2001 From: Wauplin Date: Fri, 19 Sep 2025 14:45:14 +0200 Subject: [PATCH 21/30] require 1.0.0.rc0 --- .circleci/config.yml | 3 --- .circleci/create_circleci_config.py | 4 ++-- setup.py | 2 +- src/transformers/dependency_versions_table.py | 2 +- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 034f514138c1..893b87ed84d6 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -44,7 +44,6 @@ jobs: steps: - checkout - run: uv pip install -U -e . - - run: uv pip install --prerelease allow "huggingface_hub==1.0.0.rc0" - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV" - run: mkdir -p test_preparation - run: python utils/tests_fetcher.py | tee tests_fetched_summary.txt @@ -97,7 +96,6 @@ jobs: steps: - checkout - run: uv pip install -U -e . - - run: uv pip install --prerelease allow "huggingface_hub==1.0.0.rc0" - run: echo 'export "GIT_COMMIT_MESSAGE=$(git show -s --format=%s)"' >> "$BASH_ENV" && source "$BASH_ENV" - run: mkdir -p test_preparation - run: python utils/tests_fetcher.py --fetch_all | tee tests_fetched_summary.txt @@ -151,7 +149,6 @@ jobs: steps: - checkout - run: uv pip install -e ".[quality]" - - run: uv pip install --prerelease allow "huggingface_hub==1.0.0.rc0" - run: name: Show installed libraries and their versions command: pip freeze | tee installed.txt diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py index 624b6a3b3e65..7680da8387f0 100644 --- a/.circleci/create_circleci_config.py +++ b/.circleci/create_circleci_config.py @@ -15,11 +15,12 @@ import argparse import copy +import glob import os import random from dataclasses import dataclass from typing import Any, Dict, List, Optional -import glob + import yaml @@ -112,7 +113,6 @@ def __post_init__(self): self.install_steps = ["uv pip install ."] # Use a custom patched pytest to force exit the process at the end, to avoid `Too long with no output (exceeded 10m0s): context deadline exceeded` self.install_steps.append("uv pip install git+https://github.com/ydshieh/pytest.git@8.4.1-ydshieh") - self.install_steps.append("uv pip install --prerelease allow 'huggingface_hub==1.0.0.rc0'") if self.pytest_options is None: self.pytest_options = {} if isinstance(self.tests_to_run, str): diff --git a/setup.py b/setup.py index 5b43228cccba..efd0ad5bc2e7 100644 --- a/setup.py +++ b/setup.py @@ -115,7 +115,7 @@ "GitPython<3.1.19", "hf-doc-builder>=0.3.0", "hf_xet", - "huggingface-hub", + "huggingface-hub==1.0.0.rc0", "importlib_metadata", "ipadic>=1.0.0,<2.0", "jax>=0.4.1,<=0.4.13", diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index 78dd436b1c11..2ecb7682cd3e 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -24,7 +24,7 @@ "GitPython": "GitPython<3.1.19", "hf-doc-builder": "hf-doc-builder>=0.3.0", "hf_xet": "hf_xet", - "huggingface-hub": "huggingface-hub", + "huggingface-hub": "huggingface-hub==1.0.0.rc0", "importlib_metadata": "importlib_metadata", "ipadic": "ipadic>=1.0.0,<2.0", "jax": "jax>=0.4.1,<=0.4.13", From 14e99075c4c1ceda6836809c9b1668e42c59e6a9 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Fri, 19 Sep 2025 15:52:21 +0200 Subject: [PATCH 22/30] fix mocked tests --- tests/utils/test_tokenization_utils.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/utils/test_tokenization_utils.py b/tests/utils/test_tokenization_utils.py index e11564090a4d..ecda49fd9bd8 100644 --- a/tests/utils/test_tokenization_utils.py +++ b/tests/utils/test_tokenization_utils.py @@ -49,7 +49,9 @@ def test_cached_files_are_used_when_internet_is_down(self): response_mock = mock.Mock() response_mock.status_code = 500 response_mock.headers = {} - response_mock.raise_for_status.side_effect = httpx.HTTPError("failed") + response_mock.raise_for_status.side_effect = httpx.HTTPStatusError( + "failed", request=mock.Mock(), response=mock.Mock() + ) response_mock.json.return_value = {} # Download this model to make sure it's in the cache. @@ -67,7 +69,9 @@ def test_cached_files_are_used_when_internet_is_down_missing_files(self): response_mock = mock.Mock() response_mock.status_code = 500 response_mock.headers = {} - response_mock.raise_for_status.side_effect = httpx.HTTPError("failed") + response_mock.raise_for_status.side_effect = httpx.HTTPStatusError( + "failed", request=mock.Mock(), response=mock.Mock() + ) response_mock.json.return_value = {} # Download this model to make sure it's in the cache. From e7c94d58764ad63d3b8c192443b866d1b07c18f8 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Fri, 19 Sep 2025 16:07:49 +0200 Subject: [PATCH 23/30] fix others --- tests/utils/test_configuration_utils.py | 4 +++- tests/utils/test_feature_extraction_utils.py | 4 +++- tests/utils/test_image_processing_utils.py | 4 +++- tests/utils/test_modeling_utils.py | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/tests/utils/test_configuration_utils.py b/tests/utils/test_configuration_utils.py index 9c47297bb422..069ca6729bbd 100644 --- a/tests/utils/test_configuration_utils.py +++ b/tests/utils/test_configuration_utils.py @@ -220,7 +220,9 @@ def test_cached_files_are_used_when_internet_is_down(self): response_mock = mock.Mock() response_mock.status_code = 500 response_mock.headers = {} - response_mock.raise_for_status.side_effect = httpx.HTTPError("failed") + response_mock.raise_for_status.side_effect = httpx.HTTPStatusError( + "failed", request=mock.Mock(), response=mock.Mock() + ) response_mock.json.return_value = {} # Download this model to make sure it's in the cache. diff --git a/tests/utils/test_feature_extraction_utils.py b/tests/utils/test_feature_extraction_utils.py index a39aadc5f36d..b0a6a193d10d 100644 --- a/tests/utils/test_feature_extraction_utils.py +++ b/tests/utils/test_feature_extraction_utils.py @@ -39,7 +39,9 @@ def test_cached_files_are_used_when_internet_is_down(self): response_mock = mock.Mock() response_mock.status_code = 500 response_mock.headers = {} - response_mock.raise_for_status.side_effect = httpx.HTTPError("failed") + response_mock.raise_for_status.side_effect = httpx.HTTPStatusError( + "failed", request=mock.Mock(), response=mock.Mock() + ) response_mock.json.return_value = {} # Download this model to make sure it's in the cache. diff --git a/tests/utils/test_image_processing_utils.py b/tests/utils/test_image_processing_utils.py index 7483e7de1f6e..17e5e305c610 100644 --- a/tests/utils/test_image_processing_utils.py +++ b/tests/utils/test_image_processing_utils.py @@ -39,7 +39,9 @@ def test_cached_files_are_used_when_internet_is_down(self): response_mock = mock.Mock() response_mock.status_code = 500 response_mock.headers = {} - response_mock.raise_for_status.side_effect = httpx.HTTPError("failed") + response_mock.raise_for_status.side_effect = httpx.HTTPStatusError( + "failed", request=mock.Mock(), response=mock.Mock() + ) response_mock.json.return_value = {} # Download this model to make sure it's in the cache. diff --git a/tests/utils/test_modeling_utils.py b/tests/utils/test_modeling_utils.py index f104efd01d1d..88424d7e9673 100644 --- a/tests/utils/test_modeling_utils.py +++ b/tests/utils/test_modeling_utils.py @@ -1145,7 +1145,9 @@ def test_cached_files_are_used_when_internet_is_down(self): response_mock = mock.Mock() response_mock.status_code = 500 response_mock.headers = {} - response_mock.raise_for_status.side_effect = httpx.HTTPError("failed") + response_mock.raise_for_status.side_effect = httpx.HTTPStatusError( + "failed", request=mock.Mock(), response=mock.Mock() + ) response_mock.json.return_value = {} # Download this model to make sure it's in the cache. From ff9d9418e90990250af717083aa2f176dcff309e Mon Sep 17 00:00:00 2001 From: Wauplin Date: Fri, 19 Sep 2025 16:43:08 +0200 Subject: [PATCH 24/30] unchange --- .circleci/create_circleci_config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py index e9f54e85dd5d..32ce4115a39e 100644 --- a/.circleci/create_circleci_config.py +++ b/.circleci/create_circleci_config.py @@ -15,11 +15,11 @@ import argparse import copy +import glob import os -import random from dataclasses import dataclass from typing import Any, Dict, List, Optional -import glob + import yaml From fd41991940607f98f7e9317bf68fc9cc9ca97ba9 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Fri, 19 Sep 2025 16:43:40 +0200 Subject: [PATCH 25/30] unchange --- .circleci/create_circleci_config.py | 178 ++++++++-------------------- 1 file changed, 52 insertions(+), 126 deletions(-) diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py index 32ce4115a39e..0e8c8e01950a 100644 --- a/.circleci/create_circleci_config.py +++ b/.circleci/create_circleci_config.py @@ -15,10 +15,9 @@ import argparse import copy -import glob import os from dataclasses import dataclass -from typing import Any, Dict, List, Optional +from typing import Any, Optional import yaml @@ -32,7 +31,7 @@ "RUN_FLAKY": True, } # Disable the use of {"s": None} as the output is way too long, causing the navigation on CircleCI impractical -COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "vvv": None, "rsfE": None} +COMMON_PYTEST_OPTIONS = {"max-worker-restart": 0, "vvv": None, "rsfE":None} DEFAULT_DOCKER_IMAGE = [{"image": "cimg/python:3.8.12"}] # Strings that commonly appear in the output of flaky tests when they fail. These are used with `pytest-rerunfailures` @@ -59,18 +58,14 @@ class EmptyJob: job_name = "empty" def to_dict(self): - steps = [{"run": "ls -la"}] + steps = [{"run": 'ls -la'}] if self.job_name == "collection_job": steps.extend( [ "checkout", {"run": "pip install requests || true"}, - { - "run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true""" - }, - { - "run": "python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true" - }, + {"run": """while [[ $(curl --location --request GET "https://circleci.com/api/v2/workflow/$CIRCLE_WORKFLOW_ID/job" --header "Circle-Token: $CCI_TOKEN"| jq -r '.items[]|select(.name != "collection_job")|.status' | grep -c "running") -gt 0 ]]; do sleep 5; done || true"""}, + {"run": 'python utils/process_circleci_workflow_test_reports.py --workflow_id $CIRCLE_WORKFLOW_ID || true'}, {"store_artifacts": {"path": "outputs"}}, {"run": 'echo "All required jobs have now completed"'}, ] @@ -109,10 +104,7 @@ def __post_init__(self): else: # BIG HACK WILL REMOVE ONCE FETCHER IS UPDATED print(os.environ.get("GIT_COMMIT_MESSAGE")) - if ( - "[build-ci-image]" in os.environ.get("GIT_COMMIT_MESSAGE", "") - or os.environ.get("GIT_COMMIT_MESSAGE", "") == "dev-ci" - ): + if "[build-ci-image]" in os.environ.get("GIT_COMMIT_MESSAGE", "") or os.environ.get("GIT_COMMIT_MESSAGE", "") == "dev-ci": self.docker_image[0]["image"] = f"{self.docker_image[0]['image']}:dev" print(f"Using {self.docker_image} docker image") if self.install_steps is None: @@ -124,7 +116,7 @@ def __post_init__(self): if isinstance(self.tests_to_run, str): self.tests_to_run = [self.tests_to_run] else: - test_file = os.path.join("test_preparation", f"{self.job_name}_test_list.txt") + test_file = os.path.join("test_preparation" , f"{self.job_name}_test_list.txt") print("Looking for ", test_file) if os.path.exists(test_file): with open(test_file) as f: @@ -138,7 +130,7 @@ def __post_init__(self): def to_dict(self): env = COMMON_ENV_VARIABLES.copy() # Do not run tests decorated by @is_flaky on pull requests - env["RUN_FLAKY"] = os.environ.get("CIRCLE_PULL_REQUEST", "") == "" + env['RUN_FLAKY'] = os.environ.get("CIRCLE_PULL_REQUEST", "") == "" env.update(self.additional_env) job = { @@ -149,84 +141,50 @@ def to_dict(self): job["resource_class"] = self.resource_class all_options = {**COMMON_PYTEST_OPTIONS, **self.pytest_options} - pytest_flags = [ - f"--{key}={value}" if (value is not None or key in ["doctest-modules"]) else f"-{key}" - for key, value in all_options.items() - ] + pytest_flags = [f"--{key}={value}" if (value is not None or key in ["doctest-modules"]) else f"-{key}" for key, value in all_options.items()] pytest_flags.append( f"--make-reports={self.name}" if "examples" in self.name else f"--make-reports=tests_{self.name}" ) - # Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues + # Examples special case: we need to download NLTK files in advance to avoid cuncurrency issues timeout_cmd = f"timeout {self.command_timeout} " if self.command_timeout else "" marker_cmd = f"-m '{self.marker}'" if self.marker is not None else "" junit_flags = " -p no:warning -o junit_family=xunit1 --junitxml=test-results/junit.xml" joined_flaky_patterns = "|".join(FLAKY_TEST_FAILURE_PATTERNS) repeat_on_failure_flags = f"--reruns 5 --reruns-delay 2 --only-rerun '({joined_flaky_patterns})'" - parallel = f" << pipeline.parameters.{self.job_name}_parallelism >> " + parallel = f' << pipeline.parameters.{self.job_name}_parallelism >> ' steps = [ "checkout", {"attach_workspace": {"at": "test_preparation"}}, {"run": "apt-get update && apt-get install -y curl"}, {"run": " && ".join(self.install_steps)}, - { - "run": { - "name": "Download NLTK files", - "command": """python -c "import nltk; nltk.download('punkt', quiet=True)" """, - } - if "example" in self.name - else "echo Skipping" - }, - { - "run": { + {"run": {"name": "Download NLTK files", "command": """python -c "import nltk; nltk.download('punkt', quiet=True)" """} if "example" in self.name else "echo Skipping"}, + {"run": { "name": "Show installed libraries and their size", - "command": """du -h -d 1 "$(pip -V | cut -d ' ' -f 4 | sed 's/pip//g')" | grep -vE "dist-info|_distutils_hack|__pycache__" | sort -h | tee installed.txt || true""", - } + "command": """du -h -d 1 "$(pip -V | cut -d ' ' -f 4 | sed 's/pip//g')" | grep -vE "dist-info|_distutils_hack|__pycache__" | sort -h | tee installed.txt || true"""} }, - { - "run": { - "name": "Show installed libraries and their versions", - "command": """pip list --format=freeze | tee installed.txt || true""", - } + {"run": { + "name": "Show installed libraries and their versions", + "command": """pip list --format=freeze | tee installed.txt || true"""} }, - { - "run": { - "name": "Show biggest libraries", - "command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true""", - } + {"run": { + "name": "Show biggest libraries", + "command": """dpkg-query --show --showformat='${Installed-Size}\t${Package}\n' | sort -rh | head -25 | sort -h | awk '{ package=$2; sub(".*/", "", package); printf("%.5f GB %s\n", $1/1024/1024, package)}' || true"""} }, {"run": {"name": "Create `test-results` directory", "command": "mkdir test-results"}}, - { - "run": { - "name": "Get files to test", - "command": f'curl -L -o {self.job_name}_test_list.txt <> --header "Circle-Token: $CIRCLE_TOKEN"' - if self.name != "pr_documentation_tests" - else 'echo "Skipped"', - } - }, - { - "run": { - "name": "Split tests across parallel nodes: show current parallel tests", - "command": f"TESTS=$(circleci tests split --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" - if self.parallelism - else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt", - } + {"run": {"name": "Get files to test", "command":f'curl -L -o {self.job_name}_test_list.txt <> --header "Circle-Token: $CIRCLE_TOKEN"' if self.name != "pr_documentation_tests" else 'echo "Skipped"'}}, + {"run": {"name": "Split tests across parallel nodes: show current parallel tests", + "command": f"TESTS=$(circleci tests split --split-by=timings {self.job_name}_test_list.txt) && echo $TESTS > splitted_tests.txt && echo $TESTS | tr ' ' '\n'" if self.parallelism else f"awk '{{printf \"%s \", $0}}' {self.job_name}_test_list.txt > splitted_tests.txt" + } }, # During the CircleCI docker images build time, we might already (or not) download the data. # If it's done already, the files are inside the directory `/test_data/`. - { - "run": { - "name": "fetch hub objects before pytest", - "command": "cp -r /test_data/* . 2>/dev/null || true; python3 utils/fetch_hub_objects_for_ci.py", - } + {"run": {"name": "fetch hub objects before pytest", "command": "cp -r /test_data/* . 2>/dev/null || true; python3 utils/fetch_hub_objects_for_ci.py"}}, + {"run": { + "name": "Run tests", + "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {junit_flags} {repeat_on_failure_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)"} }, - { - "run": { - "name": "Run tests", - "command": f"({timeout_cmd} python3 -m pytest {marker_cmd} -n {self.pytest_num_workers} {junit_flags} {repeat_on_failure_flags} {' '.join(pytest_flags)} $(cat splitted_tests.txt) | tee tests_output.txt)", - } - }, - { - "run": { + {"run": + { "name": "Check for test crashes", "when": "always", "command": """if [ ! -f tests_output.txt ]; then @@ -238,30 +196,12 @@ def to_dict(self): exit 1 else echo "Tests output file exists and no worker crashes detected" - fi""", + fi""" }, }, - { - "run": { - "name": "Expand to show skipped tests", - "when": "always", - "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip", - } - }, - { - "run": { - "name": "Failed tests: show reasons", - "when": "always", - "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail", - } - }, - { - "run": { - "name": "Errors", - "when": "always", - "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors", - } - }, + {"run": {"name": "Expand to show skipped tests", "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --skip"}}, + {"run": {"name": "Failed tests: show reasons", "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --fail"}}, + {"run": {"name": "Errors", "when": "always", "command": "python3 .circleci/parse_test_outputs.py --file tests_output.txt --errors"}}, {"store_test_results": {"path": "test-results"}}, {"store_artifacts": {"path": "test-results/junit.xml"}}, {"store_artifacts": {"path": "reports"}}, @@ -276,11 +216,7 @@ def to_dict(self): @property def job_name(self): - return ( - self.name - if ("examples" in self.name or "pipeline" in self.name or "pr_documentation" in self.name) - else f"tests_{self.name}" - ) + return self.name if ("examples" in self.name or "pipeline" in self.name or "pr_documentation" in self.name) else f"tests_{self.name}" # JOBS @@ -316,7 +252,7 @@ def job_name(self): pipelines_torch_job = CircleCIJob( "pipelines_torch", additional_env={"RUN_PIPELINE_TESTS": True}, - docker_image=[{"image": "huggingface/transformers-torch-light"}], + docker_image=[{"image":"huggingface/transformers-torch-light"}], marker="is_pipeline_test", parallelism=4, ) @@ -330,7 +266,7 @@ def job_name(self): examples_torch_job = CircleCIJob( "examples_torch", additional_env={"OMP_NUM_THREADS": 8}, - docker_image=[{"image": "huggingface/transformers-examples-torch"}], + docker_image=[{"image":"huggingface/transformers-examples-torch"}], # TODO @ArthurZucker remove this once docker is easier to build install_steps=["uv pip install . && uv pip install -r examples/pytorch/_tests_requirements.txt"], pytest_num_workers=4, @@ -339,9 +275,9 @@ def job_name(self): hub_job = CircleCIJob( "hub", additional_env={"HUGGINGFACE_CO_STAGING": True}, - docker_image=[{"image": "huggingface/transformers-torch-light"}], + docker_image=[{"image":"huggingface/transformers-torch-light"}], install_steps=[ - "uv pip install .", + 'uv pip install .', 'git config --global user.email "ci@dummy.com"', 'git config --global user.name "ci"', ], @@ -352,14 +288,14 @@ def job_name(self): exotic_models_job = CircleCIJob( "exotic_models", - docker_image=[{"image": "huggingface/transformers-exotic-models"}], + docker_image=[{"image":"huggingface/transformers-exotic-models"}], parallelism=4, pytest_options={"durations": 100}, ) repo_utils_job = CircleCIJob( "repo_utils", - docker_image=[{"image": "huggingface/transformers-consistency"}], + docker_image=[{"image":"huggingface/transformers-consistency"}], pytest_num_workers=4, resource_class="large", ) @@ -383,7 +319,7 @@ def job_name(self): command = f'echo """{py_command}""" > pr_documentation_tests_temp.txt' doc_test_job = CircleCIJob( "pr_documentation_tests", - docker_image=[{"image": "huggingface/transformers-consistency"}], + docker_image=[{"image":"huggingface/transformers-consistency"}], additional_env={"TRANSFORMERS_VERBOSITY": "error", "DATASETS_VERBOSITY": "error", "SKIP_CUDA_DOCTEST": "1"}, install_steps=[ # Add an empty file to keep the test step running correctly even no file is selected to be tested. @@ -391,7 +327,7 @@ def job_name(self): "touch dummy.py", command, "cat pr_documentation_tests_temp.txt", - "tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests_test_list.txt", + "tail -n1 pr_documentation_tests_temp.txt | tee pr_documentation_tests_test_list.txt" ], tests_to_run="$(cat pr_documentation_tests.txt)", # noqa pytest_options={"-doctest-modules": None, "doctest-glob": "*.md", "dist": "loadfile", "rvsA": None}, @@ -399,7 +335,7 @@ def job_name(self): pytest_num_workers=1, ) -REGULAR_TESTS = [torch_job, hub_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip +REGULAR_TESTS = [torch_job, hub_job, tokenization_job, processor_job, generate_job, non_model_job] # fmt: skip EXAMPLES_TESTS = [examples_torch_job] PIPELINE_TESTS = [pipelines_torch_job] REPO_UTIL_TESTS = [repo_utils_job] @@ -411,16 +347,13 @@ def create_circleci_config(folder=None): if folder is None: folder = os.getcwd() os.environ["test_preparation_dir"] = folder - jobs = [k for k in ALL_TESTS if os.path.isfile(os.path.join("test_preparation", f"{k.job_name}_test_list.txt"))] + jobs = [k for k in ALL_TESTS if os.path.isfile(os.path.join("test_preparation" , f"{k.job_name}_test_list.txt") )] print("The following jobs will be run ", jobs) if len(jobs) == 0: jobs = [EmptyJob()] else: - print( - "Full list of job name inputs", - {j.job_name + "_test_list": {"type": "string", "default": ""} for j in jobs}, - ) + print("Full list of job name inputs", {j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs}) # Add a job waiting all the test jobs and aggregate their test summary files at the end collection_job = EmptyJob() collection_job.job_name = "collection_job" @@ -437,26 +370,19 @@ def create_circleci_config(folder=None): "GHA_Event": {"type": "string", "default": ""}, "GHA_Meta": {"type": "string", "default": ""}, "tests_to_run": {"type": "string", "default": ""}, - **{j.job_name + "_test_list": {"type": "string", "default": ""} for j in jobs}, - **{j.job_name + "_parallelism": {"type": "integer", "default": 1} for j in jobs}, + **{j.job_name + "_test_list":{"type":"string", "default":''} for j in jobs}, + **{j.job_name + "_parallelism":{"type":"integer", "default":1} for j in jobs}, }, - "jobs": {j.job_name: j.to_dict() for j in jobs}, + "jobs": {j.job_name: j.to_dict() for j in jobs} } if "CIRCLE_TOKEN" in os.environ: # For private forked repo. (e.g. new model addition) - config["workflows"] = { - "version": 2, - "run_tests": {"jobs": [{j.job_name: {"context": ["TRANSFORMERS_CONTEXT"]}} for j in jobs]}, - } + config["workflows"] = {"version": 2, "run_tests": {"jobs": [{j.job_name: {"context": ["TRANSFORMERS_CONTEXT"]}} for j in jobs]}} else: # For public repo. (e.g. `transformers`) config["workflows"] = {"version": 2, "run_tests": {"jobs": [j.job_name for j in jobs]}} with open(os.path.join(folder, "generated_config.yml"), "w") as f: - f.write( - yaml.dump(config, sort_keys=False, default_flow_style=False) - .replace("' << pipeline", " << pipeline") - .replace(">> '", " >>") - ) + f.write(yaml.dump(config, sort_keys=False, default_flow_style=False).replace("' << pipeline", " << pipeline").replace(">> '", " >>")) if __name__ == "__main__": @@ -466,4 +392,4 @@ def create_circleci_config(folder=None): ) args = parser.parse_args() - create_circleci_config(args.fetcher_folder) + create_circleci_config(args.fetcher_folder) \ No newline at end of file From 4f2e0725ba90a7a7f776106c39bcace6a9f47390 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Fri, 19 Sep 2025 16:44:12 +0200 Subject: [PATCH 26/30] args --- .circleci/create_circleci_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.circleci/create_circleci_config.py b/.circleci/create_circleci_config.py index 0e8c8e01950a..1e39aa4751a5 100644 --- a/.circleci/create_circleci_config.py +++ b/.circleci/create_circleci_config.py @@ -392,4 +392,4 @@ def create_circleci_config(folder=None): ) args = parser.parse_args() - create_circleci_config(args.fetcher_folder) \ No newline at end of file + create_circleci_config(args.fetcher_folder) From 79c1b767aaed9a20792f027c210cebd44ce304f0 Mon Sep 17 00:00:00 2001 From: Lucain Date: Mon, 22 Sep 2025 17:19:22 +0200 Subject: [PATCH 27/30] Update .circleci/config.yml --- .circleci/config.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 893b87ed84d6..5616355415b4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -174,7 +174,6 @@ jobs: steps: - checkout - run: uv pip install -e ".[quality]" - - run: uv pip install --prerelease allow "huggingface_hub==1.0.0.rc0" - run: name: Show installed libraries and their versions command: pip freeze | tee installed.txt From ea561698a27fc6016810d64596b799191f454167 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Wed, 24 Sep 2025 11:57:51 +0200 Subject: [PATCH 28/30] Bump to 1.0.0.rc1 --- .circleci/config.yml | 2 +- setup.py | 2 +- src/transformers/dependency_versions_table.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 893b87ed84d6..c2ce772283d4 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -174,7 +174,7 @@ jobs: steps: - checkout - run: uv pip install -e ".[quality]" - - run: uv pip install --prerelease allow "huggingface_hub==1.0.0.rc0" + - run: uv pip install --prerelease allow "huggingface_hub==1.0.0.rc1" - run: name: Show installed libraries and their versions command: pip freeze | tee installed.txt diff --git a/setup.py b/setup.py index 415fe2952294..d76495ed6a56 100644 --- a/setup.py +++ b/setup.py @@ -114,7 +114,7 @@ "GitPython<3.1.19", "hf-doc-builder>=0.3.0", "hf_xet", - "huggingface-hub==1.0.0.rc0", + "huggingface-hub==1.0.0.rc1", "importlib_metadata", "ipadic>=1.0.0,<2.0", "jinja2>=3.1.0", diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index 89a9e8cbf3ec..a7dc6f57f3fb 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -23,7 +23,7 @@ "GitPython": "GitPython<3.1.19", "hf-doc-builder": "hf-doc-builder>=0.3.0", "hf_xet": "hf_xet", - "huggingface-hub": "huggingface-hub==1.0.0.rc0", + "huggingface-hub": "huggingface-hub==1.0.0.rc1", "importlib_metadata": "importlib_metadata", "ipadic": "ipadic>=1.0.0,<2.0", "jinja2": "jinja2>=3.1.0", From 9c13643cbe41374bd6c15bf4aa72521747615ae0 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Wed, 24 Sep 2025 12:13:37 +0200 Subject: [PATCH 29/30] bump kernels version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index c30d0f882c6e..3945537c49ff 100644 --- a/setup.py +++ b/setup.py @@ -119,7 +119,7 @@ "ipadic>=1.0.0,<2.0", "jinja2>=3.1.0", "kenlm", - "kernels>=0.6.1,<=0.9", + "kernels>=0.10.2,<0.11", "librosa", "natten>=0.14.6,<0.15.0", "nltk<=3.8.1", From 1ab8c3ea2ee9358edce1c72af015771180b24200 Mon Sep 17 00:00:00 2001 From: Wauplin Date: Wed, 24 Sep 2025 12:27:22 +0200 Subject: [PATCH 30/30] fix deps --- src/transformers/dependency_versions_table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/dependency_versions_table.py b/src/transformers/dependency_versions_table.py index b47b4417eaaa..80b107d93c4d 100644 --- a/src/transformers/dependency_versions_table.py +++ b/src/transformers/dependency_versions_table.py @@ -28,7 +28,7 @@ "ipadic": "ipadic>=1.0.0,<2.0", "jinja2": "jinja2>=3.1.0", "kenlm": "kenlm", - "kernels": "kernels>=0.6.1,<=0.9", + "kernels": "kernels>=0.10.2,<0.11", "librosa": "librosa", "natten": "natten>=0.14.6,<0.15.0", "nltk": "nltk<=3.8.1",