huggingface · ArthurZucker · Sep 16, 2025 · Jul 31, 2025 · Aug 28, 2025 · Aug 29, 2025
diff --git a/bindings/python/py_src/tokenizers/trainers/__init__.pyi b/bindings/python/py_src/tokenizers/trainers/__init__.pyi
@@ -45,6 +45,20 @@ class BpeTrainer(Trainer):
             highly repetitive tokens like `======` for wikipedia
 
     """
+    def __init__(
+        self,
+        vocab_size=30000,
+        min_frequency=0,
+        show_progress=True,
+        special_tokens=[],
+        limit_alphabet=None,
+        initial_alphabet=[],
+        continuing_subword_prefix=None,
+        end_of_word_suffix=None,
+        max_token_length=None,
+        words={},
+    ):
+        pass
 
 class UnigramTrainer(Trainer):
     """
@@ -85,6 +99,7 @@ class UnigramTrainer(Trainer):
         vocab_size=8000,
         show_progress=True,
         special_tokens=[],
+        initial_alphabet=[],
         shrinking_factor=0.75,
         unk_token=None,
         max_piece_length=16,
@@ -109,6 +124,8 @@ class WordLevelTrainer(Trainer):
         special_tokens (:obj:`List[Union[str, AddedToken]]`):
             A list of special tokens the model should know of.
     """
+    def __init__(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[]):
+        pass
 
 class WordPieceTrainer(Trainer):
     """

diff --git a/bindings/python/src/trainers.rs b/bindings/python/src/trainers.rs
@@ -312,7 +312,10 @@ impl PyBpeTrainer {
     }
 
     #[new]
-    #[pyo3(signature = (**kwargs), text_signature = None)]
+    #[pyo3(
+        signature = (**kwargs), 
+        text_signature = "(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[], limit_alphabet=None, initial_alphabet=[], continuing_subword_prefix=None, end_of_word_suffix=None, max_token_length=None, words={})"
-        text_signature = "(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[], limit_alphabet=None, initial_alphabet=[], continuing_subword_prefix=None, end_of_word_suffix=None, max_token_length=None, words={})"
+        text_signature = "(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=None, limit_alphabet=None, initial_alphabet=None, continuing_subword_prefix=None, end_of_word_suffix=None, max_token_length=None, words=None)"
-        text_signature = "(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[], limit_alphabet=None, initial_alphabet=[], continuing_subword_prefix=None, end_of_word_suffix=None, max_token_length=None, words={})"
+        text_signature = "(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=None, limit_alphabet=None, initial_alphabet=None, continuing_subword_prefix=None, end_of_word_suffix=None, max_token_length=None, words=None)"
+    )]
     pub fn new(kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<(Self, PyTrainer)> {
         let mut builder = tk::models::bpe::BpeTrainer::builder();
         if let Some(kwargs) = kwargs {
@@ -518,7 +521,7 @@ impl PyWordPieceTrainer {
     #[new]
     #[pyo3(
         signature = (** kwargs),
-        text_signature = "(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[], limit_alphabet=None, initial_alphabet= [],continuing_subword_prefix=\"##\", end_of_word_suffix=None)"
+        text_signature = "(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[], limit_alphabet=None, initial_alphabet=[], continuing_subword_prefix=\"##\", end_of_word_suffix=None)"
     )]
     pub fn new(kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<(Self, PyTrainer)> {
         let mut builder = tk::models::wordpiece::WordPieceTrainer::builder();
@@ -659,7 +662,10 @@ impl PyWordLevelTrainer {
     }
 
     #[new]
-    #[pyo3(signature = (**kwargs), text_signature = None)]
+    #[pyo3(
+        signature = (**kwargs), 
+        text_signature = "(self, vocab_size=30000, min_frequency=0, show_progress=True, special_tokens=[])"
+    )]
     pub fn new(kwargs: Option<&Bound<'_, PyDict>>) -> PyResult<(Self, PyTrainer)> {
         let mut builder = tk::models::wordlevel::WordLevelTrainer::builder();
 
@@ -826,7 +832,7 @@ impl PyUnigramTrainer {
     #[new]
     #[pyo3(
         signature = (**kwargs),
-        text_signature = "(self, vocab_size=8000, show_progress=True, special_tokens=[], shrinking_factor=0.75, unk_token=None, max_piece_length=16, n_sub_iterations=2)"
+        text_signature = "(self, vocab_size=8000, show_progress=True, special_tokens=[], initial_alphabet=[], shrinking_factor=0.75, unk_token=None, max_piece_length=16, n_sub_iterations=2)"
     )]
     pub fn new(kwargs: Option<Bound<'_, PyDict>>) -> PyResult<(Self, PyTrainer)> {
         let mut builder = tk::models::unigram::UnigramTrainer::builder();