Skip to content

Commit 975d3df

Browse files
authored
Update bert benchmark: replace deprecated API (#22611)
### Description (1) tokenizer.max_model_input_sizes was deprecated. Use tokenizer.model_max_length to replace it. (2) onnx opset updated to 16 instead of 11/12 for models. (3) Update a few comments related to torch installation. (4) Test gpu instead of cpu in dev_benchmark.cmd. ### Motivation and Context Update bert benchmark script so that it can run with latest huggingface transformers package.
1 parent dd28f09 commit 975d3df

File tree

5 files changed

+55
-156
lines changed

5 files changed

+55
-156
lines changed

onnxruntime/python/tools/transformers/benchmark.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ def run_pytorch(
348348
else:
349349
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
350350

351-
max_input_size = tokenizer.max_model_input_sizes.get(model_name, 1024)
351+
max_input_size = tokenizer.model_max_length
352352

353353
logger.debug(f"Model {model}")
354354
logger.debug(f"Number of parameters {model.num_parameters()}")
@@ -500,7 +500,7 @@ def run_tensorflow(
500500

501501
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
502502

503-
max_input_size = tokenizer.max_model_input_sizes.get(model_name, 1024)
503+
max_input_size = tokenizer.model_max_length
504504

505505
for batch_size in batch_sizes:
506506
if batch_size <= 0:

onnxruntime/python/tools/transformers/dev_benchmark.cmd

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
REM Run benchmark in Windows for developing purpose. For official benchmark, please use run_benchmark.sh.
44
REM Settings are different from run_benchmark.sh: no cli, batch and sequence, input counts, average over 100, no fp16, less models etc.
55

6-
REM Please install PyTorch (see https://pytorch.org/) before running this benchmark. Like the following:
7-
REM GPU: conda install pytorch torchvision cudatoolkit=10.1 -c pytorch
8-
REM CPU: conda install pytorch torchvision cpuonly -c pytorch
6+
REM Please install PyTorch (see https://pytorch.org/) before running this benchmark.
97

108
REM When use_package=true, you need not copy other files to run benchmarks except this sh file.
119
REM Otherwise, it will use python script (*.py) files in this directory.
@@ -21,12 +19,12 @@ set run_torchscript=false
2119

2220
REM Devices to test.
2321
REM Attention: You cannot run both CPU and GPU at the same time: gpu need onnxruntime-gpu, and CPU need onnxruntime.
24-
set run_gpu_fp32=false
25-
set run_gpu_fp16=false
26-
set run_cpu_fp32=true
27-
set run_cpu_int8=true
22+
set run_gpu_fp32=true
23+
set run_gpu_fp16=true
24+
set run_cpu_fp32=false
25+
set run_cpu_int8=false
2826

29-
set average_over=100
27+
set average_over=1000
3028

3129
REM Enable optimizer (use script instead of OnnxRuntime for graph optimization)
3230
set use_optimizer=true
@@ -36,7 +34,7 @@ set sequence_length=8 128
3634

3735
REM Number of inputs (input_ids, token_type_ids, attention_mask) for ONNX model.
3836
REM Note that different input count might lead to different performance
39-
set input_counts=1
37+
set input_counts=3
4038

4139
REM Pretrained transformers models can be a subset of: bert-base-cased roberta-base gpt2 distilgpt2 distilbert-base-uncased
4240
set models_to_test=bert-base-cased
@@ -57,7 +55,6 @@ if %run_cpu_int8% == true if %run_gpu_fp32% == true echo cannot test cpu and gpu
5755
if %run_cpu_int8% == true if %run_gpu_fp16% == true echo cannot test cpu and gpu at same time & goto :EOF
5856

5957
if %run_install% == true (
60-
pip uninstall --yes ort_nightly
6158
pip uninstall --yes onnxruntime
6259
pip uninstall --yes onnxruntime-gpu
6360
if %run_cpu_fp32% == true (
@@ -70,7 +67,6 @@ if %run_install% == true (
7067
)
7168
)
7269

73-
pip install --upgrade onnxconverter_common
7470
pip install --upgrade transformers
7571
)
7672

onnxruntime/python/tools/transformers/huggingface_models.py

Lines changed: 42 additions & 135 deletions
Original file line numberDiff line numberDiff line change
@@ -13,155 +13,62 @@
1313
"AutoModelForCausalLM",
1414
]
1515

16-
# List of pretrained models: https://huggingface.co/transformers/pretrained_models.html
1716
# Pretrained model name to a tuple of input names, opset_version, use_external_data_format, optimization model type
17+
# Some models like GPT, T5, Bart etc has its own convert_to_onnx.py in models sub-directory, and they are excluded here.
1818
MODELS = {
1919
# BERT
20-
"bert-base-uncased": (
21-
["input_ids", "attention_mask", "token_type_ids"],
22-
12,
23-
False,
24-
"bert",
25-
),
26-
"bert-large-uncased": (
27-
["input_ids", "attention_mask", "token_type_ids"],
28-
12,
29-
False,
30-
"bert",
31-
),
32-
"bert-base-cased": (
33-
["input_ids", "attention_mask", "token_type_ids"],
34-
12,
35-
False,
36-
"bert",
37-
),
38-
# "bert-large-cased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
39-
# "bert-base-multilingual-uncased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
40-
# "bert-base-multilingual-cased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
41-
# "bert-base-chinese": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
42-
# "bert-base-german-cased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
43-
# "bert-large-uncased-whole-word-masking": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
44-
# "bert-large-cased-whole-word-masking": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
45-
# "bert-large-uncased-whole-word-masking-finetuned-squad": (["input_ids", "attention_mask",
46-
# "token_type_ids"], 12, False, "bert"),
47-
# "bert-large-cased-whole-word-masking-finetuned-squad": (["input_ids", "attention_mask",
48-
# "token_type_ids"], 12, False, "bert"),
49-
# "bert-base-cased-finetuned-mrpc": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
50-
# "bert-base-german-dbmdz-cased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
51-
# "bert-base-german-dbmdz-uncased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"),
52-
# todo: more models to add
53-
# GPT (no past state)
54-
"openai-gpt": (["input_ids"], 11, False, "gpt2"),
55-
# GPT-2 (no past state, use benchmark_gpt2.py for past_key_values)
56-
"gpt2": (["input_ids"], 11, False, "gpt2"),
57-
"gpt2-medium": (["input_ids"], 11, False, "gpt2"),
58-
"gpt2-large": (["input_ids"], 11, True, "gpt2"),
59-
"gpt2-xl": (["input_ids"], 11, True, "gpt2"),
60-
"distilgpt2": (["input_ids"], 11, False, "gpt2"),
61-
# Transformer-XL (Models uses Einsum, which need opset version 12 or later.)
62-
"transfo-xl-wt103": (["input_ids", "mems"], 12, False, "bert"),
20+
"bert-base-cased": (["input_ids", "attention_mask", "token_type_ids"], 16, False, "bert"),
21+
"bert-large-cased": (["input_ids", "attention_mask", "token_type_ids"], 16, False, "bert"),
22+
# Transformer-XL (Models uses Einsum, which need opset version 16 or later.)
23+
"transfo-xl-wt103": (["input_ids", "mems"], 16, False, "bert"),
6324
# XLNet
64-
"xlnet-base-cased": (["input_ids"], 12, False, "bert"),
65-
"xlnet-large-cased": (["input_ids"], 12, False, "bert"),
25+
"xlnet-base-cased": (["input_ids"], 16, False, "bert"),
26+
"xlnet-large-cased": (["input_ids"], 16, False, "bert"),
6627
# XLM
67-
"xlm-mlm-en-2048": (["input_ids"], 11, True, "bert"),
68-
"xlm-mlm-ende-1024": (["input_ids"], 11, False, "bert"),
69-
"xlm-mlm-enfr-1024": (["input_ids"], 11, False, "bert"),
28+
"xlm-mlm-en-2048": (["input_ids"], 16, True, "bert"),
29+
"xlm-mlm-ende-1024": (["input_ids"], 16, False, "bert"),
30+
"xlm-mlm-enfr-1024": (["input_ids"], 16, False, "bert"),
7031
# RoBERTa
71-
"roberta-base": (["input_ids", "attention_mask"], 12, False, "bert"),
72-
"roberta-large": (["input_ids", "attention_mask"], 12, False, "bert"),
73-
"roberta-large-mnli": (["input_ids", "attention_mask"], 12, False, "bert"),
74-
"deepset/roberta-base-squad2": (["input_ids", "attention_mask"], 11, False, "bert"),
75-
"distilroberta-base": (["input_ids", "attention_mask"], 12, False, "bert"),
32+
"roberta-base": (["input_ids", "attention_mask"], 16, False, "bert"),
33+
"roberta-large": (["input_ids", "attention_mask"], 16, False, "bert"),
34+
"roberta-large-mnli": (["input_ids", "attention_mask"], 16, False, "bert"),
35+
"deepset/roberta-base-squad2": (["input_ids", "attention_mask"], 16, False, "bert"),
36+
"distilroberta-base": (["input_ids", "attention_mask"], 16, False, "bert"),
7637
# DistilBERT
77-
"distilbert-base-uncased": (["input_ids", "attention_mask"], 11, False, "bert"),
78-
"distilbert-base-uncased-distilled-squad": (
79-
["input_ids", "attention_mask"],
80-
11,
81-
False,
82-
"bert",
83-
),
38+
"distilbert-base-uncased": (["input_ids", "attention_mask"], 16, False, "bert"),
39+
"distilbert-base-uncased-distilled-squad": (["input_ids", "attention_mask"], 16, False, "bert"),
8440
# CTRL
85-
"ctrl": (["input_ids"], 11, True, "bert"),
41+
"ctrl": (["input_ids"], 16, True, "bert"),
8642
# CamemBERT
87-
"camembert-base": (["input_ids"], 11, False, "bert"),
43+
"camembert-base": (["input_ids"], 16, False, "bert"),
8844
# ALBERT
89-
"albert-base-v1": (["input_ids"], 12, False, "bert"),
90-
"albert-large-v1": (["input_ids"], 12, False, "bert"),
91-
"albert-xlarge-v1": (["input_ids"], 12, True, "bert"),
92-
# "albert-xxlarge-v1": (["input_ids"], 12, True, "bert"),
93-
"albert-base-v2": (["input_ids"], 12, False, "bert"),
94-
"albert-large-v2": (["input_ids"], 12, False, "bert"),
95-
"albert-xlarge-v2": (["input_ids"], 12, True, "bert"),
96-
# "albert-xxlarge-v2": (["input_ids"], 12, True, "bert"),
97-
# T5 (use benchmark_t5.py instead)
98-
# "t5-small": (["input_ids", "decoder_input_ids"], 12, False, "bert"),
99-
# "t5-base": (["input_ids", "decoder_input_ids"], 12, False, "bert"),
100-
# "t5-large": (["input_ids", "decoder_input_ids"], 12, True, "bert"),
101-
# "t5-3b": (["input_ids", "decoder_input_ids"], 12, True, "bert"),
102-
# "t5-11b": (["input_ids", "decoder_input_ids"], 12, True, "bert"),
103-
# "valhalla/t5-small-qa-qg-hl": (["input_ids"], 12, True, "bert"),
45+
"albert-base-v1": (["input_ids"], 16, False, "bert"),
46+
"albert-large-v1": (["input_ids"], 16, False, "bert"),
47+
"albert-xlarge-v1": (["input_ids"], 16, True, "bert"),
48+
# "albert-xxlarge-v1": (["input_ids"], 16, True, "bert"),
49+
"albert-base-v2": (["input_ids"], 16, False, "bert"),
50+
"albert-large-v2": (["input_ids"], 16, False, "bert"),
51+
"albert-xlarge-v2": (["input_ids"], 16, True, "bert"),
52+
# "albert-xxlarge-v2": (["input_ids"], 16, True, "bert"),
10453
# XLM-RoBERTa
105-
"xlm-roberta-base": (["input_ids"], 11, False, "bert"),
106-
"xlm-roberta-large": (["input_ids"], 11, True, "bert"),
54+
"xlm-roberta-base": (["input_ids"], 16, False, "bert"),
55+
"xlm-roberta-large": (["input_ids"], 16, True, "bert"),
10756
# FlauBERT
108-
"flaubert/flaubert_small_cased": (["input_ids"], 11, False, "bert"),
109-
# "flaubert/flaubert_base_uncased": (["input_ids"], 11, False, "bert"),
110-
"flaubert/flaubert_base_cased": (["input_ids"], 11, False, "bert"),
111-
# "flaubert/flaubert_large_cased": (["input_ids"], 11, False, "bert"),
112-
# Bart
113-
"facebook/bart-large": (["input_ids", "attention_mask"], 11, False, "bart"),
114-
"facebook/bart-base": (["input_ids", "attention_mask"], 11, False, "bart"),
115-
"facebook/bart-large-mnli": (["input_ids", "attention_mask"], 11, False, "bart"),
116-
"facebook/bart-large-cnn": (["input_ids", "attention_mask"], 11, False, "bart"),
117-
# DialoGPT
118-
"microsoft/DialoGPT-small": (["input_ids"], 11, False, "gpt2"),
119-
"microsoft/DialoGPT-medium": (["input_ids"], 11, False, "gpt2"),
120-
# "microsoft/DialoGPT-large": (["input_ids"], 11, True, "gpt2"),
121-
# Reformer
122-
# "google/reformer-enwik8": (["input_ids"], 11, False, "bert"),
123-
# "google/reformer-crime-and-punishment": (["input_ids"], 11, False, "bert"),
124-
# MarianMT
125-
# "Helsinki-NLP/opus-mt-ROMANCE-en": (["input_ids"], 12, False, "bert"),
126-
# Longformer (use benchmark_longformer.py instead)
127-
# "allenai/longformer-base-4096": (["input_ids"], 12, False, "bert"),
128-
# "allenai/longformer-large-4096": (["input_ids"], 12, False, "bert"),
129-
# MBart
130-
"facebook/mbart-large-cc25": (["input_ids"], 11, True, "bert"),
131-
"facebook/mbart-large-en-ro": (["input_ids"], 11, True, "bert"),
132-
# "Helsinki-NLP/opus-mt-ROMANCE-en": (["input_ids"], 12, False, "bert"),
133-
# # Longformer
134-
# "allenai/longformer-base-4096": (["input_ids"], 12, False, "bert"),
135-
# "allenai/longformer-large-4096": (["input_ids"], 12, True, "bert"),
136-
# "funnel-transformer/small": (["input_ids"], 12, False, "bert"),
137-
# "funnel-transformer/small-base": (["input_ids"], 12, False, "bert"),
138-
# "funnel-transformer/medium": (["input_ids"], 12, False, "bert"),
139-
# "funnel-transformer/medium-base": (["input_ids"], 12, False, "bert"),
140-
# "funnel-transformer/intermediate": (["input_ids"], 12, False, "bert"),
141-
# "funnel-transformer/intermediate-base": (["input_ids"], 12, False, "bert"),
142-
# "funnel-transformer/large": (["input_ids"], 12, True, "bert"),
143-
# "funnel-transformer/large-base": (["input_ids"], 12, True, "bert"),
144-
# "funnel-transformer/xlarge": (["input_ids"], 12, True, "bert"),
145-
# "funnel-transformer/xlarge-base": (["input_ids"], 12, True, "bert"),
57+
"flaubert/flaubert_small_cased": (["input_ids"], 16, False, "bert"),
58+
"flaubert/flaubert_base_cased": (["input_ids"], 16, False, "bert"),
59+
# "flaubert/flaubert_large_cased": (["input_ids"], 16, False, "bert"),
14660
# Layoutlm
147-
"microsoft/layoutlm-base-uncased": (["input_ids"], 11, False, "bert"),
148-
"microsoft/layoutlm-large-uncased": (["input_ids"], 11, False, "bert"),
61+
"microsoft/layoutlm-base-uncased": (["input_ids"], 16, False, "bert"),
62+
"microsoft/layoutlm-large-uncased": (["input_ids"], 16, False, "bert"),
14963
# Squeezebert
150-
"squeezebert/squeezebert-uncased": (["input_ids"], 11, False, "bert"),
151-
"squeezebert/squeezebert-mnli": (["input_ids"], 11, False, "bert"),
152-
"squeezebert/squeezebert-mnli-headless": (["input_ids"], 11, False, "bert"),
153-
"unc-nlp/lxmert-base-uncased": (
154-
["input_ids", "visual_feats", "visual_pos"],
155-
11,
156-
False,
157-
"bert",
158-
),
159-
# "google/pegasus-xsum": (["input_ids"], 11, False, "bert"),
160-
# "google/pegasus-large": (["input_ids"], 11, False, "bert"),
64+
"squeezebert/squeezebert-uncased": (["input_ids"], 16, False, "bert"),
65+
"squeezebert/squeezebert-mnli": (["input_ids"], 16, False, "bert"),
66+
"squeezebert/squeezebert-mnli-headless": (["input_ids"], 16, False, "bert"),
67+
"unc-nlp/lxmert-base-uncased": (["input_ids", "visual_feats", "visual_pos"], 16, False, "bert"),
16168
# ViT
162-
"google/vit-base-patch16-224": (["pixel_values"], 12, False, "vit"),
69+
"google/vit-base-patch16-224": (["pixel_values"], 16, False, "vit"),
16370
# Swin
164-
"microsoft/swin-base-patch4-window7-224": (["pixel_values"], 12, False, "swin"),
165-
"microsoft/swin-small-patch4-window7-224": (["pixel_values"], 12, False, "swin"),
166-
"microsoft/swin-tiny-patch4-window7-224": (["pixel_values"], 12, False, "swin"),
71+
"microsoft/swin-base-patch4-window7-224": (["pixel_values"], 16, False, "swin"),
72+
"microsoft/swin-small-patch4-window7-224": (["pixel_values"], 16, False, "swin"),
73+
"microsoft/swin-tiny-patch4-window7-224": (["pixel_values"], 16, False, "swin"),
16774
}

onnxruntime/python/tools/transformers/onnx_exporter.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -492,7 +492,7 @@ def export_onnx_model_from_pt(
492492
example_inputs = image_processor(data, return_tensors="pt")
493493
else:
494494
tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
495-
max_input_size = tokenizer.max_model_input_sizes.get(model_name, 1024)
495+
max_input_size = tokenizer.model_max_length
496496
example_inputs = tokenizer.encode_plus("This is a sample input", return_tensors="pt")
497497

498498
example_inputs = filter_inputs(example_inputs, input_names)
@@ -596,7 +596,7 @@ def export_onnx_model_from_tf(
596596
# Fix "Using pad_token, but it is not set yet" error.
597597
if tokenizer.pad_token is None:
598598
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
599-
max_input_size = tokenizer.max_model_input_sizes.get(model_name, 1024)
599+
max_input_size = tokenizer.model_max_length
600600

601601
config, model = load_tf_model(model_name, model_class, cache_dir, config_modifier)
602602
model.resize_token_embeddings(len(tokenizer))

onnxruntime/python/tools/transformers/run_benchmark.sh

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,7 @@
55
# license information.
66
# --------------------------------------------------------------------------
77
# This measures the performance of OnnxRuntime, PyTorch and TorchScript on transformer models.
8-
# Please install PyTorch (see https://pytorch.org/) before running this benchmark. Like the following:
9-
# GPU: conda install pytorch torchvision cudatoolkit=11.0 -c pytorch
10-
# CPU: conda install pytorch torchvision cpuonly -c pytorch
11-
# To use torch2, please install the nightly PyTorch by replacing pytorch with pytorch-nightly.
8+
# Please install PyTorch (see https://pytorch.org/) before running this benchmark.
129

1310
# When use_package=true, you need not copy other files to run benchmarks except this sh file.
1411
# Otherwise, it will use python script (*.py) files in this directory.
@@ -60,7 +57,6 @@ sequence_lengths="8 16 32 64 128 256 512 1024"
6057
# Here we only test one input (input_ids) for fair comparison with PyTorch.
6158
input_counts=1
6259

63-
# Pretrained transformers models can be a subset of: bert-base-cased roberta-base gpt2 distilgpt2 distilbert-base-uncased
6460
models_to_test="bert-base-cased roberta-base distilbert-base-uncased"
6561

6662
# If you have multiple GPUs, you can choose one GPU for test. Here is an example to use the second GPU:
@@ -99,7 +95,7 @@ if [ "$run_install" = true ] ; then
9995
else
10096
pip install onnxruntime-gpu
10197
fi
102-
pip install --upgrade onnx coloredlogs packaging psutil py3nvml onnxconverter_common numpy transformers sympy
98+
pip install --upgrade onnx coloredlogs packaging psutil py3nvml numpy transformers sympy
10399
fi
104100

105101
if [ "$use_package" = true ] ; then

0 commit comments

Comments
 (0)