|
13 | 13 | "AutoModelForCausalLM",
|
14 | 14 | ]
|
15 | 15 |
|
16 |
| -# List of pretrained models: https://huggingface.co/transformers/pretrained_models.html |
17 | 16 | # Pretrained model name to a tuple of input names, opset_version, use_external_data_format, optimization model type
|
| 17 | +# Some models like GPT, T5, Bart etc has its own convert_to_onnx.py in models sub-directory, and they are excluded here. |
18 | 18 | MODELS = {
|
19 | 19 | # BERT
|
20 |
| - "bert-base-uncased": ( |
21 |
| - ["input_ids", "attention_mask", "token_type_ids"], |
22 |
| - 12, |
23 |
| - False, |
24 |
| - "bert", |
25 |
| - ), |
26 |
| - "bert-large-uncased": ( |
27 |
| - ["input_ids", "attention_mask", "token_type_ids"], |
28 |
| - 12, |
29 |
| - False, |
30 |
| - "bert", |
31 |
| - ), |
32 |
| - "bert-base-cased": ( |
33 |
| - ["input_ids", "attention_mask", "token_type_ids"], |
34 |
| - 12, |
35 |
| - False, |
36 |
| - "bert", |
37 |
| - ), |
38 |
| - # "bert-large-cased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"), |
39 |
| - # "bert-base-multilingual-uncased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"), |
40 |
| - # "bert-base-multilingual-cased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"), |
41 |
| - # "bert-base-chinese": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"), |
42 |
| - # "bert-base-german-cased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"), |
43 |
| - # "bert-large-uncased-whole-word-masking": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"), |
44 |
| - # "bert-large-cased-whole-word-masking": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"), |
45 |
| - # "bert-large-uncased-whole-word-masking-finetuned-squad": (["input_ids", "attention_mask", |
46 |
| - # "token_type_ids"], 12, False, "bert"), |
47 |
| - # "bert-large-cased-whole-word-masking-finetuned-squad": (["input_ids", "attention_mask", |
48 |
| - # "token_type_ids"], 12, False, "bert"), |
49 |
| - # "bert-base-cased-finetuned-mrpc": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"), |
50 |
| - # "bert-base-german-dbmdz-cased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"), |
51 |
| - # "bert-base-german-dbmdz-uncased": (["input_ids", "attention_mask", "token_type_ids"], 12, False, "bert"), |
52 |
| - # todo: more models to add |
53 |
| - # GPT (no past state) |
54 |
| - "openai-gpt": (["input_ids"], 11, False, "gpt2"), |
55 |
| - # GPT-2 (no past state, use benchmark_gpt2.py for past_key_values) |
56 |
| - "gpt2": (["input_ids"], 11, False, "gpt2"), |
57 |
| - "gpt2-medium": (["input_ids"], 11, False, "gpt2"), |
58 |
| - "gpt2-large": (["input_ids"], 11, True, "gpt2"), |
59 |
| - "gpt2-xl": (["input_ids"], 11, True, "gpt2"), |
60 |
| - "distilgpt2": (["input_ids"], 11, False, "gpt2"), |
61 |
| - # Transformer-XL (Models uses Einsum, which need opset version 12 or later.) |
62 |
| - "transfo-xl-wt103": (["input_ids", "mems"], 12, False, "bert"), |
| 20 | + "bert-base-cased": (["input_ids", "attention_mask", "token_type_ids"], 16, False, "bert"), |
| 21 | + "bert-large-cased": (["input_ids", "attention_mask", "token_type_ids"], 16, False, "bert"), |
| 22 | + # Transformer-XL (Models uses Einsum, which need opset version 16 or later.) |
| 23 | + "transfo-xl-wt103": (["input_ids", "mems"], 16, False, "bert"), |
63 | 24 | # XLNet
|
64 |
| - "xlnet-base-cased": (["input_ids"], 12, False, "bert"), |
65 |
| - "xlnet-large-cased": (["input_ids"], 12, False, "bert"), |
| 25 | + "xlnet-base-cased": (["input_ids"], 16, False, "bert"), |
| 26 | + "xlnet-large-cased": (["input_ids"], 16, False, "bert"), |
66 | 27 | # XLM
|
67 |
| - "xlm-mlm-en-2048": (["input_ids"], 11, True, "bert"), |
68 |
| - "xlm-mlm-ende-1024": (["input_ids"], 11, False, "bert"), |
69 |
| - "xlm-mlm-enfr-1024": (["input_ids"], 11, False, "bert"), |
| 28 | + "xlm-mlm-en-2048": (["input_ids"], 16, True, "bert"), |
| 29 | + "xlm-mlm-ende-1024": (["input_ids"], 16, False, "bert"), |
| 30 | + "xlm-mlm-enfr-1024": (["input_ids"], 16, False, "bert"), |
70 | 31 | # RoBERTa
|
71 |
| - "roberta-base": (["input_ids", "attention_mask"], 12, False, "bert"), |
72 |
| - "roberta-large": (["input_ids", "attention_mask"], 12, False, "bert"), |
73 |
| - "roberta-large-mnli": (["input_ids", "attention_mask"], 12, False, "bert"), |
74 |
| - "deepset/roberta-base-squad2": (["input_ids", "attention_mask"], 11, False, "bert"), |
75 |
| - "distilroberta-base": (["input_ids", "attention_mask"], 12, False, "bert"), |
| 32 | + "roberta-base": (["input_ids", "attention_mask"], 16, False, "bert"), |
| 33 | + "roberta-large": (["input_ids", "attention_mask"], 16, False, "bert"), |
| 34 | + "roberta-large-mnli": (["input_ids", "attention_mask"], 16, False, "bert"), |
| 35 | + "deepset/roberta-base-squad2": (["input_ids", "attention_mask"], 16, False, "bert"), |
| 36 | + "distilroberta-base": (["input_ids", "attention_mask"], 16, False, "bert"), |
76 | 37 | # DistilBERT
|
77 |
| - "distilbert-base-uncased": (["input_ids", "attention_mask"], 11, False, "bert"), |
78 |
| - "distilbert-base-uncased-distilled-squad": ( |
79 |
| - ["input_ids", "attention_mask"], |
80 |
| - 11, |
81 |
| - False, |
82 |
| - "bert", |
83 |
| - ), |
| 38 | + "distilbert-base-uncased": (["input_ids", "attention_mask"], 16, False, "bert"), |
| 39 | + "distilbert-base-uncased-distilled-squad": (["input_ids", "attention_mask"], 16, False, "bert"), |
84 | 40 | # CTRL
|
85 |
| - "ctrl": (["input_ids"], 11, True, "bert"), |
| 41 | + "ctrl": (["input_ids"], 16, True, "bert"), |
86 | 42 | # CamemBERT
|
87 |
| - "camembert-base": (["input_ids"], 11, False, "bert"), |
| 43 | + "camembert-base": (["input_ids"], 16, False, "bert"), |
88 | 44 | # ALBERT
|
89 |
| - "albert-base-v1": (["input_ids"], 12, False, "bert"), |
90 |
| - "albert-large-v1": (["input_ids"], 12, False, "bert"), |
91 |
| - "albert-xlarge-v1": (["input_ids"], 12, True, "bert"), |
92 |
| - # "albert-xxlarge-v1": (["input_ids"], 12, True, "bert"), |
93 |
| - "albert-base-v2": (["input_ids"], 12, False, "bert"), |
94 |
| - "albert-large-v2": (["input_ids"], 12, False, "bert"), |
95 |
| - "albert-xlarge-v2": (["input_ids"], 12, True, "bert"), |
96 |
| - # "albert-xxlarge-v2": (["input_ids"], 12, True, "bert"), |
97 |
| - # T5 (use benchmark_t5.py instead) |
98 |
| - # "t5-small": (["input_ids", "decoder_input_ids"], 12, False, "bert"), |
99 |
| - # "t5-base": (["input_ids", "decoder_input_ids"], 12, False, "bert"), |
100 |
| - # "t5-large": (["input_ids", "decoder_input_ids"], 12, True, "bert"), |
101 |
| - # "t5-3b": (["input_ids", "decoder_input_ids"], 12, True, "bert"), |
102 |
| - # "t5-11b": (["input_ids", "decoder_input_ids"], 12, True, "bert"), |
103 |
| - # "valhalla/t5-small-qa-qg-hl": (["input_ids"], 12, True, "bert"), |
| 45 | + "albert-base-v1": (["input_ids"], 16, False, "bert"), |
| 46 | + "albert-large-v1": (["input_ids"], 16, False, "bert"), |
| 47 | + "albert-xlarge-v1": (["input_ids"], 16, True, "bert"), |
| 48 | + # "albert-xxlarge-v1": (["input_ids"], 16, True, "bert"), |
| 49 | + "albert-base-v2": (["input_ids"], 16, False, "bert"), |
| 50 | + "albert-large-v2": (["input_ids"], 16, False, "bert"), |
| 51 | + "albert-xlarge-v2": (["input_ids"], 16, True, "bert"), |
| 52 | + # "albert-xxlarge-v2": (["input_ids"], 16, True, "bert"), |
104 | 53 | # XLM-RoBERTa
|
105 |
| - "xlm-roberta-base": (["input_ids"], 11, False, "bert"), |
106 |
| - "xlm-roberta-large": (["input_ids"], 11, True, "bert"), |
| 54 | + "xlm-roberta-base": (["input_ids"], 16, False, "bert"), |
| 55 | + "xlm-roberta-large": (["input_ids"], 16, True, "bert"), |
107 | 56 | # FlauBERT
|
108 |
| - "flaubert/flaubert_small_cased": (["input_ids"], 11, False, "bert"), |
109 |
| - # "flaubert/flaubert_base_uncased": (["input_ids"], 11, False, "bert"), |
110 |
| - "flaubert/flaubert_base_cased": (["input_ids"], 11, False, "bert"), |
111 |
| - # "flaubert/flaubert_large_cased": (["input_ids"], 11, False, "bert"), |
112 |
| - # Bart |
113 |
| - "facebook/bart-large": (["input_ids", "attention_mask"], 11, False, "bart"), |
114 |
| - "facebook/bart-base": (["input_ids", "attention_mask"], 11, False, "bart"), |
115 |
| - "facebook/bart-large-mnli": (["input_ids", "attention_mask"], 11, False, "bart"), |
116 |
| - "facebook/bart-large-cnn": (["input_ids", "attention_mask"], 11, False, "bart"), |
117 |
| - # DialoGPT |
118 |
| - "microsoft/DialoGPT-small": (["input_ids"], 11, False, "gpt2"), |
119 |
| - "microsoft/DialoGPT-medium": (["input_ids"], 11, False, "gpt2"), |
120 |
| - # "microsoft/DialoGPT-large": (["input_ids"], 11, True, "gpt2"), |
121 |
| - # Reformer |
122 |
| - # "google/reformer-enwik8": (["input_ids"], 11, False, "bert"), |
123 |
| - # "google/reformer-crime-and-punishment": (["input_ids"], 11, False, "bert"), |
124 |
| - # MarianMT |
125 |
| - # "Helsinki-NLP/opus-mt-ROMANCE-en": (["input_ids"], 12, False, "bert"), |
126 |
| - # Longformer (use benchmark_longformer.py instead) |
127 |
| - # "allenai/longformer-base-4096": (["input_ids"], 12, False, "bert"), |
128 |
| - # "allenai/longformer-large-4096": (["input_ids"], 12, False, "bert"), |
129 |
| - # MBart |
130 |
| - "facebook/mbart-large-cc25": (["input_ids"], 11, True, "bert"), |
131 |
| - "facebook/mbart-large-en-ro": (["input_ids"], 11, True, "bert"), |
132 |
| - # "Helsinki-NLP/opus-mt-ROMANCE-en": (["input_ids"], 12, False, "bert"), |
133 |
| - # # Longformer |
134 |
| - # "allenai/longformer-base-4096": (["input_ids"], 12, False, "bert"), |
135 |
| - # "allenai/longformer-large-4096": (["input_ids"], 12, True, "bert"), |
136 |
| - # "funnel-transformer/small": (["input_ids"], 12, False, "bert"), |
137 |
| - # "funnel-transformer/small-base": (["input_ids"], 12, False, "bert"), |
138 |
| - # "funnel-transformer/medium": (["input_ids"], 12, False, "bert"), |
139 |
| - # "funnel-transformer/medium-base": (["input_ids"], 12, False, "bert"), |
140 |
| - # "funnel-transformer/intermediate": (["input_ids"], 12, False, "bert"), |
141 |
| - # "funnel-transformer/intermediate-base": (["input_ids"], 12, False, "bert"), |
142 |
| - # "funnel-transformer/large": (["input_ids"], 12, True, "bert"), |
143 |
| - # "funnel-transformer/large-base": (["input_ids"], 12, True, "bert"), |
144 |
| - # "funnel-transformer/xlarge": (["input_ids"], 12, True, "bert"), |
145 |
| - # "funnel-transformer/xlarge-base": (["input_ids"], 12, True, "bert"), |
| 57 | + "flaubert/flaubert_small_cased": (["input_ids"], 16, False, "bert"), |
| 58 | + "flaubert/flaubert_base_cased": (["input_ids"], 16, False, "bert"), |
| 59 | + # "flaubert/flaubert_large_cased": (["input_ids"], 16, False, "bert"), |
146 | 60 | # Layoutlm
|
147 |
| - "microsoft/layoutlm-base-uncased": (["input_ids"], 11, False, "bert"), |
148 |
| - "microsoft/layoutlm-large-uncased": (["input_ids"], 11, False, "bert"), |
| 61 | + "microsoft/layoutlm-base-uncased": (["input_ids"], 16, False, "bert"), |
| 62 | + "microsoft/layoutlm-large-uncased": (["input_ids"], 16, False, "bert"), |
149 | 63 | # Squeezebert
|
150 |
| - "squeezebert/squeezebert-uncased": (["input_ids"], 11, False, "bert"), |
151 |
| - "squeezebert/squeezebert-mnli": (["input_ids"], 11, False, "bert"), |
152 |
| - "squeezebert/squeezebert-mnli-headless": (["input_ids"], 11, False, "bert"), |
153 |
| - "unc-nlp/lxmert-base-uncased": ( |
154 |
| - ["input_ids", "visual_feats", "visual_pos"], |
155 |
| - 11, |
156 |
| - False, |
157 |
| - "bert", |
158 |
| - ), |
159 |
| - # "google/pegasus-xsum": (["input_ids"], 11, False, "bert"), |
160 |
| - # "google/pegasus-large": (["input_ids"], 11, False, "bert"), |
| 64 | + "squeezebert/squeezebert-uncased": (["input_ids"], 16, False, "bert"), |
| 65 | + "squeezebert/squeezebert-mnli": (["input_ids"], 16, False, "bert"), |
| 66 | + "squeezebert/squeezebert-mnli-headless": (["input_ids"], 16, False, "bert"), |
| 67 | + "unc-nlp/lxmert-base-uncased": (["input_ids", "visual_feats", "visual_pos"], 16, False, "bert"), |
161 | 68 | # ViT
|
162 |
| - "google/vit-base-patch16-224": (["pixel_values"], 12, False, "vit"), |
| 69 | + "google/vit-base-patch16-224": (["pixel_values"], 16, False, "vit"), |
163 | 70 | # Swin
|
164 |
| - "microsoft/swin-base-patch4-window7-224": (["pixel_values"], 12, False, "swin"), |
165 |
| - "microsoft/swin-small-patch4-window7-224": (["pixel_values"], 12, False, "swin"), |
166 |
| - "microsoft/swin-tiny-patch4-window7-224": (["pixel_values"], 12, False, "swin"), |
| 71 | + "microsoft/swin-base-patch4-window7-224": (["pixel_values"], 16, False, "swin"), |
| 72 | + "microsoft/swin-small-patch4-window7-224": (["pixel_values"], 16, False, "swin"), |
| 73 | + "microsoft/swin-tiny-patch4-window7-224": (["pixel_values"], 16, False, "swin"), |
167 | 74 | }
|
0 commit comments