mosaicml · nik-mosaic · Oct 1, 2022 · Sep 23, 2022 · Sep 23, 2022 · Sep 23, 2022
diff --git a/composer/callbacks/export_for_inference.py b/composer/callbacks/export_for_inference.py
@@ -86,5 +86,5 @@ def export_model(self, state: State, logger: Logger):
                            save_path=self.save_path,
                            logger=logger,
                            save_object_store=self.save_object_store,
-                           sample_input=(self.sample_input,),
+                           sample_input=(self.sample_input, {}),
                            transforms=self.transforms)
diff --git a/composer/trainer/trainer.py b/composer/trainer/trainer.py
@@ -2581,5 +2581,5 @@ def export_for_inference(
                            save_path=save_path,
                            logger=self.logger,
                            save_object_store=save_object_store,
-                           sample_input=(sample_input,),
+                           sample_input=(sample_input, {}),
                            transforms=transforms)
diff --git a/composer/utils/inference.py b/composer/utils/inference.py
@@ -180,11 +180,17 @@ def export_for_inference(
                     raise ValueError(f'sample_input argument is required for onnx export')
                 sample_input = ensure_tuple(sample_input)
 
+                input_names = ['input']
+
+                # Extract input names from sample_input if it is a dict
+                if isinstance(sample_input[0], dict):
+                    input_names = list(sample_input[0].keys())
+
                 torch.onnx.export(
                     model,
                     sample_input,
                     local_save_path,
-                    input_names=['input'],
+                    input_names=input_names,
                     output_names=['output'],
                 )
 

diff --git a/examples/exporting_for_inference.ipynb b/examples/exporting_for_inference.ipynb
@@ -413,6 +413,14 @@
     "print(f\"The predicted classes are {np.argmax(outputs[0], axis=1)}\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "0bc52f62",
+   "metadata": {},
+   "source": [
+    "If our input is a dictionary, as if often the case when using a Composer [HuggingFaceModel](https://docs.mosaicml.com/en/stable/examples/huggingface_models.html), we'll need to make sure all the elements of our input dictionary are numpy arrays before calling `ort_session.run()`."
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "ca091f8e",
@@ -454,7 +462,7 @@
     "export_for_inference(model=model, \n",
     "                     save_format=save_format, \n",
     "                     save_path=model_save_path, \n",
-    "                     sample_input=(input,),\n",
+    "                     sample_input=(input, {}),\n",
     "                     surgery_algs=[cf.apply_squeeze_excite],\n",
     "                     load_path=checkpoint_path)"
    ]

diff --git a/tests/callbacks/test_inference.py b/tests/callbacks/test_inference.py
@@ -47,7 +47,7 @@ def test_inference_callback_torchscript(model_cls):
                 save_path=save_path,
                 logger=trainer.logger,
                 save_object_store=None,
-                sample_input=(exp_for_inf_callback.sample_input,),
+                sample_input=(exp_for_inf_callback.sample_input, {}),
                 transforms=None)
 
 
@@ -78,5 +78,5 @@ def test_inference_callback_onnx(model_cls):
                 save_path=save_path,
                 logger=trainer.logger,
                 save_object_store=None,
-                sample_input=(exp_for_inf_callback.sample_input,),
+                sample_input=(exp_for_inf_callback.sample_input, {}),
                 transforms=None)
diff --git a/tests/utils/test_inference.py b/tests/utils/test_inference.py
@@ -62,6 +62,68 @@ def test_export_for_inference_torchscript(model_cls, sample_input):
         )
 
 
+def test_huggingface_export_for_inference_onnx():
+    pytest.importorskip('onnx')
+    pytest.importorskip('onnxruntime')
+    pytest.importorskip('transformers')
+
+    import onnx
+    import onnx.checker
+    import onnxruntime as ort
+    import transformers
+
+    from composer.models import HuggingFaceModel
+
+    # HuggingFace Bert Model
+    # dummy sequence batch with 2 labels, 32 sequence length, and 30522 (bert) vocab size).
+    input_ids = torch.randint(low=0, high=30522, size=(2, 32))
+    labels = torch.randint(low=0, high=1, size=(2,))
+    token_type_ids = torch.zeros(size=(2, 32), dtype=torch.int64)
+    attention_mask = torch.randint(low=0, high=1, size=(2, 32))
+    sample_input = {
+        'input_ids': input_ids,
+        'labels': labels,
+        'token_type_ids': token_type_ids,
+        'attention_mask': attention_mask,
+    }
+
+    # non pretrained model to avoid a slow test that downloads the weights.
+    config = transformers.AutoConfig.from_pretrained('bert-base-uncased', num_labels=2, hidden_act='gelu_new')
+    hf_model = transformers.AutoModelForSequenceClassification.from_config(config)  # type: ignore (thirdparty)
+
+    model = HuggingFaceModel(hf_model)
+    model.eval()
+    orig_out = model(sample_input)
+
+    save_format = 'onnx'
+    with tempfile.TemporaryDirectory() as tempdir:
+        save_path = os.path.join(tempdir, f'model.{save_format}')
+        inference.export_for_inference(
+            model=model,
+            save_format=save_format,
+            save_path=save_path,
+            sample_input=(sample_input, {}),
+        )
+        loaded_model = onnx.load(save_path)
+
+        onnx.checker.check_model(loaded_model)
+
+        ort_session = ort.InferenceSession(save_path)
+
+        for key, value in sample_input.items():
+            sample_input[key] = value.numpy()
+
+        loaded_model_out = ort_session.run(None, sample_input)
+
+        torch.testing.assert_close(
+            orig_out['logits'].detach().numpy(),
+            loaded_model_out[1],
+            rtol=1e-4,  # lower tolerance for ONNX
+            atol=1e-3,  # lower tolerance for ONNX
+            msg=f'output mismatch with {save_format}',
+        )
+
+
 @pytest.mark.parametrize(
     'model_cls, sample_input',
     [
@@ -87,7 +149,7 @@ def test_export_for_inference_onnx(model_cls, sample_input):
             model=model,
             save_format=save_format,
             save_path=save_path,
-            sample_input=(sample_input,),
+            sample_input=(sample_input, {}),
         )
         loaded_model = onnx.load(save_path)
         onnx.checker.check_model(loaded_model)
@@ -152,7 +214,7 @@ def test_export_for_inference_onnx_ddp(model_cls, sample_input):
                 model=state.model.module,
                 save_format=save_format,
                 save_path=save_path,
-                sample_input=(sample_input,),
+                sample_input=(sample_input, {}),
             )
 
             loaded_model = onnx.load(save_path)
@@ -247,7 +309,7 @@ def test_export_with_file_artifact_logger(model_cls, sample_input):
                 model=model,
                 save_format=save_format,
                 save_path=save_path,
-                sample_input=(sample_input,),
+                sample_input=(sample_input, {}),
                 logger=mock_logger,
             )
 
@@ -292,7 +354,6 @@ def test_export_with_other_logger(model_cls, sample_input):
                 model=model,
                 save_format=save_format,
                 save_path=save_path,
-                sample_input=(sample_input,),
                 logger=mock_logger,
             )