diff --git a/com.unity.ml-agents/CHANGELOG.md b/com.unity.ml-agents/CHANGELOG.md index 756083a32a..1dcfd3b77f 100755 --- a/com.unity.ml-agents/CHANGELOG.md +++ b/com.unity.ml-agents/CHANGELOG.md @@ -54,6 +54,8 @@ and this project adheres to - Removed several memory allocations that happened during inference. On a test scene, this reduced the amount of memory allocated by approximately 25%. (#4887) - Properly catch permission errors when writing timer files. (#4921) +- Unexpected gRPC exceptions during training are now logged before stopping training. If you see + "noisy" log, please let us know! (#4930) #### ml-agents / ml-agents-envs / gym-unity (Python) - Fixed a bug that would cause an exception when `RunOptions` was deserialized via `pickle`. (#4842) diff --git a/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs b/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs index 683dfebd17..6a97e1467b 100644 --- a/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs +++ b/com.unity.ml-agents/Runtime/Communicator/RpcCommunicator.cs @@ -440,6 +440,7 @@ UnityInputProto Exchange(UnityOutputProto unityOutput) { return null; } + try { var message = m_Client.Exchange(WrapMessage(unityOutput, 200)); @@ -455,8 +456,33 @@ UnityInputProto Exchange(UnityOutputProto unityOutput) QuitCommandReceived?.Invoke(); return message.UnityInput; } - catch + catch (RpcException rpcException) + { + // Log more verbose errors if they're something the user can possibly do something about. + switch (rpcException.Status.StatusCode) + { + case StatusCode.Unavailable: + // This can happen when python disconnects. Ignore it to avoid noisy logs. + break; + case StatusCode.ResourceExhausted: + // This happens is the message body is too large. There's no way to + // gracefully handle this, but at least we can show the message and the + // user can try to reduce the number of agents or observation sizes. + Debug.LogError($"GRPC Exception: {rpcException.Message}. Disconnecting from trainer."); + break; + default: + // Other unknown errors. Log at INFO level. + Debug.Log($"GRPC Exception: {rpcException.Message}. Disconnecting from trainer."); + break; + } + m_IsOpen = false; + QuitCommandReceived?.Invoke(); + return null; + } + catch (Exception ex) { + // Fall-through for other error types + Debug.LogError($"GRPC Exception: {ex.Message}. Disconnecting from trainer."); m_IsOpen = false; QuitCommandReceived?.Invoke(); return null;