-
Notifications
You must be signed in to change notification settings - Fork 4.4k
Team manager prototype #4850
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Team manager prototype #4850
Changes from all commits
cd84fe3
eed2fce
f706a91
cee5466
195978c
10f336e
f0bf657
e03c79e
1118089
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
using System.Collections.Generic; | ||
using Unity.MLAgents; | ||
using Unity.MLAgents.Extensions.Teams; | ||
using Unity.MLAgents.Sensors; | ||
|
||
public class HallwayTeamManager : BaseTeamManager | ||
{ | ||
List<Agent> m_AgentList = new List<Agent> { }; | ||
|
||
|
||
public override void RegisterAgent(Agent agent) | ||
{ | ||
m_AgentList.Add(agent); | ||
} | ||
|
||
public override void OnAgentDone(Agent agent, Agent.DoneReason doneReason, List<ISensor> sensors) | ||
{ | ||
agent.SendDoneToTrainer(); | ||
} | ||
|
||
public override void AddTeamReward(float reward) | ||
{ | ||
|
||
} | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
using System.Collections.Generic; | ||
using Unity.MLAgents; | ||
using Unity.MLAgents.Sensors; | ||
|
||
namespace Unity.MLAgents.Extensions.Teams | ||
{ | ||
public class BaseTeamManager : ITeamManager | ||
{ | ||
readonly string m_Id = System.Guid.NewGuid().ToString(); | ||
|
||
public virtual void RegisterAgent(Agent agent) | ||
{ | ||
throw new System.NotImplementedException(); | ||
} | ||
|
||
public virtual void OnAgentDone(Agent agent, Agent.DoneReason doneReason, List<ISensor> sensors) | ||
{ | ||
// Possible implementation - save reference to Agent's IPolicy so that we can repeatedly | ||
// call IPolicy.RequestDecision on behalf of the Agent after it's dead | ||
// If so, we'll need dummy sensor impls with the same shape as the originals. | ||
throw new System.NotImplementedException(); | ||
} | ||
|
||
public virtual void AddTeamReward(float reward) | ||
{ | ||
|
||
} | ||
|
||
public string GetId() | ||
{ | ||
return m_Id; | ||
} | ||
|
||
} | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,6 +50,11 @@ internal struct AgentInfo | |
/// </summary> | ||
public int episodeId; | ||
|
||
/// <summary> | ||
/// Team Manager identifier. | ||
/// </summary> | ||
public string teamManagerId; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This shouldn't be public. I think a better approrach is to add an accessor for m_TeamManager, and add a method to the interface for an ID. |
||
|
||
public void ClearActions() | ||
{ | ||
storedActions.Clear(); | ||
|
@@ -312,6 +317,8 @@ internal struct AgentParameters | |
/// </summary> | ||
float[] m_LegacyActionCache; | ||
|
||
private ITeamManager m_TeamManager; | ||
|
||
/// <summary> | ||
/// Called when the attached [GameObject] becomes enabled and active. | ||
/// [GameObject]: https://docs.unity3d.com/Manual/GameObjects.html | ||
|
@@ -443,6 +450,11 @@ public void LazyInitialize() | |
new int[m_ActuatorManager.NumDiscreteActions] | ||
); | ||
|
||
if (m_TeamManager != null) | ||
{ | ||
m_Info.teamManagerId = m_TeamManager.GetId(); | ||
} | ||
|
||
// The first time the Academy resets, all Agents in the scene will be | ||
// forced to reset through the <see cref="AgentForceReset"/> event. | ||
// To avoid the Agent resetting twice, the Agents will not begin their | ||
|
@@ -459,7 +471,7 @@ public void LazyInitialize() | |
/// <summary> | ||
/// The reason that the Agent has been set to "done". | ||
/// </summary> | ||
enum DoneReason | ||
public enum DoneReason | ||
{ | ||
/// <summary> | ||
/// The episode was ended manually by calling <see cref="EndEpisode"/>. | ||
|
@@ -535,9 +547,17 @@ void NotifyAgentDone(DoneReason doneReason) | |
} | ||
} | ||
// Request the last decision with no callbacks | ||
// We request a decision so Python knows the Agent is done immediately | ||
m_Brain?.RequestDecision(m_Info, sensors); | ||
ResetSensors(); | ||
if (m_TeamManager != null) | ||
{ | ||
// Send final observations to TeamManager if it exists. | ||
// The TeamManager is responsible to keeping track of the Agent after it's | ||
// done, including propagating any "posthumous" rewards. | ||
m_TeamManager.OnAgentDone(this, doneReason, sensors); | ||
} | ||
else | ||
{ | ||
SendDoneToTrainer(); | ||
} | ||
|
||
// We also have to write any to any DemonstationStores so that they get the "done" flag. | ||
foreach (var demoWriter in DemonstrationWriters) | ||
|
@@ -560,6 +580,13 @@ void NotifyAgentDone(DoneReason doneReason) | |
m_Info.storedActions.Clear(); | ||
} | ||
|
||
public void SendDoneToTrainer() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. internal |
||
{ | ||
// We request a decision so Python knows the Agent is done immediately | ||
m_Brain?.RequestDecision(m_Info, sensors); | ||
ResetSensors(); | ||
} | ||
|
||
/// <summary> | ||
/// Updates the Model assigned to this Agent instance. | ||
/// </summary> | ||
|
@@ -1344,5 +1371,12 @@ void DecideAction() | |
m_Info.CopyActions(actions); | ||
m_ActuatorManager.UpdateActions(actions); | ||
} | ||
|
||
public void SetTeamManager(ITeamManager teamManager) | ||
{ | ||
m_TeamManager = teamManager; | ||
m_Info.teamManagerId = teamManager?.GetId(); | ||
teamManager?.RegisterAgent(this); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
using System.Collections.Generic; | ||
using Unity.MLAgents.Sensors; | ||
|
||
namespace Unity.MLAgents | ||
{ | ||
public interface ITeamManager | ||
{ | ||
string GetId(); | ||
|
||
void RegisterAgent(Agent agent); | ||
// TODO not sure this is all the info we need, maybe pass a class/struct instead. | ||
void OnAgentDone(Agent agent, Agent.DoneReason doneReason, List<ISensor> sensors); | ||
} | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You could also make this an integer - have a global static ID and increment with
Interlocked.Increment
(which is threadsafe)