|
using System.Collections.Generic; |
|
using System.Linq; |
|
using Unity.MLAgents.Inference.Utils; |
|
using Unity.MLAgents.Actuators; |
|
using Unity.Barracuda; |
|
using UnityEngine; |
|
|
|
namespace Unity.MLAgents.Inference |
|
{ |
|
|
|
|
|
|
|
|
|
internal class ContinuousActionOutputApplier : TensorApplier.IApplier |
|
{ |
|
readonly ActionSpec m_ActionSpec; |
|
|
|
public ContinuousActionOutputApplier(ActionSpec actionSpec) |
|
{ |
|
m_ActionSpec = actionSpec; |
|
} |
|
|
|
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions) |
|
{ |
|
var actionSize = tensorProxy.shape[tensorProxy.shape.Length - 1]; |
|
var agentIndex = 0; |
|
for (var i = 0; i < actionIds.Count; i++) |
|
{ |
|
var agentId = actionIds[i]; |
|
if (lastActions.ContainsKey(agentId)) |
|
{ |
|
var actionBuffer = lastActions[agentId]; |
|
if (actionBuffer.IsEmpty()) |
|
{ |
|
actionBuffer = new ActionBuffers(m_ActionSpec); |
|
lastActions[agentId] = actionBuffer; |
|
} |
|
var continuousBuffer = actionBuffer.ContinuousActions; |
|
for (var j = 0; j < actionSize; j++) |
|
{ |
|
continuousBuffer[j] = tensorProxy.data[agentIndex, j]; |
|
} |
|
} |
|
agentIndex++; |
|
} |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
internal class DiscreteActionOutputApplier : TensorApplier.IApplier |
|
{ |
|
readonly ActionSpec m_ActionSpec; |
|
|
|
|
|
public DiscreteActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator) |
|
{ |
|
m_ActionSpec = actionSpec; |
|
} |
|
|
|
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions) |
|
{ |
|
var agentIndex = 0; |
|
var actionSize = tensorProxy.shape[tensorProxy.shape.Length - 1]; |
|
for (var i = 0; i < actionIds.Count; i++) |
|
{ |
|
var agentId = actionIds[i]; |
|
if (lastActions.ContainsKey(agentId)) |
|
{ |
|
var actionBuffer = lastActions[agentId]; |
|
if (actionBuffer.IsEmpty()) |
|
{ |
|
actionBuffer = new ActionBuffers(m_ActionSpec); |
|
lastActions[agentId] = actionBuffer; |
|
} |
|
var discreteBuffer = actionBuffer.DiscreteActions; |
|
for (var j = 0; j < actionSize; j++) |
|
{ |
|
discreteBuffer[j] = (int)tensorProxy.data[agentIndex, j]; |
|
} |
|
} |
|
agentIndex++; |
|
} |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
internal class LegacyDiscreteActionOutputApplier : TensorApplier.IApplier |
|
{ |
|
readonly int[] m_ActionSize; |
|
readonly Multinomial m_Multinomial; |
|
readonly ActionSpec m_ActionSpec; |
|
readonly int[] m_StartActionIndices; |
|
readonly float[] m_CdfBuffer; |
|
|
|
|
|
public LegacyDiscreteActionOutputApplier(ActionSpec actionSpec, int seed, ITensorAllocator allocator) |
|
{ |
|
m_ActionSize = actionSpec.BranchSizes; |
|
m_Multinomial = new Multinomial(seed); |
|
m_ActionSpec = actionSpec; |
|
m_StartActionIndices = Utilities.CumSum(m_ActionSize); |
|
|
|
|
|
|
|
var largestBranch = Mathf.Max(m_ActionSize); |
|
m_CdfBuffer = new float[largestBranch]; |
|
} |
|
|
|
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions) |
|
{ |
|
var agentIndex = 0; |
|
for (var i = 0; i < actionIds.Count; i++) |
|
{ |
|
var agentId = actionIds[i]; |
|
if (lastActions.ContainsKey(agentId)) |
|
{ |
|
var actionBuffer = lastActions[agentId]; |
|
if (actionBuffer.IsEmpty()) |
|
{ |
|
actionBuffer = new ActionBuffers(m_ActionSpec); |
|
lastActions[agentId] = actionBuffer; |
|
} |
|
var discreteBuffer = actionBuffer.DiscreteActions; |
|
for (var j = 0; j < m_ActionSize.Length; j++) |
|
{ |
|
ComputeCdf(tensorProxy, agentIndex, m_StartActionIndices[j], m_ActionSize[j]); |
|
discreteBuffer[j] = m_Multinomial.Sample(m_CdfBuffer, m_ActionSize[j]); |
|
} |
|
} |
|
agentIndex++; |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
internal void ComputeCdf(TensorProxy logProbs, int batch, int channelOffset, int branchSize) |
|
{ |
|
|
|
var maxProb = float.NegativeInfinity; |
|
for (var cls = 0; cls < branchSize; ++cls) |
|
{ |
|
maxProb = Mathf.Max(logProbs.data[batch, cls + channelOffset], maxProb); |
|
} |
|
|
|
|
|
var sumProb = 0.0f; |
|
for (var cls = 0; cls < branchSize; ++cls) |
|
{ |
|
sumProb += Mathf.Exp(logProbs.data[batch, cls + channelOffset] - maxProb); |
|
m_CdfBuffer[cls] = sumProb; |
|
} |
|
} |
|
} |
|
|
|
|
|
|
|
|
|
|
|
internal class MemoryOutputApplier : TensorApplier.IApplier |
|
{ |
|
Dictionary<int, List<float>> m_Memories; |
|
|
|
public MemoryOutputApplier( |
|
Dictionary<int, List<float>> memories) |
|
{ |
|
m_Memories = memories; |
|
} |
|
|
|
public void Apply(TensorProxy tensorProxy, IList<int> actionIds, Dictionary<int, ActionBuffers> lastActions) |
|
{ |
|
var agentIndex = 0; |
|
var memorySize = tensorProxy.data.width; |
|
for (var i = 0; i < actionIds.Count; i++) |
|
{ |
|
var agentId = actionIds[i]; |
|
List<float> memory; |
|
if (!m_Memories.TryGetValue(agentId, out memory) |
|
|| memory.Count < memorySize) |
|
{ |
|
memory = new List<float>(); |
|
memory.AddRange(Enumerable.Repeat(0f, memorySize)); |
|
} |
|
|
|
for (var j = 0; j < memorySize; j++) |
|
{ |
|
memory[j] = tensorProxy.data[agentIndex, 0, j, 0]; |
|
} |
|
|
|
m_Memories[agentId] = memory; |
|
agentIndex++; |
|
} |
|
} |
|
} |
|
} |
|
|