File size: 4,253 Bytes
05c9ac2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
using System.Collections.Generic;
using System;
using System.Collections;
using Unity.MLAgents.Actuators;
using Unity.MLAgents.Sensors;
namespace Unity.MLAgents.Policies
{
/// <summary>
/// The Heuristic Policy uses a hard-coded Heuristic method
/// to take decisions each time the RequestDecision method is
/// called.
/// </summary>
internal class HeuristicPolicy : IPolicy
{
ActuatorManager m_ActuatorManager;
ActionBuffers m_ActionBuffers;
bool m_Done;
bool m_DecisionRequested;
ObservationWriter m_ObservationWriter = new ObservationWriter();
NullList m_NullList = new NullList();
public HeuristicPolicy(ActuatorManager actuatorManager, ActionSpec actionSpec)
{
m_ActuatorManager = actuatorManager;
var numContinuousActions = actionSpec.NumContinuousActions;
var numDiscreteActions = actionSpec.NumDiscreteActions;
var continuousDecision = new ActionSegment<float>(new float[numContinuousActions], 0, numContinuousActions);
var discreteDecision = new ActionSegment<int>(new int[numDiscreteActions], 0, numDiscreteActions);
m_ActionBuffers = new ActionBuffers(continuousDecision, discreteDecision);
}
/// <inheritdoc />
public void RequestDecision(AgentInfo info, List<ISensor> sensors)
{
StepSensors(sensors);
m_Done = info.done;
m_DecisionRequested = true;
}
/// <inheritdoc />
public ref readonly ActionBuffers DecideAction()
{
if (!m_Done && m_DecisionRequested)
{
m_ActionBuffers.Clear();
m_ActuatorManager.ApplyHeuristic(m_ActionBuffers);
}
m_DecisionRequested = false;
return ref m_ActionBuffers;
}
public void Dispose()
{
}
/// <summary>
/// Trivial implementation of the IList interface that does nothing.
/// This is only used for "writing" observations that we will discard.
/// </summary>
internal class NullList : IList<float>
{
public IEnumerator<float> GetEnumerator()
{
throw new NotImplementedException();
}
IEnumerator IEnumerable.GetEnumerator()
{
return GetEnumerator();
}
public void Add(float item)
{
}
public void Clear()
{
}
public bool Contains(float item)
{
return false;
}
public void CopyTo(float[] array, int arrayIndex)
{
throw new NotImplementedException();
}
public bool Remove(float item)
{
return false;
}
public int Count { get; }
public bool IsReadOnly { get; }
public int IndexOf(float item)
{
return -1;
}
public void Insert(int index, float item)
{
}
public void RemoveAt(int index)
{
}
public float this[int index]
{
get { return 0.0f; }
set { }
}
}
/// <summary>
/// Run ISensor.Write or ISensor.GetCompressedObservation for each sensor
/// The output is currently unused, but this makes the sensor usage consistent
/// between training and inference.
/// </summary>
/// <param name="sensors"></param>
void StepSensors(List<ISensor> sensors)
{
foreach (var sensor in sensors)
{
if (sensor.GetCompressionSpec().SensorCompressionType == SensorCompressionType.None)
{
m_ObservationWriter.SetTarget(m_NullList, sensor.GetObservationSpec(), 0);
sensor.Write(m_ObservationWriter);
}
else
{
sensor.GetCompressedObservation();
}
}
}
}
}
|