Skip to content

Commit 698d4a4

Browse files
committed
Refine Gymnasium task
1 parent 36ffaa7 commit 698d4a4

File tree

7 files changed

+89
-74
lines changed

7 files changed

+89
-74
lines changed

src/SharpNeat.Tasks.Windows/Gymnasium/GymnasiumControl.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ private void InitializeComponent()
8686
/// </summary>
8787
private void SimulationThread()
8888
{
89-
// Wait to be signalled to start the next trial run.
89+
// Wait to be signaled to start the next trial run.
9090
_simStartEvent.WaitOne();
9191

9292
IBlackBox<double> agent = _agent;
@@ -96,7 +96,7 @@ private void SimulationThread()
9696

9797
while (true)
9898
{
99-
// Check if we have been signalled to terminate before starting a simulation run.
99+
// Check if we have been signaled to terminate before starting a simulation run.
100100
if(_terminateSimThread)
101101
break;
102102

src/SharpNeat.Tasks/Gymnasium/GymnasiumEpisode.cs

Lines changed: 33 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,16 @@ namespace SharpNeat.Tasks.Gymnasium;
77

88
public sealed class GymnasiumEpisode
99
{
10-
readonly int _inputCount;
11-
readonly int _outputCount;
12-
readonly bool _isContinious;
13-
readonly bool _test;
10+
private readonly int _inputCount;
11+
private readonly int _outputCount;
12+
private readonly bool _isContinuous;
13+
private readonly bool _test;
1414

15-
public GymnasiumEpisode(int inputCount, int outputCount, bool isContinious, bool test)
15+
public GymnasiumEpisode(int inputCount, int outputCount, bool isContinuous, bool test)
1616
{
1717
_inputCount = inputCount;
1818
_outputCount = outputCount;
19-
_isContinious = isContinious;
19+
_isContinuous = isContinuous;
2020
_test = test;
2121
}
2222

@@ -28,12 +28,12 @@ public FitnessInfo Evaluate(IBlackBox<double> phenome)
2828
{
2929
FileName = @"pythonw.exe",
3030
WorkingDirectory = @"./",
31-
Arguments = string.Format(CultureInfo.InvariantCulture, @"gymnasium/main.py -uuid {0} -render {1}", uuid.ToString(), _test),
31+
Arguments = string.Format(CultureInfo.InvariantCulture, @"gymnasium/main.py -uuid {0} -render {1} -test False", uuid.ToString(), _test),
3232
UseShellExecute = false,
3333
RedirectStandardOutput = false
3434
};
3535

36-
var process = Process.Start(start) ?? throw new InvalidOperationException("No proccess resource is started");
36+
var process = Process.Start(start) ?? throw new InvalidOperationException("No process resource is started");
3737
var totalReward = 0.0;
3838

3939
try
@@ -52,10 +52,9 @@ public FitnessInfo Evaluate(IBlackBox<double> phenome)
5252
var inputs = phenome.Inputs.Span;
5353
inputs.Clear();
5454

55-
var observationTuple = ReadObservation(namedPipeClientStream, _inputCount);
56-
var observation = observationTuple.observation;
57-
totalReward = observationTuple.reward[0];
58-
var done = observationTuple.done[0];
55+
var (observation, rewardArray, doneArray) = ReadObservation(namedPipeClientStream, _inputCount);
56+
totalReward = rewardArray[0];
57+
var done = doneArray[0];
5958

6059
if (done == 1)
6160
{
@@ -66,7 +65,7 @@ public FitnessInfo Evaluate(IBlackBox<double> phenome)
6665
phenome.Activate();
6766

6867
// var clampedOutputs = outputs.Select(output => Math.Clamp(output, -1.0, 1.0)).ToArray();
69-
if (_isContinious)
68+
if (_isContinuous)
7069
{
7170
var outputBuffer = new byte[_outputCount * sizeof(float)];
7271
var outputs = new double[_outputCount];
@@ -76,7 +75,7 @@ public FitnessInfo Evaluate(IBlackBox<double> phenome)
7675
}
7776
else
7877
{
79-
int maxSigIndex = ReadMaxSigIndex(phenome);
78+
var maxSigIndex = ReadMaxSigIndex(phenome);
8079
var outputBuffer = new byte[sizeof(int)];
8180
Buffer.BlockCopy(new int[] { maxSigIndex }, 0, outputBuffer, 0, outputBuffer.Length);
8281
namedPipeClientStream.Write(outputBuffer, 0, outputBuffer.Length);
@@ -101,16 +100,16 @@ public FitnessInfo Evaluate(IBlackBox<double> phenome)
101100
return new FitnessInfo(maskedReward);
102101
}
103102

104-
static (double[] observation, double[] reward, int[] done) ReadObservation(NamedPipeClientStream namedPipeClientStream, int count)
103+
private static (double[] observation, double[] reward, int[] done) ReadObservation(Stream namedPipeClientStream, int count)
105104
{
106105
var count0 = count * sizeof(double);
107-
var count1 = sizeof(double);
108-
var count2 = sizeof(int);
106+
const int count1 = sizeof(double);
107+
const int count2 = sizeof(int);
109108
var inputBuffer = new byte[count0 + count1 + count2];
110109
namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length);
111-
double[] observation = new double[count];
112-
double[] reward = new double[1];
113-
int[] done = new int[1];
110+
var observation = new double[count];
111+
var reward = new double[1];
112+
var done = new int[1];
114113
var offset1 = count0;
115114
var offset2 = count0 + count1;
116115
Buffer.BlockCopy(inputBuffer, 0, observation, 0, count0);
@@ -119,46 +118,44 @@ public FitnessInfo Evaluate(IBlackBox<double> phenome)
119118
return (observation, reward, done);
120119
}
121120

122-
static double[] ReadDoubleArray(NamedPipeClientStream namedPipeClientStream, int count)
121+
private static double[] ReadDoubleArray(Stream namedPipeClientStream, int count)
123122
{
124123
var inputBuffer = new byte[count * sizeof(double)];
125124
namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length);
126-
double[] values = new double[inputBuffer.Length / sizeof(double)];
125+
var values = new double[inputBuffer.Length / sizeof(double)];
127126
Buffer.BlockCopy(inputBuffer, 0, values, 0, values.Length * sizeof(double));
128127
return values;
129128
}
130129

131-
static float[] ReadFloatArray(NamedPipeClientStream namedPipeClientStream, int count)
130+
private static float[] ReadFloatArray(Stream namedPipeClientStream, int count)
132131
{
133132
var inputBuffer = new byte[count * sizeof(float)];
134133
namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length);
135-
float[] values = new float[inputBuffer.Length / sizeof(float)];
134+
var values = new float[inputBuffer.Length / sizeof(float)];
136135
Buffer.BlockCopy(inputBuffer, 0, values, 0, values.Length * sizeof(float));
137136
return values;
138137
}
139138

140-
static int[] ReadIntArray(NamedPipeClientStream namedPipeClientStream, int count)
139+
private static int[] ReadIntArray(Stream namedPipeClientStream, int count)
141140
{
142141
var inputBuffer = new byte[count * sizeof(int)];
143142
namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length);
144-
int[] values = new int[inputBuffer.Length / sizeof(int)];
143+
var values = new int[inputBuffer.Length / sizeof(int)];
145144
Buffer.BlockCopy(inputBuffer, 0, values, 0, values.Length * sizeof(int));
146145
return values;
147146
}
148147

149-
int ReadMaxSigIndex(IBlackBox<double> phenome)
148+
private int ReadMaxSigIndex(IBlackBox<double> phenome)
150149
{
151-
double maxSig = phenome.Outputs.Span[0];
152-
int maxSigIdx = 0;
150+
var maxSig = phenome.Outputs.Span[0];
151+
var maxSigIdx = 0;
153152

154-
for (int i = 1; i < _outputCount; i++)
153+
for (var i = 1; i < _outputCount; i++)
155154
{
156-
double v = phenome.Outputs.Span[i];
157-
if (v > maxSig)
158-
{
159-
maxSig = v;
160-
maxSigIdx = i;
161-
}
155+
var v = phenome.Outputs.Span[i];
156+
if (!(v > maxSig)) continue;
157+
maxSig = v;
158+
maxSigIdx = i;
162159
}
163160

164161
return maxSigIdx;

src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ internal class GymnasiumEvaluationScheme : IBlackBoxEvaluationScheme<double>
1111
public int OutputCount => 4;
1212

1313
/// <inheritdoc/>
14-
public bool IsDeterministic => true;
14+
public bool IsDeterministic => false;
1515

1616
/// <inheritdoc/>
1717
public IComparer<FitnessInfo> FitnessComparer => PrimaryFitnessInfoComparer.Singleton;

src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluator.cs

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,29 +4,29 @@ namespace SharpNeat.Tasks.Gymnasium;
44

55
public sealed class GymnasiumEvaluator : IPhenomeEvaluator<IBlackBox<double>>
66
{
7-
readonly int _inputCount;
8-
readonly int _outputCount;
9-
readonly bool _isContinious;
10-
readonly bool _test;
11-
readonly int _trialsPerEvaluation = 1;
7+
private const int TrialsPerEvaluation = 1;
8+
private readonly int _inputCount;
9+
private readonly int _outputCount;
10+
private readonly bool _isContinuous;
11+
private readonly bool _test;
1212

13-
public GymnasiumEvaluator(int inputCount, int outputCount, bool isContinious, bool test)
13+
public GymnasiumEvaluator(int inputCount, int outputCount, bool isContinuous, bool test)
1414
{
1515
_inputCount = inputCount;
1616
_outputCount = outputCount;
17-
_isContinious = isContinious;
17+
_isContinuous = isContinuous;
1818
_test = test;
1919
}
2020

2121
public FitnessInfo Evaluate(IBlackBox<double> phenome)
2222
{
2323
var finesses = new List<FitnessInfo>();
24-
for (int i = 0; i < _trialsPerEvaluation; i++)
24+
for (var i = 0; i < TrialsPerEvaluation; i++)
2525
{
26-
var episode = new GymnasiumEpisode(_inputCount, _outputCount, _isContinious, _test);
26+
var episode = new GymnasiumEpisode(_inputCount, _outputCount, _isContinuous, _test);
2727
finesses.Add(episode.Evaluate(phenome));
2828
}
2929

30-
return new FitnessInfo(finesses.Average(finesses => finesses.PrimaryFitness));
30+
return new FitnessInfo(finesses.Average(fitness => fitness.PrimaryFitness));
3131
}
3232
}

src/SharpNeat.Tasks/Gymnasium/GymnasiumExperimentFactory.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ namespace SharpNeat.Tasks.Gymnasium;
88
internal class GymnasiumExperimentFactory : INeatExperimentFactory
99
{
1010
/// <inheritdoc/>
11-
public string Id => "gmnasium";
11+
public string Id => "gymnasium";
1212

1313
/// <inheritdoc/>
1414
public INeatExperiment<double> CreateExperiment(Stream jsonConfigStream)

src/SharpNeat.Windows.App/config/experiments-config/gymnasium.config.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "Gymnasium",
3-
"isAcyclic": true,
3+
"isAcyclic": false,
44
"cyclesPerActivation": 1,
55
"activationFnName": "LeakyReLU",
66
"evolutionAlgorithm": {

src/SharpNeat.Windows.App/gymnasium/main.py

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import collections
12
import struct
23
import time
34
import traceback
@@ -11,29 +12,31 @@
1112
import numpy as np
1213
import logging
1314

14-
1515
logging.basicConfig(filename='debug.log', encoding='utf-8', level=logging.FATAL)
1616
logging.debug("start")
1717

1818
parser = ArgumentParser()
19-
parser.add_argument("-uuid", dest="uuid")
20-
parser.add_argument("-render", dest="render")
19+
parser.add_argument("-uuid", dest="uuid", default="test")
20+
parser.add_argument("-render", dest="render", default="False")
21+
parser.add_argument("-test", dest="test", default="True")
2122
args = parser.parse_args()
2223
render = args.render == "True"
24+
test = args.test == "True"
2325

24-
pipe = win32pipe.CreateNamedPipe("\\\\.\\pipe\\gymnasium_pipe_" + args.uuid,
25-
win32pipe.PIPE_ACCESS_DUPLEX,
26-
win32pipe.PIPE_TYPE_MESSAGE | win32pipe.PIPE_READMODE_MESSAGE | win32pipe.PIPE_WAIT,
27-
1, 1024, 1024, 0, None)
28-
logging.debug("Connecting pipe...")
29-
win32pipe.ConnectNamedPipe(pipe, None)
30-
logging.debug("Pipe connected")
26+
if not test:
27+
pipe = win32pipe.CreateNamedPipe("\\\\.\\pipe\\gymnasium_pipe_" + args.uuid,
28+
win32pipe.PIPE_ACCESS_DUPLEX,
29+
win32pipe.PIPE_TYPE_MESSAGE | win32pipe.PIPE_READMODE_MESSAGE | win32pipe.PIPE_WAIT,
30+
1, 1024, 1024, 0, None)
31+
logging.debug("Connecting pipe...")
32+
win32pipe.ConnectNamedPipe(pipe, None)
33+
logging.debug("Pipe connected")
3134

3235
# env = gym.make("BipedalWalker-v3", hardcore=False, render_mode="human" if render else None)
3336
# env = gym.make("LunarLander-v2", render_mode="human" if render else None)
3437
try:
3538
# env = gym.make("LunarLander-v2", enable_wind=True, render_mode="human" if render else None)
36-
env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="human" if render else None)
39+
env = gym.make("BipedalWalker-v3", hardcore=False, render_mode="human" if render else None)
3740
env = ClipAction(env)
3841

3942
logging.debug("Environment created")
@@ -48,17 +51,23 @@
4851
def run_episode():
4952
observation, info = env.reset()
5053

51-
logging.debug("Initial observation:", observation)
52-
send_observation(observation, 0, False)
53-
logging.debug("Initial observation sent")
54+
if not test:
55+
logging.debug("Initial observation:", observation)
56+
send_observation(observation, 0, False)
57+
logging.debug("Initial observation sent")
5458

59+
max_reward_history_len = 100
5560
total_reward = 0
5661
total_timesteps = 0
62+
latest_rewards = collections.deque(maxlen=max_reward_history_len)
5763

5864
while 1:
5965
logging.debug("Starting step")
60-
a = read_action(env.action_space)
61-
logging.debug("Action read:", a)
66+
67+
if not test:
68+
a = read_action(env.action_space)
69+
else:
70+
a = env.action_space.sample()
6271

6372
total_timesteps += 1
6473

@@ -71,20 +80,28 @@ def run_episode():
7180
# print("reward %0.3f" % reward)
7281

7382
total_reward += reward
83+
latest_rewards.append(float(reward))
7484

7585
masked_done = done
7686

77-
# if render:
78-
# masked_done = False
87+
if total_timesteps >= max_reward_history_len:
88+
low_performing = True
89+
for historical_reward in latest_rewards:
90+
if historical_reward > 0:
91+
low_performing = False
92+
break
93+
if low_performing:
94+
masked_done = True
7995

80-
send_observation(observation, float(total_reward), masked_done)
81-
logging.debug("Observation sent")
96+
if not test:
97+
send_observation(observation, float(total_reward), masked_done)
98+
logging.debug("Observation sent")
8299

83100
if render:
84101
env.render()
85-
time.sleep(0.02)
102+
time.sleep(0.01)
86103

87-
if done:
104+
if masked_done:
88105
logging.debug("Terminated")
89106
if not render:
90107
# pipe.close()
@@ -99,7 +116,8 @@ def run_episode():
99116

100117

101118
def send_observation(observation: np.array, reward: float, done: bool):
102-
win32file.WriteFile(pipe, bytes(observation.astype(float)) + bytes(np.array([reward]).astype(float)) + bytes(np.array([int(done)])))
119+
win32file.WriteFile(pipe, bytes(observation.astype(float)) + bytes(np.array([reward]).astype(float)) + bytes(
120+
np.array([int(done)])))
103121

104122

105123
def read_action(space: spaces.Space):

0 commit comments

Comments
 (0)