Skip to content

Commit 2aa1b6c

Browse files
committed
Refine Gymnasium task
1 parent 36ffaa7 commit 2aa1b6c

File tree

4 files changed

+86
-62
lines changed

4 files changed

+86
-62
lines changed

src/SharpNeat.Tasks/Gymnasium/GymnasiumEpisode.cs

Lines changed: 43 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -7,33 +7,42 @@ namespace SharpNeat.Tasks.Gymnasium;
77

88
public sealed class GymnasiumEpisode
99
{
10-
readonly int _inputCount;
11-
readonly int _outputCount;
12-
readonly bool _isContinious;
13-
readonly bool _test;
10+
private readonly int _inputCount;
11+
private readonly int _outputCount;
12+
private readonly bool _isContinuous;
13+
private readonly bool _test;
1414

15-
public GymnasiumEpisode(int inputCount, int outputCount, bool isContinious, bool test)
15+
public GymnasiumEpisode(int inputCount, int outputCount, bool isContinuous, bool test)
1616
{
1717
_inputCount = inputCount;
1818
_outputCount = outputCount;
19-
_isContinious = isContinious;
19+
_isContinuous = isContinuous;
2020
_test = test;
2121
}
2222

2323
public FitnessInfo Evaluate(IBlackBox<double> phenome)
2424
{
2525
var uuid = Guid.NewGuid();
2626

27+
// var start = new ProcessStartInfo
28+
// {
29+
// FileName = @"pythonw.exe",
30+
// WorkingDirectory = @"./",
31+
// Arguments = string.Format(CultureInfo.InvariantCulture, @"gymnasium/main.py -uuid {0} -render {1} -test False", uuid.ToString(), _test),
32+
// UseShellExecute = false,
33+
// RedirectStandardOutput = false
34+
// };
35+
2736
var start = new ProcessStartInfo
2837
{
29-
FileName = @"pythonw.exe",
38+
FileName = @"D:\projects\sharpneat-fork\src\SharpNeat.Windows.App\gymnasium\main.exe",
3039
WorkingDirectory = @"./",
31-
Arguments = string.Format(CultureInfo.InvariantCulture, @"gymnasium/main.py -uuid {0} -render {1}", uuid.ToString(), _test),
40+
Arguments = string.Format(CultureInfo.InvariantCulture, @"-uuid {0} -render {1} -test False", uuid.ToString(), _test),
3241
UseShellExecute = false,
3342
RedirectStandardOutput = false
3443
};
3544

36-
var process = Process.Start(start) ?? throw new InvalidOperationException("No proccess resource is started");
45+
var process = Process.Start(start) ?? throw new InvalidOperationException("No process resource is started");
3746
var totalReward = 0.0;
3847

3948
try
@@ -52,10 +61,9 @@ public FitnessInfo Evaluate(IBlackBox<double> phenome)
5261
var inputs = phenome.Inputs.Span;
5362
inputs.Clear();
5463

55-
var observationTuple = ReadObservation(namedPipeClientStream, _inputCount);
56-
var observation = observationTuple.observation;
57-
totalReward = observationTuple.reward[0];
58-
var done = observationTuple.done[0];
64+
var (observation, rewardArray, doneArray) = ReadObservation(namedPipeClientStream, _inputCount);
65+
totalReward = rewardArray[0];
66+
var done = doneArray[0];
5967

6068
if (done == 1)
6169
{
@@ -66,7 +74,7 @@ public FitnessInfo Evaluate(IBlackBox<double> phenome)
6674
phenome.Activate();
6775

6876
// var clampedOutputs = outputs.Select(output => Math.Clamp(output, -1.0, 1.0)).ToArray();
69-
if (_isContinious)
77+
if (_isContinuous)
7078
{
7179
var outputBuffer = new byte[_outputCount * sizeof(float)];
7280
var outputs = new double[_outputCount];
@@ -76,7 +84,7 @@ public FitnessInfo Evaluate(IBlackBox<double> phenome)
7684
}
7785
else
7886
{
79-
int maxSigIndex = ReadMaxSigIndex(phenome);
87+
var maxSigIndex = ReadMaxSigIndex(phenome);
8088
var outputBuffer = new byte[sizeof(int)];
8189
Buffer.BlockCopy(new int[] { maxSigIndex }, 0, outputBuffer, 0, outputBuffer.Length);
8290
namedPipeClientStream.Write(outputBuffer, 0, outputBuffer.Length);
@@ -101,16 +109,16 @@ public FitnessInfo Evaluate(IBlackBox<double> phenome)
101109
return new FitnessInfo(maskedReward);
102110
}
103111

104-
static (double[] observation, double[] reward, int[] done) ReadObservation(NamedPipeClientStream namedPipeClientStream, int count)
112+
private static (double[] observation, double[] reward, int[] done) ReadObservation(NamedPipeClientStream namedPipeClientStream, int count)
105113
{
106114
var count0 = count * sizeof(double);
107-
var count1 = sizeof(double);
108-
var count2 = sizeof(int);
115+
const int count1 = sizeof(double);
116+
const int count2 = sizeof(int);
109117
var inputBuffer = new byte[count0 + count1 + count2];
110118
namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length);
111-
double[] observation = new double[count];
112-
double[] reward = new double[1];
113-
int[] done = new int[1];
119+
var observation = new double[count];
120+
var reward = new double[1];
121+
var done = new int[1];
114122
var offset1 = count0;
115123
var offset2 = count0 + count1;
116124
Buffer.BlockCopy(inputBuffer, 0, observation, 0, count0);
@@ -119,46 +127,44 @@ public FitnessInfo Evaluate(IBlackBox<double> phenome)
119127
return (observation, reward, done);
120128
}
121129

122-
static double[] ReadDoubleArray(NamedPipeClientStream namedPipeClientStream, int count)
130+
private static double[] ReadDoubleArray(NamedPipeClientStream namedPipeClientStream, int count)
123131
{
124132
var inputBuffer = new byte[count * sizeof(double)];
125133
namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length);
126-
double[] values = new double[inputBuffer.Length / sizeof(double)];
134+
var values = new double[inputBuffer.Length / sizeof(double)];
127135
Buffer.BlockCopy(inputBuffer, 0, values, 0, values.Length * sizeof(double));
128136
return values;
129137
}
130138

131-
static float[] ReadFloatArray(NamedPipeClientStream namedPipeClientStream, int count)
139+
private static float[] ReadFloatArray(NamedPipeClientStream namedPipeClientStream, int count)
132140
{
133141
var inputBuffer = new byte[count * sizeof(float)];
134142
namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length);
135-
float[] values = new float[inputBuffer.Length / sizeof(float)];
143+
var values = new float[inputBuffer.Length / sizeof(float)];
136144
Buffer.BlockCopy(inputBuffer, 0, values, 0, values.Length * sizeof(float));
137145
return values;
138146
}
139147

140-
static int[] ReadIntArray(NamedPipeClientStream namedPipeClientStream, int count)
148+
private static int[] ReadIntArray(NamedPipeClientStream namedPipeClientStream, int count)
141149
{
142150
var inputBuffer = new byte[count * sizeof(int)];
143151
namedPipeClientStream.Read(inputBuffer, 0, inputBuffer.Length);
144-
int[] values = new int[inputBuffer.Length / sizeof(int)];
152+
var values = new int[inputBuffer.Length / sizeof(int)];
145153
Buffer.BlockCopy(inputBuffer, 0, values, 0, values.Length * sizeof(int));
146154
return values;
147155
}
148156

149-
int ReadMaxSigIndex(IBlackBox<double> phenome)
157+
private int ReadMaxSigIndex(IBlackBox<double> phenome)
150158
{
151-
double maxSig = phenome.Outputs.Span[0];
152-
int maxSigIdx = 0;
159+
var maxSig = phenome.Outputs.Span[0];
160+
var maxSigIdx = 0;
153161

154-
for (int i = 1; i < _outputCount; i++)
162+
for (var i = 1; i < _outputCount; i++)
155163
{
156-
double v = phenome.Outputs.Span[i];
157-
if (v > maxSig)
158-
{
159-
maxSig = v;
160-
maxSigIdx = i;
161-
}
164+
var v = phenome.Outputs.Span[i];
165+
if (!(v > maxSig)) continue;
166+
maxSig = v;
167+
maxSigIdx = i;
162168
}
163169

164170
return maxSigIdx;

src/SharpNeat.Tasks/Gymnasium/GymnasiumEvaluationScheme.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ internal class GymnasiumEvaluationScheme : IBlackBoxEvaluationScheme<double>
1111
public int OutputCount => 4;
1212

1313
/// <inheritdoc/>
14-
public bool IsDeterministic => true;
14+
public bool IsDeterministic => false;
1515

1616
/// <inheritdoc/>
1717
public IComparer<FitnessInfo> FitnessComparer => PrimaryFitnessInfoComparer.Singleton;

src/SharpNeat.Windows.App/config/experiments-config/gymnasium.config.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "Gymnasium",
3-
"isAcyclic": true,
3+
"isAcyclic": false,
44
"cyclesPerActivation": 1,
55
"activationFnName": "LeakyReLU",
66
"evolutionAlgorithm": {

src/SharpNeat.Windows.App/gymnasium/main.py

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import collections
12
import struct
23
import time
34
import traceback
@@ -11,29 +12,31 @@
1112
import numpy as np
1213
import logging
1314

14-
1515
logging.basicConfig(filename='debug.log', encoding='utf-8', level=logging.FATAL)
1616
logging.debug("start")
1717

1818
parser = ArgumentParser()
19-
parser.add_argument("-uuid", dest="uuid")
20-
parser.add_argument("-render", dest="render")
19+
parser.add_argument("-uuid", dest="uuid", default="test")
20+
parser.add_argument("-render", dest="render", default="False")
21+
parser.add_argument("-test", dest="test", default="True")
2122
args = parser.parse_args()
2223
render = args.render == "True"
24+
test = args.test == "True"
2325

24-
pipe = win32pipe.CreateNamedPipe("\\\\.\\pipe\\gymnasium_pipe_" + args.uuid,
25-
win32pipe.PIPE_ACCESS_DUPLEX,
26-
win32pipe.PIPE_TYPE_MESSAGE | win32pipe.PIPE_READMODE_MESSAGE | win32pipe.PIPE_WAIT,
27-
1, 1024, 1024, 0, None)
28-
logging.debug("Connecting pipe...")
29-
win32pipe.ConnectNamedPipe(pipe, None)
30-
logging.debug("Pipe connected")
26+
if not test:
27+
pipe = win32pipe.CreateNamedPipe("\\\\.\\pipe\\gymnasium_pipe_" + args.uuid,
28+
win32pipe.PIPE_ACCESS_DUPLEX,
29+
win32pipe.PIPE_TYPE_MESSAGE | win32pipe.PIPE_READMODE_MESSAGE | win32pipe.PIPE_WAIT,
30+
1, 1024, 1024, 0, None)
31+
logging.debug("Connecting pipe...")
32+
win32pipe.ConnectNamedPipe(pipe, None)
33+
logging.debug("Pipe connected")
3134

3235
# env = gym.make("BipedalWalker-v3", hardcore=False, render_mode="human" if render else None)
3336
# env = gym.make("LunarLander-v2", render_mode="human" if render else None)
3437
try:
3538
# env = gym.make("LunarLander-v2", enable_wind=True, render_mode="human" if render else None)
36-
env = gym.make("BipedalWalker-v3", hardcore=True, render_mode="human" if render else None)
39+
env = gym.make("BipedalWalker-v3", hardcore=False, render_mode="human" if render else None)
3740
env = ClipAction(env)
3841

3942
logging.debug("Environment created")
@@ -48,17 +51,23 @@
4851
def run_episode():
4952
observation, info = env.reset()
5053

51-
logging.debug("Initial observation:", observation)
52-
send_observation(observation, 0, False)
53-
logging.debug("Initial observation sent")
54+
if not test:
55+
logging.debug("Initial observation:", observation)
56+
send_observation(observation, 0, False)
57+
logging.debug("Initial observation sent")
5458

59+
max_reward_history_len = 100
5560
total_reward = 0
5661
total_timesteps = 0
62+
latest_rewards = collections.deque(maxlen=max_reward_history_len)
5763

5864
while 1:
5965
logging.debug("Starting step")
60-
a = read_action(env.action_space)
61-
logging.debug("Action read:", a)
66+
67+
if not test:
68+
a = read_action(env.action_space)
69+
else:
70+
a = env.action_space.sample()
6271

6372
total_timesteps += 1
6473

@@ -71,20 +80,28 @@ def run_episode():
7180
# print("reward %0.3f" % reward)
7281

7382
total_reward += reward
83+
latest_rewards.append(float(reward))
7484

7585
masked_done = done
7686

77-
# if render:
78-
# masked_done = False
87+
if total_timesteps >= max_reward_history_len:
88+
low_performing = True
89+
for historical_reward in latest_rewards:
90+
if historical_reward > 0:
91+
low_performing = False
92+
break
93+
if low_performing:
94+
masked_done = True
7995

80-
send_observation(observation, float(total_reward), masked_done)
81-
logging.debug("Observation sent")
96+
if not test:
97+
send_observation(observation, float(total_reward), masked_done)
98+
logging.debug("Observation sent")
8299

83100
if render:
84101
env.render()
85-
time.sleep(0.02)
102+
time.sleep(0.01)
86103

87-
if done:
104+
if masked_done:
88105
logging.debug("Terminated")
89106
if not render:
90107
# pipe.close()
@@ -99,7 +116,8 @@ def run_episode():
99116

100117

101118
def send_observation(observation: np.array, reward: float, done: bool):
102-
win32file.WriteFile(pipe, bytes(observation.astype(float)) + bytes(np.array([reward]).astype(float)) + bytes(np.array([int(done)])))
119+
win32file.WriteFile(pipe, bytes(observation.astype(float)) + bytes(np.array([reward]).astype(float)) + bytes(
120+
np.array([int(done)])))
103121

104122

105123
def read_action(space: spaces.Space):

0 commit comments

Comments
 (0)