Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 12 additions & 13 deletions eng/skill-validator/src/Evaluate/AgentRunner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
using System.Text.Json;
using System.Text.Json.Nodes;
using SkillValidator.Shared;
using GitHub.Copilot.SDK;
using GitHub.Copilot;
using GitHub.Copilot.Rpc;
using AgentInfo = SkillValidator.Shared.AgentInfo;

namespace SkillValidator.Evaluate;

Expand Down Expand Up @@ -68,14 +70,14 @@ public static async Task<CopilotClient> GetPluginClient(

var options = new CopilotClientOptions
{
LogLevel = verbose ? "info" : "none",
LogLevel = verbose ? CopilotLogLevel.Info : CopilotLogLevel.None,
SessionFs = new SessionFsConfig
{
InitialCwd = Environment.CurrentDirectory,
InitialWorkingDirectory = Environment.CurrentDirectory,
SessionStatePath = "session-state",
Conventions = OperatingSystem.IsWindows()
? GitHub.Copilot.SDK.Rpc.SessionFsSetProviderConventions.Windows
: GitHub.Copilot.SDK.Rpc.SessionFsSetProviderConventions.Posix,
? GitHub.Copilot.Rpc.SessionFsSetProviderConventions.Windows
: GitHub.Copilot.Rpc.SessionFsSetProviderConventions.Posix,
},
};

Expand Down Expand Up @@ -443,22 +445,19 @@ internal static async Task<SessionConfig> BuildSessionConfig(
Streaming = true,
WorkingDirectory = workDir,
SkillDirectories = [..skillDirs, ..noiseDirs],
ConfigDir = configDir,
ConfigDirectory = configDir,
McpServers = sdkMcp,
CustomAgents = customAgents,
InfiniteSessions = new InfiniteSessionConfig { Enabled = false },
// SDK 0.3.0 requires a SessionFsProvider (abstract base class).
// SDK 1.0.0 requires a SessionFsProvider (abstract base class).
// Without this, events.jsonl files are never written and
// session replay data is lost.
CreateSessionFsHandler = _ => new LocalSessionFsHandler(configDir),
CreateSessionFsProvider = _ => new LocalSessionFsHandler(configDir),
OnPermissionRequest = (request, _) =>
{
// SDK 0.2.0: PermissionRequest only has Kind, no path data.
// Permission sandboxing is handled via Hooks.OnPreToolUse instead.
return Task.FromResult(new PermissionRequestResult
{
Kind = PermissionRequestResultKind.Approved,
});
return Task.FromResult(PermissionDecision.ApproveOnce());
},
Hooks = new SessionHooks
{
Expand Down Expand Up @@ -580,7 +579,7 @@ await BuildSessionConfig(options.Skill, options.PluginRoot, options.Model, workD

// Register event handler BEFORE SelectAsync so SubagentSelectedEvent
// from the agent selection is captured in the events list.
session.On(evt =>
session.On<SessionEvent>(evt =>
{
var agentEvent = new AgentEvent(
evt.Type,
Expand Down
7 changes: 2 additions & 5 deletions eng/skill-validator/src/Evaluate/Judge.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
using System.Text.Json;
using System.Text.Json.Nodes;
using SkillValidator.Shared;
using GitHub.Copilot.SDK;
using GitHub.Copilot.Rpc;

namespace SkillValidator.Evaluate;

Expand Down Expand Up @@ -47,10 +47,7 @@ public static class Judge
{
// Judge sessions: deny all tool permissions. Judging should be a
// pure LLM task — no file access or tool execution needed.
return Task.FromResult(new PermissionRequestResult
{
Kind = PermissionRequestResultKind.UserNotAvailable,
});
return Task.FromResult(PermissionDecision.UserNotAvailable());
},
cancellationToken: cancellationToken);

Expand Down
14 changes: 6 additions & 8 deletions eng/skill-validator/src/Evaluate/LlmSession.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using GitHub.Copilot.SDK;
using GitHub.Copilot;
using GitHub.Copilot.Rpc;

namespace SkillValidator.Evaluate;

Expand Down Expand Up @@ -30,7 +31,7 @@ internal static async Task<LlmResponse> SendAsync(
int timeoutMs,
bool verbose,
string timeoutLabel = "LLM",
PermissionRequestHandler? onPermissionRequest = null,
Func<PermissionRequest, PermissionInvocation, Task<PermissionDecision>>? onPermissionRequest = null,
CancellationToken cancellationToken = default)
{
var client = await AgentRunner.GetSharedClient(verbose);
Expand All @@ -52,11 +53,8 @@ internal static async Task<LlmResponse> SendAsync(
Content = systemPrompt,
},
InfiniteSessions = new InfiniteSessionConfig { Enabled = false },
CreateSessionFsHandler = _ => new LocalSessionFsHandler(tempConfigDir),
OnPermissionRequest = onPermissionRequest ?? ((_, _) => Task.FromResult(new PermissionRequestResult
{
Kind = PermissionRequestResultKind.UserNotAvailable,
})),
CreateSessionFsProvider = _ => new LocalSessionFsHandler(tempConfigDir),
OnPermissionRequest = onPermissionRequest ?? ((_, _) => Task.FromResult(PermissionDecision.UserNotAvailable())),
});

using var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
Expand All @@ -70,7 +68,7 @@ internal static async Task<LlmResponse> SendAsync(
string responseContent = "";
int inputTokens = 0, outputTokens = 0, cacheReadTokens = 0, cacheWriteTokens = 0;

session.On(evt =>
session.On<SessionEvent>(evt =>
{
switch (evt)
{
Expand Down
12 changes: 6 additions & 6 deletions eng/skill-validator/src/Evaluate/LocalSessionFsHandler.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using System.Collections.Concurrent;
using GitHub.Copilot.SDK;
using GitHub.Copilot.SDK.Rpc;
using GitHub.Copilot;
using GitHub.Copilot.Rpc;

namespace SkillValidator.Evaluate;

Expand Down Expand Up @@ -117,14 +117,14 @@ protected override Task<SessionFsStatResult> StatAsync(string path, Cancellation
throw new FileNotFoundException($"Not found: {path}");
}

protected override Task MkdirAsync(string path, bool recursive, int? mode, CancellationToken cancellationToken)
protected override Task MakeDirectoryAsync(string path, bool recursive, int? mode, CancellationToken cancellationToken)
{
var resolved = ResolvePath(path);
Directory.CreateDirectory(resolved);
return Task.CompletedTask;
}

protected override Task<IList<string>> ReaddirAsync(string path, CancellationToken cancellationToken)
protected override Task<IList<string>> ReadDirectoryAsync(string path, CancellationToken cancellationToken)
{
var resolved = ResolvePath(path);
var entries = new List<string>();
Expand All @@ -136,7 +136,7 @@ protected override Task<IList<string>> ReaddirAsync(string path, CancellationTok
return Task.FromResult<IList<string>>(entries);
}

protected override Task<IList<SessionFsReaddirWithTypesEntry>> ReaddirWithTypesAsync(string path, CancellationToken cancellationToken)
protected override Task<IList<SessionFsReaddirWithTypesEntry>> ReadDirectoryWithTypesAsync(string path, CancellationToken cancellationToken)
{
var resolved = ResolvePath(path);
var entries = new List<SessionFsReaddirWithTypesEntry>();
Expand All @@ -154,7 +154,7 @@ protected override Task<IList<SessionFsReaddirWithTypesEntry>> ReaddirWithTypesA
return Task.FromResult<IList<SessionFsReaddirWithTypesEntry>>(entries);
}

protected override Task RmAsync(string path, bool recursive, bool force, CancellationToken cancellationToken)
protected override Task RemoveAsync(string path, bool recursive, bool force, CancellationToken cancellationToken)
{
var resolved = ResolvePath(path);
if (File.Exists(resolved))
Expand Down
7 changes: 2 additions & 5 deletions eng/skill-validator/src/Evaluate/PairwiseJudge.cs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
using System.Text.Json;
using System.Text.Json.Nodes;
using SkillValidator.Shared;
using GitHub.Copilot.SDK;
using GitHub.Copilot.Rpc;

namespace SkillValidator.Evaluate;

Expand Down Expand Up @@ -87,10 +87,7 @@ public static class PairwiseJudge
{
// Pairwise judge sessions: deny all tool permissions. The judge
// should operate purely on the provided text — no tool execution.
return Task.FromResult(new PermissionRequestResult
{
Kind = PermissionRequestResultKind.UserNotAvailable,
});
return Task.FromResult(PermissionDecision.UserNotAvailable());
},
cancellationToken: cancellationToken);

Expand Down
4 changes: 3 additions & 1 deletion eng/skill-validator/src/SkillValidator.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
<EnforceCodeStyleInBuild>true</EnforceCodeStyleInBuild>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<NoWarn>$(NoWarn);CS1591</NoWarn>
<!-- Suppress evaluation-only warnings from the SDK for types we intentionally use -->
<NoWarn>$(NoWarn);GHCP001</NoWarn>
<!-- YamlDotNet 17.x is not fully trim/AOT-compatible; suppress the
assembly-level trim analysis warning so native publish succeeds. -->
<NoWarn>$(NoWarn);IL2104</NoWarn>
Expand Down Expand Up @@ -42,7 +44,7 @@
<PackageReference Include="Microsoft.ML.Tokenizers.Data.Cl100kBase" Version="2.0.0" />
<PackageReference Include="System.CommandLine" Version="2.0.7" />
<!-- external -->
<PackageReference Include="GitHub.Copilot.SDK" Version="0.3.0" />
<PackageReference Include="GitHub.Copilot.SDK" Version="1.0.0" />
<PackageReference Include="YamlDotNet" Version="18.0.0" />
<PackageReference Include="Vecc.YamlDotNet.Analyzers.StaticGenerator" Version="18.0.0" />

Expand Down
22 changes: 13 additions & 9 deletions eng/skill-validator/tests/Evaluate/RunnerTests.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
using System.Diagnostics;
using System.Text.Json;
using GitHub.Copilot.SDK;
using GitHub.Copilot;
using GitHub.Copilot.Rpc;
using SkillValidator.Evaluate;
using SkillValidator.Shared;

Expand Down Expand Up @@ -213,25 +214,25 @@ public async Task SetsWorkingDirectoryToWorkDir()
public async Task SetsConfigDirToUniqueTempDirForSkillIsolation()
{
var config = await AgentRunner.BuildSessionConfig(MockSkill, null, "gpt-4.1", "C:\\tmp\\work");
Assert.NotEqual("C:\\tmp\\work", config.ConfigDir);
Assert.StartsWith(Path.GetTempPath(), config.ConfigDir);
Assert.True(Directory.Exists(config.ConfigDir));
Assert.NotEqual("C:\\tmp\\work", config.ConfigDirectory);
Assert.StartsWith(Path.GetTempPath(), config.ConfigDirectory);
Assert.True(Directory.Exists(config.ConfigDirectory));
}

[Fact]
public async Task SetsConfigDirToUniqueTempDirEvenWithoutSkill()
{
var config = await AgentRunner.BuildSessionConfig(null, null, "gpt-4.1", "C:\\tmp\\work");
Assert.NotEqual("C:\\tmp\\work", config.ConfigDir);
Assert.StartsWith(Path.GetTempPath(), config.ConfigDir);
Assert.NotEqual("C:\\tmp\\work", config.ConfigDirectory);
Assert.StartsWith(Path.GetTempPath(), config.ConfigDirectory);
}

[Fact]
public async Task EachCallGetsUniqueConfigDir()
{
var config1 = await AgentRunner.BuildSessionConfig(null, null, "gpt-4.1", "C:\\tmp\\work");
var config2 = await AgentRunner.BuildSessionConfig(null, null, "gpt-4.1", "C:\\tmp\\work");
Assert.NotEqual(config1.ConfigDir, config2.ConfigDir);
Assert.NotEqual(config1.ConfigDirectory, config2.ConfigDirectory);
}

[Fact]
Expand Down Expand Up @@ -356,7 +357,10 @@ public async Task DropsMcpCwd()
var config = await AgentRunner.BuildSessionConfig(MockSkill, null, "gpt-4.1", "C:\\tmp\\work", mcpServers);
Assert.NotNull(config.McpServers);
var entry = (McpStdioServerConfig)config.McpServers["ok"];
Assert.Null(entry.Cwd);
// Cwd property no longer exists in SDK 1.0.0 — custom cwd is not
// configurable, satisfying the security requirement that MCP servers
// cannot be pointed at attacker-chosen directories.
Assert.Equal("node", entry.Command);
}

[Fact]
Expand Down Expand Up @@ -451,7 +455,7 @@ public async Task PluginRootNullPreservesSkillDirectories()

public class ExtractPathFromToolArgsTests
{
private static PreToolUseHookInput MakeInput(object? toolArgs) =>
private static PreToolUseHookInput MakeInput(JsonElement? toolArgs) =>
new() { ToolArgs = toolArgs };

[Fact]
Expand Down
Loading