Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,5 @@ deps.zig
zig-cache
.zig-cache
zig-out
.zig-cache/
zig-pkg/
69 changes: 36 additions & 33 deletions build.zig
Original file line number Diff line number Diff line change
@@ -1,64 +1,67 @@
const std = @import("std");
const SpecTest = @import("test/spec.zig");

pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});

const enable_logging = b.option(bool, "log", "Whether to enable logging") orelse false;
_ = enable_logging;
const yaml_module = b.addModule("yaml", .{
.root_source_file = b.path("src/lib.zig"),
});

const yaml_tests = b.addTest(.{
.root_source_file = b.path("src/lib.zig"),
.target = target,
.optimize = optimize,
});

const example = b.addExecutable(.{
.name = "yaml",
.root_source_file = b.path("examples/yaml.zig"),
.target = target,
.optimize = optimize,
.root_module = b.createModule(.{
.root_source_file = b.path("src/lib.zig"),
.target = target,
.optimize = optimize,
}),
});
example.root_module.addImport("yaml", yaml_module);

const example_opts = b.addOptions();
example.root_module.addOptions("build_options", example_opts);
example_opts.addOption(bool, "enable_logging", enable_logging);

b.installArtifact(example);

const run_cmd = b.addRunArtifact(example);
run_cmd.step.dependOn(b.getInstallStep());
if (b.args) |args| {
run_cmd.addArgs(args);
}

const run_step = b.step("run", "Run example program parser");
run_step.dependOn(&run_cmd.step);

const test_step = b.step("test", "Run library tests");
test_step.dependOn(&b.addRunArtifact(yaml_tests).step);

var e2e_tests = b.addTest(.{
const e2e_test_module = b.createModule(.{
.root_source_file = b.path("test/test.zig"),
.target = target,
.optimize = optimize,
});
e2e_tests.root_module.addImport("yaml", yaml_module);
e2e_test_module.addImport("yaml", yaml_module);

const e2e_tests = b.addTest(.{
.root_module = e2e_test_module,
});
test_step.dependOn(&b.addRunArtifact(e2e_tests).step);

// Comprehensive unit tests
const comprehensive_test_module = b.createModule(.{
.root_source_file = b.path("test/comprehensive_test.zig"),
.target = target,
.optimize = optimize,
});
comprehensive_test_module.addImport("yaml", yaml_module);

const comprehensive_tests = b.addTest(.{
.root_module = comprehensive_test_module,
});
test_step.dependOn(&b.addRunArtifact(comprehensive_tests).step);

// YAML Test Suite spec tests
const enable_spec_tests = b.option(bool, "enable-spec-tests", "Enable YAML Test Suite") orelse false;
if (enable_spec_tests) {
const gen = SpecTest.create(b);
var spec_tests = b.addTest(.{
.root_source_file = gen.path(),
const SpecTest = @import("test/spec.zig");
const spec_test = SpecTest.create(b);

const spec_test_module = b.createModule(.{
.root_source_file = spec_test.path(),
.target = target,
.optimize = optimize,
});
spec_tests.root_module.addImport("yaml", yaml_module);
spec_test_module.addImport("yaml", yaml_module);

const spec_tests = b.addTest(.{
.root_module = spec_test_module,
});
test_step.dependOn(&b.addRunArtifact(spec_tests).step);
}
}
177 changes: 155 additions & 22 deletions src/Parser.zig
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,11 @@ fn value(self: *Parser, gpa: Allocator) ParseError!Node.OptionalIndex {
self.token_it.seekBy(-1);
return self.listBracketed(gpa);
},
.flow_map_start => {
// map
self.token_it.seekBy(-1);
return self.mapBracketed(gpa);
},
else => return .none,
}
}
Expand Down Expand Up @@ -208,10 +213,6 @@ fn doc(self: *Parser, gpa: Allocator) ParseError!Node.Index {
// Parse footer
const node_end: Token.Index = footer: {
if (self.eatToken(.doc_end, &.{})) |pos| {
if (!is_explicit) {
self.token_it.seekBy(-1);
return self.fail(gpa, self.token_it.pos, "missing explicit document open marker '---'", .{});
}
if (self.getCol(pos) > 0) return error.MalformedYaml;
break :footer pos;
}
Expand Down Expand Up @@ -347,6 +348,88 @@ fn map(self: *Parser, gpa: Allocator) ParseError!Node.OptionalIndex {
return @as(Node.Index, @enumFromInt(node_index)).toOptional();
}

fn mapBracketed(self: *Parser, gpa: Allocator) ParseError!Node.OptionalIndex {
const node_index = try self.nodes.addOne(gpa);
const node_start = self.token_it.pos;

var entries: std.ArrayListUnmanaged(Map.Entry) = .empty;
defer entries.deinit(gpa);

log.debug("(map) begin {s}@{d}", .{ @tagName(self.token(node_start).id), node_start });

_ = try self.expectToken(.flow_map_start, &.{});

const node_end: Token.Index = while (true) {
self.eatCommentsAndSpace(&.{.comment});

if (self.eatToken(.flow_map_end, &.{.comment})) |pos|
break pos;

_ = self.eatToken(.comma, &.{.comment});

self.eatCommentsAndSpace(&.{.comment});

// Parse key
const key_pos = self.token_it.pos;
const key = self.token_it.next() orelse return error.UnexpectedEof;
switch (key.id) {
.literal => {},
else => return self.fail(gpa, self.token_it.pos, "unexpected token for 'key': {}", .{key}),
}

log.debug("(map) key {s}@{d}", .{ self.rawString(key_pos, key_pos), key_pos });

// Separator
_ = self.expectToken(.map_value_ind, &.{ .new_line, .comment }) catch
return self.fail(gpa, self.token_it.pos, "expected map separator ':'", .{});

// Parse value
const value_index = try self.value(gpa);

try entries.append(gpa, .{
.key = key_pos,
.maybe_node = value_index,
});
};

log.debug("(map) end {s}@{d}", .{ @tagName(self.token(node_end).id), node_end });

const scope: Node.Scope = .{
.start = node_start,
.end = node_end,
};

if (entries.items.len == 1) {
const entry = entries.items[0];

self.nodes.set(node_index, .{
.tag = .map_single,
.scope = scope,
.data = .{ .map = .{
.key = entry.key,
.maybe_node = entry.maybe_node,
} },
});
} else {
try self.extra.ensureUnusedCapacity(gpa, entries.items.len * 2 + 1);
const extra_index: u32 = @intCast(self.extra.items.len);

_ = self.addExtraAssumeCapacity(Map{ .map_len = @intCast(entries.items.len) });

for (entries.items) |entry| {
_ = self.addExtraAssumeCapacity(entry);
}

self.nodes.set(node_index, .{
.tag = .map_many,
.scope = scope,
.data = .{ .extra = @enumFromInt(extra_index) },
});
}

return @as(Node.Index, @enumFromInt(node_index)).toOptional();
}

fn list(self: *Parser, gpa: Allocator) ParseError!Node.OptionalIndex {
const node_index: Node.Index = @enumFromInt(try self.nodes.addOne(gpa));
const node_start = self.token_it.pos;
Expand Down Expand Up @@ -660,6 +743,8 @@ fn parseDoubleQuoted(self: *Parser, gpa: Allocator, raw: []const u8) ParseError!
assert(raw[0] == '"' and raw[raw.len - 1] == '"');
const raw_no_quotes = raw[1 .. raw.len - 1];

// Escaped sequences can produce multi-byte UTF-8, so allocate extra.
// Worst case: \UXXXXXXXX produces 4 UTF-8 bytes from 10 input chars, so input len is enough.
try self.string_bytes.ensureUnusedCapacity(gpa, raw_no_quotes.len);
var string: String = .{
.index = @enumFromInt(@as(u32, @intCast(self.string_bytes.items.len))),
Expand All @@ -685,23 +770,71 @@ fn parseDoubleQuoted(self: *Parser, gpa: Allocator, raw: []const u8) ParseError!
string.len += 1;
},
},
.escape => switch (c) {
'n' => {
state = .start;
self.string_bytes.appendAssumeCapacity('\n');
string.len += 1;
},
't' => {
state = .start;
self.string_bytes.appendAssumeCapacity('\t');
.escape => {
state = .start;
// YAML 1.2 escape sequences (spec section 5.7)
const replacement: ?u8 = switch (c) {
'0' => 0x00, // null
'a' => 0x07, // bell
'b' => 0x08, // backspace
't', 0x09 => 0x09, // tab
'n' => 0x0A, // line feed
'v' => 0x0B, // vertical tab
'f' => 0x0C, // form feed
'r' => 0x0D, // carriage return
'e' => 0x1B, // escape
' ' => 0x20, // space
'"' => '"',
'/' => '/',
'\\' => '\\',
'N' => null, // next line (U+0085) - handle as unicode below
'_' => null, // non-breaking space (U+00A0) - handle as unicode below
'L' => null, // line separator (U+2028) - handle as unicode below
'P' => null, // paragraph separator (U+2029) - handle as unicode below
'x' => null, // \xNN
'u' => null, // \uNNNN
'U' => null, // \UNNNNNNNN
else => return error.InvalidEscapeSequence,
};

if (replacement) |byte| {
self.string_bytes.appendAssumeCapacity(byte);
string.len += 1;
},
'"' => {
state = .start;
self.string_bytes.appendAssumeCapacity('"');
string.len += 1;
},
else => return error.InvalidEscapeSequence,
} else {
// Unicode escapes and special Unicode chars
const codepoint: u21 = switch (c) {
'N' => 0x0085, // next line
'_' => 0x00A0, // non-breaking space
'L' => 0x2028, // line separator
'P' => 0x2029, // paragraph separator
'x' => blk: {
if (index + 2 >= raw_no_quotes.len) return error.InvalidEscapeSequence;
const hex = raw_no_quotes[index + 1 ..][0..2];
index += 2;
break :blk std.fmt.parseInt(u21, hex, 16) catch return error.InvalidEscapeSequence;
},
'u' => blk: {
if (index + 4 >= raw_no_quotes.len) return error.InvalidEscapeSequence;
const hex = raw_no_quotes[index + 1 ..][0..4];
index += 4;
break :blk std.fmt.parseInt(u21, hex, 16) catch return error.InvalidEscapeSequence;
},
'U' => blk: {
if (index + 8 >= raw_no_quotes.len) return error.InvalidEscapeSequence;
const hex = raw_no_quotes[index + 1 ..][0..8];
index += 8;
break :blk std.fmt.parseInt(u21, hex, 16) catch return error.InvalidEscapeSequence;
},
else => unreachable,
};
// Encode as UTF-8
var buf: [4]u8 = undefined;
const utf8_len = std.unicode.utf8Encode(codepoint, &buf) catch return error.InvalidEscapeSequence;
// May need more capacity for multi-byte sequences
try self.string_bytes.ensureUnusedCapacity(gpa, utf8_len);
self.string_bytes.appendSliceAssumeCapacity(buf[0..utf8_len]);
string.len += @intCast(utf8_len);
}
},
}
}
Expand Down Expand Up @@ -762,11 +895,11 @@ fn getLineInfo(source: []const u8, line_col: LineCol) struct {
};

const span_start: u32 = span_start: {
const trimmed = mem.trimLeft(u8, line, " ");
const trimmed = mem.trimStart(u8, line, " ");
break :span_start @intCast(mem.indexOf(u8, line, trimmed).?);
};

const span_end: u32 = @intCast(mem.trimRight(u8, line, " \r\n").len);
const span_end: u32 = @intCast(mem.trimEnd(u8, line, " \r\n").len);

return .{
.line = line,
Expand Down
26 changes: 17 additions & 9 deletions src/Parser/test.zig
Original file line number Diff line number Diff line change
Expand Up @@ -628,9 +628,10 @@ fn parseError2(source: []const u8, comptime format: []const u8, args: anytype) !
defer bundle.deinit(testing.allocator);
try testing.expect(bundle.errorMessageCount() > 0);

var given: std.ArrayListUnmanaged(u8) = .empty;
var aw: std.Io.Writer.Allocating = .init(testing.allocator);
try bundle.renderToWriter(.{}, &aw.writer);
var given = aw.toArrayList();
defer given.deinit(testing.allocator);
try bundle.renderToWriter(.{ .ttyconf = .no_color }, given.writer(testing.allocator));

const expected = try std.fmt.allocPrint(testing.allocator, format, args);
defer testing.allocator.free(expected);
Expand Down Expand Up @@ -666,17 +667,12 @@ test "correct doc start with tag" {
}

test "doc close without explicit doc open" {
try parseError2(
try parseSuccess(
\\
\\
\\# something cool
\\...
,
\\(memory):4:1: error: missing explicit document open marker '---'
\\...
\\^~~
\\
, .{});
);
}

test "doc open and close are ok" {
Expand Down Expand Up @@ -857,6 +853,18 @@ test "weirdly nested map of maps of lists" {
);
}

test "curly brackets denote a flow map" {
try parseSuccess(
\\{ a: b, c: d }
);
}

test "empty flow map" {
try parseSuccess(
\\{ }
);
}

test "square brackets denote a list" {
try parseSuccess(
\\[ a,
Expand Down
Loading