Skip to content

Commit e791d56

Browse files
authored
Merge pull request #8414 from roc-lang/propagate_is_eq
Fix constraints for `==` and `!=`
2 parents 4b811d2 + 4b0ada1 commit e791d56

File tree

73 files changed

+2952
-1139
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

73 files changed

+2952
-1139
lines changed

src/PROFILING/bench_repeated_check.roc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Minimal test file - original benchmark temporarily disabled
22
# See bench_repeated_check_ORIGINAL.roc for the full version
3-
# TODO: Re-enable once `is_ne` is supported for all numeric types
3+
# TODO: Re-enable once `!=` (which desugars to is_eq().not()) is fully working
44

55
app [main] { pf: platform "../../test/str/platform/main.roc" }
66

src/base/Ident.zig

Lines changed: 180 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
//! in constant time. Storing IDs in each IR instead of strings also uses less memory in the IRs.
77

88
const std = @import("std");
9+
const builtin = @import("builtin");
910
const serialization = @import("serialization");
1011
const collections = @import("collections");
1112

@@ -16,6 +17,10 @@ const CompactWriter = collections.CompactWriter;
1617

1718
const Ident = @This();
1819

20+
/// Whether to enable debug store tracking. This adds runtime checks to verify
21+
/// that Idx values are only looked up in the store that created them.
22+
const enable_store_tracking = builtin.mode == .Debug;
23+
1924
/// Method name for parsing integers from digit lists - used by numeric literal type checking
2025
pub const FROM_INT_DIGITS_METHOD_NAME = "from_int_digits";
2126
/// Method name for parsing decimals from digit lists - used by numeric literal type checking
@@ -105,12 +110,159 @@ pub const Attributes = packed struct(u3) {
105110
}
106111
};
107112

113+
/// Debug-only info for store provenance tracking.
114+
const StoreDebugInfo = struct {
115+
store_id: []const u8,
116+
known_idxs: std.AutoHashMapUnmanaged(u32, void),
117+
};
118+
119+
/// Global counter for generating unique store IDs.
120+
/// This counter survives struct copies because the ID is stored in the Store struct itself.
121+
/// Using u32 for cross-platform compatibility (wasm32 doesn't support 64-bit atomics).
122+
var debug_store_id_counter: if (enable_store_tracking) std.atomic.Value(u32) else void =
123+
if (enable_store_tracking) std.atomic.Value(u32).init(1) else {};
124+
125+
/// Global map from Store's unique debug_id to debug info.
126+
/// Protected by a mutex for thread safety.
127+
var debug_store_map: if (enable_store_tracking) std.AutoHashMapUnmanaged(u32, StoreDebugInfo) else void = if (enable_store_tracking) .{} else {};
128+
129+
/// Mutex protecting the debug_store_map.
130+
var debug_store_mutex: if (enable_store_tracking) std.Thread.Mutex else void = if (enable_store_tracking) .{} else {};
131+
108132
/// An interner for identifier names.
109133
pub const Store = struct {
110134
interner: SmallStringInterner,
111135
attributes: collections.SafeList(Attributes) = .{},
112136
next_unique_name: u32 = 0,
113137

138+
/// Debug-only: unique ID for this store instance.
139+
/// This ID is assigned on first insert and survives struct copies.
140+
/// 0 means unassigned.
141+
debug_id: if (enable_store_tracking) u32 else void = if (enable_store_tracking) 0 else {},
142+
143+
/// Debug-only: get or assign a unique ID for this store.
144+
fn getOrAssignDebugId(self: *Store, src: std.builtin.SourceLocation) u32 {
145+
if (enable_store_tracking) {
146+
if (self.debug_id == 0) {
147+
// If this store already has idents (e.g., deserialized), we can't
148+
// fully track it because existing idents weren't registered.
149+
// Keep debug_id at 0 to skip verification for this store.
150+
if (self.interner.entry_count > 0) {
151+
return 0;
152+
}
153+
154+
// Assign a new unique ID
155+
self.debug_id = debug_store_id_counter.fetchAdd(1, .monotonic);
156+
157+
// Register in the global map with source location info
158+
const store_id = std.fmt.allocPrint(std.heap.page_allocator, "{s}:{d}:{d}", .{
159+
src.file,
160+
src.line,
161+
src.column,
162+
}) catch "unknown";
163+
164+
debug_store_map.put(std.heap.page_allocator, self.debug_id, .{
165+
.store_id = store_id,
166+
.known_idxs = .{},
167+
}) catch {};
168+
}
169+
return self.debug_id;
170+
} else {
171+
return 0;
172+
}
173+
}
174+
175+
/// Debug-only: unregister this store from the global debug map.
176+
fn unregisterFromTracking(self: *Store) void {
177+
if (enable_store_tracking) {
178+
if (self.debug_id == 0) return; // Never registered
179+
180+
debug_store_mutex.lock();
181+
defer debug_store_mutex.unlock();
182+
183+
if (debug_store_map.fetchRemove(self.debug_id)) |entry| {
184+
// Free the heap-allocated store_id (if it's not the static "unknown" string)
185+
if (entry.value.store_id.ptr != @as([*]const u8, "unknown".ptr)) {
186+
std.heap.page_allocator.free(entry.value.store_id);
187+
}
188+
// Copy the known_idxs to make it mutable for deinit
189+
var known_idxs = entry.value.known_idxs;
190+
known_idxs.deinit(std.heap.page_allocator);
191+
}
192+
}
193+
}
194+
195+
/// Debug-only: track an Idx as belonging to this store.
196+
fn trackIdx(self: *Store, idx: Idx, src: std.builtin.SourceLocation) void {
197+
if (enable_store_tracking) {
198+
debug_store_mutex.lock();
199+
defer debug_store_mutex.unlock();
200+
201+
const debug_id = self.getOrAssignDebugId(src);
202+
if (debug_store_map.getPtr(debug_id)) |info| {
203+
// We don't fail on OOM in debug tracking - just skip tracking
204+
info.known_idxs.put(std.heap.page_allocator, @bitCast(idx), {}) catch {};
205+
}
206+
}
207+
}
208+
209+
/// Debug-only: verify an Idx belongs to this store.
210+
fn verifyIdx(self: *const Store, idx: Idx) void {
211+
if (enable_store_tracking) {
212+
if (self.debug_id == 0) {
213+
// Store was never registered (e.g., deserialized store).
214+
// Skip verification.
215+
return;
216+
}
217+
218+
debug_store_mutex.lock();
219+
defer debug_store_mutex.unlock();
220+
221+
const info = debug_store_map.get(self.debug_id) orelse {
222+
// Store not in map (shouldn't happen if debug_id != 0)
223+
return;
224+
};
225+
226+
const idx_bits: u32 = @bitCast(idx);
227+
if (!info.known_idxs.contains(idx_bits)) {
228+
std.debug.panic(
229+
"Ident.Idx lookup in wrong store: Idx {d} (0x{x}) not found in store '{s}' (debug_id={d}). " ++
230+
"This Idx was created by a different store.",
231+
.{ idx.idx, idx_bits, info.store_id, self.debug_id },
232+
);
233+
}
234+
}
235+
}
236+
237+
/// Check if an Idx was created by this store.
238+
/// In debug builds with store tracking enabled, this checks the known_idxs set.
239+
/// In release builds or when tracking is disabled, this returns true (assumes valid).
240+
/// Use this to determine which store to use for lookups when idents may come from
241+
/// multiple sources (e.g., during type unification with builtins).
242+
pub fn containsIdx(self: *const Store, idx: Idx) bool {
243+
if (enable_store_tracking) {
244+
if (self.debug_id == 0) {
245+
// Store was never registered (e.g., deserialized store).
246+
// Can't verify, assume true.
247+
return true;
248+
}
249+
250+
debug_store_mutex.lock();
251+
defer debug_store_mutex.unlock();
252+
253+
const info = debug_store_map.get(self.debug_id) orelse {
254+
// Store not in map
255+
return true;
256+
};
257+
258+
const idx_bits: u32 = @bitCast(idx);
259+
return info.known_idxs.contains(idx_bits);
260+
} else {
261+
// No tracking, can't determine - assume true
262+
return true;
263+
}
264+
}
265+
114266
/// Serialized representation of an Ident.Store
115267
/// Uses extern struct to guarantee consistent field layout across optimization levels.
116268
pub const Serialized = extern struct {
@@ -142,11 +294,14 @@ pub const Store = struct {
142294
.next_unique_name = self.next_unique_name,
143295
};
144296

297+
// Note: We don't register deserialized stores for debug tracking.
298+
// This is fine because the debug tracking is meant to catch bugs during fresh compilation.
299+
145300
return store;
146301
}
147302
};
148303

149-
/// Initialize the memory for an `Ident.Store` with a specific capaicty.
304+
/// Initialize the memory for an `Ident.Store` with a specific capacity.
150305
pub fn initCapacity(gpa: std.mem.Allocator, capacity: usize) std.mem.Allocator.Error!Store {
151306
return .{
152307
.interner = try SmallStringInterner.initCapacity(gpa, capacity),
@@ -157,12 +312,30 @@ pub const Store = struct {
157312
pub fn deinit(self: *Store, gpa: std.mem.Allocator) void {
158313
self.interner.deinit(gpa);
159314
self.attributes.deinit(gpa);
315+
self.unregisterFromTracking();
160316
}
161317

162318
/// Insert a new identifier into the store.
163319
pub fn insert(self: *Store, gpa: std.mem.Allocator, ident: Ident) std.mem.Allocator.Error!Idx {
164320
const idx = try self.interner.insert(gpa, ident.raw_text);
165321

322+
const result = Idx{
323+
.attributes = ident.attributes,
324+
.idx = @as(u29, @intCast(@intFromEnum(idx))),
325+
};
326+
327+
self.trackIdx(result, @src());
328+
329+
return result;
330+
}
331+
332+
/// Look up an identifier in the store without inserting.
333+
/// Returns the index if found, null if not found.
334+
/// Unlike insert, this never modifies the store (no resize, no insertion).
335+
/// Useful for deserialized stores that cannot be grown.
336+
pub fn lookup(self: *const Store, ident: Ident) ?Idx {
337+
const idx = self.interner.lookup(ident.raw_text) orelse return null;
338+
166339
return Idx{
167340
.attributes = ident.attributes,
168341
.idx = @as(u29, @intCast(@intFromEnum(idx))),
@@ -211,14 +384,19 @@ pub const Store = struct {
211384

212385
_ = try self.attributes.append(gpa, attributes);
213386

214-
return Idx{
387+
const result = Idx{
215388
.attributes = attributes,
216389
.idx = @truncate(@intFromEnum(idx)),
217390
};
391+
392+
self.trackIdx(result, @src());
393+
394+
return result;
218395
}
219396

220397
/// Get the text for an identifier.
221398
pub fn getText(self: *const Store, idx: Idx) []u8 {
399+
self.verifyIdx(idx);
222400
return self.interner.getText(@enumFromInt(@as(u32, idx.idx)));
223401
}
224402

src/base/SmallStringInterner.zig

Lines changed: 54 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ bytes: collections.SafeList(u8) = .{},
2525
hash_table: collections.SafeList(Idx) = .{},
2626
/// The current number of entries in the hash table.
2727
entry_count: u32 = 0,
28+
/// Debug-only flag to catch invalid inserts into deserialized interners.
29+
/// Deserialized interners should never have inserts - if they do, it's a bug.
30+
supports_inserts: if (std.debug.runtime_safety) bool else void = if (std.debug.runtime_safety) true else {},
2831

2932
/// A unique index for a deduped string in this interner.
3033
pub const Idx = enum(u32) {
@@ -65,7 +68,13 @@ pub fn initCapacity(gpa: std.mem.Allocator, capacity: usize) std.mem.Allocator.E
6568

6669
/// Free all memory consumed by this interner.
6770
/// Will invalidate all slices referencing the interner.
71+
/// NOTE: Do NOT call deinit on deserialized interners - their memory is owned by the deserialization buffer.
6872
pub fn deinit(self: *SmallStringInterner, gpa: std.mem.Allocator) void {
73+
if (std.debug.runtime_safety) {
74+
if (!self.supports_inserts) {
75+
@panic("deinit called on deserialized interner - memory is owned by deserialization buffer");
76+
}
77+
}
6978
self.bytes.deinit(gpa);
7079
self.hash_table.deinit(gpa);
7180
}
@@ -131,30 +140,46 @@ fn resizeHashTable(self: *SmallStringInterner, gpa: std.mem.Allocator) std.mem.A
131140

132141
/// Add a string to this interner, returning a unique, serial index.
133142
pub fn insert(self: *SmallStringInterner, gpa: std.mem.Allocator, string: []const u8) std.mem.Allocator.Error!Idx {
134-
// Check if we need to resize the hash table (when 80% full = entry_count * 5 >= hash_table.len() * 4)
135-
if (self.entry_count * 5 >= self.hash_table.len() * 4) {
136-
try self.resizeHashTable(gpa);
137-
}
138-
139-
// Find the string or the slot where it should be inserted
143+
// First check if string exists
140144
const result = self.findStringOrSlot(string);
141145

142146
if (result.idx) |existing_idx| {
143147
// String already exists
144148
return existing_idx;
145-
} else {
146-
// String doesn't exist, add it to bytes
147-
const new_offset: Idx = @enumFromInt(self.bytes.len());
148-
149-
_ = try self.bytes.appendSlice(gpa, string);
150-
_ = try self.bytes.append(gpa, 0);
149+
}
151150

152-
// Add to hash table
153-
self.hash_table.items.items[@intCast(result.slot)] = new_offset;
154-
self.entry_count += 1;
151+
// Debug assertion: deserialized interners should never need new inserts.
152+
// If this fires, it's a bug - all idents should already exist in the interner.
153+
if (std.debug.runtime_safety) {
154+
if (!self.supports_inserts) {
155+
@panic("insert called on deserialized interner - this is a bug, ident should already exist");
156+
}
157+
}
155158

156-
return new_offset;
159+
// Check if resize needed.
160+
if (self.entry_count * 5 >= self.hash_table.len() * 4) {
161+
try self.resizeHashTable(gpa);
162+
// After resize, need to find the slot again
163+
const new_result = self.findStringOrSlot(string);
164+
return self.insertAt(gpa, string, new_result.slot);
157165
}
166+
167+
// No resize needed, insert at the found slot
168+
return self.insertAt(gpa, string, result.slot);
169+
}
170+
171+
/// Insert a string at a specific slot (internal helper).
172+
fn insertAt(self: *SmallStringInterner, gpa: std.mem.Allocator, string: []const u8, slot: u64) std.mem.Allocator.Error!Idx {
173+
const new_offset: Idx = @enumFromInt(self.bytes.len());
174+
175+
_ = try self.bytes.appendSlice(gpa, string);
176+
_ = try self.bytes.append(gpa, 0);
177+
178+
// Add to hash table
179+
self.hash_table.items.items[@intCast(slot)] = new_offset;
180+
self.entry_count += 1;
181+
182+
return new_offset;
158183
}
159184

160185
/// Check if a string is already interned in this interner, used for generating unique names.
@@ -163,6 +188,14 @@ pub fn contains(self: *const SmallStringInterner, string: []const u8) bool {
163188
return result.idx != null;
164189
}
165190

191+
/// Look up a string in this interner and return its index if found.
192+
/// Unlike insert, this never modifies the interner (no resize, no insertion).
193+
/// Useful for deserialized interners that cannot be grown.
194+
pub fn lookup(self: *const SmallStringInterner, string: []const u8) ?Idx {
195+
const result = self.findStringOrSlot(string);
196+
return result.idx;
197+
}
198+
166199
/// Get a reference to the text for an interned string.
167200
pub fn getText(self: *const SmallStringInterner, idx: Idx) []u8 {
168201
const bytes_slice = self.bytes.items.items;
@@ -209,6 +242,8 @@ pub const Serialized = extern struct {
209242
bytes: collections.SafeList(u8).Serialized,
210243
hash_table: collections.SafeList(Idx).Serialized,
211244
entry_count: u32,
245+
/// Padding to maintain struct alignment
246+
_padding: u32 = 0,
212247

213248
/// Serialize a SmallStringInterner into this Serialized struct, appending data to the writer
214249
pub fn serialize(
@@ -223,6 +258,7 @@ pub const Serialized = extern struct {
223258
try self.hash_table.serialize(&interner.hash_table, allocator, writer);
224259
// Copy simple values directly
225260
self.entry_count = interner.entry_count;
261+
self._padding = 0;
226262
}
227263

228264
/// Deserialize this Serialized struct into a SmallStringInterner
@@ -234,6 +270,8 @@ pub const Serialized = extern struct {
234270
.bytes = self.bytes.deserialize(offset).*,
235271
.hash_table = self.hash_table.deserialize(offset).*,
236272
.entry_count = self.entry_count,
273+
// Debug-only: mark as not supporting inserts - deserialized interners should never need new idents
274+
.supports_inserts = if (std.debug.runtime_safety) false else {},
237275
};
238276

239277
return interner;

0 commit comments

Comments
 (0)