From 19d57d6dc6a52f5248e779544f10e13609068540 Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Sat, 25 Oct 2025 21:25:19 -0600 Subject: [PATCH 1/8] add slope-refinement based occlusion culling to cull ~25% more chunks --- .../client/render/chunk/RenderSection.java | 73 +++++++++++++++++++ .../chunk/occlusion/OcclusionCuller.java | 56 +++++++------- 2 files changed, 102 insertions(+), 27 deletions(-) diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java index c16c371249..e83b2257bc 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java @@ -14,6 +14,7 @@ import net.minecraft.world.level.block.entity.BlockEntity; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; +import org.joml.Vector3f; /** * The render state object for a chunk section. This contains all the graphics state for each render pass along with @@ -42,6 +43,13 @@ public class RenderSection { adjacentWest, adjacentEast; + private final static boolean DISABLE_SLOPES = false; + private long lastOriginPos = -1; + final private Vector3f + baseMinSlope = new Vector3f(), + baseMaxSlope = new Vector3f(), + curMinSlope = new Vector3f(), + curMaxSlope = new Vector3f(); // Rendering State private boolean built = false; // merge with the flags? @@ -313,6 +321,71 @@ public void setIncomingDirections(int directions) { this.incomingDirections = directions; } + private static float slope(float rise, float run) { + if (run <= 0) { + return Float.POSITIVE_INFINITY; + } + return Math.max(0, rise) / run; + } + + private void updateBaseSlopes(SectionPos origin) { + var originPos = origin.asLong(); + if (originPos == this.lastOriginPos) { + return; + } + lastOriginPos = originPos; + var dx = Math.abs(origin.x() - this.getChunkX()); + var dy = Math.abs(origin.y() - this.getChunkY()); + var dz = Math.abs(origin.z() - this.getChunkZ()); + + this.baseMinSlope.set( + slope(dy - 1, dx+1), // xy plane + slope(dz - 1, dx+1), // xz + slope(dz - 1, dy+1)); // yz + this.baseMaxSlope.set( + slope(dy+1, dx-1), // xy plane + slope(dz+1, dx-1), // xz + slope(dz+1, dy-1)); // yz + } + + public boolean intersectSlopes(SectionPos origin, RenderSection other, int frame) { + // Slope refinement tracking is based on the idea that by passing through a given cell, + // the minimum and maximum angle of any cell that can be visited afterward is constrained. + // A 2D visualization of this is here: https://mod.ifies.com/f/251025_raycast_vis_v4.html + // We perform the same thing across the XY, XZ, and YZ planes separately to avoid the more + // complex 3D frustum tracking math. It misses some things that could be pruned, but is quite fast. + if (DISABLE_SLOPES) + return true; + if (this.lastVisibleFrame != frame) { + this.updateBaseSlopes(origin); + this.curMinSlope.set(Float.POSITIVE_INFINITY); + this.curMaxSlope.set(0); + } + // maybe I should have a temp object for garbage + float propMinX = Math.max(other.curMinSlope.x, this.baseMinSlope.x); + float propMaxX = Math.min(other.curMaxSlope.x, this.baseMaxSlope.x); + float propMinY = Math.max(other.curMinSlope.y, this.baseMinSlope.y); + float propMaxY = Math.min(other.curMaxSlope.y, this.baseMaxSlope.y); + float propMinZ = Math.max(other.curMinSlope.z, this.baseMinSlope.z); + float propMaxZ = Math.min(other.curMaxSlope.z, this.baseMaxSlope.z); + if (propMinX >= propMaxX || propMinY >= propMaxY || propMinZ >= propMaxZ) { + return false; + } + this.curMinSlope.x = Math.min(this.curMinSlope.x, propMinX); + this.curMinSlope.y = Math.min(this.curMinSlope.y, propMinY); + this.curMinSlope.z = Math.min(this.curMinSlope.z, propMinZ); + + this.curMaxSlope.x = Math.max(this.curMaxSlope.x, propMaxX); + this.curMaxSlope.y = Math.max(this.curMaxSlope.y, propMaxY); + this.curMaxSlope.z = Math.max(this.curMaxSlope.z, propMaxZ); + return true; + } + + public void setOriginSlopes() { + this.curMinSlope.set(0); + this.curMaxSlope.set(Float.POSITIVE_INFINITY); + } + /** * Returns a bitfield containing the {@link RenderSectionFlags} for this built section. */ diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java index cd9b5cb661..2cbc1ca67f 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java @@ -51,6 +51,7 @@ private static void processQueue(RenderSectionVisitor visitor, WriteQueue writeQueue) { RenderSection section; + SectionPos origin = viewport.getChunkCoord(); while ((section = readQueue.dequeue()) != null) { if (!isSectionVisible(section, viewport, searchDistance)) { @@ -80,10 +81,10 @@ private static void processQueue(RenderSectionVisitor visitor, // We can only traverse *outwards* from the center of the graph search, so mask off any invalid // directions. - connections &= getOutwardDirections(viewport.getChunkCoord(), section); + connections &= getOutwardDirections(origin, section); } - visitNeighbors(writeQueue, section, connections, frame); + visitNeighbors(writeQueue, origin, section, connections, frame); } } @@ -115,7 +116,7 @@ private static boolean isSectionVisible(RenderSection section, Viewport viewport return isWithinRenderDistance(viewport.getTransform(), section, maxDistance) && isWithinFrustum(viewport, section); } - private static void visitNeighbors(final WriteQueue queue, RenderSection section, int outgoing, int frame) { + private static void visitNeighbors(final WriteQueue queue, SectionPos origin, RenderSection section, int outgoing, int frame) { // Only traverse into neighbors which are actually present. // This avoids a null-check on each invocation to enqueue, and since the compiler will see that a null // is never encountered (after profiling), it will optimize it away. @@ -129,32 +130,32 @@ private static void visitNeighbors(final WriteQueue queue, Render // This helps the compiler move the checks for some invariants upwards. queue.ensureCapacity(6); - if (GraphDirectionSet.contains(outgoing, GraphDirection.DOWN)) { - visitNode(queue, section.adjacentDown, GraphDirectionSet.of(GraphDirection.UP), frame); + if (GraphDirectionSet.contains(outgoing, GraphDirection.DOWN) && section.adjacentDown.intersectSlopes(origin, section, frame)) { + visitNode(queue, origin, section.adjacentDown, GraphDirectionSet.of(GraphDirection.UP), frame); } - if (GraphDirectionSet.contains(outgoing, GraphDirection.UP)) { - visitNode(queue, section.adjacentUp, GraphDirectionSet.of(GraphDirection.DOWN), frame); + if (GraphDirectionSet.contains(outgoing, GraphDirection.UP) && section.adjacentUp.intersectSlopes(origin, section, frame)) { + visitNode(queue, origin, section.adjacentUp, GraphDirectionSet.of(GraphDirection.DOWN), frame); } - if (GraphDirectionSet.contains(outgoing, GraphDirection.NORTH)) { - visitNode(queue, section.adjacentNorth, GraphDirectionSet.of(GraphDirection.SOUTH), frame); + if (GraphDirectionSet.contains(outgoing, GraphDirection.NORTH) && section.adjacentNorth.intersectSlopes(origin, section, frame)) { + visitNode(queue, origin, section.adjacentNorth, GraphDirectionSet.of(GraphDirection.SOUTH), frame); } - if (GraphDirectionSet.contains(outgoing, GraphDirection.SOUTH)) { - visitNode(queue, section.adjacentSouth, GraphDirectionSet.of(GraphDirection.NORTH), frame); + if (GraphDirectionSet.contains(outgoing, GraphDirection.SOUTH) && section.adjacentSouth.intersectSlopes(origin, section, frame)) { + visitNode(queue, origin, section.adjacentSouth, GraphDirectionSet.of(GraphDirection.NORTH), frame); } - if (GraphDirectionSet.contains(outgoing, GraphDirection.WEST)) { - visitNode(queue, section.adjacentWest, GraphDirectionSet.of(GraphDirection.EAST), frame); + if (GraphDirectionSet.contains(outgoing, GraphDirection.WEST) && section.adjacentWest.intersectSlopes(origin, section, frame)) { + visitNode(queue, origin, section.adjacentWest, GraphDirectionSet.of(GraphDirection.EAST), frame); } - if (GraphDirectionSet.contains(outgoing, GraphDirection.EAST)) { - visitNode(queue, section.adjacentEast, GraphDirectionSet.of(GraphDirection.WEST), frame); + if (GraphDirectionSet.contains(outgoing, GraphDirection.EAST) && section.adjacentEast.intersectSlopes(origin, section, frame)) { + visitNode(queue, origin, section.adjacentEast, GraphDirectionSet.of(GraphDirection.WEST), frame); } } - private static void visitNode(final WriteQueue queue, @NotNull RenderSection render, int incoming, int frame) { + private static void visitNode(final WriteQueue queue, @NotNull SectionPos origin, @NotNull RenderSection render, int incoming, int frame) { if (render.getLastVisibleFrame() != frame) { // This is the first time we are visiting this section during the given frame, so we must // reset the state. @@ -291,6 +292,7 @@ private void initWithinWorld(RenderSectionVisitor visitor, WriteQueue queue, var radius = Mth.floor(searchDistance / 16.0f); // Layer 0 - this.tryVisitNode(queue, origin.getX(), height, origin.getZ(), direction, frame, viewport); + this.tryVisitNode(queue, origin, origin.getX(), height, origin.getZ(), direction, frame, viewport); // Complete layers, excluding layer 0 for (int layer = 1; layer <= radius; layer++) { for (int z = -layer; z < layer; z++) { int x = Math.abs(z) - layer; - this.tryVisitNode(queue, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); + this.tryVisitNode(queue, origin, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); } for (int z = layer; z > -layer; z--) { int x = layer - Math.abs(z); - this.tryVisitNode(queue, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); + this.tryVisitNode(queue, origin, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); } } @@ -345,34 +347,34 @@ private void initOutsideWorldHeight(WriteQueue queue, for (int z = -radius; z <= -l; z++) { int x = -z - layer; - this.tryVisitNode(queue, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); + this.tryVisitNode(queue, origin, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); } for (int z = l; z <= radius; z++) { int x = z - layer; - this.tryVisitNode(queue, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); + this.tryVisitNode(queue, origin,origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); } for (int z = radius; z >= l; z--) { int x = layer - z; - this.tryVisitNode(queue, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); + this.tryVisitNode(queue, origin, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); } for (int z = -l; z >= -radius; z--) { int x = layer + z; - this.tryVisitNode(queue, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); + this.tryVisitNode(queue, origin, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); } } } - private void tryVisitNode(WriteQueue queue, int x, int y, int z, int direction, int frame, Viewport viewport) { - RenderSection section = this.getRenderSection(x, y, z); + private void tryVisitNode(WriteQueue queue, SectionPos origin, int x, int y, int z, int direction, int frame, Viewport viewport) { + RenderSection section = this.getRenderSection(origin.getX(), origin.getY(), origin.getZ()); if (section == null || !isWithinFrustum(viewport, section)) { return; } - visitNode(queue, section, GraphDirectionSet.of(direction), frame); + visitNode(queue, origin, section, GraphDirectionSet.of(direction), frame); } private RenderSection getRenderSection(int x, int y, int z) { From 31546a89f487c8aa804243da5d1aeaf48879cc29 Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Sat, 25 Oct 2025 22:56:51 -0600 Subject: [PATCH 2/8] rewrite slope code to use all integer ops and no divides --- .../client/render/chunk/RenderSection.java | 144 ++++++++++++------ 1 file changed, 99 insertions(+), 45 deletions(-) diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java index e83b2257bc..d730b71490 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java @@ -44,12 +44,8 @@ public class RenderSection { adjacentEast; private final static boolean DISABLE_SLOPES = false; - private long lastOriginPos = -1; - final private Vector3f - baseMinSlope = new Vector3f(), - baseMaxSlope = new Vector3f(), - curMinSlope = new Vector3f(), - curMaxSlope = new Vector3f(); + private long curMinSlopes; + private long curMaxSlopes; // Rendering State private boolean built = false; // merge with the flags? @@ -321,33 +317,58 @@ public void setIncomingDirections(int directions) { this.incomingDirections = directions; } - private static float slope(float rise, float run) { + private static final int SLOPE_ZERO = 1 << 8; + private static final int SLOPE_INFINITY = 1; + + private static int packSlope(int rise, int run) { if (run <= 0) { - return Float.POSITIVE_INFINITY; + return SLOPE_INFINITY; } - return Math.max(0, rise) / run; + return ((run & 0xFF) << 8) | (Math.max(0, rise) & 0xFF); } - private void updateBaseSlopes(SectionPos origin) { - var originPos = origin.asLong(); - if (originPos == this.lastOriginPos) { - return; - } - lastOriginPos = originPos; - var dx = Math.abs(origin.x() - this.getChunkX()); - var dy = Math.abs(origin.y() - this.getChunkY()); - var dz = Math.abs(origin.z() - this.getChunkZ()); + private static boolean isSlopeLess(int slope1, int slope2) { + int x1 = slope1 >> 8; + int y1 = slope1 & 0xFF; + int x2 = slope2 >> 8; + int y2 = slope2 & 0xFF; + + if (x1 == 0) return false; + if (x2 == 0) return true; + + // this algebraic rearrangement avoids a division + return y1 * x2 < y2 * x1; + } + + private static int slopeMax(int slope1, int slope2) { + return isSlopeLess(slope1, slope2) ? slope2 : slope1; + } - this.baseMinSlope.set( - slope(dy - 1, dx+1), // xy plane - slope(dz - 1, dx+1), // xz - slope(dz - 1, dy+1)); // yz - this.baseMaxSlope.set( - slope(dy+1, dx-1), // xy plane - slope(dz+1, dx-1), // xz - slope(dz+1, dy-1)); // yz + private static int slopeMin(int slope1, int slope2) { + return isSlopeLess(slope1, slope2) ? slope1 : slope2; } + private static long packAllSlopes(int slopeXY, int slopeXZ, int slopeYZ) { + return (slopeXY & 0xFFFFL) | + ((slopeXZ & 0xFFFFL) << 16) | + ((slopeYZ & 0xFFFFL) << 32); + } + + private static int getSlopeXY(long packedSlopes) { + return (int)(packedSlopes & 0xFFFFL); + } + + private static int getSlopeXZ(long packedSlopes) { + return (int)((packedSlopes >> 16) & 0xFFFFL); + } + + private static int getSlopeYZ(long packedSlopes) { + return (int)((packedSlopes >> 32) & 0xFFFFL); + } + + private static final long ALL_SLOPES_ZERO = packAllSlopes(SLOPE_ZERO, SLOPE_ZERO, SLOPE_ZERO); + private static final long ALL_SLOPES_INFINITY = packAllSlopes(SLOPE_INFINITY, SLOPE_INFINITY, SLOPE_INFINITY); + public boolean intersectSlopes(SectionPos origin, RenderSection other, int frame) { // Slope refinement tracking is based on the idea that by passing through a given cell, // the minimum and maximum angle of any cell that can be visited afterward is constrained. @@ -356,34 +377,67 @@ public boolean intersectSlopes(SectionPos origin, RenderSection other, int frame // complex 3D frustum tracking math. It misses some things that could be pruned, but is quite fast. if (DISABLE_SLOPES) return true; + if (this.lastVisibleFrame != frame) { - this.updateBaseSlopes(origin); - this.curMinSlope.set(Float.POSITIVE_INFINITY); - this.curMaxSlope.set(0); + this.curMinSlopes = ALL_SLOPES_INFINITY; + this.curMaxSlopes = ALL_SLOPES_ZERO; } - // maybe I should have a temp object for garbage - float propMinX = Math.max(other.curMinSlope.x, this.baseMinSlope.x); - float propMaxX = Math.min(other.curMaxSlope.x, this.baseMaxSlope.x); - float propMinY = Math.max(other.curMinSlope.y, this.baseMinSlope.y); - float propMaxY = Math.min(other.curMaxSlope.y, this.baseMaxSlope.y); - float propMinZ = Math.max(other.curMinSlope.z, this.baseMinSlope.z); - float propMaxZ = Math.min(other.curMaxSlope.z, this.baseMaxSlope.z); - if (propMinX >= propMaxX || propMinY >= propMaxY || propMinZ >= propMaxZ) { + + var dx = Math.abs(origin.x() - this.getChunkX()); + var dy = Math.abs(origin.y() - this.getChunkY()); + var dz = Math.abs(origin.z() - this.getChunkZ()); + + int thisBaseMinXY = packSlope(dy - 1, dx + 1); + int thisBaseMinXZ = packSlope(dz - 1, dx + 1); + int thisBaseMinYZ = packSlope(dz - 1, dy + 1); + + int thisBaseMaxXY = packSlope(dy + 1, dx - 1); + int thisBaseMaxXZ = packSlope(dz + 1, dx - 1); + int thisBaseMaxYZ = packSlope(dz + 1, dy - 1); + + int otherMinXY = getSlopeXY(other.curMinSlopes); + int otherMinXZ = getSlopeXZ(other.curMinSlopes); + int otherMinYZ = getSlopeYZ(other.curMinSlopes); + + int otherMaxXY = getSlopeXY(other.curMaxSlopes); + int otherMaxXZ = getSlopeXZ(other.curMaxSlopes); + int otherMaxYZ = getSlopeYZ(other.curMaxSlopes); + + int propMinXY = slopeMax(otherMinXY, thisBaseMinXY); + int propMaxXY = slopeMin(otherMaxXY, thisBaseMaxXY); + if (!isSlopeLess(propMinXY, propMaxXY)) { return false; } - this.curMinSlope.x = Math.min(this.curMinSlope.x, propMinX); - this.curMinSlope.y = Math.min(this.curMinSlope.y, propMinY); - this.curMinSlope.z = Math.min(this.curMinSlope.z, propMinZ); - this.curMaxSlope.x = Math.max(this.curMaxSlope.x, propMaxX); - this.curMaxSlope.y = Math.max(this.curMaxSlope.y, propMaxY); - this.curMaxSlope.z = Math.max(this.curMaxSlope.z, propMaxZ); + int propMinXZ = slopeMax(otherMinXZ, thisBaseMinXZ); + int propMaxXZ = slopeMin(otherMaxXZ, thisBaseMaxXZ); + if (!isSlopeLess(propMinXZ, propMaxXZ)) { + return false; + } + + int propMinYZ = slopeMax(otherMinYZ, thisBaseMinYZ); + int propMaxYZ = slopeMin(otherMaxYZ, thisBaseMaxYZ); + if (!isSlopeLess(propMinYZ, propMaxYZ)) { + return false; + } + + int newMinXY = slopeMin(getSlopeXY(this.curMinSlopes), propMinXY); + int newMinXZ = slopeMin(getSlopeXZ(this.curMinSlopes), propMinXZ); + int newMinYZ = slopeMin(getSlopeYZ(this.curMinSlopes), propMinYZ); + + int newMaxXY = slopeMax(getSlopeXY(this.curMaxSlopes), propMaxXY); + int newMaxXZ = slopeMax(getSlopeXZ(this.curMaxSlopes), propMaxXZ); + int newMaxYZ = slopeMax(getSlopeYZ(this.curMaxSlopes), propMaxYZ); + + this.curMinSlopes = packAllSlopes(newMinXY, newMinXZ, newMinYZ); + this.curMaxSlopes = packAllSlopes(newMaxXY, newMaxXZ, newMaxYZ); + return true; } public void setOriginSlopes() { - this.curMinSlope.set(0); - this.curMaxSlope.set(Float.POSITIVE_INFINITY); + this.curMinSlopes = ALL_SLOPES_ZERO; + this.curMaxSlopes = ALL_SLOPES_INFINITY; } /** From 33865f57ca71b9c391962efc8e89d24638a7da87 Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Sat, 25 Oct 2025 23:33:31 -0600 Subject: [PATCH 3/8] trim some branches --- .../mods/sodium/client/render/chunk/RenderSection.java | 4 ---- 1 file changed, 4 deletions(-) diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java index d730b71490..5b1855b602 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java @@ -14,7 +14,6 @@ import net.minecraft.world.level.block.entity.BlockEntity; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; -import org.joml.Vector3f; /** * The render state object for a chunk section. This contains all the graphics state for each render pass along with @@ -333,9 +332,6 @@ private static boolean isSlopeLess(int slope1, int slope2) { int x2 = slope2 >> 8; int y2 = slope2 & 0xFF; - if (x1 == 0) return false; - if (x2 == 0) return true; - // this algebraic rearrangement avoids a division return y1 * x2 < y2 * x1; } From 51930f28d39f84bb9438f50ebaf1eb574de43874 Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Sun, 26 Oct 2025 17:02:47 -0600 Subject: [PATCH 4/8] squeeze a bit more micro-optimization out of intersectSlopes --- .../client/render/chunk/RenderSection.java | 111 ++++++++---------- 1 file changed, 51 insertions(+), 60 deletions(-) diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java index 5b1855b602..1075c72bcf 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java @@ -33,6 +33,9 @@ public class RenderSection { private int incomingDirections; private int lastVisibleFrame = -1; + private long curMinSlopes; + private long curMaxSlopes; + private int adjacentMask; public RenderSection adjacentDown, @@ -42,10 +45,6 @@ public class RenderSection { adjacentWest, adjacentEast; - private final static boolean DISABLE_SLOPES = false; - private long curMinSlopes; - private long curMaxSlopes; - // Rendering State private boolean built = false; // merge with the flags? private int flags = RenderSectionFlags.NONE; @@ -320,28 +319,41 @@ public void setIncomingDirections(int directions) { private static final int SLOPE_INFINITY = 1; private static int packSlope(int rise, int run) { - if (run <= 0) { + return ((Math.max(run, 0) & 0xFF) << 8) | (Math.max(0, rise) & 0xFF); + } + + private static int packSlopeRisePos(int rise, int run) { + if (run < 0) { return SLOPE_INFINITY; } - return ((run & 0xFF) << 8) | (Math.max(0, rise) & 0xFF); + return ((run & 0xFF) << 8) | (rise & 0xFF); + } + + private static int packSlopeRunPos(int rise, int run) { + if (rise < 0) { + return SLOPE_ZERO; + } + return ((run & 0xFF) << 8) | (rise & 0xFF); } - private static boolean isSlopeLess(int slope1, int slope2) { + private static int isSlopeLess(int slope1, int slope2) { int x1 = slope1 >> 8; int y1 = slope1 & 0xFF; int x2 = slope2 >> 8; int y2 = slope2 & 0xFF; // this algebraic rearrangement avoids a division - return y1 * x2 < y2 * x1; + return (y1 * x2 - y2 * x1) >> 31; } private static int slopeMax(int slope1, int slope2) { - return isSlopeLess(slope1, slope2) ? slope2 : slope1; + int mask = isSlopeLess(slope1, slope2); + return (slope1 & ~mask) | (slope2 & mask); } private static int slopeMin(int slope1, int slope2) { - return isSlopeLess(slope1, slope2) ? slope1 : slope2; + int mask = isSlopeLess(slope1, slope2); + return (slope1 & mask) | (slope2 & ~mask); } private static long packAllSlopes(int slopeXY, int slopeXZ, int slopeYZ) { @@ -371,62 +383,41 @@ public boolean intersectSlopes(SectionPos origin, RenderSection other, int frame // A 2D visualization of this is here: https://mod.ifies.com/f/251025_raycast_vis_v4.html // We perform the same thing across the XY, XZ, and YZ planes separately to avoid the more // complex 3D frustum tracking math. It misses some things that could be pruned, but is quite fast. - if (DISABLE_SLOPES) - return true; - - if (this.lastVisibleFrame != frame) { - this.curMinSlopes = ALL_SLOPES_INFINITY; - this.curMaxSlopes = ALL_SLOPES_ZERO; - } - - var dx = Math.abs(origin.x() - this.getChunkX()); - var dy = Math.abs(origin.y() - this.getChunkY()); - var dz = Math.abs(origin.z() - this.getChunkZ()); - - int thisBaseMinXY = packSlope(dy - 1, dx + 1); - int thisBaseMinXZ = packSlope(dz - 1, dx + 1); - int thisBaseMinYZ = packSlope(dz - 1, dy + 1); - - int thisBaseMaxXY = packSlope(dy + 1, dx - 1); - int thisBaseMaxXZ = packSlope(dz + 1, dx - 1); - int thisBaseMaxYZ = packSlope(dz + 1, dy - 1); - - int otherMinXY = getSlopeXY(other.curMinSlopes); - int otherMinXZ = getSlopeXZ(other.curMinSlopes); - int otherMinYZ = getSlopeYZ(other.curMinSlopes); - - int otherMaxXY = getSlopeXY(other.curMaxSlopes); - int otherMaxXZ = getSlopeXZ(other.curMaxSlopes); - int otherMaxYZ = getSlopeYZ(other.curMaxSlopes); - - int propMinXY = slopeMax(otherMinXY, thisBaseMinXY); - int propMaxXY = slopeMin(otherMaxXY, thisBaseMaxXY); - if (!isSlopeLess(propMinXY, propMaxXY)) { + var dx = Math.abs(origin.getX() - this.getChunkX()); + var dy = Math.abs(origin.getY() - this.getChunkY()); + var dz = Math.abs(origin.getZ() - this.getChunkZ()); + + int baseMinXY = packSlopeRunPos(dy - 1, dx + 1); + int baseMaxXY = packSlopeRisePos(dy + 1, dx - 1); + int baseMinXZ = packSlopeRunPos(dz - 1, dx + 1); + int baseMaxXZ = packSlopeRisePos(dz + 1, dx - 1); + int baseMinYZ = packSlopeRunPos(dz - 1, dy + 1); + int baseMaxYZ = packSlopeRisePos(dz + 1, dy - 1); + + int minXY = slopeMax(getSlopeXY(other.curMinSlopes), baseMinXY); + int maxXY = slopeMin(getSlopeXY(other.curMaxSlopes), baseMaxXY); + int minXZ = slopeMax(getSlopeXZ(other.curMinSlopes), baseMinXZ); + int maxXZ = slopeMin(getSlopeXZ(other.curMaxSlopes), baseMaxXZ); + int minYZ = slopeMax(getSlopeYZ(other.curMinSlopes), baseMinYZ); + int maxYZ = slopeMin(getSlopeYZ(other.curMaxSlopes), baseMaxYZ); + + // if max >= min for any of the planes, there is no angle left to explore from that section + if ((isSlopeLess(minXY, maxXY) & isSlopeLess(minXZ, maxXZ) & isSlopeLess(minYZ, maxYZ)) >>> 31 == 0) { return false; } - int propMinXZ = slopeMax(otherMinXZ, thisBaseMinXZ); - int propMaxXZ = slopeMin(otherMaxXZ, thisBaseMaxXZ); - if (!isSlopeLess(propMinXZ, propMaxXZ)) { - return false; - } + if (this.lastVisibleFrame == frame) { + minXY = slopeMin(getSlopeXY(this.curMinSlopes), minXY); + minXZ = slopeMin(getSlopeXZ(this.curMinSlopes), minXZ); + minYZ = slopeMin(getSlopeYZ(this.curMinSlopes), minYZ); - int propMinYZ = slopeMax(otherMinYZ, thisBaseMinYZ); - int propMaxYZ = slopeMin(otherMaxYZ, thisBaseMaxYZ); - if (!isSlopeLess(propMinYZ, propMaxYZ)) { - return false; + maxXY = slopeMax(getSlopeXY(this.curMaxSlopes), maxXY); + maxXZ = slopeMax(getSlopeXZ(this.curMaxSlopes), maxXZ); + maxYZ = slopeMax(getSlopeYZ(this.curMaxSlopes), maxYZ); } - int newMinXY = slopeMin(getSlopeXY(this.curMinSlopes), propMinXY); - int newMinXZ = slopeMin(getSlopeXZ(this.curMinSlopes), propMinXZ); - int newMinYZ = slopeMin(getSlopeYZ(this.curMinSlopes), propMinYZ); - - int newMaxXY = slopeMax(getSlopeXY(this.curMaxSlopes), propMaxXY); - int newMaxXZ = slopeMax(getSlopeXZ(this.curMaxSlopes), propMaxXZ); - int newMaxYZ = slopeMax(getSlopeYZ(this.curMaxSlopes), propMaxYZ); - - this.curMinSlopes = packAllSlopes(newMinXY, newMinXZ, newMinYZ); - this.curMaxSlopes = packAllSlopes(newMaxXY, newMaxXZ, newMaxYZ); + this.curMinSlopes = packAllSlopes(minXY, minXZ, minYZ); + this.curMaxSlopes = packAllSlopes(maxXY, maxXZ, maxYZ); return true; } From 47f1d682535f3ee76230570b8c369e829ececfab Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Mon, 27 Oct 2025 18:08:23 -0600 Subject: [PATCH 5/8] attempt to track occlusions using a bitset for angles and a LUT it's terrible, only culling 3%, but it's fast, only taking 60ns --- .../client/render/chunk/RenderSection.java | 168 +++++++++--------- 1 file changed, 80 insertions(+), 88 deletions(-) diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java index 1075c72bcf..09c2098602 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java @@ -33,8 +33,7 @@ public class RenderSection { private int incomingDirections; private int lastVisibleFrame = -1; - private long curMinSlopes; - private long curMaxSlopes; + private long allowedAngles; private int adjacentMask; public RenderSection @@ -315,116 +314,109 @@ public void setIncomingDirections(int directions) { this.incomingDirections = directions; } - private static final int SLOPE_ZERO = 1 << 8; - private static final int SLOPE_INFINITY = 1; + private static final int BITS_PER_PLANE = 21; + private static final long PLANE_MASK = (1L << BITS_PER_PLANE) - 1L; // 0x1FFFFFL - private static int packSlope(int rise, int run) { - return ((Math.max(run, 0) & 0xFF) << 8) | (Math.max(0, rise) & 0xFF); + public void setOriginSlopes() { + this.allowedAngles = -1L; } - private static int packSlopeRisePos(int rise, int run) { - if (run < 0) { - return SLOPE_INFINITY; - } - return ((run & 0xFF) << 8) | (rise & 0xFF); - } + /** + * Precomputed Lookup Table for base angle bitsets. + * This is a 32x32 LUT, indexed by [run + (rise << 5)]. + */ + private static final int LUT_DIM_BITS = 5; // 2^5 = 32 + private static final int LUT_SIZE = 1 << (LUT_DIM_BITS * 2); // 32*32 = 1024 + private static final int LUT_MAX_IDX = (1 << LUT_DIM_BITS) - 1; // 31 + private static final int[] ANGLE_BITSET_LUT = new int[LUT_SIZE]; - private static int packSlopeRunPos(int rise, int run) { - if (rise < 0) { - return SLOPE_ZERO; + /** + * Calculates the 21-bit angle bitset for a given rise/run. + * This converts the min/max slope cone into a bitset. + * + * @param rise The rise (dy) + * @param run The run (dx) + * @return A 21-bit integer bitset. + */ + private static int generateAngleBits(int rise, int run) { + int minRise = rise - 1; + int minRun = run + 1; + int maxRise = rise + 1; + int maxRun = run - 1; + + // Convert packed slopes to angles (in radians, 0 to PI/2) + double minAngle = Math.atan2(minRise, minRun); + double maxAngle = Math.atan2(maxRise, maxRun); + + final double ANGLE_PER_BIT = (Math.PI / 2.0) / BITS_PER_PLANE; // 90 degrees / 21 bits in radians + + int bits = 0; + for (int i = 0; i < BITS_PER_PLANE; i++) { + double bitStartAngle = i * ANGLE_PER_BIT; + double bitEndAngle = (i + 1) * ANGLE_PER_BIT; + if (bitEndAngle > minAngle && bitStartAngle < maxAngle) { + bits |= 1 << i; + } } - return ((run & 0xFF) << 8) | (rise & 0xFF); - } - - private static int isSlopeLess(int slope1, int slope2) { - int x1 = slope1 >> 8; - int y1 = slope1 & 0xFF; - int x2 = slope2 >> 8; - int y2 = slope2 & 0xFF; - - // this algebraic rearrangement avoids a division - return (y1 * x2 - y2 * x1) >> 31; - } - - private static int slopeMax(int slope1, int slope2) { - int mask = isSlopeLess(slope1, slope2); - return (slope1 & ~mask) | (slope2 & mask); - } - - private static int slopeMin(int slope1, int slope2) { - int mask = isSlopeLess(slope1, slope2); - return (slope1 & mask) | (slope2 & ~mask); - } - - private static long packAllSlopes(int slopeXY, int slopeXZ, int slopeYZ) { - return (slopeXY & 0xFFFFL) | - ((slopeXZ & 0xFFFFL) << 16) | - ((slopeYZ & 0xFFFFL) << 32); - } - - private static int getSlopeXY(long packedSlopes) { - return (int)(packedSlopes & 0xFFFFL); + return bits; } - private static int getSlopeXZ(long packedSlopes) { - return (int)((packedSlopes >> 16) & 0xFFFFL); - } - private static int getSlopeYZ(long packedSlopes) { - return (int)((packedSlopes >> 32) & 0xFFFFL); + static { + for (int i = 0; i < LUT_SIZE; i++) { + int run = i & LUT_MAX_IDX; + int rise = (i >> LUT_DIM_BITS); + ANGLE_BITSET_LUT[i] = generateAngleBits(rise, run); + } } - private static final long ALL_SLOPES_ZERO = packAllSlopes(SLOPE_ZERO, SLOPE_ZERO, SLOPE_ZERO); - private static final long ALL_SLOPES_INFINITY = packAllSlopes(SLOPE_INFINITY, SLOPE_INFINITY, SLOPE_INFINITY); - + /** + * Intersects the allowed angles from the 'other' section with the base angles + * subtended by this section. + * + * @param origin The origin of the visibility check. + * @param other The parent/previous section from which visibility is being propagated. + * @param frame The current frame number. + * @return false if this section is guaranteed not visible, true otherwise. + */ public boolean intersectSlopes(SectionPos origin, RenderSection other, int frame) { - // Slope refinement tracking is based on the idea that by passing through a given cell, - // the minimum and maximum angle of any cell that can be visited afterward is constrained. - // A 2D visualization of this is here: https://mod.ifies.com/f/251025_raycast_vis_v4.html - // We perform the same thing across the XY, XZ, and YZ planes separately to avoid the more - // complex 3D frustum tracking math. It misses some things that could be pruned, but is quite fast. var dx = Math.abs(origin.getX() - this.getChunkX()); var dy = Math.abs(origin.getY() - this.getChunkY()); var dz = Math.abs(origin.getZ() - this.getChunkZ()); - int baseMinXY = packSlopeRunPos(dy - 1, dx + 1); - int baseMaxXY = packSlopeRisePos(dy + 1, dx - 1); - int baseMinXZ = packSlopeRunPos(dz - 1, dx + 1); - int baseMaxXZ = packSlopeRisePos(dz + 1, dx - 1); - int baseMinYZ = packSlopeRunPos(dz - 1, dy + 1); - int baseMaxYZ = packSlopeRisePos(dz + 1, dy - 1); - - int minXY = slopeMax(getSlopeXY(other.curMinSlopes), baseMinXY); - int maxXY = slopeMin(getSlopeXY(other.curMaxSlopes), baseMaxXY); - int minXZ = slopeMax(getSlopeXZ(other.curMinSlopes), baseMinXZ); - int maxXZ = slopeMin(getSlopeXZ(other.curMaxSlopes), baseMaxXZ); - int minYZ = slopeMax(getSlopeYZ(other.curMinSlopes), baseMinYZ); - int maxYZ = slopeMin(getSlopeYZ(other.curMaxSlopes), baseMaxYZ); - - // if max >= min for any of the planes, there is no angle left to explore from that section - if ((isSlopeLess(minXY, maxXY) & isSlopeLess(minXZ, maxXZ) & isSlopeLess(minYZ, maxYZ)) >>> 31 == 0) { - return false; + while ((dx|dy|dz) >= 32) { + // This is only true for the outermost edge of sections that have a distance + // of 32, so we don't use more complex 32-Integer.numberOfLeadingZeros and per-plane + // shifting. + dx >>= 1; dy >>= 1; dz >>= 1; } - if (this.lastVisibleFrame == frame) { - minXY = slopeMin(getSlopeXY(this.curMinSlopes), minXY); - minXZ = slopeMin(getSlopeXZ(this.curMinSlopes), minXZ); - minYZ = slopeMin(getSlopeYZ(this.curMinSlopes), minYZ); + long baseAngles = ANGLE_BITSET_LUT[dx + (dy << LUT_DIM_BITS)] | + ((long)ANGLE_BITSET_LUT[dx + (dz << LUT_DIM_BITS)] << BITS_PER_PLANE) | + ((long)ANGLE_BITSET_LUT[dy + (dz << LUT_DIM_BITS)] << (BITS_PER_PLANE * 2)); - maxXY = slopeMax(getSlopeXY(this.curMaxSlopes), maxXY); - maxXZ = slopeMax(getSlopeXZ(this.curMaxSlopes), maxXZ); - maxYZ = slopeMax(getSlopeYZ(this.curMaxSlopes), maxYZ); + long pathAngles = baseAngles & other.allowedAngles; + + // If the intersection is empty for *any* plane, this path is occluded. + if (anyPlaneHasEmptyBitset(pathAngles)) { + return false; } - this.curMinSlopes = packAllSlopes(minXY, minXZ, minYZ); - this.curMaxSlopes = packAllSlopes(maxXY, maxXZ, maxYZ); + if (this.lastVisibleFrame == frame) { + // This section has been visited before *this frame* from another path. + // The new allowed angles are the *union* of the old paths and this new path. + pathAngles |= this.allowedAngles; + } + this.allowedAngles = pathAngles; return true; } - public void setOriginSlopes() { - this.curMinSlopes = ALL_SLOPES_ZERO; - this.curMaxSlopes = ALL_SLOPES_INFINITY; + private static boolean anyPlaneHasEmptyBitset(long angles) { + final long SUB_MASK = 1L | (1L << BITS_PER_PLANE) | (1L << (BITS_PER_PLANE * 2)); + final long MSB_MASK = (1L << (BITS_PER_PLANE - 1)) | (1L << (BITS_PER_PLANE * 2 - 1)) | (1L << (BITS_PER_PLANE * 3 - 1)); + long borrows = (angles - SUB_MASK) & ~angles; + return (borrows & MSB_MASK) != 0; } /** From 7efc279f6e4fa6cef045e0ebe614f86555602e9c Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Mon, 27 Oct 2025 20:49:00 -0600 Subject: [PATCH 6/8] track 10-bit quantized min/max angles over 3 planes packed in a long ~60% more cycles during BFS (~0.5ms, 225->355ns per section) in exchange for 5-25% more chunks culled. --- .../client/render/chunk/RenderSection.java | 182 +++++++++++------- .../chunk/occlusion/OcclusionCuller.java | 2 +- 2 files changed, 117 insertions(+), 67 deletions(-) diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java index 09c2098602..06f0a30591 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java @@ -32,8 +32,7 @@ public class RenderSection { private int incomingDirections; private int lastVisibleFrame = -1; - - private long allowedAngles; + private long allowedAngles; // 60-bit packed quantized min/max allowed angles, 0-9 minXY, 10-19 maxXY, etc. private int adjacentMask; public RenderSection @@ -314,60 +313,42 @@ public void setIncomingDirections(int directions) { this.incomingDirections = directions; } - private static final int BITS_PER_PLANE = 21; - private static final long PLANE_MASK = (1L << BITS_PER_PLANE) - 1L; // 0x1FFFFFL - - public void setOriginSlopes() { - this.allowedAngles = -1L; - } + private static final int ANGLE_BITS = 10; + private static final int ANGLE_MASK = (1 << ANGLE_BITS) - 1; + private static final long ANGLES_MIN_MASK = + (long)ANGLE_MASK * (1 | (1L << (ANGLE_BITS * 2)) | (1L << (ANGLE_BITS * 4))); + private static final long ANGLES_MAX_MASK = + (long)ANGLE_MASK * ((1L << ANGLE_BITS) | (1L << (ANGLE_BITS * 3)) | (1L << (ANGLE_BITS * 5))); + private static final int LUT_DIM = 32; + private static final int LUT_SHIFT = 5; // (1 << 5) = 32 /** - * Precomputed Lookup Table for base angle bitsets. - * This is a 32x32 LUT, indexed by [run + (rise << 5)]. + * Lookup table for 20-bit packed (maxAngle(10)<<10) | minAngle(10). + * Indexed by [rise + run * 32], where rise/run are integers in [0, 31]. */ - private static final int LUT_DIM_BITS = 5; // 2^5 = 32 - private static final int LUT_SIZE = 1 << (LUT_DIM_BITS * 2); // 32*32 = 1024 - private static final int LUT_MAX_IDX = (1 << LUT_DIM_BITS) - 1; // 31 - private static final int[] ANGLE_BITSET_LUT = new int[LUT_SIZE]; + private static final int[] ANGLE_LUT = new int[LUT_DIM * LUT_DIM]; - /** - * Calculates the 21-bit angle bitset for a given rise/run. - * This converts the min/max slope cone into a bitset. - * - * @param rise The rise (dy) - * @param run The run (dx) - * @return A 21-bit integer bitset. - */ - private static int generateAngleBits(int rise, int run) { - int minRise = rise - 1; - int minRun = run + 1; - int maxRise = rise + 1; - int maxRun = run - 1; - - // Convert packed slopes to angles (in radians, 0 to PI/2) - double minAngle = Math.atan2(minRise, minRun); - double maxAngle = Math.atan2(maxRise, maxRun); - - final double ANGLE_PER_BIT = (Math.PI / 2.0) / BITS_PER_PLANE; // 90 degrees / 21 bits in radians - - int bits = 0; - for (int i = 0; i < BITS_PER_PLANE; i++) { - double bitStartAngle = i * ANGLE_PER_BIT; - double bitEndAngle = (i + 1) * ANGLE_PER_BIT; - if (bitEndAngle > minAngle && bitStartAngle < maxAngle) { - bits |= 1 << i; + static { + for (int run = 0; run < LUT_DIM; run++) { + for (int rise = 0; rise < LUT_DIM; rise++) { + ANGLE_LUT[rise + run * LUT_DIM] = generateAngles(rise, run); } } - return bits; } + private static int generateAngles(int rise, int run) { + double minAngle = Math.atan2(rise - 1, run + 1); + double maxAngle = Math.atan2(rise + 1, run - 1); - static { - for (int i = 0; i < LUT_SIZE; i++) { - int run = i & LUT_MAX_IDX; - int rise = (i >> LUT_DIM_BITS); - ANGLE_BITSET_LUT[i] = generateAngleBits(rise, run); - } + // Quantize angles to 10-bit range [0, 1023] + int minQuant = (int) (Math.max(0.0, minAngle) * (ANGLE_MASK / (Math.PI / 2.0))); + int maxQuant = (int) (Math.min(Math.PI / 2.0, maxAngle) * (ANGLE_MASK / (Math.PI / 2.0))); + + return (minQuant & ANGLE_MASK) | ((maxQuant & ANGLE_MASK) << ANGLE_BITS); + } + + public void setOriginAngles() { + this.allowedAngles = ANGLES_MAX_MASK; } /** @@ -384,39 +365,108 @@ public boolean intersectSlopes(SectionPos origin, RenderSection other, int frame var dy = Math.abs(origin.getY() - this.getChunkY()); var dz = Math.abs(origin.getZ() - this.getChunkZ()); - while ((dx|dy|dz) >= 32) { - // This is only true for the outermost edge of sections that have a distance - // of 32, so we don't use more complex 32-Integer.numberOfLeadingZeros and per-plane - // shifting. - dx >>= 1; dy >>= 1; dz >>= 1; + // Shift to [0, 31] for LUT lookup + while ((dx | dy | dz) >= 32) { + // This is only true for the outermost rings of sections that have a distance + // of 32 when 32 chunks are visible, so we don't use more complex + // 32-Integer.numberOfLeadingZeros and per-plane shifting. + dx >>= 1; + dy >>= 1; + dz >>= 1; } - long baseAngles = ANGLE_BITSET_LUT[dx + (dy << LUT_DIM_BITS)] | - ((long)ANGLE_BITSET_LUT[dx + (dz << LUT_DIM_BITS)] << BITS_PER_PLANE) | - ((long)ANGLE_BITSET_LUT[dy + (dz << LUT_DIM_BITS)] << (BITS_PER_PLANE * 2)); + long baseAngles = ANGLE_LUT[dx + (dy << LUT_SHIFT)] | + ((long) ANGLE_LUT[dz + (dx << LUT_SHIFT)] << (2 * ANGLE_BITS)) | + ((long) ANGLE_LUT[dy + (dz << LUT_SHIFT)] << (4 * ANGLE_BITS)); - long pathAngles = baseAngles & other.allowedAngles; + long pathAngles = parallel_unsigned_max_min(other.allowedAngles, baseAngles); - // If the intersection is empty for *any* plane, this path is occluded. - if (anyPlaneHasEmptyBitset(pathAngles)) { + // Check if max < min for any plane, which means the path is occluded. + long borrows = parallel_unsigned_lt_msbs((pathAngles & ANGLES_MAX_MASK) >> ANGLE_BITS, pathAngles & ANGLES_MIN_MASK); + if (borrows != 0) { return false; } if (this.lastVisibleFrame == frame) { - // This section has been visited before *this frame* from another path. - // The new allowed angles are the *union* of the old paths and this new path. - pathAngles |= this.allowedAngles; + // This section has been visited before *this frame*. + // Union the angles: [min(oldMin, newMin), max(oldMax, newMax)] + pathAngles = parallel_unsigned_min_max(pathAngles, this.allowedAngles); } this.allowedAngles = pathAngles; return true; } - private static boolean anyPlaneHasEmptyBitset(long angles) { - final long SUB_MASK = 1L | (1L << BITS_PER_PLANE) | (1L << (BITS_PER_PLANE * 2)); - final long MSB_MASK = (1L << (BITS_PER_PLANE - 1)) | (1L << (BITS_PER_PLANE * 2 - 1)) | (1L << (BITS_PER_PLANE * 3 - 1)); - long borrows = (angles - SUB_MASK) & ~angles; - return (borrows & MSB_MASK) != 0; + /** + * Performs a parallel unsigned less-than comparison (a < b) for 6 10-bit lanes. + * + * @param a 6 packed 10-bit values + * @param b 6 packed 10-bit values + * @return A long with the MSB of each lane (bit 9, 19, 29, ...) set if a_k < b_k. + *

+ * Based on `vhaddu8(~a, b)` (LTU_VARIANT 0) from: + * Stackoverflow + * Citing Peter L. Montgomery's observation + * comp.arch, 2000/02/11: + * (A+B)/2 = (A AND B) + (A XOR B)/2. + * The MSB of (A+B)/2 is the same as the carry-out of (A+B), + * and `vhaddu(~a, b)` calculates `(~a+b)/2`, which sets the MSB if `b > a`. + */ + private static long parallel_unsigned_lt_msbs(long a, long b) { + // MSB (sign bit) for each 10-bit lane + final long LANE_MSB = 1L << (ANGLE_BITS - 1); + final long LANE_MSB_MASK = (LANE_MSB << (ANGLE_BITS * 0)) | + (LANE_MSB << (ANGLE_BITS * 1)) | + (LANE_MSB << (ANGLE_BITS * 2)) | + (LANE_MSB << (ANGLE_BITS * 3)) | + (LANE_MSB << (ANGLE_BITS * 4)) | + (LANE_MSB << (ANGLE_BITS * 5)); + // All bits *except* the MSB for each 10-bit lane + final long LANE_NON_MSB_MASK = ((1L << (ANGLE_BITS * 6)) - 1) ^ LANE_MSB_MASK; + + long vhaddu_result = (~a & b) + (((~a ^ b) >>> 1) & LANE_NON_MSB_MASK); + // Return just the MSBs, which are set if a_k < b_k + return vhaddu_result & LANE_MSB_MASK; + } + + /** + * Creates a 30-bit mask (0x3FF per field) where a field is all 1s + * if a_k < b_k, and 0 otherwise. + */ + private static long parallel_unsigned_borrow_mask(long a, long b) { + // 'msbs' has bits 9, 19, 29, ... set if a_k < b_k + long msbs = parallel_unsigned_lt_msbs(a, b); + + // Implements sign_to_mask for 10-bit lanes. + // (a + a - (a >> 9)) adapted from 8-bit (a + a - (a >> 7)) + // This expands the MSB of each lane to fill the entire lane (0x200 -> 0x3FF) + return msbs + msbs - (msbs >>> 9); + } + + /** + * Performs 6 parallel 10-bit *unsigned* min/max operations. + * + * @param a 6 packed 10-bit values + * @param b 6 packed 10-bit values + * @return 6 packed 10-bit values containing min(a_0, b_0), max(a_1, b_1), min(a_2, b_2) .. + */ + private static long parallel_unsigned_min_max(long a, long b) { + long mask = parallel_unsigned_borrow_mask(a, b); // all bits set where a < b + mask ^= ANGLES_MAX_MASK; // flip masks for max angles to make it a min operation + return (a & mask) | (b & ~mask); // select based on mask + } + + /** + * Performs 6 parallel 10-bit *unsigned* min/max operations. + * + * @param a 6 packed 10-bit values + * @param b 6 packed 10-bit values + * @return 6 packed 10-bit values containing max(a_0, b_0), min(a_1, b_1), max(a_2, b_2) .. + */ + private static long parallel_unsigned_max_min(long a, long b) { + long mask = parallel_unsigned_borrow_mask(a, b); // all bits set where a < b + mask ^= ANGLES_MIN_MASK; // flip masks for min angles to make it a max operation + return (a & mask) | (b & ~mask); // select based on mask } /** diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java index 2cbc1ca67f..dbb0bde754 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java @@ -292,7 +292,7 @@ private void initWithinWorld(RenderSectionVisitor visitor, WriteQueue Date: Mon, 27 Oct 2025 20:50:48 -0600 Subject: [PATCH 7/8] microoptimization: ensuring capacity on every visit costs ~25ns --- .../sodium/client/render/chunk/occlusion/OcclusionCuller.java | 3 --- 1 file changed, 3 deletions(-) diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java index dbb0bde754..7ee9b92d84 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java @@ -127,9 +127,6 @@ private static void visitNeighbors(final WriteQueue queue, Sectio return; } - // This helps the compiler move the checks for some invariants upwards. - queue.ensureCapacity(6); - if (GraphDirectionSet.contains(outgoing, GraphDirection.DOWN) && section.adjacentDown.intersectSlopes(origin, section, frame)) { visitNode(queue, origin, section.adjacentDown, GraphDirectionSet.of(GraphDirection.UP), frame); } From c74da685c9beae9566bbbe47350917439cba6901 Mon Sep 17 00:00:00 2001 From: Ryan Hitchman Date: Mon, 27 Oct 2025 21:18:21 -0600 Subject: [PATCH 8/8] fix out-of-world rendering to properly disable new angle technique --- .../chunk/occlusion/OcclusionCuller.java | 74 ++++++++++++++----- 1 file changed, 55 insertions(+), 19 deletions(-) diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java index 7ee9b92d84..a47156bde0 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java @@ -35,8 +35,14 @@ public void findVisible(RenderSectionVisitor visitor, this.init(visitor, queues.write(), viewport, searchDistance, useOcclusionCulling, frame); + SectionPos origin = viewport.getChunkCoord(); + if (this.getRenderSection(origin.getX(), origin.getY(), origin.getZ()) == null) { + // origin outside of world + origin = null; + } + while (queues.flip()) { - processQueue(visitor, viewport, searchDistance, useOcclusionCulling, frame, queues.read(), queues.write()); + processQueue(visitor, viewport, origin, searchDistance, useOcclusionCulling, frame, queues.read(), queues.write()); } this.addNearbySections(visitor, viewport, searchDistance, frame); @@ -44,6 +50,7 @@ public void findVisible(RenderSectionVisitor visitor, private static void processQueue(RenderSectionVisitor visitor, Viewport viewport, + SectionPos inBoundsOrigin, float searchDistance, boolean useOcclusionCulling, int frame, @@ -84,7 +91,7 @@ private static void processQueue(RenderSectionVisitor visitor, connections &= getOutwardDirections(origin, section); } - visitNeighbors(writeQueue, origin, section, connections, frame); + visitNeighbors(writeQueue, inBoundsOrigin, section, connections, frame); } } @@ -127,32 +134,61 @@ private static void visitNeighbors(final WriteQueue queue, Sectio return; } + if (origin == null) { + // the viewpoint is outside the world, so the angle computations relying on propagating angle information + // from the origin section to the others won't work. + if (GraphDirectionSet.contains(outgoing, GraphDirection.DOWN)) { + visitNode(queue, section.adjacentDown, GraphDirectionSet.of(GraphDirection.UP), frame); + } + + if (GraphDirectionSet.contains(outgoing, GraphDirection.UP)) { + visitNode(queue, section.adjacentUp, GraphDirectionSet.of(GraphDirection.DOWN), frame); + } + + if (GraphDirectionSet.contains(outgoing, GraphDirection.NORTH)) { + visitNode(queue, section.adjacentNorth, GraphDirectionSet.of(GraphDirection.SOUTH), frame); + } + + if (GraphDirectionSet.contains(outgoing, GraphDirection.SOUTH)) { + visitNode(queue, section.adjacentSouth, GraphDirectionSet.of(GraphDirection.NORTH), frame); + } + + if (GraphDirectionSet.contains(outgoing, GraphDirection.WEST)) { + visitNode(queue, section.adjacentWest, GraphDirectionSet.of(GraphDirection.EAST), frame); + } + + if (GraphDirectionSet.contains(outgoing, GraphDirection.EAST)) { + visitNode(queue, section.adjacentEast, GraphDirectionSet.of(GraphDirection.WEST), frame); + } + return; + } + if (GraphDirectionSet.contains(outgoing, GraphDirection.DOWN) && section.adjacentDown.intersectSlopes(origin, section, frame)) { - visitNode(queue, origin, section.adjacentDown, GraphDirectionSet.of(GraphDirection.UP), frame); + visitNode(queue, section.adjacentDown, GraphDirectionSet.of(GraphDirection.UP), frame); } if (GraphDirectionSet.contains(outgoing, GraphDirection.UP) && section.adjacentUp.intersectSlopes(origin, section, frame)) { - visitNode(queue, origin, section.adjacentUp, GraphDirectionSet.of(GraphDirection.DOWN), frame); + visitNode(queue, section.adjacentUp, GraphDirectionSet.of(GraphDirection.DOWN), frame); } if (GraphDirectionSet.contains(outgoing, GraphDirection.NORTH) && section.adjacentNorth.intersectSlopes(origin, section, frame)) { - visitNode(queue, origin, section.adjacentNorth, GraphDirectionSet.of(GraphDirection.SOUTH), frame); + visitNode(queue, section.adjacentNorth, GraphDirectionSet.of(GraphDirection.SOUTH), frame); } if (GraphDirectionSet.contains(outgoing, GraphDirection.SOUTH) && section.adjacentSouth.intersectSlopes(origin, section, frame)) { - visitNode(queue, origin, section.adjacentSouth, GraphDirectionSet.of(GraphDirection.NORTH), frame); + visitNode(queue, section.adjacentSouth, GraphDirectionSet.of(GraphDirection.NORTH), frame); } if (GraphDirectionSet.contains(outgoing, GraphDirection.WEST) && section.adjacentWest.intersectSlopes(origin, section, frame)) { - visitNode(queue, origin, section.adjacentWest, GraphDirectionSet.of(GraphDirection.EAST), frame); + visitNode(queue, section.adjacentWest, GraphDirectionSet.of(GraphDirection.EAST), frame); } if (GraphDirectionSet.contains(outgoing, GraphDirection.EAST) && section.adjacentEast.intersectSlopes(origin, section, frame)) { - visitNode(queue, origin, section.adjacentEast, GraphDirectionSet.of(GraphDirection.WEST), frame); + visitNode(queue, section.adjacentEast, GraphDirectionSet.of(GraphDirection.WEST), frame); } } - private static void visitNode(final WriteQueue queue, @NotNull SectionPos origin, @NotNull RenderSection render, int incoming, int frame) { + private static void visitNode(final WriteQueue queue, @NotNull RenderSection render, int incoming, int frame) { if (render.getLastVisibleFrame() != frame) { // This is the first time we are visiting this section during the given frame, so we must // reset the state. @@ -323,18 +359,18 @@ private void initOutsideWorldHeight(WriteQueue queue, var radius = Mth.floor(searchDistance / 16.0f); // Layer 0 - this.tryVisitNode(queue, origin, origin.getX(), height, origin.getZ(), direction, frame, viewport); + this.tryVisitNode(queue, origin.getX(), height, origin.getZ(), direction, frame, viewport); // Complete layers, excluding layer 0 for (int layer = 1; layer <= radius; layer++) { for (int z = -layer; z < layer; z++) { int x = Math.abs(z) - layer; - this.tryVisitNode(queue, origin, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); + this.tryVisitNode(queue, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); } for (int z = layer; z > -layer; z--) { int x = layer - Math.abs(z); - this.tryVisitNode(queue, origin, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); + this.tryVisitNode(queue, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); } } @@ -344,34 +380,34 @@ private void initOutsideWorldHeight(WriteQueue queue, for (int z = -radius; z <= -l; z++) { int x = -z - layer; - this.tryVisitNode(queue, origin, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); + this.tryVisitNode(queue, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); } for (int z = l; z <= radius; z++) { int x = z - layer; - this.tryVisitNode(queue, origin,origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); + this.tryVisitNode(queue, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); } for (int z = radius; z >= l; z--) { int x = layer - z; - this.tryVisitNode(queue, origin, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); + this.tryVisitNode(queue, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); } for (int z = -l; z >= -radius; z--) { int x = layer + z; - this.tryVisitNode(queue, origin, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); + this.tryVisitNode(queue, origin.getX() + x, height, origin.getZ() + z, direction, frame, viewport); } } } - private void tryVisitNode(WriteQueue queue, SectionPos origin, int x, int y, int z, int direction, int frame, Viewport viewport) { - RenderSection section = this.getRenderSection(origin.getX(), origin.getY(), origin.getZ()); + private void tryVisitNode(WriteQueue queue, int x, int y, int z, int direction, int frame, Viewport viewport) { + RenderSection section = this.getRenderSection(x, y, z); if (section == null || !isWithinFrustum(viewport, section)) { return; } - visitNode(queue, origin, section, GraphDirectionSet.of(direction), frame); + visitNode(queue, section, GraphDirectionSet.of(direction), frame); } private RenderSection getRenderSection(int x, int y, int z) {