diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java index c16c371249..06f0a30591 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/RenderSection.java @@ -32,6 +32,7 @@ public class RenderSection { private int incomingDirections; private int lastVisibleFrame = -1; + private long allowedAngles; // 60-bit packed quantized min/max allowed angles, 0-9 minXY, 10-19 maxXY, etc. private int adjacentMask; public RenderSection @@ -42,7 +43,6 @@ public class RenderSection { adjacentWest, adjacentEast; - // Rendering State private boolean built = false; // merge with the flags? private int flags = RenderSectionFlags.NONE; @@ -313,6 +313,162 @@ public void setIncomingDirections(int directions) { this.incomingDirections = directions; } + private static final int ANGLE_BITS = 10; + private static final int ANGLE_MASK = (1 << ANGLE_BITS) - 1; + private static final long ANGLES_MIN_MASK = + (long)ANGLE_MASK * (1 | (1L << (ANGLE_BITS * 2)) | (1L << (ANGLE_BITS * 4))); + private static final long ANGLES_MAX_MASK = + (long)ANGLE_MASK * ((1L << ANGLE_BITS) | (1L << (ANGLE_BITS * 3)) | (1L << (ANGLE_BITS * 5))); + private static final int LUT_DIM = 32; + private static final int LUT_SHIFT = 5; // (1 << 5) = 32 + + /** + * Lookup table for 20-bit packed (maxAngle(10)<<10) | minAngle(10). + * Indexed by [rise + run * 32], where rise/run are integers in [0, 31]. + */ + private static final int[] ANGLE_LUT = new int[LUT_DIM * LUT_DIM]; + + static { + for (int run = 0; run < LUT_DIM; run++) { + for (int rise = 0; rise < LUT_DIM; rise++) { + ANGLE_LUT[rise + run * LUT_DIM] = generateAngles(rise, run); + } + } + } + + private static int generateAngles(int rise, int run) { + double minAngle = Math.atan2(rise - 1, run + 1); + double maxAngle = Math.atan2(rise + 1, run - 1); + + // Quantize angles to 10-bit range [0, 1023] + int minQuant = (int) (Math.max(0.0, minAngle) * (ANGLE_MASK / (Math.PI / 2.0))); + int maxQuant = (int) (Math.min(Math.PI / 2.0, maxAngle) * (ANGLE_MASK / (Math.PI / 2.0))); + + return (minQuant & ANGLE_MASK) | ((maxQuant & ANGLE_MASK) << ANGLE_BITS); + } + + public void setOriginAngles() { + this.allowedAngles = ANGLES_MAX_MASK; + } + + /** + * Intersects the allowed angles from the 'other' section with the base angles + * subtended by this section. + * + * @param origin The origin of the visibility check. + * @param other The parent/previous section from which visibility is being propagated. + * @param frame The current frame number. + * @return false if this section is guaranteed not visible, true otherwise. + */ + public boolean intersectSlopes(SectionPos origin, RenderSection other, int frame) { + var dx = Math.abs(origin.getX() - this.getChunkX()); + var dy = Math.abs(origin.getY() - this.getChunkY()); + var dz = Math.abs(origin.getZ() - this.getChunkZ()); + + // Shift to [0, 31] for LUT lookup + while ((dx | dy | dz) >= 32) { + // This is only true for the outermost rings of sections that have a distance + // of 32 when 32 chunks are visible, so we don't use more complex + // 32-Integer.numberOfLeadingZeros and per-plane shifting. + dx >>= 1; + dy >>= 1; + dz >>= 1; + } + + long baseAngles = ANGLE_LUT[dx + (dy << LUT_SHIFT)] | + ((long) ANGLE_LUT[dz + (dx << LUT_SHIFT)] << (2 * ANGLE_BITS)) | + ((long) ANGLE_LUT[dy + (dz << LUT_SHIFT)] << (4 * ANGLE_BITS)); + + long pathAngles = parallel_unsigned_max_min(other.allowedAngles, baseAngles); + + // Check if max < min for any plane, which means the path is occluded. + long borrows = parallel_unsigned_lt_msbs((pathAngles & ANGLES_MAX_MASK) >> ANGLE_BITS, pathAngles & ANGLES_MIN_MASK); + if (borrows != 0) { + return false; + } + + if (this.lastVisibleFrame == frame) { + // This section has been visited before *this frame*. + // Union the angles: [min(oldMin, newMin), max(oldMax, newMax)] + pathAngles = parallel_unsigned_min_max(pathAngles, this.allowedAngles); + } + this.allowedAngles = pathAngles; + + return true; + } + + /** + * Performs a parallel unsigned less-than comparison (a < b) for 6 10-bit lanes. + * + * @param a 6 packed 10-bit values + * @param b 6 packed 10-bit values + * @return A long with the MSB of each lane (bit 9, 19, 29, ...) set if a_k < b_k. + *

+ * Based on `vhaddu8(~a, b)` (LTU_VARIANT 0) from: + * Stackoverflow + * Citing Peter L. Montgomery's observation + * comp.arch, 2000/02/11: + * (A+B)/2 = (A AND B) + (A XOR B)/2. + * The MSB of (A+B)/2 is the same as the carry-out of (A+B), + * and `vhaddu(~a, b)` calculates `(~a+b)/2`, which sets the MSB if `b > a`. + */ + private static long parallel_unsigned_lt_msbs(long a, long b) { + // MSB (sign bit) for each 10-bit lane + final long LANE_MSB = 1L << (ANGLE_BITS - 1); + final long LANE_MSB_MASK = (LANE_MSB << (ANGLE_BITS * 0)) | + (LANE_MSB << (ANGLE_BITS * 1)) | + (LANE_MSB << (ANGLE_BITS * 2)) | + (LANE_MSB << (ANGLE_BITS * 3)) | + (LANE_MSB << (ANGLE_BITS * 4)) | + (LANE_MSB << (ANGLE_BITS * 5)); + // All bits *except* the MSB for each 10-bit lane + final long LANE_NON_MSB_MASK = ((1L << (ANGLE_BITS * 6)) - 1) ^ LANE_MSB_MASK; + + long vhaddu_result = (~a & b) + (((~a ^ b) >>> 1) & LANE_NON_MSB_MASK); + // Return just the MSBs, which are set if a_k < b_k + return vhaddu_result & LANE_MSB_MASK; + } + + /** + * Creates a 30-bit mask (0x3FF per field) where a field is all 1s + * if a_k < b_k, and 0 otherwise. + */ + private static long parallel_unsigned_borrow_mask(long a, long b) { + // 'msbs' has bits 9, 19, 29, ... set if a_k < b_k + long msbs = parallel_unsigned_lt_msbs(a, b); + + // Implements sign_to_mask for 10-bit lanes. + // (a + a - (a >> 9)) adapted from 8-bit (a + a - (a >> 7)) + // This expands the MSB of each lane to fill the entire lane (0x200 -> 0x3FF) + return msbs + msbs - (msbs >>> 9); + } + + /** + * Performs 6 parallel 10-bit *unsigned* min/max operations. + * + * @param a 6 packed 10-bit values + * @param b 6 packed 10-bit values + * @return 6 packed 10-bit values containing min(a_0, b_0), max(a_1, b_1), min(a_2, b_2) .. + */ + private static long parallel_unsigned_min_max(long a, long b) { + long mask = parallel_unsigned_borrow_mask(a, b); // all bits set where a < b + mask ^= ANGLES_MAX_MASK; // flip masks for max angles to make it a min operation + return (a & mask) | (b & ~mask); // select based on mask + } + + /** + * Performs 6 parallel 10-bit *unsigned* min/max operations. + * + * @param a 6 packed 10-bit values + * @param b 6 packed 10-bit values + * @return 6 packed 10-bit values containing max(a_0, b_0), min(a_1, b_1), max(a_2, b_2) .. + */ + private static long parallel_unsigned_max_min(long a, long b) { + long mask = parallel_unsigned_borrow_mask(a, b); // all bits set where a < b + mask ^= ANGLES_MIN_MASK; // flip masks for min angles to make it a max operation + return (a & mask) | (b & ~mask); // select based on mask + } + /** * Returns a bitfield containing the {@link RenderSectionFlags} for this built section. */ diff --git a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java index cd9b5cb661..a47156bde0 100644 --- a/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java +++ b/common/src/main/java/net/caffeinemc/mods/sodium/client/render/chunk/occlusion/OcclusionCuller.java @@ -35,8 +35,14 @@ public void findVisible(RenderSectionVisitor visitor, this.init(visitor, queues.write(), viewport, searchDistance, useOcclusionCulling, frame); + SectionPos origin = viewport.getChunkCoord(); + if (this.getRenderSection(origin.getX(), origin.getY(), origin.getZ()) == null) { + // origin outside of world + origin = null; + } + while (queues.flip()) { - processQueue(visitor, viewport, searchDistance, useOcclusionCulling, frame, queues.read(), queues.write()); + processQueue(visitor, viewport, origin, searchDistance, useOcclusionCulling, frame, queues.read(), queues.write()); } this.addNearbySections(visitor, viewport, searchDistance, frame); @@ -44,6 +50,7 @@ public void findVisible(RenderSectionVisitor visitor, private static void processQueue(RenderSectionVisitor visitor, Viewport viewport, + SectionPos inBoundsOrigin, float searchDistance, boolean useOcclusionCulling, int frame, @@ -51,6 +58,7 @@ private static void processQueue(RenderSectionVisitor visitor, WriteQueue writeQueue) { RenderSection section; + SectionPos origin = viewport.getChunkCoord(); while ((section = readQueue.dequeue()) != null) { if (!isSectionVisible(section, viewport, searchDistance)) { @@ -80,10 +88,10 @@ private static void processQueue(RenderSectionVisitor visitor, // We can only traverse *outwards* from the center of the graph search, so mask off any invalid // directions. - connections &= getOutwardDirections(viewport.getChunkCoord(), section); + connections &= getOutwardDirections(origin, section); } - visitNeighbors(writeQueue, section, connections, frame); + visitNeighbors(writeQueue, inBoundsOrigin, section, connections, frame); } } @@ -115,7 +123,7 @@ private static boolean isSectionVisible(RenderSection section, Viewport viewport return isWithinRenderDistance(viewport.getTransform(), section, maxDistance) && isWithinFrustum(viewport, section); } - private static void visitNeighbors(final WriteQueue queue, RenderSection section, int outgoing, int frame) { + private static void visitNeighbors(final WriteQueue queue, SectionPos origin, RenderSection section, int outgoing, int frame) { // Only traverse into neighbors which are actually present. // This avoids a null-check on each invocation to enqueue, and since the compiler will see that a null // is never encountered (after profiling), it will optimize it away. @@ -126,30 +134,56 @@ private static void visitNeighbors(final WriteQueue queue, Render return; } - // This helps the compiler move the checks for some invariants upwards. - queue.ensureCapacity(6); + if (origin == null) { + // the viewpoint is outside the world, so the angle computations relying on propagating angle information + // from the origin section to the others won't work. + if (GraphDirectionSet.contains(outgoing, GraphDirection.DOWN)) { + visitNode(queue, section.adjacentDown, GraphDirectionSet.of(GraphDirection.UP), frame); + } + + if (GraphDirectionSet.contains(outgoing, GraphDirection.UP)) { + visitNode(queue, section.adjacentUp, GraphDirectionSet.of(GraphDirection.DOWN), frame); + } + + if (GraphDirectionSet.contains(outgoing, GraphDirection.NORTH)) { + visitNode(queue, section.adjacentNorth, GraphDirectionSet.of(GraphDirection.SOUTH), frame); + } + + if (GraphDirectionSet.contains(outgoing, GraphDirection.SOUTH)) { + visitNode(queue, section.adjacentSouth, GraphDirectionSet.of(GraphDirection.NORTH), frame); + } + + if (GraphDirectionSet.contains(outgoing, GraphDirection.WEST)) { + visitNode(queue, section.adjacentWest, GraphDirectionSet.of(GraphDirection.EAST), frame); + } + + if (GraphDirectionSet.contains(outgoing, GraphDirection.EAST)) { + visitNode(queue, section.adjacentEast, GraphDirectionSet.of(GraphDirection.WEST), frame); + } + return; + } - if (GraphDirectionSet.contains(outgoing, GraphDirection.DOWN)) { + if (GraphDirectionSet.contains(outgoing, GraphDirection.DOWN) && section.adjacentDown.intersectSlopes(origin, section, frame)) { visitNode(queue, section.adjacentDown, GraphDirectionSet.of(GraphDirection.UP), frame); } - if (GraphDirectionSet.contains(outgoing, GraphDirection.UP)) { + if (GraphDirectionSet.contains(outgoing, GraphDirection.UP) && section.adjacentUp.intersectSlopes(origin, section, frame)) { visitNode(queue, section.adjacentUp, GraphDirectionSet.of(GraphDirection.DOWN), frame); } - if (GraphDirectionSet.contains(outgoing, GraphDirection.NORTH)) { + if (GraphDirectionSet.contains(outgoing, GraphDirection.NORTH) && section.adjacentNorth.intersectSlopes(origin, section, frame)) { visitNode(queue, section.adjacentNorth, GraphDirectionSet.of(GraphDirection.SOUTH), frame); } - if (GraphDirectionSet.contains(outgoing, GraphDirection.SOUTH)) { + if (GraphDirectionSet.contains(outgoing, GraphDirection.SOUTH) && section.adjacentSouth.intersectSlopes(origin, section, frame)) { visitNode(queue, section.adjacentSouth, GraphDirectionSet.of(GraphDirection.NORTH), frame); } - if (GraphDirectionSet.contains(outgoing, GraphDirection.WEST)) { + if (GraphDirectionSet.contains(outgoing, GraphDirection.WEST) && section.adjacentWest.intersectSlopes(origin, section, frame)) { visitNode(queue, section.adjacentWest, GraphDirectionSet.of(GraphDirection.EAST), frame); } - if (GraphDirectionSet.contains(outgoing, GraphDirection.EAST)) { + if (GraphDirectionSet.contains(outgoing, GraphDirection.EAST) && section.adjacentEast.intersectSlopes(origin, section, frame)) { visitNode(queue, section.adjacentEast, GraphDirectionSet.of(GraphDirection.WEST), frame); } } @@ -291,6 +325,7 @@ private void initWithinWorld(RenderSectionVisitor visitor, WriteQueue