12
12
#include " forward.h"
13
13
#include " grid_sample.h"
14
14
#include " auxiliary.h"
15
+ #include " stopthepop_2DGS/stopthepop_common.cuh"
16
+ #include " stopthepop_2DGS/resorted_render.cuh"
15
17
#include < cooperative_groups.h>
16
18
#include < cooperative_groups/reduce.h>
17
19
namespace cg = cooperative_groups;
@@ -181,6 +183,7 @@ __global__ void preprocessCUDA(int P, int D, int M,
181
183
const float tan_fovx, const float tan_fovy,
182
184
const float focal_x, const float focal_y,
183
185
int * radii,
186
+ float2 * rects,
184
187
float2 * points_xy_image,
185
188
float * depths,
186
189
float * transMats,
@@ -233,9 +236,18 @@ __global__ void preprocessCUDA(int P, int D, int M,
233
236
float radius = ceil (truncated_R * max (max (extent.x , extent.y ), FilterSize));
234
237
235
238
uint2 rect_min, rect_max;
236
- getRect (center, radius, rect_min, rect_max, grid);
237
- if ((rect_max.x - rect_min.x ) * (rect_max.y - rect_min.y ) == 0 )
238
- return ;
239
+ #if FAST_INFERENCE
240
+ if (radius > MAX_BILLBOARD_SIZE)
241
+ getRectOld (center, radius, rect_min, rect_max, grid);
242
+ else
243
+ getRect (center, extent, rect_min, rect_max, grid);
244
+ #else
245
+ getRectOld (center, radius, rect_min, rect_max, grid);
246
+ #endif
247
+
248
+ if ((rect_max.x - rect_min.x ) * (rect_max.y - rect_min.y ) == 0 ) {
249
+ return ;
250
+ }
239
251
240
252
// compute colors
241
253
if (colors_precomp == nullptr ) {
@@ -246,6 +258,7 @@ __global__ void preprocessCUDA(int P, int D, int M,
246
258
}
247
259
248
260
depths[idx] = p_view.z ;
261
+ rects[idx] = extent;
249
262
radii[idx] = (int )radius;
250
263
points_xy_image[idx] = center;
251
264
// store them in float4
@@ -299,7 +312,6 @@ renderCUDA(
299
312
300
313
// Allocate storage for batches of collectively fetched data.
301
314
__shared__ int collected_id[BLOCK_SIZE];
302
- __shared__ float2 collected_xy[BLOCK_SIZE];
303
315
__shared__ float3 collected_normal[BLOCK_SIZE];
304
316
__shared__ float3 collected_Tu[BLOCK_SIZE];
305
317
__shared__ float3 collected_Tv[BLOCK_SIZE];
@@ -319,7 +331,7 @@ renderCUDA(
319
331
float dist1 = {0 };
320
332
float dist2 = {0 };
321
333
float distortion = {0 };
322
- float median_depth = {0 };
334
+ float median_depth = {100 };
323
335
float median_weight = {0 };
324
336
float median_contributor = {-1 };
325
337
@@ -339,7 +351,6 @@ renderCUDA(
339
351
{
340
352
int coll_id = point_list[range.x + progress];
341
353
collected_id[block.thread_rank ()] = coll_id;
342
- collected_xy[block.thread_rank ()] = points_xy_image[coll_id];
343
354
collected_normal[block.thread_rank ()] = normal_array[coll_id];
344
355
collected_Tu[block.thread_rank ()] = {transMats[9 * coll_id+0 ], transMats[9 * coll_id+1 ], transMats[9 * coll_id+2 ]};
345
356
collected_Tv[block.thread_rank ()] = {transMats[9 * coll_id+3 ], transMats[9 * coll_id+4 ], transMats[9 * coll_id+5 ]};
@@ -409,7 +420,7 @@ renderCUDA(
409
420
float error = mapped_depth * mapped_depth * A + dist2 - 2 * mapped_depth * dist1;
410
421
distortion += error * alpha * T;
411
422
412
- if (T > 0.5 ) {
423
+ if (T > 0.5 && alpha > 0.05 ) {
413
424
median_depth = depth;
414
425
median_weight = alpha * T;
415
426
median_contributor = contributor;
@@ -484,25 +495,48 @@ void FORWARD::render(
484
495
float * out_others,
485
496
float * impact)
486
497
{
487
- renderCUDA<NUM_CHANNELS> << <grid, block >> > (
488
- ranges,
489
- point_list,
490
- W, H,
491
- focal_x, focal_y,
492
- means2D,
493
- colors,
494
- texture_alpha,
495
- texture_color,
496
- texture_size,
497
- transMats,
498
- depths,
499
- normal_array,
500
- final_T,
501
- n_contrib,
502
- bg_color,
503
- out_color,
504
- out_others,
505
- impact);
498
+
499
+ #if PIXEL_RESORTING
500
+ renderBufferCUDA<NUM_CHANNELS> << <grid, block >> > (
501
+ ranges,
502
+ point_list,
503
+ W, H,
504
+ focal_x, focal_y,
505
+ means2D,
506
+ colors,
507
+ texture_alpha,
508
+ texture_color,
509
+ texture_size,
510
+ transMats,
511
+ depths,
512
+ normal_array,
513
+ final_T,
514
+ n_contrib,
515
+ bg_color,
516
+ out_color,
517
+ out_others,
518
+ impact);
519
+ #else
520
+ renderCUDA<NUM_CHANNELS> << <grid, block >> > (
521
+ ranges,
522
+ point_list,
523
+ W, H,
524
+ focal_x, focal_y,
525
+ means2D,
526
+ colors,
527
+ texture_alpha,
528
+ texture_color,
529
+ texture_size,
530
+ transMats,
531
+ depths,
532
+ normal_array,
533
+ final_T,
534
+ n_contrib,
535
+ bg_color,
536
+ out_color,
537
+ out_others,
538
+ impact);
539
+ #endif
506
540
}
507
541
508
542
void FORWARD::preprocess (int P, int D, int M,
@@ -521,6 +555,7 @@ void FORWARD::preprocess(int P, int D, int M,
521
555
const float focal_x, const float focal_y,
522
556
const float tan_fovx, const float tan_fovy,
523
557
int * radii,
558
+ float2 * rects,
524
559
float2 * means2D,
525
560
float * depths,
526
561
float * transMats,
@@ -547,6 +582,7 @@ void FORWARD::preprocess(int P, int D, int M,
547
582
tan_fovx, tan_fovy,
548
583
focal_x, focal_y,
549
584
radii,
585
+ rects,
550
586
means2D,
551
587
depths,
552
588
transMats,
@@ -557,3 +593,34 @@ void FORWARD::preprocess(int P, int D, int M,
557
593
prefiltered
558
594
);
559
595
}
596
+
597
+ void FORWARD::duplicate (
598
+ int P,
599
+ int W, int H,
600
+ const float focal_x, const float focal_y,
601
+ const float2 * means2D,
602
+ const float * depths,
603
+ const float2 * scales,
604
+ const float * view2gaussians,
605
+ const uint32_t * offsets,
606
+ const int * radii,
607
+ const float2 * rects,
608
+ uint64_t * gaussian_keys_unsorted,
609
+ uint32_t * gaussian_values_unsorted,
610
+ dim3 grid)
611
+ {
612
+ duplicateWithKeys_extended<false , true > << <(P + 255 ) / 256 , 256 >> >(
613
+ P, W, H, focal_x, focal_y,
614
+ means2D,
615
+ depths,
616
+ scales,
617
+ view2gaussians,
618
+ offsets,
619
+ radii,
620
+ rects,
621
+ gaussian_keys_unsorted,
622
+ gaussian_values_unsorted,
623
+ grid
624
+ );
625
+
626
+ }
0 commit comments