@@ -48,6 +48,7 @@ struct bpf_arena {
48
48
u64 user_vm_end ;
49
49
struct vm_struct * kern_vm ;
50
50
struct range_tree rt ;
51
+ struct range_tree rt_guard ;
51
52
struct list_head vma_list ;
52
53
struct mutex lock ;
53
54
};
@@ -143,6 +144,20 @@ static struct bpf_map *arena_map_alloc(union bpf_attr *attr)
143
144
bpf_map_area_free (arena );
144
145
goto err ;
145
146
}
147
+
148
+ /*
149
+ * Use the same semantics as the main range tree to reuse
150
+ * its methods: Present ranges are all unguarded, while
151
+ * absent ones are guarded.
152
+ */
153
+ range_tree_init (& arena -> rt_guard );
154
+ err = range_tree_set (& arena -> rt_guard , 0 , attr -> max_entries );
155
+ if (err ) {
156
+ range_tree_destroy (& arena -> rt );
157
+ bpf_map_area_free (arena );
158
+ goto err ;
159
+ }
160
+
146
161
mutex_init (& arena -> lock );
147
162
148
163
return & arena -> map ;
@@ -193,6 +208,7 @@ static void arena_map_free(struct bpf_map *map)
193
208
apply_to_existing_page_range (& init_mm , bpf_arena_get_kern_vm_start (arena ),
194
209
KERN_VM_SZ - GUARD_SZ , existing_page_cb , NULL );
195
210
free_vm_area (arena -> kern_vm );
211
+ range_tree_destroy (& arena -> rt_guard );
196
212
range_tree_destroy (& arena -> rt );
197
213
bpf_map_area_free (arena );
198
214
}
@@ -282,6 +298,11 @@ static vm_fault_t arena_vm_fault(struct vm_fault *vmf)
282
298
/* User space requested to segfault when page is not allocated by bpf prog */
283
299
return VM_FAULT_SIGSEGV ;
284
300
301
+ /* Make sure the page is not guarded. */
302
+ ret = is_range_tree_set (& arena -> rt_guard , vmf -> pgoff , 1 );
303
+ if (ret )
304
+ return VM_FAULT_SIGSEGV ;
305
+
285
306
ret = range_tree_clear (& arena -> rt , vmf -> pgoff , 1 );
286
307
if (ret )
287
308
return VM_FAULT_SIGSEGV ;
@@ -456,12 +477,17 @@ static long arena_alloc_pages(struct bpf_arena *arena, long uaddr, long page_cnt
456
477
ret = is_range_tree_set (& arena -> rt , pgoff , page_cnt );
457
478
if (ret )
458
479
goto out_free_pages ;
459
- ret = range_tree_clear (& arena -> rt , pgoff , page_cnt );
460
480
} else {
461
481
ret = pgoff = range_tree_find (& arena -> rt , page_cnt );
462
- if (pgoff >= 0 )
463
- ret = range_tree_clear ( & arena -> rt , pgoff , page_cnt ) ;
482
+ if (pgoff < 0 )
483
+ goto out_free_pages ;
464
484
}
485
+
486
+ ret = is_range_tree_set (& arena -> rt_guard , pgoff , page_cnt );
487
+ if (ret )
488
+ goto out_free_pages ;
489
+
490
+ ret = range_tree_clear (& arena -> rt , pgoff , page_cnt );
465
491
if (ret )
466
492
goto out_free_pages ;
467
493
@@ -512,6 +538,7 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
512
538
u64 full_uaddr , uaddr_end ;
513
539
long kaddr , pgoff , i ;
514
540
struct page * page ;
541
+ int ret ;
515
542
516
543
/* only aligned lower 32-bit are relevant */
517
544
uaddr = (u32 )uaddr ;
@@ -525,7 +552,14 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
525
552
526
553
guard (mutex )(& arena -> lock );
527
554
555
+
528
556
pgoff = compute_pgoff (arena , uaddr );
557
+
558
+ /* Do not free regions that include guarded pages. */
559
+ ret = is_range_tree_set (& arena -> rt_guard , pgoff , page_cnt );
560
+ if (ret )
561
+ return ;
562
+
529
563
/* clear range */
530
564
range_tree_set (& arena -> rt , pgoff , page_cnt );
531
565
@@ -550,6 +584,46 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
550
584
}
551
585
}
552
586
587
+ static int arena_guard_pages (struct bpf_arena * arena , long uaddr , u32 page_cnt )
588
+ {
589
+ long page_cnt_max = (arena -> user_vm_end - arena -> user_vm_start ) >> PAGE_SHIFT ;
590
+ long pgoff ;
591
+ int ret ;
592
+
593
+ if (uaddr & ~PAGE_MASK )
594
+ return 0 ;
595
+
596
+ pgoff = compute_pgoff (arena , uaddr );
597
+ if (pgoff + page_cnt > page_cnt_max )
598
+ return - EINVAL ;
599
+
600
+ guard (mutex )(& arena -> lock );
601
+
602
+ /* Make sure we have not already guarded the pages. */
603
+ ret = is_range_tree_set (& arena -> rt_guard , pgoff , page_cnt );
604
+ if (ret )
605
+ return - EALREADY ;
606
+
607
+ /* Cannot guard already allocated pages. */
608
+ ret = is_range_tree_set (& arena -> rt , pgoff , page_cnt );
609
+ if (ret )
610
+ return - EINVAL ;
611
+
612
+ /* Reserve the region. */
613
+ ret = range_tree_clear (& arena -> rt_guard , pgoff , page_cnt );
614
+ if (ret )
615
+ return ret ;
616
+
617
+ /* Also "allocate" the region to prevent it from being allocated. */
618
+ ret = range_tree_clear (& arena -> rt , pgoff , page_cnt );
619
+ if (ret ) {
620
+ range_tree_set (& arena -> rt_guard , pgoff , page_cnt );
621
+ return ret ;
622
+ }
623
+
624
+ return 0 ;
625
+ }
626
+
553
627
__bpf_kfunc_start_defs ();
554
628
555
629
__bpf_kfunc void * bpf_arena_alloc_pages (void * p__map , void * addr__ign , u32 page_cnt ,
@@ -573,11 +647,26 @@ __bpf_kfunc void bpf_arena_free_pages(void *p__map, void *ptr__ign, u32 page_cnt
573
647
return ;
574
648
arena_free_pages (arena , (long )ptr__ign , page_cnt );
575
649
}
650
+
651
+ __bpf_kfunc int bpf_arena_guard_pages (void * p__map , void * ptr__ign , u32 page_cnt )
652
+ {
653
+ struct bpf_map * map = p__map ;
654
+ struct bpf_arena * arena = container_of (map , struct bpf_arena , map );
655
+
656
+ if (map -> map_type != BPF_MAP_TYPE_ARENA )
657
+ return - EINVAL ;
658
+
659
+ if (!page_cnt )
660
+ return 0 ;
661
+
662
+ return arena_guard_pages (arena , (long )ptr__ign , page_cnt );
663
+ }
576
664
__bpf_kfunc_end_defs ();
577
665
578
666
BTF_KFUNCS_START (arena_kfuncs )
579
667
BTF_ID_FLAGS (func , bpf_arena_alloc_pages , KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_RET | KF_ARENA_ARG2 )
580
668
BTF_ID_FLAGS (func , bpf_arena_free_pages , KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_ARG2 )
669
+ BTF_ID_FLAGS (func , bpf_arena_guard_pages , KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_ARG2 )
581
670
BTF_KFUNCS_END (arena_kfuncs )
582
671
583
672
static const struct btf_kfunc_id_set common_kfunc_set = {
0 commit comments