diff --git a/src/libbpfilter/cgen/program.c b/src/libbpfilter/cgen/program.c index f8228eed1..b473e77c5 100644 --- a/src/libbpfilter/cgen/program.c +++ b/src/libbpfilter/cgen/program.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -902,6 +903,16 @@ static int _bf_program_load_log_map(struct bf_program *program) return 0; } +static uint64_t _bf_dedup_hash(const void *data, void *ctx) +{ + return bf_fnv1a(data, *(const size_t *)ctx, bf_fnv1a_init()); +} + +static bool _bf_dedup_equal(const void *lhs, const void *rhs, void *ctx) +{ + return memcmp(lhs, rhs, *(const size_t *)ctx) == 0; +} + /** * @brief Load set maps, one BPF map per `bf_set_group`. * @@ -911,6 +922,9 @@ static int _bf_program_load_log_map(struct bf_program *program) * value is a bitmask: bit `i` of byte `i / CHAR_BIT` identifies the `i`-th * set in the group. * + * Per-group keys and bitmask values are prepared in user space so a single + * `bf_bpf_map_update_batch()` call populates the map on the kernel side. + * * Group ownership of the created maps is transferred to `handle->sets`; * the `bf_set_group::map` pointer is a non-owning back-reference used by * `_bf_program_fixup()` when resolving `BF_FIXUP_TYPE_SET_MAP_FD` fixups. @@ -928,48 +942,90 @@ static int _bf_program_load_sets_maps(struct bf_program *new_prog) const struct bf_set *key_set = bf_list_node_get_data(bf_list_get_head(&group->sets)); size_t n_sets = bf_list_size(&group->sets); + size_t i = 0; + _cleanup_free_ uint8_t *keys = NULL; + size_t key_size = key_set->elem_size; + _cleanup_free_ uint8_t *values = NULL; size_t value_size = (n_sets + CHAR_BIT - 1) / CHAR_BIT; - size_t total_elems = 0; - size_t bit_idx = 0; + const bf_hashset_ops dedup_ops = { + .hash = _bf_dedup_hash, + .equal = _bf_dedup_equal, + // The dedup hashset borrows elements from `bf_set`s. + .free = NULL, + }; + _clean_bf_hashset_ bf_hashset unique_elements = + bf_hashset_default(&dedup_ops, &key_size); + size_t n_total_elems = 0; + size_t n_unique_elems; _free_bf_map_ struct bf_map *new_map = NULL; - _cleanup_free_ uint8_t *value = NULL; struct bf_map *map_ref; - /* Keys present in multiple sets will share a single map entry, so - * the actual entry count may be smaller, but we can err on the - * safe side. */ + // Upper-bound the set capacity to avoid incremental rehashing. bf_list_foreach (&group->sets, set_node) { const struct bf_set *set = bf_list_node_get_data(set_node); - total_elems += bf_hashset_size(&set->elems); - } - (void)snprintf(name, BPF_OBJ_NAME_LEN, _BF_SET_MAP_PREFIX "%04x", - (uint16_t)map_idx++); + n_total_elems += bf_hashset_size(&set->elems); + } - r = bf_map_new_from_set(&new_map, name, key_set, total_elems, - value_size); + r = bf_hashset_reserve(&unique_elements, n_total_elems); if (r) - return r; - - value = malloc(value_size); - if (!value) - return -ENOMEM; + return bf_err_r(r, "failed to reserve dedup hashset capacity"); + // Find all unique elements across all sets in this group. bf_list_foreach (&group->sets, set_node) { const struct bf_set *set = bf_list_node_get_data(set_node); bf_hashset_foreach (&set->elems, elem) { - memset(value, 0, value_size); - (void)bf_bpf_map_lookup_elem(new_map->fd, elem->data, value); - value[bit_idx / CHAR_BIT] |= - (uint8_t)(1U << (bit_idx % CHAR_BIT)); - r = bf_map_set_elem(new_map, elem->data, value); - if (r) - return bf_err_r(r, "failed to add set element to the map"); + void *to_add = elem->data; + r = bf_hashset_add(&unique_elements, &to_add); + if (r && r != -EEXIST) + return bf_err_r(r, "failed to dedup element"); + } + } + n_unique_elems = bf_hashset_size(&unique_elements); + + // Compute bf_map keys and values for batch insertion. + keys = calloc(n_unique_elems, key_size); + if (!keys) + return bf_err_r(-ENOMEM, "failed to allocate map keys"); + + values = calloc(n_unique_elems, value_size); + if (!values) + return bf_err_r(-ENOMEM, "failed to allocate map values"); + + bf_hashset_foreach (&unique_elements, hentry) { + size_t bit_idx = 0; + + // Compute the key. + memcpy(keys + (i * key_size), hentry->data, key_size); + + // Compute the value (bitmask). + bf_list_foreach (&group->sets, set_node) { + const struct bf_set *set = bf_list_node_get_data(set_node); + + if (bf_hashset_contains(&set->elems, hentry->data)) { + values[(i * value_size) + (bit_idx / CHAR_BIT)] |= + (uint8_t)(1U << (bit_idx % CHAR_BIT)); + } + ++bit_idx; } - ++bit_idx; + ++i; } + // Create the BPF map from the computed keys and values. + (void)snprintf(name, BPF_OBJ_NAME_LEN, _BF_SET_MAP_PREFIX "%04x", + (uint16_t)map_idx++); + + r = bf_map_new_from_set(&new_map, name, key_set, n_unique_elems, + value_size); + if (r) + return r; + + r = bf_bpf_map_update_batch(new_map->fd, keys, values, n_unique_elems, + BPF_ANY); + if (r) + return bf_err_r(r, "failed to add set elements to the map"); + map_ref = new_map; r = bf_list_push(&new_prog->handle->sets, (void **)&new_map); if (r) diff --git a/tests/e2e/rules/set_grouping.sh b/tests/e2e/rules/set_grouping.sh index 6d6e51607..eeb9de861 100755 --- a/tests/e2e/rules/set_grouping.sh +++ b/tests/e2e/rules/set_grouping.sh @@ -33,3 +33,28 @@ ${FROM_NS} ${BFCLI} chain set --from-str "chain isolation BF_HOOK_XDP{ifindex=${ rule (ip4.saddr) in a counter DROP" ping -c 1 -W 1 ${NS_IP_ADDR} || { echo "ERROR: ping should have succeeded"; exit 1; } ${FROM_NS} ${BFCLI} chain flush --name isolation + +# Multibyte bitmask: more than 8 same-key sets force a 2-byte value +# (1 byte per 8 sets). The host's address sits in s8 only (bit_index 8 -> +# byte 1, bit 0). Rule 0 references s0 (byte 0, bit 0) and must miss; +# rule 1 references s8 and must drop. +${FROM_NS} ${BFCLI} chain set --from-str "chain multibyte BF_HOOK_XDP{ifindex=${NS_IFINDEX}} ACCEPT + set s0 (ip4.saddr) in { 192.0.2.10 } + set s1 (ip4.saddr) in { 192.0.2.11 } + set s2 (ip4.saddr) in { 192.0.2.12 } + set s3 (ip4.saddr) in { 192.0.2.13 } + set s4 (ip4.saddr) in { 192.0.2.14 } + set s5 (ip4.saddr) in { 192.0.2.15 } + set s6 (ip4.saddr) in { 192.0.2.16 } + set s7 (ip4.saddr) in { 192.0.2.17 } + set s8 (ip4.saddr) in { ${HOST_IP_ADDR} } + rule (ip4.saddr) in s0 counter DROP + rule (ip4.saddr) in s8 counter DROP" +(! ping -c 1 -W 1 ${NS_IP_ADDR}) || { echo "ERROR: ping should have been dropped via s8 (bit 8)"; exit 1; } +count=$(${FROM_NS} find ${WORKDIR}/bpf/bpfilter/multibyte/ -name 'bf_set_*' | wc -l) +[ "${count}" -eq 1 ] || { echo "ERROR: expected 1 map for 9 same-key sets, got ${count}"; exit 1; } +miss=$(${FROM_NS} bpftool map dump pinned ${WORKDIR}/bpf/bpfilter/multibyte/bf_cmap | jq '.[0].value.count') +[ "${miss}" = "0" ] || { echo "ERROR: s0 rule (byte 0) should not match, got ${miss}"; exit 1; } +hit=$(${FROM_NS} bpftool map dump pinned ${WORKDIR}/bpf/bpfilter/multibyte/bf_cmap | jq '.[1].value.count') +[ "${hit}" -ge 1 ] || { echo "ERROR: s8 rule (byte 1) should match at least once, got ${hit}"; exit 1; } +${FROM_NS} ${BFCLI} chain flush --name multibyte