Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions cmd/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -98,15 +98,17 @@ endif


if USING_PYTHON
bin_SCRIPTS += arc_summary arcstat dbufstat zilstat
CLEANFILES += arc_summary arcstat dbufstat zilstat
dist_noinst_DATA += %D%/arc_summary %D%/arcstat.in %D%/dbufstat.in %D%/zilstat.in
bin_SCRIPTS += arc_summary arcstat dbufstat zilstat zleak
CLEANFILES += arc_summary arcstat dbufstat zilstat zleak
dist_noinst_DATA += %D%/arc_summary %D%/arcstat.in %D%/dbufstat.in %D%/zilstat.in %D%/zleak

$(call SUBST,arcstat,%D%/)
$(call SUBST,dbufstat,%D%/)
$(call SUBST,zilstat,%D%/)
arc_summary: %D%/arc_summary
$(AM_V_at)cp $< $@
zleak: %D%/zleak
$(AM_V_at)cp $< $@
endif


Expand Down
45 changes: 39 additions & 6 deletions cmd/zdb/zdb.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,9 @@ extern uint_t zfs_reconstruct_indirect_combinations_max;
extern uint_t zfs_btree_verify_intensity;

static const char cmdname[] = "zdb";
uint8_t dump_opt[256];
uint8_t dump_opt[512];

#define ALLOCATABLE_OPT 256

typedef void object_viewer_t(objset_t *, uint64_t, void *data, size_t size);

Expand Down Expand Up @@ -1650,6 +1652,16 @@ dump_metaslab_stats(metaslab_t *msp)
dump_histogram(rt->rt_histogram, ZFS_RANGE_TREE_HISTOGRAM_SIZE, 0);
}

static void
dump_allocated(void *arg, uint64_t start, uint64_t size)
{
uint64_t *off = arg;
if (*off != start)
(void) printf("ALLOC: %"PRIu64" %"PRIu64"\n", *off,
start - *off);
*off = start + size;
}

static void
dump_metaslab(metaslab_t *msp)
{
Expand All @@ -1666,13 +1678,24 @@ dump_metaslab(metaslab_t *msp)
(u_longlong_t)msp->ms_id, (u_longlong_t)msp->ms_start,
(u_longlong_t)space_map_object(sm), freebuf);

if (dump_opt['m'] > 2 && !dump_opt['L']) {
if (dump_opt[ALLOCATABLE_OPT] ||
(dump_opt['m'] > 2 && !dump_opt['L'])) {
mutex_enter(&msp->ms_lock);
VERIFY0(metaslab_load(msp));
}

if (dump_opt['m'] > 2 && !dump_opt['L']) {
zfs_range_tree_stat_verify(msp->ms_allocatable);
dump_metaslab_stats(msp);
metaslab_unload(msp);
mutex_exit(&msp->ms_lock);
}

if (dump_opt[ALLOCATABLE_OPT]) {
uint64_t off = msp->ms_start;
zfs_range_tree_walk(msp->ms_allocatable, dump_allocated,
&off);
if (off != msp->ms_start + msp->ms_size)
(void) printf("ALLOC: %"PRIu64" %"PRIu64"\n", off,
msp->ms_size - off);
}

if (dump_opt['m'] > 1 && sm != NULL &&
Expand All @@ -1687,6 +1710,12 @@ dump_metaslab(metaslab_t *msp)
SPACE_MAP_HISTOGRAM_SIZE, sm->sm_shift);
}

if (dump_opt[ALLOCATABLE_OPT] ||
(dump_opt['m'] > 2 && !dump_opt['L'])) {
metaslab_unload(msp);
mutex_exit(&msp->ms_lock);
}

if (vd->vdev_ops == &vdev_draid_ops)
ASSERT3U(msp->ms_size, <=, 1ULL << vd->vdev_ms_shift);
else
Expand Down Expand Up @@ -1723,8 +1752,9 @@ print_vdev_metaslab_header(vdev_t *vd)
}
}

(void) printf("\tvdev %10llu %s",
(u_longlong_t)vd->vdev_id, bias_str);
(void) printf("\tvdev %10llu\t%s metaslab shift %4lld",
(u_longlong_t)vd->vdev_id, bias_str,
(u_longlong_t)vd->vdev_ms_shift);

if (ms_flush_data_obj != 0) {
(void) printf(" ms_unflushed_phys object %llu",
Expand Down Expand Up @@ -9315,6 +9345,8 @@ main(int argc, char **argv)
{"all-reconstruction", no_argument, NULL, 'Y'},
{"livelist", no_argument, NULL, 'y'},
{"zstd-headers", no_argument, NULL, 'Z'},
{"allocatable-map", no_argument, NULL,
ALLOCATABLE_OPT},
{0, 0, 0, 0}
};

Expand Down Expand Up @@ -9345,6 +9377,7 @@ main(int argc, char **argv)
case 'u':
case 'y':
case 'Z':
case ALLOCATABLE_OPT:
dump_opt[c]++;
dump_all = 0;
break;
Expand Down
2 changes: 1 addition & 1 deletion cmd/zdb/zdb.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@
#define _ZDB_H

void dump_intent_log(zilog_t *);
extern uint8_t dump_opt[256];
extern uint8_t dump_opt[512];

#endif /* _ZDB_H */
2 changes: 1 addition & 1 deletion cmd/zdb/zdb_il.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@

#include "zdb.h"

extern uint8_t dump_opt[256];
extern uint8_t dump_opt[512];

static char tab_prefix[4] = "\t\t\t";

Expand Down
85 changes: 85 additions & 0 deletions cmd/zleak
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
#!/usr/bin/env python3
# SPDX-License-Identifier: CDDL-1.0

#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#

#
# Copyright (c) 2025 by Klara, Inc.
#

import argparse, fileinput, libzfs_core, sys, errno

def perform_raw_alloc(pool, ms_shift, ms_count, vdev_id, allocs, force,
verbose):
if args.verbose == 1:
print(f"Raw alloc: vdev {vdev_id}, {count} starting with offset "
f"{allocs[0][0]}")
if args.verbose >= 2:
print(f"Raw alloc: {pool} {ms_shift} {ms_count} {vdev_id} {count}")
try:
libzfs_core.lzc_raw_alloc(pool, 1 << ms_shift, ms_count, vdev_id,
allocs, args.force)
except libzfs_core.exceptions.ZFSGenericError as e:
if e.errno == errno.EINVAL:
print("Invalid map for provided pool")
sys.exit(1)
assert (e.errno == errno.E2BIG and force)
sys.exit(0)

allocs = []
count = 0

parser = argparse.ArgumentParser(
prog='zleak',
description='facility to replicate memory fragmentation in ZFS'
)
parser.add_argument('poolname')
parser.add_argument('-v', '--verbose', action='count', default=0)
parser.add_argument('-f', '--force', action='store_true', default=False)
args = parser.parse_args()

pool = args.poolname.encode('utf-8')

for line in fileinput.input('-'):
dump = False
line = line.rstrip()
if not line.startswith(("ALLOC: ", "\tvdev ", "\tmetaslabs ")):
continue

tokens = line.split()
if line.startswith("\tvdev "):
next_vdev_id = int(tokens[1])
next_ms_shift = int(tokens[4])
next_ms_count = 0
dump = True
elif line.startswith("\tmetaslabs "):
next_ms_count = int(tokens[1])
else:
start = int(tokens[1])
size = int(tokens[2])
allocs.append((start, size))
count = count + 1

if count == 1000000 or (dump and count != 0):
perform_raw_alloc(pool, ms_shift, ms_count, vdev_id, allocs,
args.force, args.verbose)
count = 0
allocs = []
vdev_id = next_vdev_id
ms_shift = next_ms_shift
ms_count = next_ms_count


if count > 0:
perform_raw_alloc(pool, ms_shift, ms_count, vdev_id, allocs,
args.force, args.verbose)

1 change: 1 addition & 0 deletions contrib/debian/openzfs-zfsutils.install
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ usr/sbin/arc_summary
usr/sbin/arcstat
usr/sbin/dbufstat
usr/sbin/zilstat
usr/sbin/zleak
usr/share/zfs/compatibility.d/
usr/share/bash-completion/completions
usr/share/man/man1/arcstat.1
Expand Down
1 change: 1 addition & 0 deletions contrib/debian/rules.in
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ override_dh_auto_install:
mv '$(CURDIR)/debian/tmp/usr/bin/arcstat' '$(CURDIR)/debian/tmp/usr/sbin/arcstat'
mv '$(CURDIR)/debian/tmp/usr/bin/dbufstat' '$(CURDIR)/debian/tmp/usr/sbin/dbufstat'
mv '$(CURDIR)/debian/tmp/usr/bin/zilstat' '$(CURDIR)/debian/tmp/usr/sbin/zilstat'
mv '$(CURDIR)/debian/tmp/usr/bin/zleak' '$(CURDIR)/debian/tmp/usr/sbin/zleak'

@# Zed has dependencies outside of the system root.
mv '$(CURDIR)/debian/tmp/sbin/zed' '$(CURDIR)/debian/tmp/usr/sbin/zed'
Expand Down
2 changes: 2 additions & 0 deletions contrib/pyzfs/libzfs_core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@
lzc_set_props,
lzc_list_children,
lzc_list_snaps,
lzc_raw_alloc,
receive_header,
)

Expand Down Expand Up @@ -151,6 +152,7 @@
'lzc_set_props',
'lzc_list_children',
'lzc_list_snaps',
'lzc_raw_alloc',
'receive_header',
]

Expand Down
6 changes: 6 additions & 0 deletions contrib/pyzfs/libzfs_core/_error_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,6 +696,12 @@ def lzc_list_translate_error(ret, name, opts):
raise _generic_exception(ret, name, "Error obtaining a list")


def lzc_raw_alloc_translate_errors(ret, name):
if ret == 0:
return
raise _generic_exception(ret, name, "Error performing raw allocations")


def _handle_err_list(ret, errlist, names, exception, mapper):
'''
Convert one or more errors from an operation into the requested exception.
Expand Down
32 changes: 32 additions & 0 deletions contrib/pyzfs/libzfs_core/_libzfs_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2056,6 +2056,38 @@ def lzc_list_snaps(name):
return iter(snaps)


def lzc_raw_alloc(poolname, metaslab_size, metaslab_count, vdev_id,
allocations, force):
'''
Allocate regions of the provided vdev directly; useful primarily for
performance analysis of fragmented pools. Results in space leakage that it
is not currently possible to reclaim.

:param bytes poolname: the name of the pool to allocate in
:param int metaslab_size: the size of a metaslab in this pool (for
validation)
:param int metaslab_count: the number of metaslabs in this top level
vdev (for validation)
:param int vdev_id: the id of the top-level vdev to perform allocations
from
:param allocations: pairs of offset and size to allocate
:type fromsnap: list of (int, int)

:raises TooManyArguments: if too many allocations are passed in
'''
if len(allocations) > 1000000:
raise exceptions.TooManyArguments()
allocs = _ffi.new(f"uint64_t[{2 * len(allocations)}]")
for i in range(len(allocations)):
(s, l) = allocations[i]
allocs[2 * i] = s
allocs[2 * i + 1] = l
ret = _lib.lzc_raw_alloc(poolname, uint64_t(metaslab_size),
uint64_t(metaslab_count), uint64_t(vdev_id),
allocs, 2 * len(allocations), force)
errors.lzc_raw_alloc_translate_errors(ret, poolname)


# TODO: a better way to init and uninit the library
def _initialize():
class LazyInit(object):
Expand Down
3 changes: 3 additions & 0 deletions contrib/pyzfs/libzfs_core/bindings/libzfs_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@
int lzc_inherit(const char *fsname, const char *name, nvlist_t *);
int lzc_set_props(const char *, nvlist_t *, nvlist_t *, nvlist_t *);
int lzc_list (const char *, nvlist_t *);

int lzc_raw_alloc(const char *, uint64_t, uint64_t, uint64_t,
uint64_t *, uint_t, boolean_t);
"""

SOURCE = """
Expand Down
4 changes: 4 additions & 0 deletions contrib/pyzfs/libzfs_core/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,4 +605,8 @@ class RaidzExpansionRunning(ZFSError):
message = "A raidz device is currently expanding"


class TooManyArguments(ZFSError):
error = errno.EOVERFLOW
message = "Too many arguments provided"

# vim: softtabstop=4 tabstop=4 expandtab shiftwidth=4
2 changes: 2 additions & 0 deletions include/libzfs_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ _LIBZFS_CORE_H int lzc_scrub(zfs_ioc_t, const char *, nvlist_t *, nvlist_t **);

_LIBZFS_CORE_H int lzc_ddt_prune(const char *, zpool_ddt_prune_unit_t,
uint64_t);
_LIBZFS_CORE_H int lzc_raw_alloc(const char *, uint64_t, uint64_t, uint64_t,
uint64_t *, uint_t, boolean_t);

#ifdef __cplusplus
}
Expand Down
3 changes: 2 additions & 1 deletion include/sys/fs/zfs.h
Original file line number Diff line number Diff line change
Expand Up @@ -1464,7 +1464,7 @@ typedef enum {
*/
typedef enum zfs_ioc {
/*
* Core features - 89/128 numbers reserved.
* Core features - 90/128 numbers reserved.
*/
#ifdef __FreeBSD__
ZFS_IOC_FIRST = 0,
Expand Down Expand Up @@ -1562,6 +1562,7 @@ typedef enum zfs_ioc {
ZFS_IOC_POOL_SCRUB, /* 0x5a57 */
ZFS_IOC_POOL_PREFETCH, /* 0x5a58 */
ZFS_IOC_DDT_PRUNE, /* 0x5a59 */
ZFS_IOC_RAW_ALLOC, /* 0x5a5a */

/*
* Per-platform (Optional) - 8/128 numbers reserved.
Expand Down
3 changes: 3 additions & 0 deletions include/sys/metaslab.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,9 @@ extern int metaslab_debug_load;
zfs_range_seg_type_t metaslab_calculate_range_tree_type(vdev_t *vdev,
metaslab_t *msp, uint64_t *start, uint64_t *shift);

void metaslab_force_alloc(metaslab_t *msp, uint64_t start, uint64_t size,
dmu_tx_t *tx);

#ifdef __cplusplus
}
#endif
Expand Down
3 changes: 3 additions & 0 deletions include/sys/vdev.h
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,9 @@ extern int vdev_label_init(vdev_t *vd, uint64_t txg, vdev_labeltype_t reason);
extern int vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl);
extern int vdev_prop_get(vdev_t *vd, nvlist_t *nvprops, nvlist_t *outnvl);

extern int vdev_raw_alloc(vdev_t *vd, uint64_t *allocations,
uint_t alloc_count);

#ifdef __cplusplus
}
#endif
Expand Down
Loading