Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,13 @@ def pytest_addoption(parser):
"4KiB blocksize to be formatted and used in storage tests. "
"Set it to 'auto' to let the fixtures auto-detect available disks."
)
parser.addoption(
"--expansion-sr-disk",
action="append",
default=[],
help="Name of an available disk (sdc) or partition device (sdc2) to be formatted and used in storage tests. "
"Set it to 'auto' to let the fixtures auto-detect available disks."
)

def pytest_configure(config):
global_config.ignore_ssh_banner = config.getoption('--ignore-ssh-banner')
Expand Down
17 changes: 16 additions & 1 deletion lib/host.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,22 @@ def disks(self):
disks.sort()
return disks

def disk_is_available(self, disk):
def raw_disk_is_available(self, disk: str) -> bool:
"""
Check if a raw disk (without any identifiable filesystem or partition label) is available.
It suggests the disk is "raw" and likely unformatted thus available.
"""
return self.ssh_with_result(['blkid', '/dev/' + disk]).returncode == 2

def disk_is_available(self, disk: str) -> bool:
"""
Check if a disk is unmounted and appears available for use.
It may or may not contain identifiable filesystem or partition label.
If there are no mountpoints, it is assumed that the disk is not in use.

Warn: This function may misclassify LVM_member disks (e.g. in XOSTOR, RAID, ZFS) as "available".
Such disks may not have mountpoints but still be in use.
"""
return len(self.ssh(['lsblk', '-n', '-o', 'MOUNTPOINT', '/dev/' + disk]).strip()) == 0

def available_disks(self, blocksize=512):
Expand Down
120 changes: 119 additions & 1 deletion tests/storage/linstor/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,10 @@ def lvm_disks(host, sr_disks_for_all_hosts, provisioning_type):
yield devices

for host in hosts:
devices = host.ssh('vgs ' + GROUP_NAME + ' -o pv_name --no-headings').split("\n")
host.ssh(['vgremove', '-f', GROUP_NAME])
for device in devices:
host.ssh(['pvremove', device])
host.ssh(['pvremove', '-ff', '-y', device.strip()])

@pytest.fixture(scope="package")
def storage_pool_name(provisioning_type):
Expand Down Expand Up @@ -108,3 +109,120 @@ def vm_on_linstor_sr(host, linstor_sr, vm_ref):
yield vm
logging.info("<< Destroy VM")
vm.destroy(verify=True)

@pytest.fixture(scope='module')
def prepare_linstor_packages(hostB1):
if not hostB1.is_package_installed(LINSTOR_PACKAGE):
logging.info("Installing %s on host %s", LINSTOR_PACKAGE, hostB1)
hostB1.yum_install([LINSTOR_RELEASE_PACKAGE])
hostB1.yum_install([LINSTOR_PACKAGE], enablerepo="xcp-ng-linstor-testing")
# Needed because the linstor driver is not in the xapi sm-plugins list
# before installing the LINSTOR packages.
hostB1.ssh(["systemctl", "restart", "multipathd"])
hostB1.restart_toolstack(verify=True)
yield
hostB1.yum_remove([LINSTOR_PACKAGE]) # Package cleanup

@pytest.fixture(scope='module')
def setup_lvm_on_host(hostB1):
# Ensure that the host has disks available to use, we do not care about disks symmetry across pool
# We need the disk to be "raw" (non LVM_member etc) to use
disks = [d for d in hostB1.available_disks() if hostB1.raw_disk_is_available(d)]
assert disks, "hostB1 requires at least one raw disk"
devices = [f"/dev/{d}" for d in disks]

for disk in devices:
logging.info("Found Disk %s", disk)
hostB1.ssh(['pvcreate', disk])
hostB1.ssh(['vgcreate', GROUP_NAME] + devices)

yield "linstor_group", devices

@pytest.fixture(scope='module')
def join_host_to_pool(host, hostB1):
assert len(hostB1.pool.hosts) == 1, "This test requires second host to be a single host"
original_pool = hostB1.pool
logging.info("Joining host %s to pool %s", hostB1, host)
hostB1.join_pool(host.pool)
yield
host.pool.eject_host(hostB1)
hostB1.pool = original_pool

@pytest.fixture(scope='module')
def vm_with_reboot_check(vm_on_linstor_sr):
vm = vm_on_linstor_sr
vm.start()
vm.wait_for_os_booted()
yield vm
vm.shutdown(verify=True)
# Ensure VM is able to start and shutdown on modified SR
vm.start()
vm.wait_for_os_booted()
vm.shutdown(verify=True)

@pytest.fixture(scope='module')
def evacuate_host_and_prepare_removal(host, hostA2, vm_with_reboot_check):
assert len(host.pool.hosts) >= 3, "This test requires Pool to have more than 3 hosts"

vm = vm_with_reboot_check
try:
host.ssh(f'xe host-evacuate uuid={hostA2.uuid}')
except Exception as e:
logging.warning("Host evacuation failed: %s", e)
if "lacks the feature" in getattr(e, "stdout", ""):
vm.shutdown(verify=True, force_if_fails=True)
host.ssh(f'xe host-evacuate uuid={hostA2.uuid}')
available_hosts = [h.uuid for h in host.pool.hosts if h.uuid != hostA2.uuid]
if available_hosts:
vm.start(on=available_hosts[0])
yield

@pytest.fixture(scope='module')
def remove_host_from_linstor(host, hostA2, linstor_sr, evacuate_host_and_prepare_removal):
import time
# Select a host that is not running the LINSTOR controller (port 3370)
linstor_controller_host = None
for h in host.pool.hosts:
if h.ssh_with_result(["ss -tuln | grep :3370"]).returncode == 0:
linstor_controller_host = h
break

# If the controller is running on the host to be ejected (hostA2), stop the services first
if linstor_controller_host and linstor_controller_host.uuid == hostA2.uuid:
logging.info("Ejecting host is running LINSTOR controller, stopping services first.")
hostA2.ssh("systemctl stop linstor-controller.service")
hostA2.ssh("systemctl stop drbd-reactor.service")
hostA2.ssh("systemctl stop drbd-graceful-shutdown.service")
time.sleep(30) # Give time for services to stop

ejecting_host = hostA2.xe('host-param-get', {'uuid': hostA2.uuid, 'param-name': 'name-label'})
controller_option = "--controllers=" + ",".join([m.hostname_or_ip for m in host.pool.hosts])

hostA2.ssh("systemctl stop linstor-satellite.service")

pbd = host.xe('pbd-list', {'sr-uuid': linstor_sr.uuid, 'host-uuid': hostA2.uuid}, minimal=True)
host.xe('pbd-unplug', {'uuid': pbd})

logging.info(host.ssh_with_result(["linstor", controller_option, "node", "delete", ejecting_host]).stdout)
host.pool.eject_host(hostA2)

yield

logging.info("Rejoining hostA2 to the pool after test")
hostA2.join_pool(host.pool)
# We dont want linstor services to be running on a deleted node
hostA2.ssh("systemctl stop linstor-satellite.service")
hostA2.ssh("systemctl stop drbd-graceful-shutdown.service")
# TODO: Package list is not retained in teardown
# hostA2.saved_packages_list = hostA2.packages()
# hostA2.saved_rollback_id = hostA2.get_last_yum_history_tid()

@pytest.fixture(scope='module')
def get_sr_size(linstor_sr):
sr = linstor_sr
sr_size = int(sr.pool.master.xe('sr-param-get', {'uuid': sr.uuid, 'param-name': 'physical-size'}))
logging.info("SR Size: %s", sr_size)
yield
new_sr_size = int(sr.pool.master.xe('sr-param-get', {'uuid': sr.uuid, 'param-name': 'physical-size'}))
logging.info("New SR Size vs Old SR Size: %s vs %s", new_sr_size, sr_size)
assert new_sr_size != sr_size, "SR size did not change"
188 changes: 187 additions & 1 deletion tests/storage/linstor/test_linstor_sr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pytest
import time

from .conftest import LINSTOR_PACKAGE
from .conftest import GROUP_NAME, LINSTOR_PACKAGE
from lib.commands import SSHCommandFailed
from lib.common import wait_for, vm_image
from tests.storage import vdi_is_open
Expand Down Expand Up @@ -86,6 +86,158 @@ def test_snapshot(self, vm_on_linstor_sr):
finally:
vm.shutdown(verify=True)

@pytest.mark.small_vm
def test_linstor_sr_expand_disk(self, linstor_sr, provisioning_type, storage_pool_name,
pytestconfig, vm_with_reboot_check):
"""
This test demonstrates online expansion of a LINSTOR SR while a VM is actively running on it.

It identifies hosts within the same pool, detects free raw disks, and expands the LVM to grow the SR.
A VM is started before the expansion, and its functionality is verified through a shutdown and restart
after the expansion completes successfully.
"""
sr = linstor_sr
sr_size = sr.pool.master.xe('sr-param-get', {'uuid': sr.uuid, 'param-name': 'physical-size'})

resized = _expand_lvm_on_hosts(sr, provisioning_type, storage_pool_name, pytestconfig)

# Need to ensure that linstor is healthy/up-to-date before moving ahead.
time.sleep(30) # Wait time for Linstor node communications to restore.
sr.scan()
new_sr_size = sr.pool.master.xe('sr-param-get', {'uuid': sr.uuid, 'param-name': 'physical-size'})
assert int(new_sr_size) > int(sr_size) and resized is True, \
f"Expected SR size to increase but got old size: {sr_size}, new size: {new_sr_size}"
logging.info("SR expansion completed")

@pytest.mark.small_vm
def test_linstor_sr_expand_host(self, linstor_sr, vm_with_reboot_check, prepare_linstor_packages,
join_host_to_pool, setup_lvm_on_host, host, hostB1, storage_pool_name,
provisioning_type):
"""
This test validates expansion of a LINSTOR SR by dynamically adding a new host with local storage to the pool.
A VM is started on the SR before expansion begins to ensure the SR is in active use during the process.

It performs the following steps:
- Installs LINSTOR packages on the new host (if missing).
- Detects and prepares raw disks using LVM commands.
- Joins the host (hostB1) to the existing pool and registers it with LINSTOR as a node.
- Creates a new LINSTOR storage pool on the added host (LVM or LVM-thin, based on provisioning type).
- Confirms SR expansion by verifying increased physical size.
- Ensures SR functionality by rebooting the VM running on the SR.

Finally, the test cleans up by deleting the LINSTOR node, ejecting the host from the pool,
and removing packages and LVM metadata.
"""
sr = linstor_sr
sr_size = sr.pool.master.xe('sr-param-get', {'uuid': sr.uuid, 'param-name': 'physical-size'})
resized = False

# TODO: This section could be moved into a separate fixture for modularity.
# However, capturing the SR size before expansion is critical to the test logic,
# so it's intentionally kept inline to preserve control over the measurement point.

sr_group_name = "xcp-sr-" + storage_pool_name.replace("/", "_")
hostname = hostB1.xe('host-param-get', {'uuid': hostB1.uuid, 'param-name': 'name-label'})
controller_option = "--controllers=" + ",".join([m.hostname_or_ip for m in host.pool.hosts])

logging.info("Current list of linstor nodes:")
logging.info(host.ssh_with_result(["linstor", controller_option, "node", "list"]).stdout)

logging.info("Creating linstor node")
host.ssh(["linstor", controller_option, "node", "create", "--node-type", "combined",
"--communication-type", "plain", hostname, hostB1.hostname_or_ip])
hostB1.ssh(['systemctl', 'restart', 'linstor-satellite.service'])
time.sleep(45)

logging.info("New list of linstor nodes:")
logging.info(host.ssh_with_result(["linstor", controller_option, "node", "list"]).stdout)
logging.info("Expanding with linstor node")

if provisioning_type == "thin":
hostB1.ssh(['lvcreate', '-l', '+100%FREE', '-T', storage_pool_name])
host.ssh(["linstor", controller_option, "storage-pool", "create", "lvmthin",
hostname, sr_group_name, storage_pool_name])
else:
host.ssh(["linstor", controller_option, "storage-pool", "create", "lvm",
hostname, sr_group_name, storage_pool_name])

sr.scan()
resized = True
new_sr_size = sr.pool.master.xe('sr-param-get', {'uuid': sr.uuid, 'param-name': 'physical-size'})
assert int(new_sr_size) > int(sr_size) and resized is True, \
f"Expected SR size to increase but got old size: {sr_size}, new size: {new_sr_size}"
logging.info("SR expansion completed from size %s to %s", sr_size, new_sr_size)

# Cleanup
host.ssh(["linstor", controller_option, "node", "delete", hostname])

@pytest.mark.small_vm
def test_linstor_sr_reduce_disk(self, linstor_sr, vm_with_reboot_check, provisioning_type):
"""
Identify hosts within the same pool, detect used disks, modify LVM, and rescan LINSTOR SR.
"""
if provisioning_type == "thin":
logging.info(f"* SR reductoin by removing device is not supported for {provisioning_type} type *")
return
sr = linstor_sr
sr_size = int(sr.pool.master.xe('sr-param-get', {'uuid': sr.uuid, 'param-name': 'physical-size'}))
resized = False

for h in sr.pool.hosts:
logging.info("Working on %s", h.hostname_or_ip)
devices = h.ssh('vgs ' + GROUP_NAME + ' -o pv_name --no-headings').split("\n")
assert len(devices) > 1, "This test requires {GROUP_NAME} to have more than 1 disk or parition"
eject_device = devices[-1].strip()
logging.info("Attempting to remove device: %s", eject_device)
try:
h.ssh(['pvmove', eject_device]) # Choosing last device from list, assuming its least filled
h.ssh(['vgreduce', GROUP_NAME, eject_device])
h.ssh(['pvremove', eject_device])
except SSHCommandFailed as e:
if "No data to move for" in e.stdout:
h.ssh(['vgreduce', GROUP_NAME, eject_device])
h.ssh(['pvremove', eject_device])
else:
pytest.fail("Failed to empty device")
h.ssh('systemctl restart linstor-satellite.service')
resized = True

# Need to ensure that linstor is healthy/up-to-date before moving ahead.
time.sleep(30) # Wait time for Linstor node communications to restore after service restart.

sr.scan()

new_sr_size = int(sr.pool.master.xe('sr-param-get', {'uuid': sr.uuid, 'param-name': 'physical-size'}))
assert new_sr_size < sr_size and resized, \
f"Expected SR size to decrease but got old size: {sr_size}, new size: {new_sr_size}"
logging.info("SR reduction by removing disk is completed from %s to %s", sr_size, new_sr_size)

@pytest.mark.small_vm
def test_linstor_sr_reduce_host(self, linstor_sr, get_sr_size, vm_with_reboot_check, host, hostA2,
remove_host_from_linstor):
"""
Remove non master host from the same pool Linstor SR.
Do we measure the time taken by system to rebalance after host removal?
Should the host be graceful empty or force removal?
"""
sr = linstor_sr
sr_size = int(sr.pool.master.xe('sr-param-get', {'uuid': sr.uuid, 'param-name': 'physical-size'}))
sr_size = 886189670400
resized = False

# Restart satellite services for clean state. This can be optional.
for h in host.pool.hosts:
h.ssh(['systemctl', 'restart', 'linstor-satellite.service'])

time.sleep(30) # Wait till all services become normal

resized = True
sr.scan()
new_sr_size = int(sr.pool.master.xe('sr-param-get', {'uuid': sr.uuid, 'param-name': 'physical-size'}))
assert new_sr_size < sr_size and resized, \
f"Expected SR size to decrease but got old size: {sr_size}, new size: {new_sr_size}"
logging.info("SR reduction by removing host is completed from %s to %s", sr_size, new_sr_size)

# *** tests with reboots (longer tests).

@pytest.mark.reboot
Expand Down Expand Up @@ -133,6 +285,40 @@ def test_linstor_missing(self, linstor_sr, host):

# *** End of tests with reboots

def _expand_lvm_on_hosts(sr, provisioning_type, storage_pool_name, pytestconfig):
from lib.commands import SSHCommandFailed
resized = False
for h in sr.pool.hosts:
logging.info(f"Checking for available disks on host: {h.hostname_or_ip}")
available_disks = [d for d in h.available_disks() if h.raw_disk_is_available(d)]

disks = []
expansion_sr_disk = pytestconfig.getoption("expansion_sr_disk")
if expansion_sr_disk:
assert len(expansion_sr_disk) == 1, "Only one --expansion-sr-disk should be provided"
if expansion_sr_disk[0] == "auto":
disks = available_disks
else:
assert expansion_sr_disk[0] in available_disks, "The specified expansion disk is unavailable"
disks = expansion_sr_disk
else:
disks = available_disks

for disk in disks:
device = f"/dev/{disk}"
try:
h.ssh(['pvcreate', device])
h.ssh(['vgextend', GROUP_NAME, device])
if provisioning_type == "thin":
h.ssh(['lvextend', '-l', '+100%FREE', storage_pool_name])
else:
h.ssh(['systemctl', 'restart', 'linstor-satellite.service'])
resized = True
logging.info("LVM extended on host %s using device %s", h.hostname_or_ip, device)
except SSHCommandFailed as e:
raise RuntimeError(f"Disk expansion failed on {h.hostname_or_ip}: {e}")
return resized

# --- Test diskless resources --------------------------------------------------

def _get_diskful_hosts(host, controller_option, volume_name):
Expand Down