diff --git a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs index e03df5fe5..7fdd83b8f 100644 --- a/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs +++ b/src/hyperlight_host/src/hypervisor/hyperlight_vm.rs @@ -19,8 +19,10 @@ use std::collections::HashMap; #[cfg(crashdump)] use std::path::Path; #[cfg(any(kvm, mshv3))] +use std::sync::atomic::AtomicBool; +use std::sync::atomic::AtomicU8; +#[cfg(any(kvm, mshv3))] use std::sync::atomic::AtomicU64; -use std::sync::atomic::{AtomicBool, AtomicU8}; use std::sync::{Arc, Mutex}; use log::LevelFilter; @@ -28,11 +30,11 @@ use tracing::{Span, instrument}; #[cfg(feature = "trace_guest")] use tracing_opentelemetry::OpenTelemetrySpanExt; -#[cfg(target_os = "windows")] -use super::WindowsInterruptHandle; #[cfg(gdb)] use super::gdb::{DebugCommChannel, DebugMsg, DebugResponse, DebuggableVm, VcpuStopReason, arch}; use super::regs::{CommonFpu, CommonRegisters}; +#[cfg(target_os = "windows")] +use super::{PartitionState, WindowsInterruptHandle}; use crate::HyperlightError::{ExecutionCanceledByHost, NoHypervisorFound}; #[cfg(not(gdb))] use crate::hypervisor::Hypervisor; @@ -168,8 +170,10 @@ impl HyperlightVm { #[cfg(target_os = "windows")] let interrupt_handle: Arc = Arc::new(WindowsInterruptHandle { state: AtomicU8::new(0), - partition_handle: vm.partition_handle(), - dropped: AtomicBool::new(false), + partition_state: std::sync::RwLock::new(PartitionState { + handle: vm.partition_handle(), + dropped: false, + }), }); #[cfg_attr(not(gdb), allow(unused_mut))] diff --git a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs index 76f6696bb..cc6876a5b 100644 --- a/src/hyperlight_host/src/hypervisor/hyperv_windows.rs +++ b/src/hyperlight_host/src/hypervisor/hyperv_windows.rs @@ -669,6 +669,11 @@ impl DebuggableVm for WhpVm { impl Drop for WhpVm { fn drop(&mut self) { + // HyperlightVm::drop() calls set_dropped() before this runs. + // set_dropped() ensures no WHvCancelRunVirtualProcessor calls are in progress + // or will be made in the future, so it's safe to delete the partition. + // (HyperlightVm::drop() runs before its fields are dropped, so + // set_dropped() completes before this Drop impl runs.) if let Err(e) = unsafe { WHvDeletePartition(self.partition) } { log::error!("Failed to delete partition: {}", e); } diff --git a/src/hyperlight_host/src/hypervisor/mod.rs b/src/hyperlight_host/src/hypervisor/mod.rs index 573e2acb8..5cdd74b48 100644 --- a/src/hyperlight_host/src/hypervisor/mod.rs +++ b/src/hyperlight_host/src/hypervisor/mod.rs @@ -57,7 +57,7 @@ use std::str::FromStr; #[cfg(any(kvm, mshv3))] use std::sync::atomic::{AtomicBool, AtomicU8, AtomicU64, Ordering}; #[cfg(target_os = "windows")] -use std::sync::atomic::{AtomicBool, AtomicU8, Ordering}; +use std::sync::atomic::{AtomicU8, Ordering}; #[cfg(any(kvm, mshv3))] use std::time::Duration; @@ -413,8 +413,34 @@ pub(super) struct WindowsInterruptHandle { /// (e.g., during host function calls), but is cleared at the start of each new `VirtualCPU::run()` call. state: AtomicU8, - partition_handle: windows::Win32::System::Hypervisor::WHV_PARTITION_HANDLE, - dropped: AtomicBool, + /// RwLock protecting the partition handle and dropped state. + /// + /// This lock prevents a race condition between `kill()` calling `WHvCancelRunVirtualProcessor` + /// and `WhpVm::drop()` calling `WHvDeletePartition`. These two Windows Hypervisor Platform APIs + /// must not execute concurrently - if `WHvDeletePartition` frees the partition while + /// `WHvCancelRunVirtualProcessor` is still accessing it, the result is a use-after-free + /// causing STATUS_ACCESS_VIOLATION or STATUS_HEAP_CORRUPTION. + /// + /// The synchronization works as follows: + /// - `kill()` takes a read lock before calling `WHvCancelRunVirtualProcessor` + /// - `set_dropped()` takes a write lock, which blocks until all in-flight `kill()` calls complete, + /// then sets `dropped = true`. This is called from `HyperlightVm::drop()` before `WhpVm::drop()` + /// runs, ensuring no `kill()` is accessing the partition when `WHvDeletePartition` is called. + partition_state: std::sync::RwLock, +} + +/// State protected by the RwLock in `WindowsInterruptHandle`. +/// +/// Contains a copy of the partition handle from `WhpVm` (not an owning reference). +/// The RwLock and `dropped` flag ensure this handle is never used after `WhpVm` +/// deletes the partition. +#[cfg(target_os = "windows")] +#[derive(Debug)] +pub(super) struct PartitionState { + /// Copy of partition handle from `WhpVm`. Only valid while `dropped` is false. + pub(super) handle: windows::Win32::System::Hypervisor::WHV_PARTITION_HANDLE, + /// Set true before partition deletion; prevents further use of `handle`. + pub(super) dropped: bool, } #[cfg(target_os = "windows")] @@ -468,9 +494,20 @@ impl InterruptHandleImpl for WindowsInterruptHandle { } fn set_dropped(&self) { - // Release ordering to ensure all VM cleanup operations are visible - // to any thread that checks dropped() via Acquire - self.dropped.store(true, Ordering::Release); + // Take write lock to: + // 1. Wait for any in-flight kill() calls (holding read locks) to complete + // 2. Block new kill() calls from starting while we hold the write lock + // 3. Set dropped=true so no future kill() calls will use the handle + // After this returns, no WHvCancelRunVirtualProcessor calls are in progress + // or will ever be made, so WHvDeletePartition can safely be called. + match self.partition_state.write() { + Ok(mut guard) => { + guard.dropped = true; + } + Err(e) => { + log::error!("Failed to acquire partition_state write lock: {}", e); + } + } } } @@ -486,11 +523,26 @@ impl InterruptHandle for WindowsInterruptHandle { // Acquire ordering to synchronize with the Release in set_running() // This ensures we see the running state set by the vcpu thread let state = self.state.load(Ordering::Acquire); - if state & Self::RUNNING_BIT != 0 { - unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } - } else { - false + if state & Self::RUNNING_BIT == 0 { + return false; } + + // Take read lock to prevent race with WHvDeletePartition in set_dropped(). + // Multiple kill() calls can proceed concurrently (read locks don't block each other), + // but set_dropped() will wait for all kill() calls to complete before proceeding. + let guard = match self.partition_state.read() { + Ok(guard) => guard, + Err(e) => { + log::error!("Failed to acquire partition_state read lock: {}", e); + return false; + } + }; + + if guard.dropped { + return false; + } + + unsafe { WHvCancelRunVirtualProcessor(guard.handle, 0, 0).is_ok() } } #[cfg(gdb)] fn kill_from_debugger(&self) -> bool { @@ -498,19 +550,38 @@ impl InterruptHandle for WindowsInterruptHandle { self.state .fetch_or(Self::DEBUG_INTERRUPT_BIT, Ordering::Release); + // Acquire ordering to synchronize with the Release in set_running() let state = self.state.load(Ordering::Acquire); - if state & Self::RUNNING_BIT != 0 { - unsafe { WHvCancelRunVirtualProcessor(self.partition_handle, 0, 0).is_ok() } - } else { - false + if state & Self::RUNNING_BIT == 0 { + return false; } + + // Take read lock to prevent race with WHvDeletePartition in set_dropped() + let guard = match self.partition_state.read() { + Ok(guard) => guard, + Err(e) => { + log::error!("Failed to acquire partition_state read lock: {}", e); + return false; + } + }; + + if guard.dropped { + return false; + } + + unsafe { WHvCancelRunVirtualProcessor(guard.handle, 0, 0).is_ok() } } fn dropped(&self) -> bool { - // Acquire ordering to synchronize with the Release in set_dropped() - // This ensures we see all VM cleanup operations that happened before drop - self.dropped.load(Ordering::Acquire) + // Take read lock to check dropped state consistently + match self.partition_state.read() { + Ok(guard) => guard.dropped, + Err(e) => { + log::error!("Failed to acquire partition_state read lock: {}", e); + true // Assume dropped if we can't acquire lock + } + } } } diff --git a/src/hyperlight_host/tests/integration_test.rs b/src/hyperlight_host/tests/integration_test.rs index 86a2b0f4f..2827acc60 100644 --- a/src/hyperlight_host/tests/integration_test.rs +++ b/src/hyperlight_host/tests/integration_test.rs @@ -1664,3 +1664,58 @@ fn exception_handler_installation_and_validation() { let count: i32 = sandbox.call("GetExceptionHandlerCallCount", ()).unwrap(); assert_eq!(count, 2, "Handler should have been called twice"); } + +/// This test is "likely" to catch a race condition where WHvCancelRunVirtualProcessor runs halfway, then the partition is deleted (by drop calling WHvDeletePartition), +/// and WHvCancelRunVirtualProcessor continues, and tries to access freed memory. +/// +/// Based on local observations, "likely" means that if the bug exist, running this test 5 times will catch it at least once. +#[test] +#[cfg(target_os = "windows")] +fn interrupt_cancel_delete_race() { + const NUM_THREADS: usize = 8; + const NUM_KILL_THREADS: usize = 4; + const ITERATIONS_PER_THREAD: usize = 1000; + + let mut handles = vec![]; + + for _ in 0..NUM_THREADS { + handles.push(thread::spawn(move || { + for _ in 0..ITERATIONS_PER_THREAD { + let mut sandbox: MultiUseSandbox = new_uninit().unwrap().evolve().unwrap(); + let interrupt_handle = sandbox.interrupt_handle(); + + let stop_flag = Arc::new(AtomicBool::new(false)); + + let kill_handles: Vec<_> = (0..NUM_KILL_THREADS) + .map(|_| { + let handle = interrupt_handle.clone(); + let stop = stop_flag.clone(); + thread::spawn(move || { + while !stop.load(Ordering::Relaxed) { + handle.kill(); + } + }) + }) + .collect(); + + // Makes sure RUNNING_BIT is set when kill() is called + let _ = sandbox.call::("Echo", "test".to_string()); + + // Drop the sandbox while kill threads are spamming + drop(sandbox); + + // Signal kill threads to stop + stop_flag.store(true, Ordering::Relaxed); + + // Wait for kill threads + for kill_handle in kill_handles { + kill_handle.join().expect("Kill thread panicked!"); + } + } + })); + } + + for handle in handles { + handle.join().unwrap(); + } +}