@@ -14,14 +14,16 @@ use core::{
1414 hash:: { Hash , Hasher } ,
1515 ops:: Range ,
1616 sync:: atomic:: {
17- AtomicUsize ,
18- Ordering :: { AcqRel , Acquire } ,
17+ AtomicBool , AtomicUsize ,
18+ Ordering :: { AcqRel , Acquire , Release } ,
1919 } ,
2020} ;
2121use hashbrown:: { HashMap , HashSet } ;
2222use wasmtime_environ:: {
23- iter_entity_range, EngineOrModuleTypeIndex , ModuleInternedTypeIndex , ModuleTypes , PrimaryMap ,
24- SecondaryMap , TypeTrace , VMSharedTypeIndex , WasmRecGroup , WasmSubType ,
23+ iter_entity_range,
24+ packed_option:: { PackedOption , ReservedValue } ,
25+ EngineOrModuleTypeIndex , ModuleInternedTypeIndex , ModuleTypes , PrimaryMap , SecondaryMap ,
26+ TypeTrace , VMSharedTypeIndex , WasmRecGroup , WasmSubType ,
2527} ;
2628use wasmtime_slab:: { Id as SlabId , Slab } ;
2729
@@ -154,12 +156,15 @@ impl Drop for TypeCollection {
154156
155157#[ inline]
156158fn shared_type_index_to_slab_id ( index : VMSharedTypeIndex ) -> SlabId {
159+ assert ! ( !index. is_reserved_value( ) ) ;
157160 SlabId :: from_raw ( index. bits ( ) )
158161}
159162
160163#[ inline]
161164fn slab_id_to_shared_type_index ( id : SlabId ) -> VMSharedTypeIndex {
162- VMSharedTypeIndex :: new ( id. into_raw ( ) )
165+ let index = VMSharedTypeIndex :: new ( id. into_raw ( ) ) ;
166+ assert ! ( !index. is_reserved_value( ) ) ;
167+ index
163168}
164169
165170/// A Wasm type that has been registered in the engine's `TypeRegistry`.
@@ -368,8 +373,28 @@ impl Debug for RecGroupEntry {
368373struct RecGroupEntryInner {
369374 /// The Wasm rec group, canonicalized for hash consing.
370375 hash_consing_key : WasmRecGroup ,
376+
377+ /// The shared type indices for each type in this rec group.
371378 shared_type_indices : Box < [ VMSharedTypeIndex ] > ,
379+
380+ /// The number of times that this entry has been registered in the
381+ /// `TypeRegistryInner`.
382+ ///
383+ /// This is an atomic counter so that cloning a `RegisteredType`, and
384+ /// temporarily keeping a type registered, doesn't require locking the full
385+ /// registry.
372386 registrations : AtomicUsize ,
387+
388+ /// Whether this entry has already been unregistered from the
389+ /// `TypeRegistryInner`.
390+ ///
391+ /// This flag exists to detect and avoid double-unregistration bugs that
392+ /// could otherwise occur in rare cases. See the comments in
393+ /// `TypeRegistryInner::unregister_type` for details.
394+ ///
395+ /// To maintain consistency with the rest of this entry's state, this flag
396+ /// should only be accessed while holding the type registry lock.
397+ unregistered : AtomicBool ,
373398}
374399
375400impl PartialEq for RecGroupEntry {
@@ -531,6 +556,7 @@ impl TypeRegistryInner {
531556
532557 // If we've already registered this rec group before, reuse it.
533558 if let Some ( entry) = self . hash_consing_map . get ( & hash_consing_key) {
559+ assert_eq ! ( entry. 0 . unregistered. load( Acquire ) , false ) ;
534560 entry. incref (
535561 "hash consed to already-registered type in `TypeRegistryInner::register_rec_group`" ,
536562 ) ;
@@ -542,8 +568,9 @@ impl TypeRegistryInner {
542568 // while this rec group is still alive.
543569 hash_consing_key
544570 . trace_engine_indices :: < _ , ( ) > ( & mut |index| {
545- let entry = & self . type_to_rec_group [ index] . as_ref ( ) . unwrap ( ) ;
546- entry. incref (
571+ let other_entry = & self . type_to_rec_group [ index] . as_ref ( ) . unwrap ( ) ;
572+ assert_eq ! ( other_entry. 0 . unregistered. load( Acquire ) , false ) ;
573+ other_entry. incref (
547574 "new cross-group type reference to existing type in `register_rec_group`" ,
548575 ) ;
549576 Ok ( ( ) )
@@ -565,7 +592,10 @@ impl TypeRegistryInner {
565592 map[ idx]
566593 } else {
567594 let rec_group_offset = idx. as_u32 ( ) - module_rec_group_start. as_u32 ( ) ;
568- VMSharedTypeIndex :: from_u32 ( engine_rec_group_start + rec_group_offset)
595+ let index =
596+ VMSharedTypeIndex :: from_u32 ( engine_rec_group_start + rec_group_offset) ;
597+ assert ! ( !index. is_reserved_value( ) ) ;
598+ index
569599 }
570600 } ) ;
571601 self . insert_one_type_from_rec_group ( module_index, ty)
@@ -576,6 +606,7 @@ impl TypeRegistryInner {
576606 hash_consing_key,
577607 shared_type_indices,
578608 registrations : AtomicUsize :: new ( 1 ) ,
609+ unregistered : AtomicBool :: new ( false ) ,
579610 } ) ) ;
580611 log:: trace!( "create new entry {entry:?} (registrations -> 1)" ) ;
581612
@@ -680,29 +711,133 @@ impl TypeRegistryInner {
680711 /// zero remaining registrations.
681712 fn unregister_entry ( & mut self , entry : RecGroupEntry ) {
682713 debug_assert ! ( self . drop_stack. is_empty( ) ) ;
714+
715+ // There are two races to guard against before we can unregister the
716+ // entry, even though it was on the drop stack:
717+ //
718+ // 1. Although an entry has to reach zero registrations before it is
719+ // enqueued in the drop stack, we need to double check whether the
720+ // entry is *still* at zero registrations. This is because someone
721+ // else can resurrect the entry in between when the
722+ // zero-registrations count was first observed and when we actually
723+ // acquire the lock to unregister it. In this example, we have
724+ // threads A and B, an existing rec group entry E, and a rec group
725+ // entry E' that is a duplicate of E:
726+ //
727+ // Thread A | Thread B
728+ // --------------------------------+-----------------------------
729+ // acquire(type registry lock) |
730+ // |
731+ // | decref(E) --> 0
732+ // |
733+ // | block_on(type registry lock)
734+ // |
735+ // register(E') == incref(E) --> 1 |
736+ // |
737+ // release(type registry lock) |
738+ // |
739+ // | acquire(type registry lock)
740+ // |
741+ // | unregister(E) !!!!!!
742+ //
743+ // If we aren't careful, we can unregister a type while it is still
744+ // in use!
745+ //
746+ // The fix in this case is that we skip unregistering the entry if
747+ // its reference count is non-zero, since that means it was
748+ // concurrently resurrected and is now in use again.
749+ //
750+ // 2. In a slightly more convoluted version of (1), where an entry is
751+ // resurrected but then dropped *again*, someone might attempt to
752+ // unregister an entry a second time:
753+ //
754+ // Thread A | Thread B
755+ // --------------------------------|-----------------------------
756+ // acquire(type registry lock) |
757+ // |
758+ // | decref(E) --> 0
759+ // |
760+ // | block_on(type registry lock)
761+ // |
762+ // register(E') == incref(E) --> 1 |
763+ // |
764+ // release(type registry lock) |
765+ // |
766+ // decref(E) --> 0 |
767+ // |
768+ // acquire(type registry lock) |
769+ // |
770+ // unregister(E) |
771+ // |
772+ // release(type registry lock) |
773+ // |
774+ // | acquire(type registry lock)
775+ // |
776+ // | unregister(E) !!!!!!
777+ //
778+ // If we aren't careful, we can unregister a type twice, which leads
779+ // to panics and registry corruption!
780+ //
781+ // To detect this scenario and avoid the double-unregistration bug,
782+ // we maintain an `unregistered` flag on entries. We set this flag
783+ // once an entry is unregistered and therefore, even if it is
784+ // enqueued in the drop stack multiple times, we only actually
785+ // unregister the entry the first time.
786+ //
787+ // A final note: we don't need to worry about any concurrent
788+ // modifications during the middle of this function's execution, only
789+ // between (a) when we first observed a zero-registrations count and
790+ // decided to unregister the type, and (b) when we acquired the type
791+ // registry's lock so that we could perform that unregistration. This is
792+ // because this method has exclusive access to `&mut self` -- that is,
793+ // we have a write lock on the whole type registry -- and therefore no
794+ // one else can create new references to this zero-registration entry
795+ // and bring it back to life (which would require finding it in
796+ // `self.hash_consing_map`, which no one else has access to, because we
797+ // now have an exclusive lock on `self`).
798+
799+ // Handle scenario (1) from above.
800+ let registrations = entry. 0 . registrations . load ( Acquire ) ;
801+ if registrations != 0 {
802+ log:: trace!(
803+ "{entry:?} was concurrently resurrected and no longer has \
804+ zero registrations (registrations -> {registrations})",
805+ ) ;
806+ assert_eq ! ( entry. 0 . unregistered. load( Acquire ) , false ) ;
807+ return ;
808+ }
809+
810+ // Handle scenario (2) from above.
811+ if entry. 0 . unregistered . load ( Acquire ) {
812+ log:: trace!(
813+ "{entry:?} was concurrently resurrected, dropped again, \
814+ and already unregistered"
815+ ) ;
816+ return ;
817+ }
818+
819+ // Okay, we are really going to unregister this entry. Enqueue it on the
820+ // drop stack.
683821 self . drop_stack . push ( entry) ;
684822
823+ // Keep unregistering entries until the drop stack is empty. This is
824+ // logically a recursive process where if we unregister a type that was
825+ // the only thing keeping another type alive, we then recursively
826+ // unregister that other type as well. However, we use this explicit
827+ // drop stack to avoid recursion and the potential stack overflows that
828+ // recursion implies.
685829 while let Some ( entry) = self . drop_stack . pop ( ) {
686830 log:: trace!( "Start unregistering {entry:?}" ) ;
687831
688- // We need to double check whether the entry is still at zero
689- // registrations: Between the time that we observed a zero and
690- // acquired the lock to call this function, another thread could
691- // have registered the type and found the 0-registrations entry in
692- // `self.map` and incremented its count.
693- //
694- // We don't need to worry about any concurrent increments during
695- // this function's invocation after we check for zero because we
696- // have exclusive access to `&mut self` and therefore no one can
697- // create a new reference to this entry and bring it back to life.
698- let registrations = entry. 0 . registrations . load ( Acquire ) ;
699- if registrations != 0 {
700- log:: trace!(
701- "{entry:?} was concurrently resurrected and no longer has \
702- zero registrations (registrations -> {registrations})",
703- ) ;
704- continue ;
705- }
832+ // All entries on the drop stack should *really* be ready for
833+ // unregistration, since no one can resurrect entries once we've
834+ // locked the registry.
835+ assert_eq ! ( entry. 0 . registrations. load( Acquire ) , 0 ) ;
836+ assert_eq ! ( entry. 0 . unregistered. load( Acquire ) , false ) ;
837+
838+ // We are taking responsibility for unregistering this entry, so
839+ // prevent anyone else from attempting to do it again.
840+ entry. 0 . unregistered . store ( true , Release ) ;
706841
707842 // Decrement any other types that this type was shallowly
708843 // (i.e. non-transitively) referencing and keeping alive. If this
0 commit comments