Project-HAMi · hami-robot · Mar 25, 2026 · Jan 29, 2026 · Feb 3, 2026 · Feb 4, 2026
diff --git a/src/libvgpu.c b/src/libvgpu.c
@@ -872,21 +872,30 @@ void preInit(){
 void postInit(){
     allocator_init();
     map_cuda_visible_devices();
-    int lock_ret = try_lock_unified_lock();
-    if (lock_ret != 0) {
-        LOG_WARN("try_lock_unified_lock failed, skipping set_task_pid");
-        pidfound=0;
+
+    // Use shared memory semaphore to serialize host PID detection
+    // Returns 1 if lock acquired, 0 if timeout (skip detection)
+    int lock_acquired = lock_postinit();
+    nvmlReturn_t res = NVML_SUCCESS;
+
+    if (lock_acquired) {
+        // Lock acquired - safe to call set_task_pid()
+        res = set_task_pid();
+        unlock_postinit();
     } else {
-        nvmlReturn_t res = set_task_pid();
-        try_unlock_unified_lock();
-        if (res != NVML_SUCCESS) {
-            LOG_WARN("SET_TASK_PID FAILED.");
-            pidfound = 0;
-        } else {
-            pidfound = 1;
-        }
+        // Timeout - another process likely crashed holding the lock
+        // Skip host PID detection for this process
+        LOG_WARN("Skipped host PID detection due to lock timeout");
+        res = NVML_ERROR_TIMEOUT;
     }
+
     LOG_MSG("Initialized");
+    if (res != NVML_SUCCESS) {
+        LOG_WARN("SET_TASK_PID FAILED - using container PID for accounting");
+        pidfound = 0;
+    } else {
+        pidfound = 1;
+    }
 
     //add_gpu_device_memory_usage(getpid(),0,context_size,0);
     env_utilization_switch = set_env_utilization_switch();