diff --git a/.github/actions/linux-uttest/action.yml b/.github/actions/linux-uttest/action.yml
index c6ac2eb2b..f1074aae7 100644
--- a/.github/actions/linux-uttest/action.yml
+++ b/.github/actions/linux-uttest/action.yml
@@ -161,3 +161,4 @@ runs:
         python run_distributed.py \
           2> ${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test_error.log | \
           tee ${{ github.workspace }}/ut_log/xpu_distributed/xpu_distributed_test.log
+        cp *.xml ${{ github.workspace }}/ut_log
diff --git a/.github/scripts/ut_result_check.sh b/.github/scripts/ut_result_check.sh
index cc3131cce..fd17147b7 100644
--- a/.github/scripts/ut_result_check.sh
+++ b/.github/scripts/ut_result_check.sh
@@ -225,7 +225,9 @@ if [[ "${ut_suite}" == 'torch_xpu' ]]; then
     fi
 fi
 if [[ "${ut_suite}" == 'xpu_distributed' ]]; then
-    grep -E "^FAILED" xpu_distributed_test.log | awk '{print $2}' > ./"${ut_suite}"_xpu_distributed_test_failed.log
+    grep -E "^FAILED" xpu_distributed_test.log | awk '{print $3}' > ./"${ut_suite}"_xpu_distributed_test_failed.log
+    grep -E "have failures" xpu_distributed_test.log | awk '{print $1}' >> ./"${ut_suite}"_xpu_distributed_test_failed.log
+    sed -i '/^[^.]\+/d' ./"${ut_suite}"_xpu_distributed_test_failed.log
     grep "PASSED" xpu_distributed_test.log | awk '{print $1}' > ./"${ut_suite}"_xpu_distributed_test_passed.log
     echo -e "========================================================================="
     echo -e "Show Failed cases in ${ut_suite} xpu distributed"
diff --git a/test/xpu/run_distributed.py b/test/xpu/run_distributed.py
index ddde5f8c8..724715ce8 100644
--- a/test/xpu/run_distributed.py
+++ b/test/xpu/run_distributed.py
@@ -11,7 +11,41 @@
 
 # libfabric WA to avoid hang issue
 os.environ["FI_PROVIDER"] = "tcp"
-# os.environ["ZE_AFFINITY_MASK"] = "0,1,2,3"
+# Get the xelink group card affinity
+ret = os.system("xpu-smi topology -m 2>&1|tee topology.log")
+if ret == 0:
+    gpu_dict = {}
+    with open("topology.log") as file:
+        lines = file.readlines()
+        for line in lines:
+            if "CPU Affinity" in line:
+                continue
+            line = line.strip()
+            if line.startswith("GPU "):
+                items = line.split(" ")
+                items = [x for x in items if x]
+                gpu_id = items[1]
+                i = gpu_id.split("/")[0]
+                affinity = ""
+                for j, item in enumerate(items):
+                    if "SYS" not in item and ("XL" in item or "S" in item):
+                        if len(affinity) == 0:
+                            affinity = str(j - 2)
+                        else:
+                            affinity = affinity + "," + str(j - 2)
+                gpu_dict[i] = affinity
+
+    max_affinity = ""
+    for key, value in gpu_dict.items():
+        if len(value) > len(max_affinity):
+            max_affinity = value
+
+    os.environ["ZE_AFFINITY_MASK"] = str(max_affinity)
+    print(str("ZE_AFFINITY_MASK=" + os.environ.get("ZE_AFFINITY_MASK")))
+
+else:
+    print("xpu-smi topology failed")
+    sys.exit(255)
 
 
 # run python test
@@ -24,8 +58,16 @@ def run(test_command):
     return result.returncode
 
 
+os.environ["CCL_SEND"] = "direct"
+os.environ["CCL_RECV"] = "direct"
 test_command = ["python", "distributed/test_c10d_ops_xccl.py"]
 res += run(test_command)
+del os.environ["CCL_SEND"]
+del os.environ["CCL_RECV"]
+test_command = ["python", "../../../../test/distributed/pipelining/test_backward.py"]
+res += run(test_command)
+test_command = ["python", "../../../../test/distributed/pipelining/test_microbatch.py"]
+res += run(test_command)
 
 # run pytest with skiplist
 for key in skip_dict:
diff --git a/test/xpu/skip_list_dist.py b/test/xpu/skip_list_dist.py
index 1210896ec..b380a3aaa 100644
--- a/test/xpu/skip_list_dist.py
+++ b/test/xpu/skip_list_dist.py
@@ -10,6 +10,7 @@
         "test_ddp_parity_xpu",
     ),
     "../../../../test/distributed/fsdp/test_fsdp_comm.py": None,
+    "../../../../test/distributed/fsdp/test_fsdp_comm_hooks.py": None,
     "../../../../test/distributed/fsdp/test_fsdp_core.py": (
         "test_delayed_optim_step_offload_false_no_shard_xpu",
         "test_delayed_optim_step_offload_false_none_xpu",
@@ -102,4 +103,12 @@
         # will bring back after oneccl upgrade to 2021.16.1
         "test_xccl_barrier",
     ),
+    "../../../../test/distributed/fsdp/test_fsdp_misc.py": None,
+    "../../../../test/distributed/test_functional_api.py": (
+        # depends on https://github.com/pytorch/pytorch/pull/159473
+        "test_tracing_with_fakepg_xpu",
+    ),
+    "../../../../test/distributed/_tools/test_fsdp2_mem_tracker.py": None,
+    "../../../../test/distributed/_tools/test_mem_tracker.py": None,
+    "../../../../test/distributed/_tools/test_memory_tracker.py": None,
 }