|
| 1 | +root@r-bobcat-02:~# dmidecode -t1 -t2 |
| 2 | +# dmidecode 3.4 |
| 3 | +Getting SMBIOS data from sysfs. |
| 4 | +SMBIOS 3.2.1 present. |
| 5 | + |
| 6 | +Handle 0x0001, DMI type 1, 27 bytes |
| 7 | +System Information |
| 8 | + Manufacturer: Nvidia |
| 9 | + Product Name: SN4280 |
| 10 | + Version: V0-C*GeGdFdRiRaEg-S*GbGbFcRaRaRaRaA0RaTcEiFcEi-D*TfGeGdFaRaRa-F*Tc |
| 11 | + Serial Number: MT2428XZ0JXV |
| 12 | + UUID: b908e90a-a121-11ef-8000-b0cf0e209200 |
| 13 | + Wake-up Type: Power Switch |
| 14 | + SKU Number: HI160 |
| 15 | + Family: Not Specified |
| 16 | + |
| 17 | +Handle 0x0002, DMI type 2, 15 bytes |
| 18 | +Base Board Information |
| 19 | + Manufacturer: Nvidia |
| 20 | + Product Name: VMOD0019 |
| 21 | + Version: A2 |
| 22 | + Serial Number: MT2428XZ0HXA |
| 23 | + Asset Tag: Not Specified |
| 24 | + Features: |
| 25 | + Board is a hosting board |
| 26 | + Board is removable |
| 27 | + Board is replaceable |
| 28 | + Location In Chassis: Not Specified |
| 29 | + Chassis Handle: 0x0003 |
| 30 | + Type: Motherboard |
| 31 | + Contained Object Handles: 0 |
| 32 | + |
| 33 | +root@r-bobcat-02:~# systemctl status hw-management-tc |
| 34 | +● hw-management-tc.service - Thermal control service (ver 2.0) of Mellanox systems |
| 35 | + Loaded: loaded (/lib/systemd/system/hw-management-tc.service; enabled; preset: enabled) |
| 36 | + Active: active (running) since Fri 2025-07-11 08:05:32 UTC; 42s ago |
| 37 | + Docs: man:hw-management-tc.service(8) |
| 38 | + Main PID: 19259 (hw_management_t) |
| 39 | + Tasks: 1 (limit: 153549) |
| 40 | + Memory: 17.4M |
| 41 | + CGroup: /system.slice/hw-management-tc.service |
| 42 | + └─19259 /usr/bin/python /usr/bin/hw_management_thermal_control.py |
| 43 | + |
| 44 | +Jul 11 08:05:32 r-bobcat-02 systemd[1]: Started hw-management-tc.service - Thermal control service (ver 2.0) of Mellanox systems. |
| 45 | +Jul 11 08:05:33 r-bobcat-02 hw-management-tc[19259]: NOTICE - Preinit thermal control ver 2.1.0 |
| 46 | +Jul 11 08:05:33 r-bobcat-02 hw-management-tc[19259]: NOTICE - Set FAN PWM 100 |
| 47 | +Jul 11 08:05:33 r-bobcat-02 hw-management-tc[19259]: NOTICE - Additional delay defined in ./config/thermal_delay (35 sec). |
| 48 | +Jul 11 08:05:33 r-bobcat-02 hw-management-tc[19259]: NOTICE - Mellanox thermal control is waiting for configuration (60 sec). |
| 49 | + |
| 50 | +root@r-bobcat-02:~# cat /var/run/hw-management/thermal/pwm1 |
| 51 | +255 |
| 52 | + |
| 53 | +root@r-gaur-01:/var/run/hw-management/thermal# dvs_start.sh --sdk_bridge_mode=HYBRID |
| 54 | + |
| 55 | +root@r-bobcat-02:/var/run/hw-management/thermal# cat asic |
| 56 | +64000 |
| 57 | + |
| 58 | +root@r-bobcat-02:/var/run/hw-management/thermal# cat pwm1 |
| 59 | +76 |
| 60 | + |
| 61 | +# TC: Simulate fan insertion and fault condition for Fan drawer 1 |
| 62 | +root@r-bobcat-02:/var/run/hw-management/thermal# unlink fan1_fault |
| 63 | +root@r-bobcat-02:/var/run/hw-management/thermal# unlink fan1_status |
| 64 | +root@r-bobcat-02:/var/run/hw-management/thermal# unlink fan1_speed_get |
| 65 | +root@r-bobcat-02:/var/run/hw-management/thermal# echo 0 > fan1_speed_get; echo 1 > fan1_fault; echo 0 > fan1_status |
| 66 | +# Wait for sometime |
| 67 | +root@r-bobcat-02:/var/run/hw-management/thermal# echo 1 > fan1_status |
| 68 | +root@r-bobcat-02:/var/run/hw-management/thermal# cat pwm1 |
| 69 | +127 |
| 70 | + |
| 71 | + |
| 72 | +=======================Snippet from /var/log/tc_log====================================== |
| 73 | +2025-07-11 08:20:34,765 - INFO - drwr1:[1] tacho1=0 out of RPM range 3100:11000 |
| 74 | +2025-07-11 08:20:34,767 - WARNING - drwr1:[1] status 0. Set PWM 20 |
| 75 | +2025-07-11 08:20:34,767 - WARNING - drwr1:[1] incorrect rpm [0]. Set PWM 20 |
| 76 | +2025-07-11 08:20:39,804 - INFO - drwr1:[1] tacho1=0 out of RPM range 3100:11000 |
| 77 | +2025-07-11 08:20:39,805 - WARNING - drwr1:[1] status 0. Set PWM 20 |
| 78 | +2025-07-11 08:20:39,805 - WARNING - drwr1:[1] incorrect rpm [0]. Set PWM 20 |
| 79 | +2025-07-11 08:20:44,805 - INFO - drwr1:[1] tacho1=0 out of RPM range 3100:11000 |
| 80 | +2025-07-11 08:20:44,806 - WARNING - drwr1:[1] status 0. Set PWM 20 |
| 81 | +2025-07-11 08:20:44,807 - WARNING - drwr1:[1] incorrect rpm [0]. Set PWM 20 |
| 82 | +2025-07-11 08:20:49,808 - INFO - drwr1:[1] tacho1=0 out of RPM range 3100:11000 |
| 83 | +2025-07-11 08:20:49,809 - WARNING - drwr1:[1] incorrect rpm [0]. Set PWM 20 |
| 84 | +2025-07-11 08:20:54,823 - INFO - drwr1:[1] tacho1=0 out of RPM range 3100:11000 |
| 85 | +2025-07-11 08:20:54,824 - WARNING - drwr1:[1] incorrect rpm [0]. Set PWM 20 |
| 86 | +2025-07-11 08:20:55,825 - NOTICE - drwr1:[1] fan not started after insertion |
| 87 | +2025-07-11 08:20:55,825 - INFO - Attention fan insertion failed, trying to recover |
| 88 | +2025-07-11 08:20:55,825 - NOTICE - @syslog Attention fan not started after insertion: Setting pwm to 50% from 30% |
| 89 | +2025-07-11 08:20:55,825 - INFO - Update chassis FAN PWM 50 |
| 90 | +2025-07-11 08:20:55,825 - INFO - Write drwr1:[1] PWM 50 |
| 91 | +2025-07-11 08:20:55,825 - INFO - Write drwr2:[2] PWM 50 |
| 92 | +2025-07-11 08:20:55,826 - INFO - Write drwr3:[3] PWM 50 |
| 93 | +2025-07-11 08:20:55,826 - INFO - Write drwr4:[4] PWM 50 |
| 94 | +2025-07-11 08:20:55,826 - INFO - Waiting 10s for newly inserted fan to stabilize |
| 95 | +2025-07-11 08:21:05,827 - INFO - Resuming normal operation: Setting pwm back to 30% |
| 96 | +2025-07-11 08:21:05,827 - INFO - Update chassis FAN PWM 30 |
0 commit comments