diff --git a/src/bluetooth-fw/nimble/init.c b/src/bluetooth-fw/nimble/init.c index 1f8c8930b..6bd56afba 100644 --- a/src/bluetooth-fw/nimble/init.c +++ b/src/bluetooth-fw/nimble/init.c @@ -27,6 +27,10 @@ static const uint32_t s_bt_stack_start_stop_timeout_ms = 3000; extern void pebble_pairing_service_init(void); extern void nimble_discover_init(void); +#ifdef BT_CONTROLLER_SF32LB52 +extern void ble_transport_ll_deinit(void); +extern void ble_transport_ll_reinit(void); +#endif #if NIMBLE_CFG_CONTROLLER static TaskHandle_t s_ll_task_handle; @@ -115,6 +119,10 @@ bool bt_driver_start(BTDriverConfig *config) { gh3x2x_tuning_service_init(); #endif +#ifdef BT_CONTROLLER_SF32LB52 + ble_transport_ll_reinit(); +#endif + ble_hs_sched_start(); f_rc = xSemaphoreTake(s_host_started, milliseconds_to_ticks(s_bt_stack_start_stop_timeout_ms)); if (f_rc != pdTRUE) { @@ -141,6 +149,10 @@ void bt_driver_stop(void) { ble_gatts_reset(); nimble_store_unload(); + +#ifdef BT_CONTROLLER_SF32LB52 + ble_transport_ll_deinit(); +#endif } void bt_driver_power_down_controller_on_boot(void) {} diff --git a/src/fw/comm/ble/gap_le_advert.c b/src/fw/comm/ble/gap_le_advert.c index aa01b0aae..c85288fcc 100644 --- a/src/fw/comm/ble/gap_le_advert.c +++ b/src/fw/comm/ble/gap_le_advert.c @@ -7,6 +7,7 @@ #include #include "comm/ble/ble_log.h" +#include "services/common/bluetooth/bluetooth_ctl.h" #include "comm/bt_lock.h" #include "kernel/pbl_malloc.h" #include "services/common/analytics/analytics.h" @@ -15,6 +16,10 @@ #include "system/passert.h" #include "util/list.h" +//! Maximum consecutive advertising enable failures before triggering a full BLE +//! reset to recover from an unresponsive controller. +#define ADVERT_MAX_CONSECUTIVE_FAILURES 10 + //! CC2564 / HCI Advertising Limitation: //! ------------------------------ //! The Bluetooth chip can accept only one advertising payload, one @@ -97,6 +102,8 @@ static bool s_is_advertising; static bool s_is_connected; +static uint8_t s_consecutive_enable_failures; + //! Cache of the last advertising transmission power in dBm. A cache is kept in //! case the API call fails, for example because Bluetooth is disabled. //! 12 dBm is what the PAN1315 Bluetooth module reports. @@ -351,8 +358,19 @@ static void prv_perform_next_job(bool force_refresh) { bool result = bt_driver_advert_advertising_enable(min_interval_ms, max_interval_ms); if (result) { s_is_advertising = true; + s_consecutive_enable_failures = 0; prv_analytics_start_timer(interval); PBL_LOG_DBG("Airing advertising job: %s ", prv_string_for_debug_tag(next->tag)); + } else { + if (s_consecutive_enable_failures < ADVERT_MAX_CONSECUTIVE_FAILURES) { + ++s_consecutive_enable_failures; + } + if (s_consecutive_enable_failures >= ADVERT_MAX_CONSECUTIVE_FAILURES) { + PBL_LOG_ERR("Advertising enable failed %d times, resetting BLE", + ADVERT_MAX_CONSECUTIVE_FAILURES); + prv_timer_stop(); + bt_ctl_reset_bluetooth(); + } } } @@ -547,6 +565,7 @@ void gap_le_advert_init(void) { }; s_is_advertising = false; + s_consecutive_enable_failures = 0; s_gap_le_advert_is_initialized = true; } unlock: diff --git a/tests/fakes/fake_GAPAPI.c b/tests/fakes/fake_GAPAPI.c index 70cf4338c..2a1be4b59 100644 --- a/tests/fakes/fake_GAPAPI.c +++ b/tests/fakes/fake_GAPAPI.c @@ -11,6 +11,7 @@ #include "clar_asserts.h" static bool s_is_le_advertising_enabled; +static bool s_advert_enable_should_fail; static GAP_LE_Event_Callback_t s_le_adv_connection_event_callback; static unsigned long s_le_adv_connection_callback_param; @@ -45,8 +46,15 @@ int GAP_LE_Advertising_Enable(unsigned int BluetoothStackID, return 0; } +void fake_GAPAPI_set_advert_enable_fails(bool should_fail) { + s_advert_enable_should_fail = should_fail; +} + // bt_driver_advert fakes used by gap_le_advert.c bool bt_driver_advert_advertising_enable(uint32_t min_interval_ms, uint32_t max_interval_ms) { + if (s_advert_enable_should_fail) { + return false; + } s_is_le_advertising_enabled = true; s_min_advertising_interval_ms = min_interval_ms; s_max_advertising_interval_ms = max_interval_ms; @@ -367,6 +375,7 @@ Boolean_t GAP_LE_Resolve_Address(unsigned int BluetoothStackID, Encryption_Key_t void fake_GAPAPI_init(void) { memset(&s_encrypted_device, 0, sizeof(s_encrypted_device)); s_is_le_advertising_enabled = false; + s_advert_enable_should_fail = false; s_le_adv_connection_event_callback = NULL; s_le_adv_connection_callback_param = 0; s_min_advertising_interval_ms = 0; diff --git a/tests/fakes/fake_GAPAPI.h b/tests/fakes/fake_GAPAPI.h index 7d45895c3..a10388c4e 100644 --- a/tests/fakes/fake_GAPAPI.h +++ b/tests/fakes/fake_GAPAPI.h @@ -43,4 +43,6 @@ const BD_ADDR_t *fake_GAPAPI_get_bd_addr_resolving_to_fake_irk(void); const BTDeviceInternal *fake_GAPAPI_get_device_resolving_to_fake_irk(void); +void fake_GAPAPI_set_advert_enable_fails(bool should_fail); + void fake_GAPAPI_init(void); diff --git a/tests/fw/comm/test_gap_le_advert.c b/tests/fw/comm/test_gap_le_advert.c index 50184f501..fd9fb51b8 100644 --- a/tests/fw/comm/test_gap_le_advert.c +++ b/tests/fw/comm/test_gap_le_advert.c @@ -61,6 +61,12 @@ void launcher_task_add_callback(void (*callback)(void *data), void *data) { callback(data); } +static uint32_t s_bt_ctl_reset_count; + +void bt_ctl_reset_bluetooth(void) { + ++s_bt_ctl_reset_count; +} + static uint32_t s_unscheduled_cb_count; static void * s_unscheduled_cb_data = "Callback Data"; static GAPLEAdvertisingJobRef s_unscheduled_job; @@ -81,6 +87,7 @@ void test_gap_le_advert__initialize(void) { s_unscheduled_cb_count = 0; s_unscheduled_job = NULL; s_unscheduled_completed = false; + s_bt_ctl_reset_count = 0; // This bypasses the work-around for the CC2564 advertising bug, that pauses the round-robinning // through scheduled advertisment jobs: @@ -693,3 +700,39 @@ void test_gap_le_advert__unschedule_job_types(void) { free(ad); } + +void test_gap_le_advert__enable_failure_triggers_reset(void) { + BLEAdData *ad = create_ad("test", NULL); + + GAPLEAdvertisingJobTerm advert_term = { + .interval = GAPLEAdvertisingInterval_Short, + .duration_secs = GAPLE_ADVERTISING_DURATION_INFINITE, + }; + GAPLEAdvertisingJobRef job; + job = gap_le_advert_schedule(ad, &advert_term, 1, + unscheduled_callback, s_unscheduled_cb_data, 0); + cl_assert(job); + cl_assert(gap_le_is_advertising_enabled()); + cl_assert_equal_i(regular_timer_seconds_count(), 1); + + // Simulate a disconnect so s_is_advertising becomes false + gap_le_set_advertising_disabled(); + gap_le_advert_handle_connect_as_slave(); + + // Make advertising enable fail from now on + fake_GAPAPI_set_advert_enable_fails(true); + + // Fire timer — should not trigger reset yet (under threshold) + for (int i = 0; i < 9; i++) { + regular_timer_fire_seconds(1); + } + cl_assert_equal_i(s_bt_ctl_reset_count, 0); + cl_assert_equal_i(regular_timer_seconds_count(), 1); + + // 10th failure should trigger BLE reset and stop the timer + regular_timer_fire_seconds(1); + cl_assert_equal_i(s_bt_ctl_reset_count, 1); + cl_assert_equal_i(regular_timer_seconds_count(), 0); + + free(ad); +} diff --git a/third_party/nimble/transport/hci_sf32lb52.c b/third_party/nimble/transport/hci_sf32lb52.c index 1e929bf13..11f5b4df5 100644 --- a/third_party/nimble/transport/hci_sf32lb52.c +++ b/third_party/nimble/transport/hci_sf32lb52.c @@ -54,6 +54,7 @@ static uint8_t s_hci_buf[MAX_HCI_PKT_SIZE]; #endif extern void lcpu_power_on(void); +extern void lcpu_power_off(void); extern void lcpu_custom_nvds_config(void); #if defined(NIMBLE_HCI_SF32LB52_TRACE_LOG) @@ -214,6 +215,15 @@ static int prv_hci_frame_cb(uint8_t pkt_type, void *data) { return -1; } +// Maximum OOM retries before giving up on the current IPC read buffer. +// At 10ms per retry, 100 retries = ~1 second. If NimBLE can't free buffers +// in that time, something is fundamentally broken. Breaking out will cause +// H4 stream desync (since ipc_queue_read is destructive), but the BLE +// recovery mechanism in gap_le_advert.c will detect the resulting advertising +// failures and trigger a full LCPU power cycle via bt_ctl_reset_bluetooth(). +#define HCI_OOM_MAX_RETRIES 100 +#define HCI_OOM_RETRY_DELAY_TICKS 10 + static void prv_hci_task_main(void *unused) { uint8_t buf[64]; @@ -226,18 +236,25 @@ static void prv_hci_task_main(void *unused) { len = ipc_queue_read(s_ipc_port, buf, sizeof(buf)); if (len > 0U) { uint8_t *pbuf = buf; + int oom_retries = 0; while (len > 0U) { int consumed_bytes; consumed_bytes = hci_h4_sm_rx(&s_hci_h4sm, pbuf, len); if (consumed_bytes < 0) { + if (++oom_retries > HCI_OOM_MAX_RETRIES) { + PBL_LOG_ERR("hci_h4_sm_rx OOM, giving up after %d retries", + oom_retries); + break; + } PBL_LOG_D_WRN(LOG_DOMAIN_BT_STACK, "hci_h4_sm_rx OOM, retrying"); - vTaskDelay(1); + vTaskDelay(HCI_OOM_RETRY_DELAY_TICKS); continue; } else if (consumed_bytes == 0) { PBL_LOG_D_ERR(LOG_DOMAIN_BT_STACK, "hci_h4_sm_rx consumed 0 bytes"); break; } + oom_retries = 0; len -= consumed_bytes; pbuf += consumed_bytes; } @@ -278,6 +295,24 @@ void ble_transport_ll_init(void) { lcpu_power_on(); } +void ble_transport_ll_deinit(void) { + NVIC_DisableIRQ(LCPU2HCPU_IRQn); + ipc_queue_close(s_ipc_port); + ipc_queue_deinit(s_ipc_port); + s_ipc_port = IPC_QUEUE_INVALID_HANDLE; + lcpu_power_off(); +} + +void ble_transport_ll_reinit(void) { + hci_h4_sm_init(&s_hci_h4sm, &hci_h4_allocs_from_ll, prv_hci_frame_cb); + + int ret = prv_config_ipc(); + PBL_ASSERTN(ret == 0); + + lcpu_custom_nvds_config(); + lcpu_power_on(); +} + /* APIs to be implemented by HS/LL side of transports */ int ble_transport_to_ll_cmd_impl(void *buf) { struct ble_hci_cmd *cmd = buf;