diff --git a/components/nertc_wake_up/CMakeLists.txt b/components/nertc_wake_up/CMakeLists.txt new file mode 100644 index 0000000..8523ddd --- /dev/null +++ b/components/nertc_wake_up/CMakeLists.txt @@ -0,0 +1,7 @@ +idf_component_register( + INCLUDE_DIRS "include" +) + +add_prebuilt_library(nertc_wake_up "${CMAKE_CURRENT_SOURCE_DIR}/libs/esp32s3/libnertc_wake_up.a" + REQUIRES espressif__esp-sr) +target_link_libraries(${COMPONENT_LIB} INTERFACE nertc_wake_up) diff --git a/components/nertc_wake_up/idf_component.yml b/components/nertc_wake_up/idf_component.yml new file mode 100644 index 0000000..3fb2996 --- /dev/null +++ b/components/nertc_wake_up/idf_component.yml @@ -0,0 +1,18 @@ +## IDF Component Manager Manifest File +dependencies: + espressif/zlib: "^1.3.1" + espressif/esp-sr: ~2.1.1 + ## Required IDF version + idf: + version: ">=5.4.0" + # # Put list of dependencies here + # # For components maintained by Espressif: + # component: "~1.0.0" + # # For 3rd party components: + # username/component: ">=1.0.0,<2.0.0" + # username2/component2: + # version: "~1.0.0" + # # For transient dependencies `public` flag can be set. + # # `public` flag doesn't have an effect dependencies of the `main` component. + # # All dependencies of `main` are public by default. + # public: true diff --git a/components/nertc_wake_up/include/nertc_wakeup_sdk.h b/components/nertc_wake_up/include/nertc_wakeup_sdk.h new file mode 100644 index 0000000..ddc7a5c --- /dev/null +++ b/components/nertc_wake_up/include/nertc_wakeup_sdk.h @@ -0,0 +1,52 @@ +#ifndef _NERTC_WAKE_UP_H_ +#define _NERTC_WAKE_UP_H_ + +#include "nertc_wakeup_sdk_event.h" + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define NERTC_WAKE_UP_API __attribute__((visibility("default"))) + +/** + * @brief 创建 wakeup 实例,该方法是整个SDK调用的第一个方法 + * @param cfg 引擎配置 + * @return 引擎实例 + */ +NERTC_WAKE_UP_API nertc_wakeup_sdk_t nertc_wakeup_create(const nertc_wakeup_sdk_config_t *cfg); + +/** + * @brief 销毁 wakeup 实例 + * @param engine 通过 nertc_wakeup_create 创建的实例 + */ +NERTC_WAKE_UP_API void nertc_wakeup_destory(nertc_wakeup_sdk_t wakeup); + +/** + * @brief 初始化 wakeup 实例 + * @note 创建引擎实例之后调用的第一个方法,仅能被初始化一次 + * @param engine 通过 nertc_wakeup_create 创建且未被初始化的引擎实例 + * @return 方法调用结果:
+ * - 0:成功
+ * - 非0:失败
+ */ +NERTC_WAKE_UP_API int nertc_wakeup_init(nertc_wakeup_sdk_t wakeup, int input_channels, int reference_channels); + +NERTC_WAKE_UP_API int nertc_wakeup_detect(nertc_wakeup_sdk_t wakeup); + +NERTC_WAKE_UP_API int nertc_wakeup_stop_detect(nertc_wakeup_sdk_t wakeup); + +NERTC_WAKE_UP_API int nertc_wakeup_feed(nertc_wakeup_sdk_t wakeup, const int16_t* data, int data_length); + +NERTC_WAKE_UP_API int nertc_wakeup_get_feed_size(nertc_wakeup_sdk_t wakeup); + +NERTC_WAKE_UP_API int nertc_wakeup_get_feed_chunk_size(nertc_wakeup_sdk_t wakeup); + +NERTC_WAKE_UP_API int nertc_wakeup_get_fetch_chunk_size(nertc_wakeup_sdk_t wakeup); + +#ifdef __cplusplus +} +#endif +#endif \ No newline at end of file diff --git a/components/nertc_wake_up/include/nertc_wakeup_sdk_event.h b/components/nertc_wake_up/include/nertc_wakeup_sdk_event.h new file mode 100644 index 0000000..e305ea4 --- /dev/null +++ b/components/nertc_wake_up/include/nertc_wakeup_sdk_event.h @@ -0,0 +1,42 @@ +#ifndef __NERTC_WAKE_UP_EVENT_H__ +#define __NERTC_WAKE_UP_EVENT_H__ + + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void * nertc_wakeup_sdk_t; +typedef void * nertc_wakeup_sdk_user_data_t; + +typedef struct nertc_wakeup_sdk_callback_context { + nertc_wakeup_sdk_t wakeup; /**< wakeup 实例 */ + nertc_wakeup_sdk_user_data_t user_data; /**< 用户数据 */ +} nertc_wakeup_sdk_callback_context_t; + +typedef struct nertc_wakeup_sdk_event_handler { +/** + * 检测到唤醒词的回调。 + *
该回调方法表示 SDK 检测到了唤醒词 + * 干预或提示用户。 + * @param ctx 回调上下文 + * @param wake_word 对应的唤醒词 + * @endif + */ +void (*on_wake_word_detected)(const nertc_wakeup_sdk_callback_context_t* ctx, const char *wake_word); +} nertc_wakeup_sdk_event_handle_t; + +typedef struct nertc_wakeup_sdk_config { + nertc_wakeup_sdk_event_handle_t event_handler; + nertc_wakeup_sdk_user_data_t user_data; + const char* appkey; + const char* deviceId; +} nertc_wakeup_sdk_config_t; + +#ifdef __cplusplus +} +#endif + +#endif // __NERTC_SDK_EVENT_H__ \ No newline at end of file diff --git a/components/nertc_wake_up/libs/esp32s3/libnertc_wake_up.a b/components/nertc_wake_up/libs/esp32s3/libnertc_wake_up.a new file mode 100644 index 0000000..fa8eb07 Binary files /dev/null and b/components/nertc_wake_up/libs/esp32s3/libnertc_wake_up.a differ diff --git a/main/CMakeLists.txt b/main/CMakeLists.txt index ecfa667..7aea76d 100644 --- a/main/CMakeLists.txt +++ b/main/CMakeLists.txt @@ -210,6 +210,8 @@ else() endif() if(CONFIG_USE_AFE_WAKE_WORD) list(APPEND SOURCES "audio_processing/afe_wake_word.cc") +elseif(CONFIG_USE_AFE_NERTC_WAKE_WORD) + list(APPEND SOURCES "audio_processing/nertc_afe_wake_word.cc") elseif(CONFIG_USE_ESP_WAKE_WORD) list(APPEND SOURCES "audio_processing/esp_wake_word.cc") else() diff --git a/main/Kconfig.projbuild b/main/Kconfig.projbuild index 980016e..903dd9e 100644 --- a/main/Kconfig.projbuild +++ b/main/Kconfig.projbuild @@ -398,6 +398,13 @@ config USE_AFE_WAKE_WORD help 需要 ESP32 S3 与 PSRAM 支持 +config USE_AFE_NERTC_WAKE_WORD + bool "Enable nertc Wake Word Detection (AFE)" + default n + depends on (IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4) && SPIRAM + help + 需要 ESP32 S3 与 PSRAM 支持 + config USE_AUDIO_PROCESSOR bool "Enable Audio Noise Reduction" default y diff --git a/main/application.cc b/main/application.cc index abac713..a11876b 100644 --- a/main/application.cc +++ b/main/application.cc @@ -23,12 +23,15 @@ #if CONFIG_USE_AFE_WAKE_WORD #include "afe_wake_word.h" +#elif CONFIG_USE_AFE_NERTC_WAKE_WORD +#include "nertc_afe_wake_word.h" #elif CONFIG_USE_ESP_WAKE_WORD #include "esp_wake_word.h" #else #include "no_wake_word.h" #endif + #include #include #include @@ -72,6 +75,8 @@ Application::Application() { #if CONFIG_USE_AFE_WAKE_WORD wake_word_ = std::make_unique(); +#elif CONFIG_USE_AFE_NERTC_WAKE_WORD + wake_word_ = std::make_unique(); #elif CONFIG_USE_ESP_WAKE_WORD wake_word_ = std::make_unique(); #else diff --git a/main/audio_processing/nertc_afe_wake_word.cc b/main/audio_processing/nertc_afe_wake_word.cc new file mode 100644 index 0000000..f741139 --- /dev/null +++ b/main/audio_processing/nertc_afe_wake_word.cc @@ -0,0 +1,184 @@ +#include "nertc_afe_wake_word.h" +#include "application.h" + +#include +#include +#include +#include + +#define DETECTION_RUNNING_EVENT 1 + +#define TAG "NertcAfeWakeWord" + +void on_wake_word_detected_handle(const nertc_wakeup_sdk_callback_context_t* ctx, const char *wake_word) +{ + auto* multinet = static_cast(ctx->user_data); + //ESP_LOGI("test", "wake_word:%s", wake_word); + std::string call_back_wake_word = std::string(wake_word); + multinet->DoCallBack(call_back_wake_word); +} +NertcAfeWakeWord::NertcAfeWakeWord() + : wake_word_pcm_(), + wake_word_opus_() { + event_group_ = xEventGroupCreate(); +} + +NertcAfeWakeWord::~NertcAfeWakeWord() { + if (wake_word_encode_task_stack_ != nullptr) { + heap_caps_free(wake_word_encode_task_stack_); + } + if(nertc_wake_word_) + { + nertc_wakeup_destory(nertc_wake_word_); + } + + vEventGroupDelete(event_group_); +} + +void NertcAfeWakeWord::Initialize(AudioCodec* codec) { + codec_ = codec; + nertc_wakeup_sdk_config_t config; + nertc_wakeup_sdk_event_handle_t handle = + { + .on_wake_word_detected = on_wake_word_detected_handle + }; + config.event_handler = handle; + config.user_data = this; + cJSON* root = cJSON_Parse(Board::GetInstance().GetJson().c_str()); + if (!root) { + ESP_LOGE(TAG, "Failed to parse JSON broad_info"); + return; + } + cJSON* board_item = cJSON_GetObjectItem(root, "board"); + if (!board_item) { + ESP_LOGE(TAG, "Invalid board info"); + cJSON_Delete(root); + return; + } + cJSON* board_name_item = cJSON_GetObjectItem(board_item, "nertc_board_name"); + if (!board_name_item || !cJSON_IsString(board_name_item)) { + ESP_LOGE(TAG, "Invalid board name in Board info JSON"); + cJSON_Delete(root); + return; + } + std::string device_id = board_name_item->valuestring; + cJSON_Delete(root); + config.deviceId = device_id.c_str(); + config.appkey = CONFIG_NERTC_APPKEY; + nertc_wake_word_ = nertc_wakeup_create(&config); + nertc_wakeup_init(nertc_wake_word_, codec_->input_channels(), codec_->input_reference()); + xTaskCreate([](void* arg) { + auto this_ = (NertcAfeWakeWord*)arg; + this_->AudioDetectionTask(); + vTaskDelete(NULL); + }, "audio_detection", 4096, this, 3, nullptr); +} + +void NertcAfeWakeWord::OnWakeWordDetected(std::function callback) { + wake_up_call_back_ = callback; +} + +void NertcAfeWakeWord::DoCallBack(std::string& wake_word) +{ + last_detected_wake_word_ = wake_word; + wake_up_call_back_(last_detected_wake_word_); +} + +void NertcAfeWakeWord::StartDetection() { + xEventGroupSetBits(event_group_, DETECTION_RUNNING_EVENT); +} + +void NertcAfeWakeWord::StopDetection() { + xEventGroupClearBits(event_group_, DETECTION_RUNNING_EVENT); + if (nertc_wake_word_ != nullptr) { + nertc_wakeup_stop_detect(nertc_wake_word_); + } +} + +bool NertcAfeWakeWord::IsDetectionRunning() { + return xEventGroupGetBits(event_group_) & DETECTION_RUNNING_EVENT; +} + +void NertcAfeWakeWord::Feed(const std::vector& data) { + if(nertc_wake_word_ == nullptr) { + return; + } + StoreWakeWordData(data.data(), data.size()); + + nertc_wakeup_feed(nertc_wake_word_, data.data(), data.size()); +} + +size_t NertcAfeWakeWord::GetFeedSize() { + if (nertc_wake_word_ == nullptr) { + return 0; + } + return (size_t)nertc_wakeup_get_feed_size(nertc_wake_word_); +} + +void NertcAfeWakeWord::AudioDetectionTask() { + auto fetch_size = nertc_wakeup_get_feed_chunk_size(nertc_wake_word_); + auto feed_size = nertc_wakeup_get_fetch_chunk_size(nertc_wake_word_); + ESP_LOGI(TAG, "Audio detection task started, feed size: %d fetch size: %d", + feed_size, fetch_size); + + while (true) + { + xEventGroupWaitBits(event_group_, DETECTION_RUNNING_EVENT, pdFALSE, pdTRUE, portMAX_DELAY); + nertc_wakeup_detect(nertc_wake_word_); + } +} + +void NertcAfeWakeWord::StoreWakeWordData(const int16_t* data, size_t samples) { + // store audio data to wake_word_pcm_ + std::lock_guard lock(wake_word_pcm_mutex_); + wake_word_pcm_.emplace_back(std::vector(data, data + samples)); + // keep about 2 seconds of data, detect duration is 30ms (sample_rate == 16000, chunksize == 512) + while (wake_word_pcm_.size() > 2000 / 30) { + wake_word_pcm_.pop_front(); + } +} + +void NertcAfeWakeWord::EncodeWakeWordData() { + wake_word_opus_.clear(); + if (wake_word_encode_task_stack_ == nullptr) { + wake_word_encode_task_stack_ = (StackType_t*)heap_caps_malloc(4096 * 8, MALLOC_CAP_SPIRAM); + } + wake_word_encode_task_ = xTaskCreateStatic([](void* arg) { + auto this_ = (NertcAfeWakeWord*)arg; + { + auto start_time = esp_timer_get_time(); + auto encoder = std::make_unique(16000, 1, OPUS_FRAME_DURATION_MS); + encoder->SetComplexity(0); // 0 is the fastest + + int packets = 0; + std::lock_guard lock_(this_->wake_word_pcm_mutex_); + for (auto& pcm: this_->wake_word_pcm_) { + encoder->Encode(std::move(pcm), [this_](std::vector&& opus) { + std::lock_guard lock(this_->wake_word_mutex_); + this_->wake_word_opus_.emplace_back(std::move(opus)); + this_->wake_word_cv_.notify_all(); + }); + packets++; + } + this_->wake_word_pcm_.clear(); + + auto end_time = esp_timer_get_time(); + ESP_LOGI(TAG, "Encode wake word opus %d packets in %ld ms", packets, (long)((end_time - start_time) / 1000)); + + std::lock_guard lock(this_->wake_word_mutex_); + this_->wake_word_opus_.push_back(std::vector()); + this_->wake_word_cv_.notify_all(); + } + vTaskDelete(NULL); + }, "encode_detect_packets", 4096 * 8, this, 2, wake_word_encode_task_stack_, &wake_word_encode_task_buffer_); +} + +bool NertcAfeWakeWord::GetWakeWordOpus(std::vector& opus) { + std::unique_lock lock(wake_word_mutex_); + wake_word_cv_.wait(lock, [this]() { + return !wake_word_opus_.empty(); + }); + opus.swap(wake_word_opus_.front()); + wake_word_opus_.pop_front(); + return !opus.empty(); +} diff --git a/main/audio_processing/nertc_afe_wake_word.h b/main/audio_processing/nertc_afe_wake_word.h new file mode 100644 index 0000000..68a851f --- /dev/null +++ b/main/audio_processing/nertc_afe_wake_word.h @@ -0,0 +1,61 @@ +#ifndef MULTINET_AFE_WAKE_WORD_H +#define MULTINET_AFE_WAKE_WORD_H + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "audio_codec.h" +#include "wake_word.h" +#include "nertc_wakeup_sdk.h" + +class NertcAfeWakeWord : public WakeWord { +public: + NertcAfeWakeWord(); + virtual ~NertcAfeWakeWord(); + + void Initialize(AudioCodec* codec); + void Feed(const std::vector& data); + void OnWakeWordDetected(std::function callback); + void DoCallBack(std::string& wake_word); + void StartDetection(); + void StopDetection(); + bool IsDetectionRunning(); + size_t GetFeedSize(); + void EncodeWakeWordData(); + bool GetWakeWordOpus(std::vector& opus); + const std::string& GetLastDetectedWakeWord() const { return last_detected_wake_word_; } + +private: + nertc_wakeup_sdk_t nertc_wake_word_ = nullptr; + std::function wake_up_call_back_; + std::vector wake_words_; + EventGroupHandle_t event_group_; + std::function wake_word_detected_callback_; + AudioCodec* codec_ = nullptr; + std::string last_detected_wake_word_; + + TaskHandle_t wake_word_encode_task_ = nullptr; + StaticTask_t wake_word_encode_task_buffer_; + StackType_t* wake_word_encode_task_stack_ = nullptr; + std::mutex wake_word_pcm_mutex_; + std::list> wake_word_pcm_; + std::list> wake_word_opus_; + std::mutex wake_word_mutex_; + std::condition_variable wake_word_cv_; + + void StoreWakeWordData(const int16_t* data, size_t size); + void AudioDetectionTask(); +}; + +#endif diff --git a/partitions/v1/partitions_16m_support_nertc_wake.csv b/partitions/v1/partitions_16m_support_nertc_wake.csv new file mode 100644 index 0000000..f0f53d7 --- /dev/null +++ b/partitions/v1/partitions_16m_support_nertc_wake.csv @@ -0,0 +1,9 @@ +# ESP-IDF Partition Table +# Name, Type, SubType, Offset, Size, Flags +nvs, data, nvs, 0x9000, 0x4000, +otadata, data, ota, 0xd000, 0x2000, +phy_init, data, phy, 0xf000, 0x1000, +model, data, spiffs, , 5526K, +custom, data, spiffs, 0x576000, 64K, +ota_0, app, ota_0, , 5M, +ota_1, app, ota_1, , 5M,