Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions components/nertc_wake_up/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
idf_component_register(
INCLUDE_DIRS "include"
)

add_prebuilt_library(nertc_wake_up "${CMAKE_CURRENT_SOURCE_DIR}/libs/esp32s3/libnertc_wake_up.a"
REQUIRES espressif__esp-sr)
target_link_libraries(${COMPONENT_LIB} INTERFACE nertc_wake_up)
18 changes: 18 additions & 0 deletions components/nertc_wake_up/idf_component.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
## IDF Component Manager Manifest File
dependencies:
espressif/zlib: "^1.3.1"
espressif/esp-sr: ~2.1.1
## Required IDF version
idf:
version: ">=5.4.0"
# # Put list of dependencies here
# # For components maintained by Espressif:
# component: "~1.0.0"
# # For 3rd party components:
# username/component: ">=1.0.0,<2.0.0"
# username2/component2:
# version: "~1.0.0"
# # For transient dependencies `public` flag can be set.
# # `public` flag doesn't have an effect dependencies of the `main` component.
# # All dependencies of `main` are public by default.
# public: true
52 changes: 52 additions & 0 deletions components/nertc_wake_up/include/nertc_wakeup_sdk.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#ifndef _NERTC_WAKE_UP_H_
#define _NERTC_WAKE_UP_H_

#include "nertc_wakeup_sdk_event.h"

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

#define NERTC_WAKE_UP_API __attribute__((visibility("default")))

/**
* @brief 创建 wakeup 实例,该方法是整个SDK调用的第一个方法
* @param cfg 引擎配置
* @return 引擎实例
*/
NERTC_WAKE_UP_API nertc_wakeup_sdk_t nertc_wakeup_create(const nertc_wakeup_sdk_config_t *cfg);

/**
* @brief 销毁 wakeup 实例
* @param engine 通过 nertc_wakeup_create 创建的实例
*/
NERTC_WAKE_UP_API void nertc_wakeup_destory(nertc_wakeup_sdk_t wakeup);

/**
* @brief 初始化 wakeup 实例
* @note 创建引擎实例之后调用的第一个方法,仅能被初始化一次
* @param engine 通过 nertc_wakeup_create 创建且未被初始化的引擎实例
* @return 方法调用结果:<br>
* - 0:成功 <br>
* - 非0:失败 <br>
*/
NERTC_WAKE_UP_API int nertc_wakeup_init(nertc_wakeup_sdk_t wakeup, int input_channels, int reference_channels);

NERTC_WAKE_UP_API int nertc_wakeup_detect(nertc_wakeup_sdk_t wakeup);

NERTC_WAKE_UP_API int nertc_wakeup_stop_detect(nertc_wakeup_sdk_t wakeup);

NERTC_WAKE_UP_API int nertc_wakeup_feed(nertc_wakeup_sdk_t wakeup, const int16_t* data, int data_length);

NERTC_WAKE_UP_API int nertc_wakeup_get_feed_size(nertc_wakeup_sdk_t wakeup);

NERTC_WAKE_UP_API int nertc_wakeup_get_feed_chunk_size(nertc_wakeup_sdk_t wakeup);

NERTC_WAKE_UP_API int nertc_wakeup_get_fetch_chunk_size(nertc_wakeup_sdk_t wakeup);

#ifdef __cplusplus
}
#endif
#endif
42 changes: 42 additions & 0 deletions components/nertc_wake_up/include/nertc_wakeup_sdk_event.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#ifndef __NERTC_WAKE_UP_EVENT_H__
#define __NERTC_WAKE_UP_EVENT_H__


#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

typedef void * nertc_wakeup_sdk_t;
typedef void * nertc_wakeup_sdk_user_data_t;

typedef struct nertc_wakeup_sdk_callback_context {
nertc_wakeup_sdk_t wakeup; /**< wakeup 实例 */
nertc_wakeup_sdk_user_data_t user_data; /**< 用户数据 */
} nertc_wakeup_sdk_callback_context_t;

typedef struct nertc_wakeup_sdk_event_handler {
/**
* 检测到唤醒词的回调。
* <br>该回调方法表示 SDK 检测到了唤醒词
* 干预或提示用户。
* @param ctx 回调上下文
* @param wake_word 对应的唤醒词
* @endif
*/
void (*on_wake_word_detected)(const nertc_wakeup_sdk_callback_context_t* ctx, const char *wake_word);
} nertc_wakeup_sdk_event_handle_t;

typedef struct nertc_wakeup_sdk_config {
nertc_wakeup_sdk_event_handle_t event_handler;
nertc_wakeup_sdk_user_data_t user_data;
const char* appkey;
const char* deviceId;
} nertc_wakeup_sdk_config_t;

#ifdef __cplusplus
}
#endif

#endif // __NERTC_SDK_EVENT_H__
Binary file not shown.
2 changes: 2 additions & 0 deletions main/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,8 @@ else()
endif()
if(CONFIG_USE_AFE_WAKE_WORD)
list(APPEND SOURCES "audio_processing/afe_wake_word.cc")
elseif(CONFIG_USE_AFE_NERTC_WAKE_WORD)
list(APPEND SOURCES "audio_processing/nertc_afe_wake_word.cc")
elseif(CONFIG_USE_ESP_WAKE_WORD)
list(APPEND SOURCES "audio_processing/esp_wake_word.cc")
else()
Expand Down
7 changes: 7 additions & 0 deletions main/Kconfig.projbuild
Original file line number Diff line number Diff line change
Expand Up @@ -398,6 +398,13 @@ config USE_AFE_WAKE_WORD
help
需要 ESP32 S3 与 PSRAM 支持

config USE_AFE_NERTC_WAKE_WORD
bool "Enable nertc Wake Word Detection (AFE)"
default n
depends on (IDF_TARGET_ESP32S3 || IDF_TARGET_ESP32P4) && SPIRAM
help
需要 ESP32 S3 与 PSRAM 支持

config USE_AUDIO_PROCESSOR
bool "Enable Audio Noise Reduction"
default y
Expand Down
5 changes: 5 additions & 0 deletions main/application.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,15 @@

#if CONFIG_USE_AFE_WAKE_WORD
#include "afe_wake_word.h"
#elif CONFIG_USE_AFE_NERTC_WAKE_WORD
#include "nertc_afe_wake_word.h"
#elif CONFIG_USE_ESP_WAKE_WORD
#include "esp_wake_word.h"
#else
#include "no_wake_word.h"
#endif


#include <cstring>
#include <esp_log.h>
#include <cJSON.h>
Expand Down Expand Up @@ -72,6 +75,8 @@ Application::Application() {

#if CONFIG_USE_AFE_WAKE_WORD
wake_word_ = std::make_unique<AfeWakeWord>();
#elif CONFIG_USE_AFE_NERTC_WAKE_WORD
wake_word_ = std::make_unique<NertcAfeWakeWord>();
#elif CONFIG_USE_ESP_WAKE_WORD
wake_word_ = std::make_unique<EspWakeWord>();
#else
Expand Down
184 changes: 184 additions & 0 deletions main/audio_processing/nertc_afe_wake_word.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
#include "nertc_afe_wake_word.h"
#include "application.h"

#include <esp_log.h>
#include <model_path.h>
#include <arpa/inet.h>
#include <sstream>

#define DETECTION_RUNNING_EVENT 1

#define TAG "NertcAfeWakeWord"

void on_wake_word_detected_handle(const nertc_wakeup_sdk_callback_context_t* ctx, const char *wake_word)
{
auto* multinet = static_cast<NertcAfeWakeWord*>(ctx->user_data);
//ESP_LOGI("test", "wake_word:%s", wake_word);
std::string call_back_wake_word = std::string(wake_word);
multinet->DoCallBack(call_back_wake_word);
}
NertcAfeWakeWord::NertcAfeWakeWord()
: wake_word_pcm_(),
wake_word_opus_() {
event_group_ = xEventGroupCreate();
}

NertcAfeWakeWord::~NertcAfeWakeWord() {
if (wake_word_encode_task_stack_ != nullptr) {
heap_caps_free(wake_word_encode_task_stack_);
}
if(nertc_wake_word_)
{
nertc_wakeup_destory(nertc_wake_word_);
}

vEventGroupDelete(event_group_);
}

void NertcAfeWakeWord::Initialize(AudioCodec* codec) {
codec_ = codec;
nertc_wakeup_sdk_config_t config;
nertc_wakeup_sdk_event_handle_t handle =
{
.on_wake_word_detected = on_wake_word_detected_handle
};
config.event_handler = handle;
config.user_data = this;
cJSON* root = cJSON_Parse(Board::GetInstance().GetJson().c_str());
if (!root) {
ESP_LOGE(TAG, "Failed to parse JSON broad_info");
return;
}
cJSON* board_item = cJSON_GetObjectItem(root, "board");
if (!board_item) {
ESP_LOGE(TAG, "Invalid board info");
cJSON_Delete(root);
return;
}
cJSON* board_name_item = cJSON_GetObjectItem(board_item, "nertc_board_name");
if (!board_name_item || !cJSON_IsString(board_name_item)) {
ESP_LOGE(TAG, "Invalid board name in Board info JSON");
cJSON_Delete(root);
return;
}
std::string device_id = board_name_item->valuestring;
cJSON_Delete(root);
config.deviceId = device_id.c_str();
config.appkey = CONFIG_NERTC_APPKEY;
nertc_wake_word_ = nertc_wakeup_create(&config);
nertc_wakeup_init(nertc_wake_word_, codec_->input_channels(), codec_->input_reference());
xTaskCreate([](void* arg) {
auto this_ = (NertcAfeWakeWord*)arg;
this_->AudioDetectionTask();
vTaskDelete(NULL);
}, "audio_detection", 4096, this, 3, nullptr);
}

void NertcAfeWakeWord::OnWakeWordDetected(std::function<void(const std::string& wake_word)> callback) {
wake_up_call_back_ = callback;
}

void NertcAfeWakeWord::DoCallBack(std::string& wake_word)
{
last_detected_wake_word_ = wake_word;
wake_up_call_back_(last_detected_wake_word_);
}

void NertcAfeWakeWord::StartDetection() {
xEventGroupSetBits(event_group_, DETECTION_RUNNING_EVENT);
}

void NertcAfeWakeWord::StopDetection() {
xEventGroupClearBits(event_group_, DETECTION_RUNNING_EVENT);
if (nertc_wake_word_ != nullptr) {
nertc_wakeup_stop_detect(nertc_wake_word_);
}
}

bool NertcAfeWakeWord::IsDetectionRunning() {
return xEventGroupGetBits(event_group_) & DETECTION_RUNNING_EVENT;
}

void NertcAfeWakeWord::Feed(const std::vector<int16_t>& data) {
if(nertc_wake_word_ == nullptr) {
return;
}
StoreWakeWordData(data.data(), data.size());

nertc_wakeup_feed(nertc_wake_word_, data.data(), data.size());
}

size_t NertcAfeWakeWord::GetFeedSize() {
if (nertc_wake_word_ == nullptr) {
return 0;
}
return (size_t)nertc_wakeup_get_feed_size(nertc_wake_word_);
}

void NertcAfeWakeWord::AudioDetectionTask() {
auto fetch_size = nertc_wakeup_get_feed_chunk_size(nertc_wake_word_);
auto feed_size = nertc_wakeup_get_fetch_chunk_size(nertc_wake_word_);
ESP_LOGI(TAG, "Audio detection task started, feed size: %d fetch size: %d",
feed_size, fetch_size);

while (true)
{
xEventGroupWaitBits(event_group_, DETECTION_RUNNING_EVENT, pdFALSE, pdTRUE, portMAX_DELAY);
nertc_wakeup_detect(nertc_wake_word_);
}
}

void NertcAfeWakeWord::StoreWakeWordData(const int16_t* data, size_t samples) {
// store audio data to wake_word_pcm_
std::lock_guard<std::mutex> lock(wake_word_pcm_mutex_);
wake_word_pcm_.emplace_back(std::vector<int16_t>(data, data + samples));
// keep about 2 seconds of data, detect duration is 30ms (sample_rate == 16000, chunksize == 512)
while (wake_word_pcm_.size() > 2000 / 30) {
wake_word_pcm_.pop_front();
}
}

void NertcAfeWakeWord::EncodeWakeWordData() {
wake_word_opus_.clear();
if (wake_word_encode_task_stack_ == nullptr) {
wake_word_encode_task_stack_ = (StackType_t*)heap_caps_malloc(4096 * 8, MALLOC_CAP_SPIRAM);
}
wake_word_encode_task_ = xTaskCreateStatic([](void* arg) {
auto this_ = (NertcAfeWakeWord*)arg;
{
auto start_time = esp_timer_get_time();
auto encoder = std::make_unique<OpusEncoderWrapper>(16000, 1, OPUS_FRAME_DURATION_MS);
encoder->SetComplexity(0); // 0 is the fastest

int packets = 0;
std::lock_guard<std::mutex> lock_(this_->wake_word_pcm_mutex_);
for (auto& pcm: this_->wake_word_pcm_) {
encoder->Encode(std::move(pcm), [this_](std::vector<uint8_t>&& opus) {
std::lock_guard<std::mutex> lock(this_->wake_word_mutex_);
this_->wake_word_opus_.emplace_back(std::move(opus));
this_->wake_word_cv_.notify_all();
});
packets++;
}
this_->wake_word_pcm_.clear();

auto end_time = esp_timer_get_time();
ESP_LOGI(TAG, "Encode wake word opus %d packets in %ld ms", packets, (long)((end_time - start_time) / 1000));

std::lock_guard<std::mutex> lock(this_->wake_word_mutex_);
this_->wake_word_opus_.push_back(std::vector<uint8_t>());
this_->wake_word_cv_.notify_all();
}
vTaskDelete(NULL);
}, "encode_detect_packets", 4096 * 8, this, 2, wake_word_encode_task_stack_, &wake_word_encode_task_buffer_);
}

bool NertcAfeWakeWord::GetWakeWordOpus(std::vector<uint8_t>& opus) {
std::unique_lock<std::mutex> lock(wake_word_mutex_);
wake_word_cv_.wait(lock, [this]() {
return !wake_word_opus_.empty();
});
opus.swap(wake_word_opus_.front());
wake_word_opus_.pop_front();
return !opus.empty();
}
Loading