From f49f73933358dd0bd8eae68b2cf416b5069a871d Mon Sep 17 00:00:00 2001 From: aks Date: Fri, 14 Jun 2024 07:06:23 +0400 Subject: [PATCH 1/6] hashtable memset issue fix --- src/switch_hashtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/switch_hashtable.c b/src/switch_hashtable.c index 66669acb63d..91edde925dc 100644 --- a/src/switch_hashtable.c +++ b/src/switch_hashtable.c @@ -126,7 +126,7 @@ hashtable_expand(switch_hashtable_t *h) realloc(h->table, newsize * sizeof(struct entry *)); if (NULL == newtable) { (h->primeindex)--; return 0; } h->table = newtable; - memset(newtable[h->tablelength], 0, newsize - h->tablelength); + memset(&newtable[h->tablelength], 0, ((newsize - h->tablelength) * sizeof(struct entry*))); for (i = 0; i < h->tablelength; i++) { for (pE = &(newtable[i]), e = *pE; e != NULL; e = *pE) { index = indexFor(newsize,e->h); From 187e40df5ddc58b96a3b9c4f73eb863a179b563c Mon Sep 17 00:00:00 2001 From: aks Date: Sat, 15 Jun 2024 04:53:54 +0400 Subject: [PATCH 2/6] extra parentheses deleted --- src/switch_hashtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/switch_hashtable.c b/src/switch_hashtable.c index 91edde925dc..107d539c4f4 100644 --- a/src/switch_hashtable.c +++ b/src/switch_hashtable.c @@ -126,7 +126,7 @@ hashtable_expand(switch_hashtable_t *h) realloc(h->table, newsize * sizeof(struct entry *)); if (NULL == newtable) { (h->primeindex)--; return 0; } h->table = newtable; - memset(&newtable[h->tablelength], 0, ((newsize - h->tablelength) * sizeof(struct entry*))); + memset(&newtable[h->tablelength], 0, (newsize - h->tablelength) * sizeof(struct entry*)); for (i = 0; i < h->tablelength; i++) { for (pE = &(newtable[i]), e = *pE; e != NULL; e = *pE) { index = indexFor(newsize,e->h); From 93d5d3158bcb70bb6f67a4a00b20f74c93473a5c Mon Sep 17 00:00:00 2001 From: aks Date: Thu, 17 Oct 2024 20:14:11 +0400 Subject: [PATCH 3/6] new tts modules --- build/modules.conf.in | 3 + configure.ac | 3 + src/mod/asr_tts/mod_google_tts/Makefile.am | 11 + .../conf/autoload_configs/google_tts.conf.xml | 29 + .../mod_google_tts/conf/dialplan/example.xml | 8 + .../asr_tts/mod_google_tts/mod_google_tts.c | 509 ++++++++++++++++++ .../asr_tts/mod_google_tts/mod_google_tts.h | 58 ++ src/mod/asr_tts/mod_google_tts/utils.c | 112 ++++ src/mod/asr_tts/mod_openai_tts/Makefile.am | 11 + .../conf/autoload_configs/openai_tts.conf.xml | 30 ++ .../mod_openai_tts/conf/dialplan/example.xml | 9 + .../asr_tts/mod_openai_tts/mod_openai_tts.c | 494 +++++++++++++++++ .../asr_tts/mod_openai_tts/mod_openai_tts.h | 59 ++ src/mod/asr_tts/mod_openai_tts/utils.c | 85 +++ src/mod/asr_tts/mod_piper_tts/Makefile.am | 11 + .../conf/autoload_configs/piper_tts.conf.xml | 22 + .../mod_piper_tts/conf/dialplan/example.xml | 9 + src/mod/asr_tts/mod_piper_tts/mod_piper_tts.c | 339 ++++++++++++ src/mod/asr_tts/mod_piper_tts/mod_piper_tts.h | 60 +++ 19 files changed, 1862 insertions(+) create mode 100644 src/mod/asr_tts/mod_google_tts/Makefile.am create mode 100644 src/mod/asr_tts/mod_google_tts/conf/autoload_configs/google_tts.conf.xml create mode 100644 src/mod/asr_tts/mod_google_tts/conf/dialplan/example.xml create mode 100644 src/mod/asr_tts/mod_google_tts/mod_google_tts.c create mode 100644 src/mod/asr_tts/mod_google_tts/mod_google_tts.h create mode 100644 src/mod/asr_tts/mod_google_tts/utils.c create mode 100644 src/mod/asr_tts/mod_openai_tts/Makefile.am create mode 100644 src/mod/asr_tts/mod_openai_tts/conf/autoload_configs/openai_tts.conf.xml create mode 100644 src/mod/asr_tts/mod_openai_tts/conf/dialplan/example.xml create mode 100644 src/mod/asr_tts/mod_openai_tts/mod_openai_tts.c create mode 100644 src/mod/asr_tts/mod_openai_tts/mod_openai_tts.h create mode 100644 src/mod/asr_tts/mod_openai_tts/utils.c create mode 100644 src/mod/asr_tts/mod_piper_tts/Makefile.am create mode 100644 src/mod/asr_tts/mod_piper_tts/conf/autoload_configs/piper_tts.conf.xml create mode 100644 src/mod/asr_tts/mod_piper_tts/conf/dialplan/example.xml create mode 100644 src/mod/asr_tts/mod_piper_tts/mod_piper_tts.c create mode 100644 src/mod/asr_tts/mod_piper_tts/mod_piper_tts.h diff --git a/build/modules.conf.in b/build/modules.conf.in index 8453e290b70..0210972cc3b 100644 --- a/build/modules.conf.in +++ b/build/modules.conf.in @@ -60,6 +60,9 @@ applications/mod_voicemail #asr_tts/mod_flite #asr_tts/mod_pocketsphinx #asr_tts/mod_tts_commandline +#asr_tts/mod_google_tts +#asr_tts/mod_openai_tts +#asr_tts/mod_piper_tts codecs/mod_amr #codecs/mod_amrwb codecs/mod_b64 diff --git a/configure.ac b/configure.ac index 6991398ae7e..a7640174f4d 100644 --- a/configure.ac +++ b/configure.ac @@ -2155,6 +2155,9 @@ AC_CONFIG_FILES([Makefile src/mod/asr_tts/mod_flite/Makefile src/mod/asr_tts/mod_pocketsphinx/Makefile src/mod/asr_tts/mod_tts_commandline/Makefile + src/mod/asr_tts/mod_google_tts/Makefile + src/mod/asr_tts/mod_openai_tts/Makefile + src/mod/asr_tts/mod_piper_tts/Makefile src/mod/codecs/mod_amr/Makefile src/mod/codecs/mod_amrwb/Makefile src/mod/codecs/mod_b64/Makefile diff --git a/src/mod/asr_tts/mod_google_tts/Makefile.am b/src/mod/asr_tts/mod_google_tts/Makefile.am new file mode 100644 index 00000000000..a0acc9a7dc1 --- /dev/null +++ b/src/mod/asr_tts/mod_google_tts/Makefile.am @@ -0,0 +1,11 @@ + +include $(top_srcdir)/build/modmake.rulesam +MODNAME=mod_google_tts + +mod_LTLIBRARIES = mod_google_tts.la +mod_google_tts_la_SOURCES = mod_google_tts.c utils.c +mod_google_tts_la_CFLAGS = $(AM_CFLAGS) -I. +mod_google_tts_la_LIBADD = $(switch_builddir)/libfreeswitch.la +mod_google_tts_la_LDFLAGS = -avoid-version -module -no-undefined -shared + +$(am_mod_google_tts_la_OBJECTS): mod_google_tts.h diff --git a/src/mod/asr_tts/mod_google_tts/conf/autoload_configs/google_tts.conf.xml b/src/mod/asr_tts/mod_google_tts/conf/autoload_configs/google_tts.conf.xml new file mode 100644 index 00000000000..bc85f56ca0f --- /dev/null +++ b/src/mod/asr_tts/mod_google_tts/conf/autoload_configs/google_tts.conf.xml @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/mod/asr_tts/mod_google_tts/conf/dialplan/example.xml b/src/mod/asr_tts/mod_google_tts/conf/dialplan/example.xml new file mode 100644 index 00000000000..0d571e43a02 --- /dev/null +++ b/src/mod/asr_tts/mod_google_tts/conf/dialplan/example.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/src/mod/asr_tts/mod_google_tts/mod_google_tts.c b/src/mod/asr_tts/mod_google_tts/mod_google_tts.c new file mode 100644 index 00000000000..01793278da6 --- /dev/null +++ b/src/mod/asr_tts/mod_google_tts/mod_google_tts.c @@ -0,0 +1,509 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + * Provides the ability to use Google TTS service in the Freeswitch + * https://cloud.google.com/text-to-speech/docs/reference/rest + * + * Development repository: + * https://github.com/akscf/mod_google_tts + * + */ +#include "mod_google_tts.h" + +static struct { + char *file_ext; + char *cache_path; + char *tmp_path; + char *opt_gender; + char *opt_encoding; + char *user_agent; + char *api_url; + char *api_key; + char *proxy; + char *proxy_credentials; + char *api_url_ep; + uint32_t file_size_max; + uint32_t request_timeout; // seconds + uint32_t connect_timeout; // seconds + uint8_t fl_voice_name_as_lang; + uint8_t fl_log_http_error; + uint8_t fl_cache_enabled; +} globals; + +SWITCH_MODULE_LOAD_FUNCTION(mod_google_tts_load); +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_google_tts_shutdown); +SWITCH_MODULE_DEFINITION(mod_google_tts, mod_google_tts_load, mod_google_tts_shutdown, NULL); + + +static size_t curl_io_write_callback(char *buffer, size_t size, size_t nitems, void *user_data) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)user_data; + size_t len = (size * nitems); + + if(len > 0 && tts_ctx->curl_recv_buffer) { + switch_buffer_write(tts_ctx->curl_recv_buffer, buffer, len); + } + + return len; +} + +static size_t curl_io_read_callback(char *buffer, size_t size, size_t nitems, void *user_data) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)user_data; + size_t nmax = (size * nitems); + size_t ncur = (tts_ctx->curl_send_buffer_len > nmax) ? nmax : tts_ctx->curl_send_buffer_len; + + memmove(buffer, tts_ctx->curl_send_buffer_ref, ncur); + tts_ctx->curl_send_buffer_ref += ncur; + tts_ctx->curl_send_buffer_len -= ncur; + + return ncur; +} + +static switch_status_t curl_perform(tts_ctx_t *tts_ctx, char *text) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + CURL *curl_handle = NULL; + switch_curl_slist_t *headers = NULL; + switch_CURLcode curl_ret = 0; + long http_resp = 0; + const char *xgender = (tts_ctx->gender ? tts_ctx->gender : globals.opt_gender); + const char *ygender = (!globals.fl_voice_name_as_lang && tts_ctx->voice_name) ? tts_ctx->voice_name : NULL; + char *pdata = NULL; + char *qtext = NULL; + + if(text) { + qtext = escape_squotes(text); + } + + pdata = switch_mprintf( + "{'input':{'text':'%s'},'voice':{'ssmlGender':'%s', 'languageCode':'%s'},'audioConfig':{'audioEncoding':'%s', 'sampleRateHertz':'%d'}}\n\n", + qtext ? qtext : "", + ygender ? ygender : xgender, + tts_ctx->lang_code, + globals.opt_encoding, + tts_ctx->samplerate + ); + +#ifdef GTTS_DEBUG + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "CURL: URL=[%s], PDATA=[%s]\n", globals.api_url_ep, pdata); +#endif + + tts_ctx->curl_send_buffer_len = strlen(pdata); + tts_ctx->curl_send_buffer_ref = pdata; + + curl_handle = switch_curl_easy_init(); + + headers = switch_curl_slist_append(headers, "Content-Type: application/json; charset=utf-8"); + headers = switch_curl_slist_append(headers, "Expect:"); + + switch_curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, headers); + switch_curl_easy_setopt(curl_handle, CURLOPT_POST, 1); + switch_curl_easy_setopt(curl_handle, CURLOPT_NOSIGNAL, 1); + + switch_curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDSIZE, tts_ctx->curl_send_buffer_len); + switch_curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDS, (void *)pdata); + switch_curl_easy_setopt(curl_handle, CURLOPT_READFUNCTION, curl_io_read_callback); + switch_curl_easy_setopt(curl_handle, CURLOPT_READDATA, (void *)tts_ctx); + + switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, curl_io_write_callback); + switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)tts_ctx); + + if(globals.connect_timeout > 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_CONNECTTIMEOUT, globals.connect_timeout); + } + if(globals.request_timeout > 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, globals.request_timeout); + } + if(globals.user_agent) { + switch_curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, globals.user_agent); + } + if(strncasecmp(globals.api_url_ep, "https", 5) == 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYPEER, 0); + switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYHOST, 0); + } + if(globals.proxy) { + if(globals.proxy_credentials != NULL) { + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_ANY); + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYUSERPWD, globals.proxy_credentials); + } + if(strncasecmp(globals.proxy, "https", 5) == 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY_SSL_VERIFYPEER, 0); + } + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY, globals.proxy); + } + + switch_curl_easy_setopt(curl_handle, CURLOPT_URL, globals.api_url_ep); + + curl_ret = switch_curl_easy_perform(curl_handle); + if(!curl_ret) { + switch_curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &http_resp); + if(!http_resp) { switch_curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CONNECTCODE, &http_resp); } + } else { + http_resp = curl_ret; + } + + if(http_resp != 200) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "http-error=[%ld] (%s)\n", http_resp, globals.api_url); + status = SWITCH_STATUS_FALSE; + } + + if(tts_ctx->curl_recv_buffer) { + if(switch_buffer_inuse(tts_ctx->curl_recv_buffer) > 0) { + switch_buffer_write(tts_ctx->curl_recv_buffer, "\0", 1); + } + } + + if(curl_handle) { switch_curl_easy_cleanup(curl_handle); } + if(headers) { switch_curl_slist_free_all(headers); } + + switch_safe_free(pdata); + switch_safe_free(qtext); + return status; +} + +static switch_status_t extract_audio(tts_ctx_t *tts_ctx, char *buf_in, uint32_t buf_len) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + switch_memory_pool_t *pool = tts_ctx->pool; + switch_file_t *fd = NULL; + char *buf_out = NULL, *ptr = NULL; + size_t len = buf_len, dec_len = 0; + uint32_t ofs1 = 0, ofs2 = 0; + + if((ptr = strnstr(buf_in, "\"audioContent\"", len)) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Malformed media content\n"); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + for(ofs1 = ((ptr - buf_in) + 14); ofs1 < len; ofs1++) { + if(buf_in[ofs1] == '"') { ofs1++; break; } + } + if(ofs1 >= len) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Malformed media content\n"); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + for(ofs2 = len; ofs2 > ofs1; ofs2--) { + if(buf_in[ofs2] == '"') { buf_in[ofs2]='\0'; ofs2--; break; } + } + if(ofs2 <= ofs1) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Malformed media content\n"); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + ptr = (void *)(buf_in + ofs1); + len = (ofs2 - ofs1); + dec_len = BASE64_DEC_SZ(len); + + if(dec_len < 4 ) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Malformed media content\n"); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + + if((buf_out = switch_core_alloc(pool, dec_len)) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_alloc() failed\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + len = switch_b64_decode(ptr, buf_out, dec_len); + if(len != dec_len) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "switch_b64_decode: (len != dec_len)\n"); + dec_len = len; + } + + status = switch_file_open(&fd, tts_ctx->dst_file, + (SWITCH_FOPEN_WRITE | SWITCH_FOPEN_CREATE | SWITCH_FOPEN_TRUNCATE | SWITCH_FOPEN_BINARY), + (SWITCH_FPROT_UREAD | SWITCH_FPROT_UWRITE), pool); + if(status != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Unable to create output file (%s)\n", tts_ctx->dst_file); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + + status = switch_file_write(fd, buf_out, &len); + if(status != SWITCH_STATUS_SUCCESS || len != dec_len) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Unable to write into file (%s)\n", tts_ctx->dst_file); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + +out: + if(fd) { + switch_file_close(fd); + } + return status; +} + +// --------------------------------------------------------------------------------------------------------------------------------------------- +// speech api +// --------------------------------------------------------------------------------------------------------------------------------------------- +static switch_status_t speech_open(switch_speech_handle_t *sh, const char *voice, int samplerate, int channels, switch_speech_flag_t *flags) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + char name_uuid[SWITCH_UUID_FORMATTED_LENGTH + 1] = { 0 }; + tts_ctx_t *tts_ctx = NULL; + + tts_ctx = switch_core_alloc(sh->memory_pool, sizeof(tts_ctx_t)); + tts_ctx->pool = sh->memory_pool; + tts_ctx->fhnd = switch_core_alloc(tts_ctx->pool, sizeof(switch_file_handle_t)); + tts_ctx->voice_name = switch_core_strdup(tts_ctx->pool, voice); + tts_ctx->lang_code = (globals.fl_voice_name_as_lang && voice) ? switch_core_strdup(sh->memory_pool, lang2bcp47(voice)) : "en-gb"; + tts_ctx->channels = channels; + tts_ctx->samplerate = samplerate; + tts_ctx->dst_file = NULL; + + sh->private_info = tts_ctx; + + if((status = switch_buffer_create_dynamic(&tts_ctx->curl_recv_buffer, 1024, 8192, globals.file_size_max)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_buffer_create_dynamic()\n"); + goto out; + } + + if(!globals.fl_cache_enabled) { + switch_uuid_str((char *)name_uuid, sizeof(name_uuid)); + tts_ctx->dst_file = switch_core_sprintf(sh->memory_pool, "%s%sgoogle-%s.%s", + globals.tmp_path, + SWITCH_PATH_SEPARATOR, + name_uuid, + globals.file_ext + ); + } + +out: + return status; +} + +static switch_status_t speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; + assert(tts_ctx != NULL); + + if(switch_test_flag(tts_ctx->fhnd, SWITCH_FILE_OPEN)) { + switch_core_file_close(tts_ctx->fhnd); + } + + if(tts_ctx->curl_recv_buffer) { + switch_buffer_destroy(&tts_ctx->curl_recv_buffer); + } + + if(tts_ctx->dst_file && !globals.fl_cache_enabled) { + unlink(tts_ctx->dst_file); + } + + return SWITCH_STATUS_SUCCESS; +} + +static switch_status_t speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + switch_status_t status = SWITCH_STATUS_SUCCESS; + char digest[SWITCH_MD5_DIGEST_STRING_SIZE + 1] = { 0 }; + const void *ptr = NULL; + uint32_t recv_len = 0; + + assert(tts_ctx != NULL); + + if(!tts_ctx->dst_file) { + switch_md5_string(digest, (void *) text, strlen(text)); + tts_ctx->dst_file = switch_core_sprintf(sh->memory_pool, "%s%s%s.%s", + globals.cache_path, + SWITCH_PATH_SEPARATOR, + digest, + globals.file_ext + ); + } + + if(switch_file_exists(tts_ctx->dst_file, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { + if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_file, tts_ctx->channels, tts_ctx->samplerate, + (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_file); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + } else { + switch_buffer_zero(tts_ctx->curl_recv_buffer); + status = curl_perform(tts_ctx , text); + recv_len = switch_buffer_peek_zerocopy(tts_ctx->curl_recv_buffer, &ptr); + if(status == SWITCH_STATUS_SUCCESS) { + if((status = extract_audio(tts_ctx, (char *)ptr, recv_len)) == SWITCH_STATUS_SUCCESS) { + if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_file, tts_ctx->channels, tts_ctx->samplerate, + (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_file); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + } else { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to extract media\n"); + status = SWITCH_STATUS_FALSE; + } + } else { + if(globals.fl_log_http_error && recv_len > 0) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Services response: %s\n", (char *)ptr); + } + } + } +out: + return status; +} + +static switch_status_t speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *data_len, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + size_t len = (*data_len / sizeof(int16_t)); + + assert(tts_ctx != NULL); + + if(tts_ctx->fhnd->file_interface == NULL) { + return SWITCH_STATUS_FALSE; + } + + if(switch_core_file_read(tts_ctx->fhnd, data, &len) != SWITCH_STATUS_SUCCESS) { + switch_core_file_close(tts_ctx->fhnd); + return SWITCH_STATUS_FALSE; + } + + *data_len = (len * sizeof(int16_t)); + if(!data_len) { + switch_core_file_close(tts_ctx->fhnd); + return SWITCH_STATUS_BREAK; + } + + return SWITCH_STATUS_SUCCESS; +} + +static void speech_flush_tts(switch_speech_handle_t *sh) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + + assert(tts_ctx != NULL); + + if(tts_ctx->fhnd != NULL && tts_ctx->fhnd->file_interface != NULL) { + switch_core_file_close(tts_ctx->fhnd); + } +} + +static void speech_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + + assert(tts_ctx != NULL); + + if(strcasecmp(param, "lang") == 0) { + if(val) tts_ctx->lang_code = switch_core_strdup(sh->memory_pool, lang2bcp47(val)); + } else if(strcasecmp(param, "gender") == 0) { + if(val) tts_ctx->gender = switch_core_strdup(sh->memory_pool, fmt_gender(val)); + } +} + +static void speech_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val) { +} + +static void speech_float_param_tts(switch_speech_handle_t *sh, char *param, double val) { +} + +// --------------------------------------------------------------------------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------------------------------------------------------------------------- +SWITCH_MODULE_LOAD_FUNCTION(mod_google_tts_load) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + switch_xml_t cfg, xml, settings, param; + switch_speech_interface_t *speech_interface; + + memset(&globals, 0, sizeof(globals)); + + if((xml = switch_xml_open_cfg(MOD_CONFIG_NAME, &cfg, NULL)) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open configuration: %s\n", MOD_CONFIG_NAME); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + if((settings = switch_xml_child(cfg, "settings"))) { + for (param = switch_xml_child(settings, "param"); param; param = param->next) { + char *var = (char *)switch_xml_attr_soft(param, "name"); + char *val = (char *)switch_xml_attr_soft(param, "value"); + + if(!strcasecmp(var, "api-url")) { + if(val) globals.api_url = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "api-key")) { + if(val) globals.api_key = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "cache-path")) { + if(val) globals.cache_path = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "gender")) { + if(val) globals.opt_gender = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "encoding")) { + if(val) globals.opt_encoding = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "user-agent")) { + if(val) globals.user_agent = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "request-timeout")) { + if(val) globals.request_timeout = atoi(val); + } else if(!strcasecmp(var, "connect-timeout")) { + if(val) globals.connect_timeout = atoi(val); + } else if(!strcasecmp(var, "voice-name-as-language")) { + if(val) globals.fl_voice_name_as_lang = switch_true(val); + } else if(!strcasecmp(var, "log-http-errors")) { + if(val) globals.fl_log_http_error = switch_true(val); + } else if(!strcasecmp(var, "cache-enable")) { + if(val) globals.fl_cache_enabled = switch_true(val); + } else if(!strcasecmp(var, "file-size-max")) { + if(val) globals.file_size_max = atoi(val); + } else if(!strcasecmp(var, "proxy")) { + if(val) globals.proxy = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "proxy-credentials")) { + if(val) globals.proxy_credentials = switch_core_strdup(pool, val); + } + } + } + + if(!globals.api_url) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing required parameter: api-url\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + if(!globals.api_key) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing required parameter: api-key\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + globals.tmp_path = SWITCH_GLOBAL_dirs.temp_dir; + globals.api_url_ep = switch_string_replace(globals.api_url, "${api-key}", globals.api_key); + globals.cache_path = (globals.cache_path == NULL ? "/tmp/google-tts-cache" : globals.cache_path); + globals.opt_gender = fmt_gender(globals.opt_gender == NULL ? "female" : globals.opt_gender); + globals.opt_encoding = fmt_encode(globals.opt_encoding == NULL ? "mp3" : globals.opt_encoding); + globals.file_size_max = globals.file_size_max > 0 ? globals.file_size_max : FILE_SIZE_MAX; + globals.file_ext = fmt_enct2fext(globals.opt_encoding); + + if(!globals.api_url_ep) { + globals.api_url_ep = strdup(globals.api_key); + } + + if(switch_directory_exists(globals.cache_path, NULL) != SWITCH_STATUS_SUCCESS) { + switch_dir_make(globals.cache_path, SWITCH_FPROT_OS_DEFAULT, NULL); + } + + *module_interface = switch_loadable_module_create_module_interface(pool, modname); + speech_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_SPEECH_INTERFACE); + speech_interface->interface_name = "google"; + + speech_interface->speech_open = speech_open; + speech_interface->speech_close = speech_close; + speech_interface->speech_feed_tts = speech_feed_tts; + speech_interface->speech_read_tts = speech_read_tts; + speech_interface->speech_flush_tts = speech_flush_tts; + + speech_interface->speech_text_param_tts = speech_text_param_tts; + speech_interface->speech_numeric_param_tts = speech_numeric_param_tts; + speech_interface->speech_float_param_tts = speech_float_param_tts; + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "GoogleTTS (%s)\n", MOD_VERSION); +out: + if(xml) { + switch_xml_free(xml); + } + return status; +} + +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_google_tts_shutdown) { + + switch_safe_free(globals.api_url_ep); + + return SWITCH_STATUS_SUCCESS; +} diff --git a/src/mod/asr_tts/mod_google_tts/mod_google_tts.h b/src/mod/asr_tts/mod_google_tts/mod_google_tts.h new file mode 100644 index 00000000000..18090f9feed --- /dev/null +++ b/src/mod/asr_tts/mod_google_tts/mod_google_tts.h @@ -0,0 +1,58 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#ifndef MOD_GOOGLE_TTS_H +#define MOD_GOOGLE_TTS_H + +#include +#include + +#define MOD_VERSION "1.0_gcp_api_v1" +#define MOD_CONFIG_NAME "google_tts.conf" +#define FILE_SIZE_MAX (2*1024*1024) +#define BASE64_DEC_SZ(n) ((n*3)/4) +//#define GTTS_DEBUG + +typedef struct { + switch_memory_pool_t *pool; + switch_file_handle_t *fhnd; + switch_buffer_t *curl_recv_buffer; + char *curl_send_buffer_ref; + char *lang_code; + char *gender; + char *voice_name; + char *dst_file; + uint32_t samplerate; + uint32_t channels; + size_t curl_send_buffer_len; +} tts_ctx_t; + + +/* utils.c */ +char *lang2bcp47(const char *lng); +char *fmt_enct2fext(const char *fmt); +char *fmt_gender(const char *gender); +char *fmt_encode(const char *fmt); + +char *strnstr(const char *s, const char *find, size_t slen); +char *escape_squotes(const char *string); + +#endif diff --git a/src/mod/asr_tts/mod_google_tts/utils.c b/src/mod/asr_tts/mod_google_tts/utils.c new file mode 100644 index 00000000000..b83043199c9 --- /dev/null +++ b/src/mod/asr_tts/mod_google_tts/utils.c @@ -0,0 +1,112 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#include "mod_google_tts.h" + +char *lang2bcp47(const char *lng) { + if(strcasecmp(lng, "en") == 0) { return "en-gb"; } + if(strcasecmp(lng, "de") == 0) { return "de-de"; } + if(strcasecmp(lng, "es") == 0) { return "es-es"; } + if(strcasecmp(lng, "it") == 0) { return "it-it"; } + if(strcasecmp(lng, "ru") == 0) { return "ru-ru"; } + return (char *)lng; +} + +char *fmt_gender(const char *gender) { + if(strcasecmp(gender, "male") == 0) { return "MALE"; } + if(strcasecmp(gender, "female") == 0) { return "FEMALE"; } + return (char *)gender; +} + +char *fmt_encode(const char *fmt) { + if(strcasecmp(fmt, "mp3") == 0) { return "MP3"; } + if(strcasecmp(fmt, "wav") == 0) { return "LINEAR16"; } + if(strcasecmp(fmt, "ulaw") == 0) { return "MULAW"; } + if(strcasecmp(fmt, "alaw") == 0) { return "ALAW"; } + return (char *)fmt; +} + +char *fmt_enct2fext(const char *fmt) { + if(strcasecmp(fmt, "mp3") == 0) { return "mp3"; } + if(strcasecmp(fmt, "linear16") == 0) { return "wav"; } + if(strcasecmp(fmt, "mulaw") == 0) { return "ulaw"; } + if(strcasecmp(fmt, "alaw") == 0) { return "alaw"; } + return (char *)fmt; +} + +char *escape_squotes(const char *string) { + size_t string_len = strlen(string); + size_t i; + size_t n = 0; + size_t dest_len = 0; + char *dest; + + dest_len = strlen(string) + 1; + for (i = 0; i < string_len; i++) { + switch (string[i]) { + case '\'': dest_len += 1; break; + } + } + + dest = (char *) malloc(sizeof(char) * dest_len); + switch_assert(dest); + + for (i = 0; i < string_len; i++) { + switch (string[i]) { + case '\'': + dest[n++] = '\\'; + dest[n++] = '\''; + break; + default: + dest[n++] = string[i]; + } + } + dest[n++] = '\0'; + + switch_assert(n == dest_len); + return dest; +} + +/*- + * Copyright (c) 2001 Mike Barcroft + * Copyright (c) 1990, 1993 + * The Regents of the University of California. All rights reserved. + */ +char *strnstr(const char *s, const char *find, size_t slen) { + char c, sc; + size_t len; + + if ((c = *find++) != '\0') { + len = strlen(find); + do { + do { + if (slen-- < 1 || (sc = *s++) == '\0') + return (NULL); + } while (sc != c); + if (len > slen) + return (NULL); + } while (strncmp(s, find, len) != 0); + s--; + } + + return ((char *)s); +} + diff --git a/src/mod/asr_tts/mod_openai_tts/Makefile.am b/src/mod/asr_tts/mod_openai_tts/Makefile.am new file mode 100644 index 00000000000..c15cee17586 --- /dev/null +++ b/src/mod/asr_tts/mod_openai_tts/Makefile.am @@ -0,0 +1,11 @@ + +include $(top_srcdir)/build/modmake.rulesam +MODNAME=mod_openai_tts + +mod_LTLIBRARIES = mod_openai_tts.la +mod_openai_tts_la_SOURCES = mod_openai_tts.c utils.c +mod_openai_tts_la_CFLAGS = $(AM_CFLAGS) -I. +mod_openai_tts_la_LIBADD = $(switch_builddir)/libfreeswitch.la +mod_openai_tts_la_LDFLAGS = -avoid-version -module -no-undefined -shared + +$(am_mod_openai_tts_la_OBJECTS): mod_openai_tts.h diff --git a/src/mod/asr_tts/mod_openai_tts/conf/autoload_configs/openai_tts.conf.xml b/src/mod/asr_tts/mod_openai_tts/conf/autoload_configs/openai_tts.conf.xml new file mode 100644 index 00000000000..5ebf5c5d588 --- /dev/null +++ b/src/mod/asr_tts/mod_openai_tts/conf/autoload_configs/openai_tts.conf.xml @@ -0,0 +1,30 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/mod/asr_tts/mod_openai_tts/conf/dialplan/example.xml b/src/mod/asr_tts/mod_openai_tts/conf/dialplan/example.xml new file mode 100644 index 00000000000..3829423d2cd --- /dev/null +++ b/src/mod/asr_tts/mod_openai_tts/conf/dialplan/example.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.c b/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.c new file mode 100644 index 00000000000..acedadfca16 --- /dev/null +++ b/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.c @@ -0,0 +1,494 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + * Provides the ability to use OpenAI TTS service in the Freeswitch + * https://platform.openai.com/docs/guides/text-to-speech + * + * Development repository: + * https://github.com/akscf/mod_openai_tts + * + */ +#include "mod_openai_tts.h" + +static struct { + switch_mutex_t *mutex; + switch_hash_t *models; + char *cache_path; + char *tmp_path; + char *opt_encoding; + char *user_agent; + char *api_url; + char *api_key; + char *proxy; + char *proxy_credentials; + uint32_t file_size_max; + uint32_t request_timeout; // seconds + uint32_t connect_timeout; // seconds + uint8_t fl_voice_name_as_language; + uint8_t fl_log_http_error; + uint8_t fl_cache_enabled; +} globals; + +SWITCH_MODULE_LOAD_FUNCTION(mod_openai_tts_load); +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_openai_tts_shutdown); +SWITCH_MODULE_DEFINITION(mod_openai_tts, mod_openai_tts_load, mod_openai_tts_shutdown, NULL); + +static tts_model_info_t *tts_model_lookup(const char *lang) { + tts_model_info_t *model = NULL; + + if(!lang) { return NULL; } + + switch_mutex_lock(globals.mutex); + model = switch_core_hash_find(globals.models, lang); + switch_mutex_unlock(globals.mutex); + + return model; +} + +static size_t curl_io_write_callback(char *buffer, size_t size, size_t nitems, void *user_data) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)user_data; + size_t len = (size * nitems); + + if(len > 0 && tts_ctx->curl_recv_buffer) { + switch_buffer_write(tts_ctx->curl_recv_buffer, buffer, len); + } + + return len; +} + +static size_t curl_io_read_callback(char *buffer, size_t size, size_t nitems, void *user_data) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)user_data; + size_t nmax = (size * nitems); + size_t ncur = (tts_ctx->curl_send_buffer_len > nmax) ? nmax : tts_ctx->curl_send_buffer_len; + + memmove(buffer, tts_ctx->curl_send_buffer_ref, ncur); + tts_ctx->curl_send_buffer_ref += ncur; + tts_ctx->curl_send_buffer_len -= ncur; + + return ncur; +} + +static switch_status_t curl_perform(tts_ctx_t *tts_ctx, char *text) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + CURL *curl_handle = NULL; + switch_curl_slist_t *headers = NULL; + switch_CURLcode curl_ret = 0; + long http_resp = 0; + const char *voice_local = (tts_ctx->alt_voice ? tts_ctx->alt_voice : tts_ctx->model_info->voice); + const char *model_local = (tts_ctx->alt_model ? tts_ctx->alt_model : tts_ctx->model_info->model); + char *pdata = NULL; + char *qtext = NULL; + + if(text) { + qtext = escape_dquotes(text); + } + pdata = switch_mprintf("{\"model\":\"%s\",\"voice\":\"%s\",\"input\":\"%s\"}\n", + model_local, + voice_local, + qtext ? qtext : "" + ); + +#ifdef OAITTS_DEBUG + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "CURL: URL=[%s], PDATA=[%s]\n", globals.api_url, pdata); +#endif + + tts_ctx->curl_send_buffer_len = strlen(pdata); + tts_ctx->curl_send_buffer_ref = pdata; + + curl_handle = switch_curl_easy_init(); + + headers = switch_curl_slist_append(headers, "Content-Type: application/json; charset=utf-8"); + headers = switch_curl_slist_append(headers, "Expect:"); + + switch_curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, headers); + switch_curl_easy_setopt(curl_handle, CURLOPT_POST, 1); + switch_curl_easy_setopt(curl_handle, CURLOPT_NOSIGNAL, 1); + + switch_curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDSIZE, tts_ctx->curl_send_buffer_len); + switch_curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDS, (void *) pdata); + switch_curl_easy_setopt(curl_handle, CURLOPT_READFUNCTION, curl_io_read_callback); + switch_curl_easy_setopt(curl_handle, CURLOPT_READDATA, (void *) tts_ctx); + + switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, curl_io_write_callback); + switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *) tts_ctx); + + if(globals.connect_timeout > 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_CONNECTTIMEOUT, globals.connect_timeout); + } + if(globals.request_timeout > 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, globals.request_timeout); + } + if(globals.user_agent) { + switch_curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, globals.user_agent); + } + + if(strncasecmp(globals.api_url, "https", 5) == 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYPEER, 0); + switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYHOST, 0); + } + if(globals.proxy) { + if(globals.proxy_credentials != NULL) { + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_ANY); + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYUSERPWD, globals.proxy_credentials); + } + if(strncasecmp(globals.proxy, "https", 5) == 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY_SSL_VERIFYPEER, 0); + } + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY, globals.proxy); + } + + curl_easy_setopt(curl_handle, CURLOPT_XOAUTH2_BEARER, globals.api_key); + curl_easy_setopt(curl_handle, CURLOPT_HTTPAUTH, CURLAUTH_BEARER); + + switch_curl_easy_setopt(curl_handle, CURLOPT_URL, globals.api_url); + + curl_ret = switch_curl_easy_perform(curl_handle); + if(!curl_ret) { + switch_curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &http_resp); + if(!http_resp) { switch_curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CONNECTCODE, &http_resp); } + } else { + http_resp = curl_ret; + } + + if(http_resp != 200) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "http-error=[%ld] (%s)\n", http_resp, globals.api_url); + status = SWITCH_STATUS_FALSE; + } + + if(tts_ctx->curl_recv_buffer) { + if(switch_buffer_inuse(tts_ctx->curl_recv_buffer) > 0) { + switch_buffer_write(tts_ctx->curl_recv_buffer, "\0", 1); + } + } + + if(curl_handle) { switch_curl_easy_cleanup(curl_handle); } + if(headers) { switch_curl_slist_free_all(headers); } + + switch_safe_free(pdata); + switch_safe_free(qtext); + return status; +} + + +// --------------------------------------------------------------------------------------------------------------------------------------------- +// speech api +// --------------------------------------------------------------------------------------------------------------------------------------------- +static switch_status_t speech_open(switch_speech_handle_t *sh, const char *voice, int samplerate, int channels, switch_speech_flag_t *flags) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + char name_uuid[SWITCH_UUID_FORMATTED_LENGTH + 1] = { 0 }; + tts_ctx_t *tts_ctx = NULL; + + tts_ctx = switch_core_alloc(sh->memory_pool, sizeof(tts_ctx_t)); + tts_ctx->pool = sh->memory_pool; + tts_ctx->fhnd = switch_core_alloc(tts_ctx->pool, sizeof(switch_file_handle_t)); + tts_ctx->language = (globals.fl_voice_name_as_language && voice) ? switch_core_strdup(sh->memory_pool, voice) : NULL; + tts_ctx->channels = channels; + tts_ctx->samplerate = samplerate; + tts_ctx->dst_file = NULL; + + sh->private_info = tts_ctx; + + if(tts_ctx->language) { + tts_ctx->model_info = tts_model_lookup(tts_ctx->language); + } + + if((status = switch_buffer_create_dynamic(&tts_ctx->curl_recv_buffer, 1024, 8192, globals.file_size_max)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_buffer_create_dynamic() fail\n"); + goto out; + } + + if(!globals.fl_cache_enabled) { + switch_uuid_str((char *)name_uuid, sizeof(name_uuid)); + tts_ctx->dst_file = switch_core_sprintf(sh->memory_pool, "%s%sopenai-%s.%s", + globals.tmp_path, + SWITCH_PATH_SEPARATOR, + name_uuid, + enc2ext(globals.opt_encoding) + ); + } + +out: + return status; +} + +static switch_status_t speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; + assert(tts_ctx != NULL); + + if(switch_test_flag(tts_ctx->fhnd, SWITCH_FILE_OPEN)) { + switch_core_file_close(tts_ctx->fhnd); + } + + if(tts_ctx->curl_recv_buffer) { + switch_buffer_destroy(&tts_ctx->curl_recv_buffer); + } + + if(tts_ctx->dst_file && !globals.fl_cache_enabled) { + unlink(tts_ctx->dst_file); + } + + return SWITCH_STATUS_SUCCESS; +} + +static switch_status_t speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + switch_status_t status = SWITCH_STATUS_SUCCESS; + char digest[SWITCH_MD5_DIGEST_STRING_SIZE + 1] = { 0 }; + const void *ptr = NULL; + uint32_t recv_len = 0; + + assert(tts_ctx != NULL); + + if(!tts_ctx->dst_file) { + switch_md5_string(digest, (void *)text, strlen(text)); + tts_ctx->dst_file = switch_core_sprintf(sh->memory_pool, "%s%s%s.%s", + globals.cache_path, + SWITCH_PATH_SEPARATOR, + digest, + enc2ext(globals.opt_encoding) + ); + } + + if(switch_file_exists(tts_ctx->dst_file, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { + if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_file, tts_ctx->channels, tts_ctx->samplerate, + (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_file); + status = SWITCH_STATUS_FALSE; + goto out; + } + } else { + if(tts_ctx->alt_voice == NULL && tts_ctx->model_info == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "voice not determined\n"); + status = SWITCH_STATUS_FALSE; goto out; + } + if(tts_ctx->alt_model == NULL && tts_ctx->model_info == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "model not determined\n"); + status = SWITCH_STATUS_FALSE; goto out; + } + + switch_buffer_zero(tts_ctx->curl_recv_buffer); + status = curl_perform(tts_ctx , text); + recv_len = switch_buffer_peek_zerocopy(tts_ctx->curl_recv_buffer, &ptr); + + if(status == SWITCH_STATUS_SUCCESS) { + if((status = write_file(tts_ctx->dst_file, (switch_byte_t *)ptr, recv_len)) == SWITCH_STATUS_SUCCESS) { + if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_file, tts_ctx->channels, tts_ctx->samplerate, + (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_file); + goto out; + } + } + } else { + if(globals.fl_log_http_error && recv_len > 0) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Services response: %s\n", (char *)ptr); + } + } + } +out: + return status; +} + +static switch_status_t speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *data_len, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + size_t len = (*data_len / sizeof(int16_t)); + + assert(tts_ctx != NULL); + + if(tts_ctx->fhnd->file_interface == NULL) { + return SWITCH_STATUS_FALSE; + } + + if(switch_core_file_read(tts_ctx->fhnd, data, &len) != SWITCH_STATUS_SUCCESS) { + switch_core_file_close(tts_ctx->fhnd); + return SWITCH_STATUS_FALSE; + } + + *data_len = (len * sizeof(int16_t)); + if(!data_len) { + switch_core_file_close(tts_ctx->fhnd); + return SWITCH_STATUS_BREAK; + } + + return SWITCH_STATUS_SUCCESS; +} + +static void speech_flush_tts(switch_speech_handle_t *sh) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + + assert(tts_ctx != NULL); + + if(tts_ctx->fhnd != NULL && tts_ctx->fhnd->file_interface != NULL) { + switch_core_file_close(tts_ctx->fhnd); + } +} + +static void speech_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + + assert(tts_ctx != NULL); + + if(strcasecmp(param, "voice") == 0) { + if(val) { tts_ctx->alt_voice = switch_core_strdup(sh->memory_pool, val); } + } else if(strcasecmp(param, "model") == 0) { + if(val) { tts_ctx->alt_model = switch_core_strdup(sh->memory_pool, val); } + } +} + +static void speech_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val) { +} + +static void speech_float_param_tts(switch_speech_handle_t *sh, char *param, double val) { +} + +// --------------------------------------------------------------------------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------------------------------------------------------------------------- +SWITCH_MODULE_LOAD_FUNCTION(mod_openai_tts_load) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + switch_xml_t cfg, xml, settings, param, xmodels, xmodel; + switch_speech_interface_t *speech_interface; + + memset(&globals, 0, sizeof(globals)); + switch_mutex_init(&globals.mutex, SWITCH_MUTEX_NESTED, pool); + switch_core_hash_init(&globals.models); + + if((xml = switch_xml_open_cfg(MOD_CONFIG_NAME, &cfg, NULL)) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open configuration: %s\n", MOD_CONFIG_NAME); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + if((settings = switch_xml_child(cfg, "settings"))) { + for (param = switch_xml_child(settings, "param"); param; param = param->next) { + char *var = (char *) switch_xml_attr_soft(param, "name"); + char *val = (char *) switch_xml_attr_soft(param, "value"); + + if(!strcasecmp(var, "api-url")) { + if(val) globals.api_url = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "api-key")) { + if(val) globals.api_key = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "cache-path")) { + if(val) globals.cache_path = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "encoding")) { + if(val) globals.opt_encoding = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "user-agent")) { + if(val) globals.user_agent = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "request-timeout")) { + if(val) globals.request_timeout = atoi(val); + } else if(!strcasecmp(var, "connect-timeout")) { + if(val) globals.connect_timeout = atoi(val); + } else if(!strcasecmp(var, "voice-name-as-language")) { + if(val) globals.fl_voice_name_as_language = switch_true(val); + } else if(!strcasecmp(var, "log-http-errors")) { + if(val) globals.fl_log_http_error = switch_true(val); + } else if(!strcasecmp(var, "cache-enable")) { + if(val) globals.fl_cache_enabled = switch_true(val); + } else if(!strcasecmp(var, "file-size-max")) { + if(val) globals.file_size_max = atoi(val); + } else if(!strcasecmp(var, "proxy")) { + if(val) globals.proxy = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "proxy-credentials")) { + if(val) globals.proxy_credentials = switch_core_strdup(pool, val); + } + } + } + + if((xmodels = switch_xml_child(cfg, "models"))) { + for(xmodel = switch_xml_child(xmodels, "model"); xmodel; xmodel = xmodel->next) { + char *lang = (char *) switch_xml_attr_soft(xmodel, "language"); + char *voice = (char *) switch_xml_attr_soft(xmodel, "voice"); + char *model = (char *) switch_xml_attr_soft(xmodel, "model"); + + tts_model_info_t *model_info = NULL; + + if(!lang || !voice || !model) { continue; } + + if(switch_core_hash_find(globals.models, lang)) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Language '%s' already registered\n", lang); + continue; + } + + if((model_info = switch_core_alloc(pool, sizeof(tts_model_info_t))) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_alloc()\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + model_info->lang = switch_core_strdup(pool, lang); + model_info->voice = switch_core_strdup(pool, voice); + model_info->model = switch_core_strdup(pool, model); + + switch_core_hash_insert(globals.models, model_info->lang, model_info); + } + } + + + if(!globals.api_url) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing required parameter: api-url\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + if(!globals.api_key) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing required parameter: api-key\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + globals.tmp_path = SWITCH_GLOBAL_dirs.temp_dir; + globals.cache_path = (globals.cache_path == NULL ? "/tmp/openai-tts-cache" : globals.cache_path); + globals.opt_encoding = (globals.opt_encoding == NULL ? "mp3" : globals.opt_encoding); + globals.file_size_max = globals.file_size_max > 0 ? globals.file_size_max : FILE_SIZE_MAX; + + if(switch_directory_exists(globals.cache_path, NULL) != SWITCH_STATUS_SUCCESS) { + switch_dir_make(globals.cache_path, SWITCH_FPROT_OS_DEFAULT, NULL); + } + + *module_interface = switch_loadable_module_create_module_interface(pool, modname); + speech_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_SPEECH_INTERFACE); + speech_interface->interface_name = "openai"; + + speech_interface->speech_open = speech_open; + speech_interface->speech_close = speech_close; + speech_interface->speech_feed_tts = speech_feed_tts; + speech_interface->speech_read_tts = speech_read_tts; + speech_interface->speech_flush_tts = speech_flush_tts; + + speech_interface->speech_text_param_tts = speech_text_param_tts; + speech_interface->speech_numeric_param_tts = speech_numeric_param_tts; + speech_interface->speech_float_param_tts = speech_float_param_tts; + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "OpenAI-TTS (%s)\n", MOD_VERSION); +out: + if(xml) { + switch_xml_free(xml); + } + if(status != SWITCH_STATUS_SUCCESS) { + if(globals.models) { + switch_core_hash_destroy(&globals.models); + } + } + return status; +} + +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_openai_tts_shutdown) { + + if(globals.models) { + switch_core_hash_destroy(&globals.models); + } + + return SWITCH_STATUS_SUCCESS; +} diff --git a/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.h b/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.h new file mode 100644 index 00000000000..1f9d1c53beb --- /dev/null +++ b/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.h @@ -0,0 +1,59 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#ifndef MOD_OPENAI_TTS_H +#define MOD_OPENAI_TTS_H + +#include +#include + +#define MOD_VERSION "1.0_apiv1" +#define MOD_CONFIG_NAME "openai_tts.conf" +#define FILE_SIZE_MAX (2*1024*1024) +// #define OAITTS_DEBUG + +typedef struct { + char *lang; + char *voice; + char *model; +} tts_model_info_t; + +typedef struct { + switch_memory_pool_t *pool; + switch_file_handle_t *fhnd; + switch_buffer_t *curl_recv_buffer; + tts_model_info_t *model_info; + char *curl_send_buffer_ref; + char *language; + char *alt_voice; + char *alt_model; + char *dst_file; + uint32_t samplerate; + uint32_t channels; + size_t curl_send_buffer_len; +} tts_ctx_t; + +char *enc2ext(const char *fmt); +char *escape_dquotes(const char *string); + +switch_status_t write_file(char *file_name, switch_byte_t *buf, uint32_t buf_len); + +#endif diff --git a/src/mod/asr_tts/mod_openai_tts/utils.c b/src/mod/asr_tts/mod_openai_tts/utils.c new file mode 100644 index 00000000000..df16d332c92 --- /dev/null +++ b/src/mod/asr_tts/mod_openai_tts/utils.c @@ -0,0 +1,85 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#include "mod_openai_tts.h" + +char *enc2ext(const char *fmt) { + if(strcasecmp(fmt, "mp3") == 0) { return "mp3"; } + return (char *)fmt; +} + +char *escape_dquotes(const char *string) { + size_t string_len = strlen(string); + size_t i; + size_t n = 0; + size_t dest_len = 0; + char *dest; + + dest_len = strlen(string) + 1; + for (i = 0; i < string_len; i++) { + switch (string[i]) { + case '\"': dest_len += 1; break; + } + } + + dest = (char *) malloc(sizeof(char) * dest_len); + switch_assert(dest); + + for (i = 0; i < string_len; i++) { + switch (string[i]) { + case '\"': + dest[n++] = '\\'; + dest[n++] = '\"'; + break; + default: + dest[n++] = string[i]; + } + } + dest[n++] = '\0'; + + switch_assert(n == dest_len); + return dest; +} + +switch_status_t write_file(char *file_name, switch_byte_t *buf, uint32_t buf_len) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + switch_memory_pool_t *pool = NULL; + switch_size_t len = buf_len; + switch_file_t *fd = NULL; + + if(switch_core_new_memory_pool(&pool) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_new_memory_pool() fail\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + if((status = switch_file_open(&fd, file_name, (SWITCH_FOPEN_WRITE | SWITCH_FOPEN_TRUNCATE | SWITCH_FOPEN_CREATE), SWITCH_FPROT_OS_DEFAULT, pool)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Open fail: %s\n", file_name); + goto out; + } + if((status = switch_file_write(fd, buf, &len)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Write fail (%s)\n", file_name); + } + switch_file_close(fd); +out: + if(pool) { + switch_core_destroy_memory_pool(&pool); + } + return status; +} diff --git a/src/mod/asr_tts/mod_piper_tts/Makefile.am b/src/mod/asr_tts/mod_piper_tts/Makefile.am new file mode 100644 index 00000000000..013d7df268c --- /dev/null +++ b/src/mod/asr_tts/mod_piper_tts/Makefile.am @@ -0,0 +1,11 @@ + +include $(top_srcdir)/build/modmake.rulesam +MODNAME=mod_piper_tts + +mod_LTLIBRARIES = mod_piper_tts.la +mod_piper_tts_la_SOURCES = mod_piper_tts.c +mod_piper_tts_la_CFLAGS = $(AM_CFLAGS) -I. +mod_piper_tts_la_LIBADD = $(switch_builddir)/libfreeswitch.la +mod_piper_tts_la_LDFLAGS = -avoid-version -module -no-undefined -shared + +$(am_mod_piper_tts_la_OBJECTS): mod_piper_tts.h diff --git a/src/mod/asr_tts/mod_piper_tts/conf/autoload_configs/piper_tts.conf.xml b/src/mod/asr_tts/mod_piper_tts/conf/autoload_configs/piper_tts.conf.xml new file mode 100644 index 00000000000..76d52c24f89 --- /dev/null +++ b/src/mod/asr_tts/mod_piper_tts/conf/autoload_configs/piper_tts.conf.xml @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + diff --git a/src/mod/asr_tts/mod_piper_tts/conf/dialplan/example.xml b/src/mod/asr_tts/mod_piper_tts/conf/dialplan/example.xml new file mode 100644 index 00000000000..e4e09e41f63 --- /dev/null +++ b/src/mod/asr_tts/mod_piper_tts/conf/dialplan/example.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.c b/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.c new file mode 100644 index 00000000000..0fd10560845 --- /dev/null +++ b/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.c @@ -0,0 +1,339 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + * Provides the ability to use PIPER TTS in the Freeswitch + * https://github.com/rhasspy/piper + * + * + * Development repository: + * https://github.com/akscf/mod_piper_tts + * + */ +#include "mod_piper_tts.h" + +static piper_globals_t globals; + +SWITCH_MODULE_LOAD_FUNCTION(mod_piper_tts_load); +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_piper_tts_shutdown); +SWITCH_MODULE_DEFINITION(mod_piper_tts, mod_piper_tts_load, mod_piper_tts_shutdown, NULL); + + +static piper_model_info_t *piper_lookup_model(const char *lang) { + piper_model_info_t *model = NULL; + + if(!lang) { + return NULL; + } + + switch_mutex_lock(globals.mutex); + model = switch_core_hash_find(globals.models, lang); + switch_mutex_unlock(globals.mutex); + + return model; +} + +static switch_status_t speech_open(switch_speech_handle_t *sh, const char *voice, int samplerate, int channels, switch_speech_flag_t *flags) { + char name_uuid[SWITCH_UUID_FORMATTED_LENGTH + 1] = { 0 }; + switch_status_t status = SWITCH_STATUS_SUCCESS; + tts_ctx_t *tts_ctx = NULL; + + tts_ctx = switch_core_alloc(sh->memory_pool, sizeof(tts_ctx_t)); + tts_ctx->pool = sh->memory_pool; + tts_ctx->fhnd = switch_core_alloc(tts_ctx->pool, sizeof(switch_file_handle_t)); + tts_ctx->voice = switch_core_strdup(tts_ctx->pool, voice); + tts_ctx->language = (globals.fl_voice_as_language && voice ? switch_core_strdup(sh->memory_pool, voice) : "en"); + tts_ctx->channels = channels; + tts_ctx->samplerate = samplerate; + + sh->private_info = tts_ctx; + + if(tts_ctx->language) { + tts_ctx->model_info = piper_lookup_model(tts_ctx->language); + if(!tts_ctx->model_info) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Language '%s' not registered!\n", tts_ctx->language); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + } + + if(!globals.fl_cache_enabled) { + switch_uuid_str((char *)name_uuid, sizeof(name_uuid)); + tts_ctx->dst_fname = switch_core_sprintf(sh->memory_pool, "%s%spiper-%s.%s", + globals.tmp_path, + SWITCH_PATH_SEPARATOR, + name_uuid, + PIPER_FILE_ENCODING + ); + } + +out: + return status; +} + +static switch_status_t speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; + assert(tts_ctx != NULL); + + if(switch_test_flag(tts_ctx->fhnd, SWITCH_FILE_OPEN)) { + switch_core_file_close(tts_ctx->fhnd); + } + + if(tts_ctx->dst_fname && !globals.fl_cache_enabled) { + unlink(tts_ctx->dst_fname); + } + + return SWITCH_STATUS_SUCCESS; +} + +static switch_status_t speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + char digest[SWITCH_MD5_DIGEST_STRING_SIZE + 1] = { 0 }; + switch_status_t status = SWITCH_STATUS_SUCCESS; + + assert(tts_ctx != NULL); + + if(!tts_ctx->dst_fname) { + switch_md5_string(digest, (void *)text, strlen(text)); + tts_ctx->dst_fname = switch_core_sprintf(sh->memory_pool, "%s%s%s.%s", + globals.cache_path, + SWITCH_PATH_SEPARATOR, + digest, + PIPER_FILE_ENCODING + ); + } + + if(switch_file_exists(tts_ctx->dst_fname, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { + if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_fname, tts_ctx->channels, tts_ctx->samplerate, + (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_fname); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + } else { + char *cmd = NULL; + char *textq = NULL; + + if(!tts_ctx->model_info) { + if(tts_ctx->language) { + tts_ctx->model_info = piper_lookup_model(tts_ctx->language); + } + if(!tts_ctx->model_info) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to lookup the model for lang: %s\n", tts_ctx->language); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + } + + textq = switch_util_quote_shell_arg(text); + cmd = switch_mprintf("echo %s | %s %s --model '%s' --output_file '%s'", + textq, globals.piper_bin, + globals.piper_opts ? globals.piper_opts : "", + tts_ctx->model_info->model, + tts_ctx->dst_fname + ); + +#ifdef PIPER_DEBUG + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "PIPER-CMD: [%s]\n", cmd); +#endif + + if(switch_system(cmd, SWITCH_TRUE)) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to perform cmd: %s\n", cmd); + status = SWITCH_STATUS_FALSE; + } + + switch_safe_free(textq); + switch_safe_free(cmd); + + if(status == SWITCH_STATUS_SUCCESS) { + if(switch_file_exists(tts_ctx->dst_fname, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { + if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_fname, tts_ctx->channels, tts_ctx->samplerate, + (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_fname); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + } else { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "File not found: %s\n", tts_ctx->dst_fname); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + } + } + +out: + return status; +} + +static switch_status_t speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *data_len, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; + size_t len = (*data_len / sizeof(int16_t)); + + assert(tts_ctx != NULL); + + if(tts_ctx->fhnd->file_interface == NULL) { + return SWITCH_STATUS_FALSE; + } + + if(switch_core_file_read(tts_ctx->fhnd, data, &len) != SWITCH_STATUS_SUCCESS) { + switch_core_file_close(tts_ctx->fhnd); + return SWITCH_STATUS_FALSE; + } + + *data_len = (len * sizeof(int16_t)); + if(!data_len) { + switch_core_file_close(tts_ctx->fhnd); + return SWITCH_STATUS_BREAK; + } + + return SWITCH_STATUS_SUCCESS; +} + +static void speech_flush_tts(switch_speech_handle_t *sh) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + + assert(tts_ctx != NULL); + + if(tts_ctx->fhnd != NULL && tts_ctx->fhnd->file_interface != NULL) { + switch_core_file_close(tts_ctx->fhnd); + } +} + +static void speech_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val) { + tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; + + assert(tts_ctx != NULL); + + if(strcasecmp(param, "lang") == 0) { + if(val) { tts_ctx->language = switch_core_strdup(sh->memory_pool, val); } + } else if(strcasecmp(param, "voice") == 0) { + if(val) { tts_ctx->voice = switch_core_strdup(sh->memory_pool, val); } + } +} + +static void speech_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val) { +} + +static void speech_float_param_tts(switch_speech_handle_t *sh, char *param, double val) { +} + +// --------------------------------------------------------------------------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------------------------------------------------------------------------- +SWITCH_MODULE_LOAD_FUNCTION(mod_piper_tts_load) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + switch_xml_t cfg, xml, settings, param, xmodels, xmodel; + switch_speech_interface_t *speech_interface; + + memset(&globals, 0, sizeof(globals)); + switch_mutex_init(&globals.mutex, SWITCH_MUTEX_NESTED, pool); + switch_core_hash_init(&globals.models); + + if((xml = switch_xml_open_cfg(MOD_CONFIG_NAME, &cfg, NULL)) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open configuration: %s\n", MOD_CONFIG_NAME); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + if((settings = switch_xml_child(cfg, "settings"))) { + for(param = switch_xml_child(settings, "param"); param; param = param->next) { + char *var = (char *) switch_xml_attr_soft(param, "name"); + char *val = (char *) switch_xml_attr_soft(param, "value"); + + if(!strcasecmp(var, "cache-path")) { + if(val) globals.cache_path = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "piper-bin")) { + if(val) globals.piper_bin = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "piper-opts")) { + if(val) globals.piper_opts = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "voice-name-as-language")) { + if(val) globals.fl_voice_as_language = switch_true(val); + } else if(!strcasecmp(var, "cache-enable")) { + if(val) globals.fl_cache_enabled = switch_true(val); + } + } + } + + if((xmodels = switch_xml_child(cfg, "models"))) { + for(xmodel = switch_xml_child(xmodels, "model"); xmodel; xmodel = xmodel->next) { + char *lang = (char *) switch_xml_attr_soft(xmodel, "language"); + char *model = (char *) switch_xml_attr_soft(xmodel, "model"); + piper_model_info_t *model_info = NULL; + + if(!lang || !model) { continue; } + + if(switch_core_hash_find(globals.models, lang)) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Language '%s' already registered\n", lang); + continue; + } + + if((model_info = switch_core_alloc(pool, sizeof(piper_model_info_t))) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_alloc()\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + model_info->lang = switch_core_strdup(pool, lang); + model_info->model = switch_core_strdup(pool, model); + + switch_core_hash_insert(globals.models, model_info->lang, model_info); + } + } + + if(!globals.piper_bin) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "piper-bin - not determined!\n"); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + + globals.tmp_path = SWITCH_GLOBAL_dirs.temp_dir; + globals.cache_path = (globals.cache_path == NULL ? "/tmp/piper-tts-cache" : globals.cache_path); + + if(switch_directory_exists(globals.cache_path, NULL) != SWITCH_STATUS_SUCCESS) { + switch_dir_make(globals.cache_path, SWITCH_FPROT_OS_DEFAULT, NULL); + } + + *module_interface = switch_loadable_module_create_module_interface(pool, modname); + speech_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_SPEECH_INTERFACE); + speech_interface->interface_name = "piper"; + + speech_interface->speech_open = speech_open; + speech_interface->speech_close = speech_close; + speech_interface->speech_feed_tts = speech_feed_tts; + speech_interface->speech_read_tts = speech_read_tts; + speech_interface->speech_flush_tts = speech_flush_tts; + + speech_interface->speech_text_param_tts = speech_text_param_tts; + speech_interface->speech_float_param_tts = speech_float_param_tts; + speech_interface->speech_numeric_param_tts = speech_numeric_param_tts; + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "PiperTTS (%s)\n", MOD_VERSION); +out: + if(xml) { + switch_xml_free(xml); + } + if(status != SWITCH_STATUS_SUCCESS) { + if(globals.models) { + switch_core_hash_destroy(&globals.models); + } + } + return status; +} + +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_piper_tts_shutdown) { + + if(globals.models) { + switch_core_hash_destroy(&globals.models); + } + + return SWITCH_STATUS_SUCCESS; +} diff --git a/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.h b/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.h new file mode 100644 index 00000000000..ea6c1b9de1d --- /dev/null +++ b/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.h @@ -0,0 +1,60 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#ifndef MOD_PIPER_TTS_H +#define MOD_PIPER_TTS_H + +#include + +#define MOD_VERSION "1.0" +#define MOD_CONFIG_NAME "piper_tts.conf" +#define PIPER_FILE_ENCODING "wav" +// #define PIPER_DEBUG + +typedef struct { + switch_mutex_t *mutex; + switch_hash_t *models; + const char *tmp_path; + const char *cache_path; + const char *piper_bin; + const char *piper_opts; + uint8_t fl_cache_enabled; + uint8_t fl_voice_as_language; +} piper_globals_t; + +typedef struct { + char *lang; + char *model; +} piper_model_info_t; + +typedef struct { + piper_model_info_t *model_info; + switch_memory_pool_t *pool; + switch_file_handle_t *fhnd; + char *language; + char *voice; + char *dst_fname; + uint32_t samplerate; + uint32_t channels; +} tts_ctx_t; + + +#endif From 1340dc8ca456293ccdd9b9503a44cc6597e7c372 Mon Sep 17 00:00:00 2001 From: aks Date: Mon, 30 Dec 2024 19:42:17 +0400 Subject: [PATCH 4/6] added mod_curl_tts --- src/mod/asr_tts/mod_curl_tts/Makefile.am | 11 + .../conf/autoload_configs/curl_tts.conf.xml | 22 ++ .../mod_curl_tts/conf/dialplan/example.xml | 8 + src/mod/asr_tts/mod_curl_tts/curl.c | 185 +++++++++++ src/mod/asr_tts/mod_curl_tts/mod_curl_tts.c | 302 ++++++++++++++++++ src/mod/asr_tts/mod_curl_tts/mod_curl_tts.h | 72 +++++ src/mod/asr_tts/mod_curl_tts/utils.c | 80 +++++ 7 files changed, 680 insertions(+) create mode 100644 src/mod/asr_tts/mod_curl_tts/Makefile.am create mode 100644 src/mod/asr_tts/mod_curl_tts/conf/autoload_configs/curl_tts.conf.xml create mode 100644 src/mod/asr_tts/mod_curl_tts/conf/dialplan/example.xml create mode 100644 src/mod/asr_tts/mod_curl_tts/curl.c create mode 100644 src/mod/asr_tts/mod_curl_tts/mod_curl_tts.c create mode 100644 src/mod/asr_tts/mod_curl_tts/mod_curl_tts.h create mode 100644 src/mod/asr_tts/mod_curl_tts/utils.c diff --git a/src/mod/asr_tts/mod_curl_tts/Makefile.am b/src/mod/asr_tts/mod_curl_tts/Makefile.am new file mode 100644 index 00000000000..22b75838bfc --- /dev/null +++ b/src/mod/asr_tts/mod_curl_tts/Makefile.am @@ -0,0 +1,11 @@ + +include $(top_srcdir)/build/modmake.rulesam +MODNAME=mod_curl_tts + +mod_LTLIBRARIES = mod_curl_tts.la +mod_curl_tts_la_SOURCES = mod_curl_tts.c utils.c curl.c +mod_curl_tts_la_CFLAGS = $(AM_CFLAGS) -I. +mod_curl_tts_la_LIBADD = $(switch_builddir)/libfreeswitch.la +mod_curl_tts_la_LDFLAGS = -avoid-version -module -no-undefined -shared + +$(am_mod_curl_tts_la_OBJECTS): mod_curl_tts.h diff --git a/src/mod/asr_tts/mod_curl_tts/conf/autoload_configs/curl_tts.conf.xml b/src/mod/asr_tts/mod_curl_tts/conf/autoload_configs/curl_tts.conf.xml new file mode 100644 index 00000000000..b4520fa11cf --- /dev/null +++ b/src/mod/asr_tts/mod_curl_tts/conf/autoload_configs/curl_tts.conf.xml @@ -0,0 +1,22 @@ + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/mod/asr_tts/mod_curl_tts/conf/dialplan/example.xml b/src/mod/asr_tts/mod_curl_tts/conf/dialplan/example.xml new file mode 100644 index 00000000000..86f9e43ceb2 --- /dev/null +++ b/src/mod/asr_tts/mod_curl_tts/conf/dialplan/example.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/src/mod/asr_tts/mod_curl_tts/curl.c b/src/mod/asr_tts/mod_curl_tts/curl.c new file mode 100644 index 00000000000..b2fe58c9ea1 --- /dev/null +++ b/src/mod/asr_tts/mod_curl_tts/curl.c @@ -0,0 +1,185 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#include "mod_curl_tts.h" + +extern globals_t globals; + +static size_t curl_io_write_callback(char *buffer, size_t size, size_t nitems, void *user_data) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)user_data; + size_t len = (size * nitems); + + if(len > 0 && tts_ctx->curl_recv_buffer) { + switch_buffer_write(tts_ctx->curl_recv_buffer, buffer, len); + } + + return len; +} + +static size_t curl_io_read_callback(char *buffer, size_t size, size_t nitems, void *user_data) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)user_data; + size_t nmax = (size * nitems); + size_t ncur = (tts_ctx->curl_send_buffer_len > nmax) ? nmax : tts_ctx->curl_send_buffer_len; + + memmove(buffer, tts_ctx->curl_send_buffer_ref, ncur); + tts_ctx->curl_send_buffer_ref += ncur; + tts_ctx->curl_send_buffer_len -= ncur; + + return ncur; +} + + +switch_status_t curl_perform(tts_ctx_t *tts_ctx, char *text) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + CURL *curl_handle = NULL; + switch_curl_slist_t *headers = NULL; + switch_CURLcode curl_ret = 0; + long http_resp = 0; + char *pdata = NULL; + char *qtext = NULL; + + if(!tts_ctx->api_url) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "api_url not determined\n"); + return SWITCH_STATUS_FALSE; + } + + if(text) { + qtext = escape_dquotes(text); + } + + if(tts_ctx->curl_params && !switch_core_hash_empty(tts_ctx->curl_params)) { + const void *hkey = NULL; void *hval = NULL; + switch_hash_index_t *hidx = NULL; + cJSON *jopts = NULL; + + jopts = cJSON_CreateObject(); + for(hidx = switch_core_hash_first_iter(tts_ctx->curl_params, hidx); hidx; hidx = switch_core_hash_next(&hidx)) { + switch_core_hash_this(hidx, &hkey, NULL, &hval); + if(hkey && hval) { + cJSON_AddItemToObject(jopts, (char *)hkey, cJSON_CreateString((char *)hval)); + } + } + + cJSON_AddItemToObject(jopts, "language", cJSON_CreateString((char *)tts_ctx->language)); + cJSON_AddItemToObject(jopts, "text", cJSON_CreateString((char *)qtext)); + + pdata = cJSON_PrintUnformatted(jopts); + cJSON_Delete(jopts); + } else { + cJSON *jopts = NULL; + + jopts = cJSON_CreateObject(); + + cJSON_AddItemToObject(jopts, "language", cJSON_CreateString((char *)tts_ctx->language)); + cJSON_AddItemToObject(jopts, "text", cJSON_CreateString((char *)qtext)); + + pdata = cJSON_PrintUnformatted(jopts); + cJSON_Delete(jopts); + } + + // switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "POST: [%s]\n", pdata); + + tts_ctx->media_ctype = NULL; + tts_ctx->curl_send_buffer_len = strlen(pdata); + tts_ctx->curl_send_buffer_ref = pdata; + + curl_handle = switch_curl_easy_init(); + + headers = switch_curl_slist_append(headers, "Content-Type: application/json; charset=utf-8"); + headers = switch_curl_slist_append(headers, "Expect:"); + + switch_curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, headers); + switch_curl_easy_setopt(curl_handle, CURLOPT_POST, 1); + switch_curl_easy_setopt(curl_handle, CURLOPT_NOSIGNAL, 1); + + switch_curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDSIZE, tts_ctx->curl_send_buffer_len); + switch_curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDS, (void *) pdata); + switch_curl_easy_setopt(curl_handle, CURLOPT_READFUNCTION, curl_io_read_callback); + switch_curl_easy_setopt(curl_handle, CURLOPT_READDATA, (void *) tts_ctx); + + switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, curl_io_write_callback); + switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *) tts_ctx); + + if(globals.connect_timeout > 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_CONNECTTIMEOUT, globals.connect_timeout); + } + if(globals.request_timeout > 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, globals.request_timeout); + } + if(globals.user_agent) { + switch_curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, globals.user_agent); + } + + if(strncasecmp(tts_ctx->api_url, "https", 5) == 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYPEER, 0); + switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYHOST, 0); + } + if(globals.proxy) { + if(globals.proxy_credentials != NULL) { + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_ANY); + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYUSERPWD, globals.proxy_credentials); + } + if(strncasecmp(globals.proxy, "https", 5) == 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY_SSL_VERIFYPEER, 0); + } + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY, globals.proxy); + } + + if(tts_ctx->api_key) { + curl_easy_setopt(curl_handle, CURLOPT_XOAUTH2_BEARER, tts_ctx->api_key); + curl_easy_setopt(curl_handle, CURLOPT_HTTPAUTH, CURLAUTH_BEARER); + } + + switch_curl_easy_setopt(curl_handle, CURLOPT_URL, tts_ctx->api_url); + + curl_ret = switch_curl_easy_perform(curl_handle); + if(!curl_ret) { + switch_curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &http_resp); + if(!http_resp) { switch_curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CONNECTCODE, &http_resp); } + } else { + http_resp = curl_ret; + } + + if(http_resp != 200) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "http-error=[%ld] (%s)\n", http_resp, tts_ctx->api_url); + status = SWITCH_STATUS_FALSE; + } else { + char *ct = NULL; + if(!curl_easy_getinfo(curl_handle, CURLINFO_CONTENT_TYPE, &ct) && ct) { + tts_ctx->media_ctype = switch_core_strdup(tts_ctx->pool, ct); + } + } + + if(tts_ctx->curl_recv_buffer) { + if(switch_buffer_inuse(tts_ctx->curl_recv_buffer) > 0) { + switch_buffer_write(tts_ctx->curl_recv_buffer, "\0", 1); + } + } + + if(curl_handle) { switch_curl_easy_cleanup(curl_handle); } + if(headers) { switch_curl_slist_free_all(headers); } + + switch_safe_free(pdata); + switch_safe_free(qtext); + + return status; +} + diff --git a/src/mod/asr_tts/mod_curl_tts/mod_curl_tts.c b/src/mod/asr_tts/mod_curl_tts/mod_curl_tts.c new file mode 100644 index 00000000000..36ef4e31a1d --- /dev/null +++ b/src/mod/asr_tts/mod_curl_tts/mod_curl_tts.c @@ -0,0 +1,302 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * Provides the ability to interact with TTS services over HTTP + * + * Development repository: + * https://github.com/akscf/mod_curl_tts + * + */ +#include "mod_curl_tts.h" + +globals_t globals; + +SWITCH_MODULE_LOAD_FUNCTION(mod_curl_tts_load); +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_curl_tts_shutdown); +SWITCH_MODULE_DEFINITION(mod_curl_tts, mod_curl_tts_load, mod_curl_tts_shutdown, NULL); + +// --------------------------------------------------------------------------------------------------------------------------------------------- +// speech api +// --------------------------------------------------------------------------------------------------------------------------------------------- +static switch_status_t speech_open(switch_speech_handle_t *sh, const char *voice, int samplerate, int channels, switch_speech_flag_t *flags) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + tts_ctx_t *tts_ctx = NULL; + + tts_ctx = switch_core_alloc(sh->memory_pool, sizeof(tts_ctx_t)); + tts_ctx->pool = sh->memory_pool; + tts_ctx->fhnd = switch_core_alloc(tts_ctx->pool, sizeof(switch_file_handle_t)); + tts_ctx->language = (globals.fl_voice_name_as_language && voice) ? switch_core_strdup(sh->memory_pool, voice) : NULL; + tts_ctx->samplerate = samplerate; + tts_ctx->channels = channels; + tts_ctx->api_url = globals.api_url; + tts_ctx->api_key = globals.api_key; + + sh->private_info = tts_ctx; + + if((status = switch_buffer_create_dynamic(&tts_ctx->curl_recv_buffer, 1024, 8192, globals.file_size_max)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_buffer_create_dynamic() fail\n"); + goto out; + } + + if((status = switch_core_hash_init(&tts_ctx->curl_params)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_hash_init()\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + +out: + return status; +} + +static switch_status_t speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; + assert(tts_ctx != NULL); + + if(switch_test_flag(tts_ctx->fhnd, SWITCH_FILE_OPEN)) { + switch_core_file_close(tts_ctx->fhnd); + } + + if(tts_ctx->curl_recv_buffer) { + switch_buffer_destroy(&tts_ctx->curl_recv_buffer); + } + + if(!globals.fl_cache_enabled) { + if(tts_ctx->mp3_name) unlink(tts_ctx->mp3_name); + if(tts_ctx->wav_name) unlink(tts_ctx->wav_name); + } + + if(tts_ctx->curl_params) { + switch_core_hash_destroy(&tts_ctx->curl_params); + } + + return SWITCH_STATUS_SUCCESS; +} + +static switch_status_t speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + switch_status_t status = SWITCH_STATUS_SUCCESS; + char digest[SWITCH_MD5_DIGEST_STRING_SIZE + 1] = { 0 }; + const void *ptr = NULL; + uint32_t recv_len = 0; + + assert(tts_ctx != NULL); + + switch_md5_string(digest, (void *)text, strlen(text)); + if(!tts_ctx->mp3_name) { + tts_ctx->mp3_name = switch_core_sprintf(sh->memory_pool, "%s%s%s.mp3", globals.cache_path, SWITCH_PATH_SEPARATOR, digest); + } + if(!tts_ctx->wav_name) { + tts_ctx->wav_name = switch_core_sprintf(sh->memory_pool, "%s%s%s.wav", globals.cache_path, SWITCH_PATH_SEPARATOR, digest); + } + + if(switch_file_exists(tts_ctx->mp3_name, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { + if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->mp3_name, tts_ctx->channels, tts_ctx->samplerate, + (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->mp3_name); + status = SWITCH_STATUS_FALSE; + goto out; + } + } else if(switch_file_exists(tts_ctx->wav_name, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { + if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->wav_name, tts_ctx->channels, tts_ctx->samplerate, + (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->wav_name); + status = SWITCH_STATUS_FALSE; + goto out; + } + } else { + switch_buffer_zero(tts_ctx->curl_recv_buffer); + status = curl_perform(tts_ctx , text); + recv_len = switch_buffer_peek_zerocopy(tts_ctx->curl_recv_buffer, &ptr); + + if(status == SWITCH_STATUS_SUCCESS) { + char *dst_name = NULL; + + if(strcasecmp(tts_ctx->media_ctype, "audio/mpeg") == 0) { + dst_name = tts_ctx->mp3_name; + } else if(strcasecmp(tts_ctx->media_ctype, "audio/wav") == 0) { + dst_name = tts_ctx->wav_name; + } else { + status = SWITCH_STATUS_FALSE; + } + + if(status == SWITCH_STATUS_SUCCESS) { + if((status = write_file(dst_name, (switch_byte_t *)ptr, recv_len)) == SWITCH_STATUS_SUCCESS) { + if((status = switch_core_file_open(tts_ctx->fhnd, dst_name, tts_ctx->channels, tts_ctx->samplerate, + (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", dst_name); + goto out; + } + } + } else { + if(!dst_name) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unsupported media-type (%s)\n", tts_ctx->media_ctype); + } else if(globals.fl_log_http_error) { + if(recv_len > 0) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Services response: %s\n", (char *)ptr); + } + } + } + } + } +out: + return status; +} + +static switch_status_t speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *data_len, switch_speech_flag_t *flags) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + size_t len = (*data_len / sizeof(int16_t)); + + assert(tts_ctx != NULL); + + if(tts_ctx->fhnd->file_interface == NULL) { + return SWITCH_STATUS_FALSE; + } + + if(switch_core_file_read(tts_ctx->fhnd, data, &len) != SWITCH_STATUS_SUCCESS) { + switch_core_file_close(tts_ctx->fhnd); + return SWITCH_STATUS_FALSE; + } + + *data_len = (len * sizeof(int16_t)); + if(!data_len) { + switch_core_file_close(tts_ctx->fhnd); + return SWITCH_STATUS_BREAK; + } + + return SWITCH_STATUS_SUCCESS; +} + +static void speech_flush_tts(switch_speech_handle_t *sh) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + + assert(tts_ctx != NULL); + + if(tts_ctx->fhnd != NULL && tts_ctx->fhnd->file_interface != NULL) { + switch_core_file_close(tts_ctx->fhnd); + } +} + +static void speech_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val) { + tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; + + assert(tts_ctx != NULL); + + if(strcasecmp(param, "url") == 0) { + if(val) tts_ctx->api_url = switch_core_strdup(sh->memory_pool, val); + } else if(strcasecmp(param, "key") == 0) { + if(val) tts_ctx->api_key = switch_core_strdup(sh->memory_pool, val); + } else if(strcasecmp(param, "language") == 0) { + if(val) tts_ctx->api_key = switch_core_strdup(sh->memory_pool, val); + } else if(strcasecmp(param, "text") == 0) { + // reserved (ignore) + } else { + if(tts_ctx->curl_params && val) { + switch_core_hash_insert(tts_ctx->curl_params, param, switch_core_strdup(sh->memory_pool, val)); + } + } +} + +static void speech_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val) { +} + +static void speech_float_param_tts(switch_speech_handle_t *sh, char *param, double val) { +} + +// --------------------------------------------------------------------------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------------------------------------------------------------------------- +SWITCH_MODULE_LOAD_FUNCTION(mod_curl_tts_load) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + switch_xml_t cfg, xml, settings, param; + switch_speech_interface_t *speech_interface; + + memset(&globals, 0, sizeof(globals)); + switch_mutex_init(&globals.mutex, SWITCH_MUTEX_NESTED, pool); + + if((xml = switch_xml_open_cfg(MOD_CONFIG_NAME, &cfg, NULL)) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open configuration: %s\n", MOD_CONFIG_NAME); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + if((settings = switch_xml_child(cfg, "settings"))) { + for (param = switch_xml_child(settings, "param"); param; param = param->next) { + char *var = (char *) switch_xml_attr_soft(param, "name"); + char *val = (char *) switch_xml_attr_soft(param, "value"); + + if(!strcasecmp(var, "api-url")) { + if(val) globals.api_url = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "api-key")) { + if(val) globals.api_key = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "cache-path")) { + if(val) globals.cache_path = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "user-agent")) { + if(val) globals.user_agent = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "request-timeout")) { + if(val) globals.request_timeout = atoi(val); + } else if(!strcasecmp(var, "connect-timeout")) { + if(val) globals.connect_timeout = atoi(val); + } else if(!strcasecmp(var, "voice-name-as-language")) { + if(val) globals.fl_voice_name_as_language = switch_true(val); + } else if(!strcasecmp(var, "log-http-errors")) { + if(val) globals.fl_log_http_error = switch_true(val); + } else if(!strcasecmp(var, "cache-enable")) { + if(val) globals.fl_cache_enabled = switch_true(val); + } else if(!strcasecmp(var, "file-size-max")) { + if(val) globals.file_size_max = atoi(val); + } else if(!strcasecmp(var, "proxy")) { + if(val) globals.proxy = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "proxy-credentials")) { + if(val) globals.proxy_credentials = switch_core_strdup(pool, val); + } + } + } + + globals.tmp_path = SWITCH_GLOBAL_dirs.temp_dir; + globals.cache_path = (globals.cache_path == NULL ? "/tmp/curl-tts-cache" : globals.cache_path); + globals.file_size_max = globals.file_size_max > 0 ? globals.file_size_max : FILE_SIZE_MAX; + + if(switch_directory_exists(globals.cache_path, NULL) != SWITCH_STATUS_SUCCESS) { + switch_dir_make(globals.cache_path, SWITCH_FPROT_OS_DEFAULT, NULL); + } + + *module_interface = switch_loadable_module_create_module_interface(pool, modname); + speech_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_SPEECH_INTERFACE); + speech_interface->interface_name = "curl"; + + speech_interface->speech_open = speech_open; + speech_interface->speech_close = speech_close; + speech_interface->speech_feed_tts = speech_feed_tts; + speech_interface->speech_read_tts = speech_read_tts; + speech_interface->speech_flush_tts = speech_flush_tts; + + speech_interface->speech_text_param_tts = speech_text_param_tts; + speech_interface->speech_numeric_param_tts = speech_numeric_param_tts; + speech_interface->speech_float_param_tts = speech_float_param_tts; + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "CURL-TTS (%s)\n", MOD_VERSION); +out: + if(xml) { + switch_xml_free(xml); + } + return status; +} + +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_curl_tts_shutdown) { + + return SWITCH_STATUS_SUCCESS; +} diff --git a/src/mod/asr_tts/mod_curl_tts/mod_curl_tts.h b/src/mod/asr_tts/mod_curl_tts/mod_curl_tts.h new file mode 100644 index 00000000000..8539852e0f3 --- /dev/null +++ b/src/mod/asr_tts/mod_curl_tts/mod_curl_tts.h @@ -0,0 +1,72 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#ifndef MOD_CURL_TTS_H +#define MOD_CURL_TTS_H + +#include +#include + +#define MOD_VERSION "1.0.0" +#define MOD_CONFIG_NAME "curl_tts.conf" +#define FILE_SIZE_MAX (2*1024*1024) + +typedef struct { + switch_mutex_t *mutex; + char *cache_path; + char *tmp_path; + char *user_agent; + char *api_url; + char *api_key; + char *proxy; + char *proxy_credentials; + uint32_t file_size_max; + uint32_t request_timeout; // seconds + uint32_t connect_timeout; // seconds + uint8_t fl_voice_name_as_language; + uint8_t fl_log_http_error; + uint8_t fl_cache_enabled; +} globals_t; + +typedef struct { + switch_memory_pool_t *pool; + switch_file_handle_t *fhnd; + switch_buffer_t *curl_recv_buffer; + switch_hash_t *curl_params; + char *curl_send_buffer_ref; + char *api_url; + char *api_key; + char *language; + char *mp3_name; + char *wav_name; + char *media_ctype; + uint32_t samplerate; + uint32_t channels; + size_t curl_send_buffer_len; +} tts_ctx_t; + + +char *escape_dquotes(const char *string); +switch_status_t write_file(char *file_name, switch_byte_t *buf, uint32_t buf_len); +switch_status_t curl_perform(tts_ctx_t *tts_ctx, char *text); + + +#endif diff --git a/src/mod/asr_tts/mod_curl_tts/utils.c b/src/mod/asr_tts/mod_curl_tts/utils.c new file mode 100644 index 00000000000..422cb64a86c --- /dev/null +++ b/src/mod/asr_tts/mod_curl_tts/utils.c @@ -0,0 +1,80 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#include "mod_curl_tts.h" + +char *escape_dquotes(const char *string) { + size_t string_len = strlen(string); + size_t i; + size_t n = 0; + size_t dest_len = 0; + char *dest; + + dest_len = strlen(string) + 1; + for (i = 0; i < string_len; i++) { + switch (string[i]) { + case '\"': dest_len += 1; break; + } + } + + dest = (char *) malloc(sizeof(char) * dest_len); + switch_assert(dest); + + for (i = 0; i < string_len; i++) { + switch (string[i]) { + case '\"': + dest[n++] = '\\'; + dest[n++] = '\"'; + break; + default: + dest[n++] = string[i]; + } + } + dest[n++] = '\0'; + + switch_assert(n == dest_len); + return dest; +} + +switch_status_t write_file(char *file_name, switch_byte_t *buf, uint32_t buf_len) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + switch_memory_pool_t *pool = NULL; + switch_size_t len = buf_len; + switch_file_t *fd = NULL; + + if(switch_core_new_memory_pool(&pool) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_new_memory_pool() fail\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + if((status = switch_file_open(&fd, file_name, (SWITCH_FOPEN_WRITE | SWITCH_FOPEN_TRUNCATE | SWITCH_FOPEN_CREATE), SWITCH_FPROT_OS_DEFAULT, pool)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Open fail: %s\n", file_name); + goto out; + } + if((status = switch_file_write(fd, buf, &len)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Write fail (%s)\n", file_name); + } + switch_file_close(fd); +out: + if(pool) { + switch_core_destroy_memory_pool(&pool); + } + return status; +} From 3d4a9133b78296d060adf02e2569fc3187d8695a Mon Sep 17 00:00:00 2001 From: aks Date: Mon, 30 Dec 2024 20:11:43 +0400 Subject: [PATCH 5/6] cleaning master --- src/mod/asr_tts/mod_curl_tts/Makefile.am | 11 - .../conf/autoload_configs/curl_tts.conf.xml | 22 - .../mod_curl_tts/conf/dialplan/example.xml | 8 - src/mod/asr_tts/mod_curl_tts/curl.c | 185 ------- src/mod/asr_tts/mod_curl_tts/mod_curl_tts.c | 302 ----------- src/mod/asr_tts/mod_curl_tts/mod_curl_tts.h | 72 --- src/mod/asr_tts/mod_curl_tts/utils.c | 80 --- src/mod/asr_tts/mod_google_tts/Makefile.am | 11 - .../conf/autoload_configs/google_tts.conf.xml | 29 - .../mod_google_tts/conf/dialplan/example.xml | 8 - .../asr_tts/mod_google_tts/mod_google_tts.c | 509 ------------------ .../asr_tts/mod_google_tts/mod_google_tts.h | 58 -- src/mod/asr_tts/mod_google_tts/utils.c | 112 ---- src/mod/asr_tts/mod_openai_tts/Makefile.am | 11 - .../conf/autoload_configs/openai_tts.conf.xml | 30 -- .../mod_openai_tts/conf/dialplan/example.xml | 9 - .../asr_tts/mod_openai_tts/mod_openai_tts.c | 494 ----------------- .../asr_tts/mod_openai_tts/mod_openai_tts.h | 59 -- src/mod/asr_tts/mod_openai_tts/utils.c | 85 --- src/mod/asr_tts/mod_piper_tts/Makefile.am | 11 - .../conf/autoload_configs/piper_tts.conf.xml | 22 - .../mod_piper_tts/conf/dialplan/example.xml | 9 - src/mod/asr_tts/mod_piper_tts/mod_piper_tts.c | 339 ------------ src/mod/asr_tts/mod_piper_tts/mod_piper_tts.h | 60 --- 24 files changed, 2536 deletions(-) delete mode 100644 src/mod/asr_tts/mod_curl_tts/Makefile.am delete mode 100644 src/mod/asr_tts/mod_curl_tts/conf/autoload_configs/curl_tts.conf.xml delete mode 100644 src/mod/asr_tts/mod_curl_tts/conf/dialplan/example.xml delete mode 100644 src/mod/asr_tts/mod_curl_tts/curl.c delete mode 100644 src/mod/asr_tts/mod_curl_tts/mod_curl_tts.c delete mode 100644 src/mod/asr_tts/mod_curl_tts/mod_curl_tts.h delete mode 100644 src/mod/asr_tts/mod_curl_tts/utils.c delete mode 100644 src/mod/asr_tts/mod_google_tts/Makefile.am delete mode 100644 src/mod/asr_tts/mod_google_tts/conf/autoload_configs/google_tts.conf.xml delete mode 100644 src/mod/asr_tts/mod_google_tts/conf/dialplan/example.xml delete mode 100644 src/mod/asr_tts/mod_google_tts/mod_google_tts.c delete mode 100644 src/mod/asr_tts/mod_google_tts/mod_google_tts.h delete mode 100644 src/mod/asr_tts/mod_google_tts/utils.c delete mode 100644 src/mod/asr_tts/mod_openai_tts/Makefile.am delete mode 100644 src/mod/asr_tts/mod_openai_tts/conf/autoload_configs/openai_tts.conf.xml delete mode 100644 src/mod/asr_tts/mod_openai_tts/conf/dialplan/example.xml delete mode 100644 src/mod/asr_tts/mod_openai_tts/mod_openai_tts.c delete mode 100644 src/mod/asr_tts/mod_openai_tts/mod_openai_tts.h delete mode 100644 src/mod/asr_tts/mod_openai_tts/utils.c delete mode 100644 src/mod/asr_tts/mod_piper_tts/Makefile.am delete mode 100644 src/mod/asr_tts/mod_piper_tts/conf/autoload_configs/piper_tts.conf.xml delete mode 100644 src/mod/asr_tts/mod_piper_tts/conf/dialplan/example.xml delete mode 100644 src/mod/asr_tts/mod_piper_tts/mod_piper_tts.c delete mode 100644 src/mod/asr_tts/mod_piper_tts/mod_piper_tts.h diff --git a/src/mod/asr_tts/mod_curl_tts/Makefile.am b/src/mod/asr_tts/mod_curl_tts/Makefile.am deleted file mode 100644 index 22b75838bfc..00000000000 --- a/src/mod/asr_tts/mod_curl_tts/Makefile.am +++ /dev/null @@ -1,11 +0,0 @@ - -include $(top_srcdir)/build/modmake.rulesam -MODNAME=mod_curl_tts - -mod_LTLIBRARIES = mod_curl_tts.la -mod_curl_tts_la_SOURCES = mod_curl_tts.c utils.c curl.c -mod_curl_tts_la_CFLAGS = $(AM_CFLAGS) -I. -mod_curl_tts_la_LIBADD = $(switch_builddir)/libfreeswitch.la -mod_curl_tts_la_LDFLAGS = -avoid-version -module -no-undefined -shared - -$(am_mod_curl_tts_la_OBJECTS): mod_curl_tts.h diff --git a/src/mod/asr_tts/mod_curl_tts/conf/autoload_configs/curl_tts.conf.xml b/src/mod/asr_tts/mod_curl_tts/conf/autoload_configs/curl_tts.conf.xml deleted file mode 100644 index b4520fa11cf..00000000000 --- a/src/mod/asr_tts/mod_curl_tts/conf/autoload_configs/curl_tts.conf.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/mod/asr_tts/mod_curl_tts/conf/dialplan/example.xml b/src/mod/asr_tts/mod_curl_tts/conf/dialplan/example.xml deleted file mode 100644 index 86f9e43ceb2..00000000000 --- a/src/mod/asr_tts/mod_curl_tts/conf/dialplan/example.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - diff --git a/src/mod/asr_tts/mod_curl_tts/curl.c b/src/mod/asr_tts/mod_curl_tts/curl.c deleted file mode 100644 index b2fe58c9ea1..00000000000 --- a/src/mod/asr_tts/mod_curl_tts/curl.c +++ /dev/null @@ -1,185 +0,0 @@ -/* - * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application - * Copyright (C) 2005-2014, Anthony Minessale II - * - * Version: MPL 1.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Module Contributor(s): - * Konstantin Alexandrin - * - * - */ -#include "mod_curl_tts.h" - -extern globals_t globals; - -static size_t curl_io_write_callback(char *buffer, size_t size, size_t nitems, void *user_data) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)user_data; - size_t len = (size * nitems); - - if(len > 0 && tts_ctx->curl_recv_buffer) { - switch_buffer_write(tts_ctx->curl_recv_buffer, buffer, len); - } - - return len; -} - -static size_t curl_io_read_callback(char *buffer, size_t size, size_t nitems, void *user_data) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)user_data; - size_t nmax = (size * nitems); - size_t ncur = (tts_ctx->curl_send_buffer_len > nmax) ? nmax : tts_ctx->curl_send_buffer_len; - - memmove(buffer, tts_ctx->curl_send_buffer_ref, ncur); - tts_ctx->curl_send_buffer_ref += ncur; - tts_ctx->curl_send_buffer_len -= ncur; - - return ncur; -} - - -switch_status_t curl_perform(tts_ctx_t *tts_ctx, char *text) { - switch_status_t status = SWITCH_STATUS_SUCCESS; - CURL *curl_handle = NULL; - switch_curl_slist_t *headers = NULL; - switch_CURLcode curl_ret = 0; - long http_resp = 0; - char *pdata = NULL; - char *qtext = NULL; - - if(!tts_ctx->api_url) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "api_url not determined\n"); - return SWITCH_STATUS_FALSE; - } - - if(text) { - qtext = escape_dquotes(text); - } - - if(tts_ctx->curl_params && !switch_core_hash_empty(tts_ctx->curl_params)) { - const void *hkey = NULL; void *hval = NULL; - switch_hash_index_t *hidx = NULL; - cJSON *jopts = NULL; - - jopts = cJSON_CreateObject(); - for(hidx = switch_core_hash_first_iter(tts_ctx->curl_params, hidx); hidx; hidx = switch_core_hash_next(&hidx)) { - switch_core_hash_this(hidx, &hkey, NULL, &hval); - if(hkey && hval) { - cJSON_AddItemToObject(jopts, (char *)hkey, cJSON_CreateString((char *)hval)); - } - } - - cJSON_AddItemToObject(jopts, "language", cJSON_CreateString((char *)tts_ctx->language)); - cJSON_AddItemToObject(jopts, "text", cJSON_CreateString((char *)qtext)); - - pdata = cJSON_PrintUnformatted(jopts); - cJSON_Delete(jopts); - } else { - cJSON *jopts = NULL; - - jopts = cJSON_CreateObject(); - - cJSON_AddItemToObject(jopts, "language", cJSON_CreateString((char *)tts_ctx->language)); - cJSON_AddItemToObject(jopts, "text", cJSON_CreateString((char *)qtext)); - - pdata = cJSON_PrintUnformatted(jopts); - cJSON_Delete(jopts); - } - - // switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "POST: [%s]\n", pdata); - - tts_ctx->media_ctype = NULL; - tts_ctx->curl_send_buffer_len = strlen(pdata); - tts_ctx->curl_send_buffer_ref = pdata; - - curl_handle = switch_curl_easy_init(); - - headers = switch_curl_slist_append(headers, "Content-Type: application/json; charset=utf-8"); - headers = switch_curl_slist_append(headers, "Expect:"); - - switch_curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, headers); - switch_curl_easy_setopt(curl_handle, CURLOPT_POST, 1); - switch_curl_easy_setopt(curl_handle, CURLOPT_NOSIGNAL, 1); - - switch_curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDSIZE, tts_ctx->curl_send_buffer_len); - switch_curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDS, (void *) pdata); - switch_curl_easy_setopt(curl_handle, CURLOPT_READFUNCTION, curl_io_read_callback); - switch_curl_easy_setopt(curl_handle, CURLOPT_READDATA, (void *) tts_ctx); - - switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, curl_io_write_callback); - switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *) tts_ctx); - - if(globals.connect_timeout > 0) { - switch_curl_easy_setopt(curl_handle, CURLOPT_CONNECTTIMEOUT, globals.connect_timeout); - } - if(globals.request_timeout > 0) { - switch_curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, globals.request_timeout); - } - if(globals.user_agent) { - switch_curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, globals.user_agent); - } - - if(strncasecmp(tts_ctx->api_url, "https", 5) == 0) { - switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYPEER, 0); - switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYHOST, 0); - } - if(globals.proxy) { - if(globals.proxy_credentials != NULL) { - switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_ANY); - switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYUSERPWD, globals.proxy_credentials); - } - if(strncasecmp(globals.proxy, "https", 5) == 0) { - switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY_SSL_VERIFYPEER, 0); - } - switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY, globals.proxy); - } - - if(tts_ctx->api_key) { - curl_easy_setopt(curl_handle, CURLOPT_XOAUTH2_BEARER, tts_ctx->api_key); - curl_easy_setopt(curl_handle, CURLOPT_HTTPAUTH, CURLAUTH_BEARER); - } - - switch_curl_easy_setopt(curl_handle, CURLOPT_URL, tts_ctx->api_url); - - curl_ret = switch_curl_easy_perform(curl_handle); - if(!curl_ret) { - switch_curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &http_resp); - if(!http_resp) { switch_curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CONNECTCODE, &http_resp); } - } else { - http_resp = curl_ret; - } - - if(http_resp != 200) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "http-error=[%ld] (%s)\n", http_resp, tts_ctx->api_url); - status = SWITCH_STATUS_FALSE; - } else { - char *ct = NULL; - if(!curl_easy_getinfo(curl_handle, CURLINFO_CONTENT_TYPE, &ct) && ct) { - tts_ctx->media_ctype = switch_core_strdup(tts_ctx->pool, ct); - } - } - - if(tts_ctx->curl_recv_buffer) { - if(switch_buffer_inuse(tts_ctx->curl_recv_buffer) > 0) { - switch_buffer_write(tts_ctx->curl_recv_buffer, "\0", 1); - } - } - - if(curl_handle) { switch_curl_easy_cleanup(curl_handle); } - if(headers) { switch_curl_slist_free_all(headers); } - - switch_safe_free(pdata); - switch_safe_free(qtext); - - return status; -} - diff --git a/src/mod/asr_tts/mod_curl_tts/mod_curl_tts.c b/src/mod/asr_tts/mod_curl_tts/mod_curl_tts.c deleted file mode 100644 index 36ef4e31a1d..00000000000 --- a/src/mod/asr_tts/mod_curl_tts/mod_curl_tts.c +++ /dev/null @@ -1,302 +0,0 @@ -/* - * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application - * Copyright (C) 2005-2014, Anthony Minessale II - * - * Version: MPL 1.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Module Contributor(s): - * Konstantin Alexandrin - * - * Provides the ability to interact with TTS services over HTTP - * - * Development repository: - * https://github.com/akscf/mod_curl_tts - * - */ -#include "mod_curl_tts.h" - -globals_t globals; - -SWITCH_MODULE_LOAD_FUNCTION(mod_curl_tts_load); -SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_curl_tts_shutdown); -SWITCH_MODULE_DEFINITION(mod_curl_tts, mod_curl_tts_load, mod_curl_tts_shutdown, NULL); - -// --------------------------------------------------------------------------------------------------------------------------------------------- -// speech api -// --------------------------------------------------------------------------------------------------------------------------------------------- -static switch_status_t speech_open(switch_speech_handle_t *sh, const char *voice, int samplerate, int channels, switch_speech_flag_t *flags) { - switch_status_t status = SWITCH_STATUS_SUCCESS; - tts_ctx_t *tts_ctx = NULL; - - tts_ctx = switch_core_alloc(sh->memory_pool, sizeof(tts_ctx_t)); - tts_ctx->pool = sh->memory_pool; - tts_ctx->fhnd = switch_core_alloc(tts_ctx->pool, sizeof(switch_file_handle_t)); - tts_ctx->language = (globals.fl_voice_name_as_language && voice) ? switch_core_strdup(sh->memory_pool, voice) : NULL; - tts_ctx->samplerate = samplerate; - tts_ctx->channels = channels; - tts_ctx->api_url = globals.api_url; - tts_ctx->api_key = globals.api_key; - - sh->private_info = tts_ctx; - - if((status = switch_buffer_create_dynamic(&tts_ctx->curl_recv_buffer, 1024, 8192, globals.file_size_max)) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_buffer_create_dynamic() fail\n"); - goto out; - } - - if((status = switch_core_hash_init(&tts_ctx->curl_params)) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_hash_init()\n"); - switch_goto_status(SWITCH_STATUS_GENERR, out); - } - -out: - return status; -} - -static switch_status_t speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags) { - tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; - assert(tts_ctx != NULL); - - if(switch_test_flag(tts_ctx->fhnd, SWITCH_FILE_OPEN)) { - switch_core_file_close(tts_ctx->fhnd); - } - - if(tts_ctx->curl_recv_buffer) { - switch_buffer_destroy(&tts_ctx->curl_recv_buffer); - } - - if(!globals.fl_cache_enabled) { - if(tts_ctx->mp3_name) unlink(tts_ctx->mp3_name); - if(tts_ctx->wav_name) unlink(tts_ctx->wav_name); - } - - if(tts_ctx->curl_params) { - switch_core_hash_destroy(&tts_ctx->curl_params); - } - - return SWITCH_STATUS_SUCCESS; -} - -static switch_status_t speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; - switch_status_t status = SWITCH_STATUS_SUCCESS; - char digest[SWITCH_MD5_DIGEST_STRING_SIZE + 1] = { 0 }; - const void *ptr = NULL; - uint32_t recv_len = 0; - - assert(tts_ctx != NULL); - - switch_md5_string(digest, (void *)text, strlen(text)); - if(!tts_ctx->mp3_name) { - tts_ctx->mp3_name = switch_core_sprintf(sh->memory_pool, "%s%s%s.mp3", globals.cache_path, SWITCH_PATH_SEPARATOR, digest); - } - if(!tts_ctx->wav_name) { - tts_ctx->wav_name = switch_core_sprintf(sh->memory_pool, "%s%s%s.wav", globals.cache_path, SWITCH_PATH_SEPARATOR, digest); - } - - if(switch_file_exists(tts_ctx->mp3_name, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { - if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->mp3_name, tts_ctx->channels, tts_ctx->samplerate, - (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->mp3_name); - status = SWITCH_STATUS_FALSE; - goto out; - } - } else if(switch_file_exists(tts_ctx->wav_name, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { - if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->wav_name, tts_ctx->channels, tts_ctx->samplerate, - (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->wav_name); - status = SWITCH_STATUS_FALSE; - goto out; - } - } else { - switch_buffer_zero(tts_ctx->curl_recv_buffer); - status = curl_perform(tts_ctx , text); - recv_len = switch_buffer_peek_zerocopy(tts_ctx->curl_recv_buffer, &ptr); - - if(status == SWITCH_STATUS_SUCCESS) { - char *dst_name = NULL; - - if(strcasecmp(tts_ctx->media_ctype, "audio/mpeg") == 0) { - dst_name = tts_ctx->mp3_name; - } else if(strcasecmp(tts_ctx->media_ctype, "audio/wav") == 0) { - dst_name = tts_ctx->wav_name; - } else { - status = SWITCH_STATUS_FALSE; - } - - if(status == SWITCH_STATUS_SUCCESS) { - if((status = write_file(dst_name, (switch_byte_t *)ptr, recv_len)) == SWITCH_STATUS_SUCCESS) { - if((status = switch_core_file_open(tts_ctx->fhnd, dst_name, tts_ctx->channels, tts_ctx->samplerate, - (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", dst_name); - goto out; - } - } - } else { - if(!dst_name) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unsupported media-type (%s)\n", tts_ctx->media_ctype); - } else if(globals.fl_log_http_error) { - if(recv_len > 0) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Services response: %s\n", (char *)ptr); - } - } - } - } - } -out: - return status; -} - -static switch_status_t speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *data_len, switch_speech_flag_t *flags) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; - size_t len = (*data_len / sizeof(int16_t)); - - assert(tts_ctx != NULL); - - if(tts_ctx->fhnd->file_interface == NULL) { - return SWITCH_STATUS_FALSE; - } - - if(switch_core_file_read(tts_ctx->fhnd, data, &len) != SWITCH_STATUS_SUCCESS) { - switch_core_file_close(tts_ctx->fhnd); - return SWITCH_STATUS_FALSE; - } - - *data_len = (len * sizeof(int16_t)); - if(!data_len) { - switch_core_file_close(tts_ctx->fhnd); - return SWITCH_STATUS_BREAK; - } - - return SWITCH_STATUS_SUCCESS; -} - -static void speech_flush_tts(switch_speech_handle_t *sh) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; - - assert(tts_ctx != NULL); - - if(tts_ctx->fhnd != NULL && tts_ctx->fhnd->file_interface != NULL) { - switch_core_file_close(tts_ctx->fhnd); - } -} - -static void speech_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; - - assert(tts_ctx != NULL); - - if(strcasecmp(param, "url") == 0) { - if(val) tts_ctx->api_url = switch_core_strdup(sh->memory_pool, val); - } else if(strcasecmp(param, "key") == 0) { - if(val) tts_ctx->api_key = switch_core_strdup(sh->memory_pool, val); - } else if(strcasecmp(param, "language") == 0) { - if(val) tts_ctx->api_key = switch_core_strdup(sh->memory_pool, val); - } else if(strcasecmp(param, "text") == 0) { - // reserved (ignore) - } else { - if(tts_ctx->curl_params && val) { - switch_core_hash_insert(tts_ctx->curl_params, param, switch_core_strdup(sh->memory_pool, val)); - } - } -} - -static void speech_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val) { -} - -static void speech_float_param_tts(switch_speech_handle_t *sh, char *param, double val) { -} - -// --------------------------------------------------------------------------------------------------------------------------------------------- -// main -// --------------------------------------------------------------------------------------------------------------------------------------------- -SWITCH_MODULE_LOAD_FUNCTION(mod_curl_tts_load) { - switch_status_t status = SWITCH_STATUS_SUCCESS; - switch_xml_t cfg, xml, settings, param; - switch_speech_interface_t *speech_interface; - - memset(&globals, 0, sizeof(globals)); - switch_mutex_init(&globals.mutex, SWITCH_MUTEX_NESTED, pool); - - if((xml = switch_xml_open_cfg(MOD_CONFIG_NAME, &cfg, NULL)) == NULL) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open configuration: %s\n", MOD_CONFIG_NAME); - switch_goto_status(SWITCH_STATUS_GENERR, out); - } - - if((settings = switch_xml_child(cfg, "settings"))) { - for (param = switch_xml_child(settings, "param"); param; param = param->next) { - char *var = (char *) switch_xml_attr_soft(param, "name"); - char *val = (char *) switch_xml_attr_soft(param, "value"); - - if(!strcasecmp(var, "api-url")) { - if(val) globals.api_url = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "api-key")) { - if(val) globals.api_key = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "cache-path")) { - if(val) globals.cache_path = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "user-agent")) { - if(val) globals.user_agent = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "request-timeout")) { - if(val) globals.request_timeout = atoi(val); - } else if(!strcasecmp(var, "connect-timeout")) { - if(val) globals.connect_timeout = atoi(val); - } else if(!strcasecmp(var, "voice-name-as-language")) { - if(val) globals.fl_voice_name_as_language = switch_true(val); - } else if(!strcasecmp(var, "log-http-errors")) { - if(val) globals.fl_log_http_error = switch_true(val); - } else if(!strcasecmp(var, "cache-enable")) { - if(val) globals.fl_cache_enabled = switch_true(val); - } else if(!strcasecmp(var, "file-size-max")) { - if(val) globals.file_size_max = atoi(val); - } else if(!strcasecmp(var, "proxy")) { - if(val) globals.proxy = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "proxy-credentials")) { - if(val) globals.proxy_credentials = switch_core_strdup(pool, val); - } - } - } - - globals.tmp_path = SWITCH_GLOBAL_dirs.temp_dir; - globals.cache_path = (globals.cache_path == NULL ? "/tmp/curl-tts-cache" : globals.cache_path); - globals.file_size_max = globals.file_size_max > 0 ? globals.file_size_max : FILE_SIZE_MAX; - - if(switch_directory_exists(globals.cache_path, NULL) != SWITCH_STATUS_SUCCESS) { - switch_dir_make(globals.cache_path, SWITCH_FPROT_OS_DEFAULT, NULL); - } - - *module_interface = switch_loadable_module_create_module_interface(pool, modname); - speech_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_SPEECH_INTERFACE); - speech_interface->interface_name = "curl"; - - speech_interface->speech_open = speech_open; - speech_interface->speech_close = speech_close; - speech_interface->speech_feed_tts = speech_feed_tts; - speech_interface->speech_read_tts = speech_read_tts; - speech_interface->speech_flush_tts = speech_flush_tts; - - speech_interface->speech_text_param_tts = speech_text_param_tts; - speech_interface->speech_numeric_param_tts = speech_numeric_param_tts; - speech_interface->speech_float_param_tts = speech_float_param_tts; - - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "CURL-TTS (%s)\n", MOD_VERSION); -out: - if(xml) { - switch_xml_free(xml); - } - return status; -} - -SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_curl_tts_shutdown) { - - return SWITCH_STATUS_SUCCESS; -} diff --git a/src/mod/asr_tts/mod_curl_tts/mod_curl_tts.h b/src/mod/asr_tts/mod_curl_tts/mod_curl_tts.h deleted file mode 100644 index 8539852e0f3..00000000000 --- a/src/mod/asr_tts/mod_curl_tts/mod_curl_tts.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application - * Copyright (C) 2005-2014, Anthony Minessale II - * - * Version: MPL 1.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Module Contributor(s): - * Konstantin Alexandrin - * - * - */ -#ifndef MOD_CURL_TTS_H -#define MOD_CURL_TTS_H - -#include -#include - -#define MOD_VERSION "1.0.0" -#define MOD_CONFIG_NAME "curl_tts.conf" -#define FILE_SIZE_MAX (2*1024*1024) - -typedef struct { - switch_mutex_t *mutex; - char *cache_path; - char *tmp_path; - char *user_agent; - char *api_url; - char *api_key; - char *proxy; - char *proxy_credentials; - uint32_t file_size_max; - uint32_t request_timeout; // seconds - uint32_t connect_timeout; // seconds - uint8_t fl_voice_name_as_language; - uint8_t fl_log_http_error; - uint8_t fl_cache_enabled; -} globals_t; - -typedef struct { - switch_memory_pool_t *pool; - switch_file_handle_t *fhnd; - switch_buffer_t *curl_recv_buffer; - switch_hash_t *curl_params; - char *curl_send_buffer_ref; - char *api_url; - char *api_key; - char *language; - char *mp3_name; - char *wav_name; - char *media_ctype; - uint32_t samplerate; - uint32_t channels; - size_t curl_send_buffer_len; -} tts_ctx_t; - - -char *escape_dquotes(const char *string); -switch_status_t write_file(char *file_name, switch_byte_t *buf, uint32_t buf_len); -switch_status_t curl_perform(tts_ctx_t *tts_ctx, char *text); - - -#endif diff --git a/src/mod/asr_tts/mod_curl_tts/utils.c b/src/mod/asr_tts/mod_curl_tts/utils.c deleted file mode 100644 index 422cb64a86c..00000000000 --- a/src/mod/asr_tts/mod_curl_tts/utils.c +++ /dev/null @@ -1,80 +0,0 @@ -/* - * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application - * Copyright (C) 2005-2014, Anthony Minessale II - * - * Version: MPL 1.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Module Contributor(s): - * Konstantin Alexandrin - * - * - */ -#include "mod_curl_tts.h" - -char *escape_dquotes(const char *string) { - size_t string_len = strlen(string); - size_t i; - size_t n = 0; - size_t dest_len = 0; - char *dest; - - dest_len = strlen(string) + 1; - for (i = 0; i < string_len; i++) { - switch (string[i]) { - case '\"': dest_len += 1; break; - } - } - - dest = (char *) malloc(sizeof(char) * dest_len); - switch_assert(dest); - - for (i = 0; i < string_len; i++) { - switch (string[i]) { - case '\"': - dest[n++] = '\\'; - dest[n++] = '\"'; - break; - default: - dest[n++] = string[i]; - } - } - dest[n++] = '\0'; - - switch_assert(n == dest_len); - return dest; -} - -switch_status_t write_file(char *file_name, switch_byte_t *buf, uint32_t buf_len) { - switch_status_t status = SWITCH_STATUS_SUCCESS; - switch_memory_pool_t *pool = NULL; - switch_size_t len = buf_len; - switch_file_t *fd = NULL; - - if(switch_core_new_memory_pool(&pool) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_new_memory_pool() fail\n"); - switch_goto_status(SWITCH_STATUS_GENERR, out); - } - if((status = switch_file_open(&fd, file_name, (SWITCH_FOPEN_WRITE | SWITCH_FOPEN_TRUNCATE | SWITCH_FOPEN_CREATE), SWITCH_FPROT_OS_DEFAULT, pool)) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Open fail: %s\n", file_name); - goto out; - } - if((status = switch_file_write(fd, buf, &len)) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Write fail (%s)\n", file_name); - } - switch_file_close(fd); -out: - if(pool) { - switch_core_destroy_memory_pool(&pool); - } - return status; -} diff --git a/src/mod/asr_tts/mod_google_tts/Makefile.am b/src/mod/asr_tts/mod_google_tts/Makefile.am deleted file mode 100644 index a0acc9a7dc1..00000000000 --- a/src/mod/asr_tts/mod_google_tts/Makefile.am +++ /dev/null @@ -1,11 +0,0 @@ - -include $(top_srcdir)/build/modmake.rulesam -MODNAME=mod_google_tts - -mod_LTLIBRARIES = mod_google_tts.la -mod_google_tts_la_SOURCES = mod_google_tts.c utils.c -mod_google_tts_la_CFLAGS = $(AM_CFLAGS) -I. -mod_google_tts_la_LIBADD = $(switch_builddir)/libfreeswitch.la -mod_google_tts_la_LDFLAGS = -avoid-version -module -no-undefined -shared - -$(am_mod_google_tts_la_OBJECTS): mod_google_tts.h diff --git a/src/mod/asr_tts/mod_google_tts/conf/autoload_configs/google_tts.conf.xml b/src/mod/asr_tts/mod_google_tts/conf/autoload_configs/google_tts.conf.xml deleted file mode 100644 index bc85f56ca0f..00000000000 --- a/src/mod/asr_tts/mod_google_tts/conf/autoload_configs/google_tts.conf.xml +++ /dev/null @@ -1,29 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/mod/asr_tts/mod_google_tts/conf/dialplan/example.xml b/src/mod/asr_tts/mod_google_tts/conf/dialplan/example.xml deleted file mode 100644 index 0d571e43a02..00000000000 --- a/src/mod/asr_tts/mod_google_tts/conf/dialplan/example.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - diff --git a/src/mod/asr_tts/mod_google_tts/mod_google_tts.c b/src/mod/asr_tts/mod_google_tts/mod_google_tts.c deleted file mode 100644 index 01793278da6..00000000000 --- a/src/mod/asr_tts/mod_google_tts/mod_google_tts.c +++ /dev/null @@ -1,509 +0,0 @@ -/* - * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application - * Copyright (C) 2005-2014, Anthony Minessale II - * - * Version: MPL 1.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Module Contributor(s): - * Konstantin Alexandrin - * - * - * Provides the ability to use Google TTS service in the Freeswitch - * https://cloud.google.com/text-to-speech/docs/reference/rest - * - * Development repository: - * https://github.com/akscf/mod_google_tts - * - */ -#include "mod_google_tts.h" - -static struct { - char *file_ext; - char *cache_path; - char *tmp_path; - char *opt_gender; - char *opt_encoding; - char *user_agent; - char *api_url; - char *api_key; - char *proxy; - char *proxy_credentials; - char *api_url_ep; - uint32_t file_size_max; - uint32_t request_timeout; // seconds - uint32_t connect_timeout; // seconds - uint8_t fl_voice_name_as_lang; - uint8_t fl_log_http_error; - uint8_t fl_cache_enabled; -} globals; - -SWITCH_MODULE_LOAD_FUNCTION(mod_google_tts_load); -SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_google_tts_shutdown); -SWITCH_MODULE_DEFINITION(mod_google_tts, mod_google_tts_load, mod_google_tts_shutdown, NULL); - - -static size_t curl_io_write_callback(char *buffer, size_t size, size_t nitems, void *user_data) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)user_data; - size_t len = (size * nitems); - - if(len > 0 && tts_ctx->curl_recv_buffer) { - switch_buffer_write(tts_ctx->curl_recv_buffer, buffer, len); - } - - return len; -} - -static size_t curl_io_read_callback(char *buffer, size_t size, size_t nitems, void *user_data) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)user_data; - size_t nmax = (size * nitems); - size_t ncur = (tts_ctx->curl_send_buffer_len > nmax) ? nmax : tts_ctx->curl_send_buffer_len; - - memmove(buffer, tts_ctx->curl_send_buffer_ref, ncur); - tts_ctx->curl_send_buffer_ref += ncur; - tts_ctx->curl_send_buffer_len -= ncur; - - return ncur; -} - -static switch_status_t curl_perform(tts_ctx_t *tts_ctx, char *text) { - switch_status_t status = SWITCH_STATUS_SUCCESS; - CURL *curl_handle = NULL; - switch_curl_slist_t *headers = NULL; - switch_CURLcode curl_ret = 0; - long http_resp = 0; - const char *xgender = (tts_ctx->gender ? tts_ctx->gender : globals.opt_gender); - const char *ygender = (!globals.fl_voice_name_as_lang && tts_ctx->voice_name) ? tts_ctx->voice_name : NULL; - char *pdata = NULL; - char *qtext = NULL; - - if(text) { - qtext = escape_squotes(text); - } - - pdata = switch_mprintf( - "{'input':{'text':'%s'},'voice':{'ssmlGender':'%s', 'languageCode':'%s'},'audioConfig':{'audioEncoding':'%s', 'sampleRateHertz':'%d'}}\n\n", - qtext ? qtext : "", - ygender ? ygender : xgender, - tts_ctx->lang_code, - globals.opt_encoding, - tts_ctx->samplerate - ); - -#ifdef GTTS_DEBUG - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "CURL: URL=[%s], PDATA=[%s]\n", globals.api_url_ep, pdata); -#endif - - tts_ctx->curl_send_buffer_len = strlen(pdata); - tts_ctx->curl_send_buffer_ref = pdata; - - curl_handle = switch_curl_easy_init(); - - headers = switch_curl_slist_append(headers, "Content-Type: application/json; charset=utf-8"); - headers = switch_curl_slist_append(headers, "Expect:"); - - switch_curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, headers); - switch_curl_easy_setopt(curl_handle, CURLOPT_POST, 1); - switch_curl_easy_setopt(curl_handle, CURLOPT_NOSIGNAL, 1); - - switch_curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDSIZE, tts_ctx->curl_send_buffer_len); - switch_curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDS, (void *)pdata); - switch_curl_easy_setopt(curl_handle, CURLOPT_READFUNCTION, curl_io_read_callback); - switch_curl_easy_setopt(curl_handle, CURLOPT_READDATA, (void *)tts_ctx); - - switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, curl_io_write_callback); - switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)tts_ctx); - - if(globals.connect_timeout > 0) { - switch_curl_easy_setopt(curl_handle, CURLOPT_CONNECTTIMEOUT, globals.connect_timeout); - } - if(globals.request_timeout > 0) { - switch_curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, globals.request_timeout); - } - if(globals.user_agent) { - switch_curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, globals.user_agent); - } - if(strncasecmp(globals.api_url_ep, "https", 5) == 0) { - switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYPEER, 0); - switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYHOST, 0); - } - if(globals.proxy) { - if(globals.proxy_credentials != NULL) { - switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_ANY); - switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYUSERPWD, globals.proxy_credentials); - } - if(strncasecmp(globals.proxy, "https", 5) == 0) { - switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY_SSL_VERIFYPEER, 0); - } - switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY, globals.proxy); - } - - switch_curl_easy_setopt(curl_handle, CURLOPT_URL, globals.api_url_ep); - - curl_ret = switch_curl_easy_perform(curl_handle); - if(!curl_ret) { - switch_curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &http_resp); - if(!http_resp) { switch_curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CONNECTCODE, &http_resp); } - } else { - http_resp = curl_ret; - } - - if(http_resp != 200) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "http-error=[%ld] (%s)\n", http_resp, globals.api_url); - status = SWITCH_STATUS_FALSE; - } - - if(tts_ctx->curl_recv_buffer) { - if(switch_buffer_inuse(tts_ctx->curl_recv_buffer) > 0) { - switch_buffer_write(tts_ctx->curl_recv_buffer, "\0", 1); - } - } - - if(curl_handle) { switch_curl_easy_cleanup(curl_handle); } - if(headers) { switch_curl_slist_free_all(headers); } - - switch_safe_free(pdata); - switch_safe_free(qtext); - return status; -} - -static switch_status_t extract_audio(tts_ctx_t *tts_ctx, char *buf_in, uint32_t buf_len) { - switch_status_t status = SWITCH_STATUS_SUCCESS; - switch_memory_pool_t *pool = tts_ctx->pool; - switch_file_t *fd = NULL; - char *buf_out = NULL, *ptr = NULL; - size_t len = buf_len, dec_len = 0; - uint32_t ofs1 = 0, ofs2 = 0; - - if((ptr = strnstr(buf_in, "\"audioContent\"", len)) == NULL) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Malformed media content\n"); - switch_goto_status(SWITCH_STATUS_FALSE, out); - } - for(ofs1 = ((ptr - buf_in) + 14); ofs1 < len; ofs1++) { - if(buf_in[ofs1] == '"') { ofs1++; break; } - } - if(ofs1 >= len) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Malformed media content\n"); - switch_goto_status(SWITCH_STATUS_FALSE, out); - } - for(ofs2 = len; ofs2 > ofs1; ofs2--) { - if(buf_in[ofs2] == '"') { buf_in[ofs2]='\0'; ofs2--; break; } - } - if(ofs2 <= ofs1) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Malformed media content\n"); - switch_goto_status(SWITCH_STATUS_FALSE, out); - } - ptr = (void *)(buf_in + ofs1); - len = (ofs2 - ofs1); - dec_len = BASE64_DEC_SZ(len); - - if(dec_len < 4 ) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Malformed media content\n"); - switch_goto_status(SWITCH_STATUS_FALSE, out); - } - - if((buf_out = switch_core_alloc(pool, dec_len)) == NULL) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_alloc() failed\n"); - switch_goto_status(SWITCH_STATUS_GENERR, out); - } - - len = switch_b64_decode(ptr, buf_out, dec_len); - if(len != dec_len) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "switch_b64_decode: (len != dec_len)\n"); - dec_len = len; - } - - status = switch_file_open(&fd, tts_ctx->dst_file, - (SWITCH_FOPEN_WRITE | SWITCH_FOPEN_CREATE | SWITCH_FOPEN_TRUNCATE | SWITCH_FOPEN_BINARY), - (SWITCH_FPROT_UREAD | SWITCH_FPROT_UWRITE), pool); - if(status != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Unable to create output file (%s)\n", tts_ctx->dst_file); - switch_goto_status(SWITCH_STATUS_FALSE, out); - } - - status = switch_file_write(fd, buf_out, &len); - if(status != SWITCH_STATUS_SUCCESS || len != dec_len) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Unable to write into file (%s)\n", tts_ctx->dst_file); - switch_goto_status(SWITCH_STATUS_FALSE, out); - } - -out: - if(fd) { - switch_file_close(fd); - } - return status; -} - -// --------------------------------------------------------------------------------------------------------------------------------------------- -// speech api -// --------------------------------------------------------------------------------------------------------------------------------------------- -static switch_status_t speech_open(switch_speech_handle_t *sh, const char *voice, int samplerate, int channels, switch_speech_flag_t *flags) { - switch_status_t status = SWITCH_STATUS_SUCCESS; - char name_uuid[SWITCH_UUID_FORMATTED_LENGTH + 1] = { 0 }; - tts_ctx_t *tts_ctx = NULL; - - tts_ctx = switch_core_alloc(sh->memory_pool, sizeof(tts_ctx_t)); - tts_ctx->pool = sh->memory_pool; - tts_ctx->fhnd = switch_core_alloc(tts_ctx->pool, sizeof(switch_file_handle_t)); - tts_ctx->voice_name = switch_core_strdup(tts_ctx->pool, voice); - tts_ctx->lang_code = (globals.fl_voice_name_as_lang && voice) ? switch_core_strdup(sh->memory_pool, lang2bcp47(voice)) : "en-gb"; - tts_ctx->channels = channels; - tts_ctx->samplerate = samplerate; - tts_ctx->dst_file = NULL; - - sh->private_info = tts_ctx; - - if((status = switch_buffer_create_dynamic(&tts_ctx->curl_recv_buffer, 1024, 8192, globals.file_size_max)) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_buffer_create_dynamic()\n"); - goto out; - } - - if(!globals.fl_cache_enabled) { - switch_uuid_str((char *)name_uuid, sizeof(name_uuid)); - tts_ctx->dst_file = switch_core_sprintf(sh->memory_pool, "%s%sgoogle-%s.%s", - globals.tmp_path, - SWITCH_PATH_SEPARATOR, - name_uuid, - globals.file_ext - ); - } - -out: - return status; -} - -static switch_status_t speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags) { - tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; - assert(tts_ctx != NULL); - - if(switch_test_flag(tts_ctx->fhnd, SWITCH_FILE_OPEN)) { - switch_core_file_close(tts_ctx->fhnd); - } - - if(tts_ctx->curl_recv_buffer) { - switch_buffer_destroy(&tts_ctx->curl_recv_buffer); - } - - if(tts_ctx->dst_file && !globals.fl_cache_enabled) { - unlink(tts_ctx->dst_file); - } - - return SWITCH_STATUS_SUCCESS; -} - -static switch_status_t speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; - switch_status_t status = SWITCH_STATUS_SUCCESS; - char digest[SWITCH_MD5_DIGEST_STRING_SIZE + 1] = { 0 }; - const void *ptr = NULL; - uint32_t recv_len = 0; - - assert(tts_ctx != NULL); - - if(!tts_ctx->dst_file) { - switch_md5_string(digest, (void *) text, strlen(text)); - tts_ctx->dst_file = switch_core_sprintf(sh->memory_pool, "%s%s%s.%s", - globals.cache_path, - SWITCH_PATH_SEPARATOR, - digest, - globals.file_ext - ); - } - - if(switch_file_exists(tts_ctx->dst_file, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { - if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_file, tts_ctx->channels, tts_ctx->samplerate, - (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_file); - switch_goto_status(SWITCH_STATUS_FALSE, out); - } - } else { - switch_buffer_zero(tts_ctx->curl_recv_buffer); - status = curl_perform(tts_ctx , text); - recv_len = switch_buffer_peek_zerocopy(tts_ctx->curl_recv_buffer, &ptr); - if(status == SWITCH_STATUS_SUCCESS) { - if((status = extract_audio(tts_ctx, (char *)ptr, recv_len)) == SWITCH_STATUS_SUCCESS) { - if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_file, tts_ctx->channels, tts_ctx->samplerate, - (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_file); - switch_goto_status(SWITCH_STATUS_FALSE, out); - } - } else { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to extract media\n"); - status = SWITCH_STATUS_FALSE; - } - } else { - if(globals.fl_log_http_error && recv_len > 0) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Services response: %s\n", (char *)ptr); - } - } - } -out: - return status; -} - -static switch_status_t speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *data_len, switch_speech_flag_t *flags) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; - size_t len = (*data_len / sizeof(int16_t)); - - assert(tts_ctx != NULL); - - if(tts_ctx->fhnd->file_interface == NULL) { - return SWITCH_STATUS_FALSE; - } - - if(switch_core_file_read(tts_ctx->fhnd, data, &len) != SWITCH_STATUS_SUCCESS) { - switch_core_file_close(tts_ctx->fhnd); - return SWITCH_STATUS_FALSE; - } - - *data_len = (len * sizeof(int16_t)); - if(!data_len) { - switch_core_file_close(tts_ctx->fhnd); - return SWITCH_STATUS_BREAK; - } - - return SWITCH_STATUS_SUCCESS; -} - -static void speech_flush_tts(switch_speech_handle_t *sh) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; - - assert(tts_ctx != NULL); - - if(tts_ctx->fhnd != NULL && tts_ctx->fhnd->file_interface != NULL) { - switch_core_file_close(tts_ctx->fhnd); - } -} - -static void speech_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; - - assert(tts_ctx != NULL); - - if(strcasecmp(param, "lang") == 0) { - if(val) tts_ctx->lang_code = switch_core_strdup(sh->memory_pool, lang2bcp47(val)); - } else if(strcasecmp(param, "gender") == 0) { - if(val) tts_ctx->gender = switch_core_strdup(sh->memory_pool, fmt_gender(val)); - } -} - -static void speech_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val) { -} - -static void speech_float_param_tts(switch_speech_handle_t *sh, char *param, double val) { -} - -// --------------------------------------------------------------------------------------------------------------------------------------------- -// main -// --------------------------------------------------------------------------------------------------------------------------------------------- -SWITCH_MODULE_LOAD_FUNCTION(mod_google_tts_load) { - switch_status_t status = SWITCH_STATUS_SUCCESS; - switch_xml_t cfg, xml, settings, param; - switch_speech_interface_t *speech_interface; - - memset(&globals, 0, sizeof(globals)); - - if((xml = switch_xml_open_cfg(MOD_CONFIG_NAME, &cfg, NULL)) == NULL) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open configuration: %s\n", MOD_CONFIG_NAME); - switch_goto_status(SWITCH_STATUS_GENERR, out); - } - - if((settings = switch_xml_child(cfg, "settings"))) { - for (param = switch_xml_child(settings, "param"); param; param = param->next) { - char *var = (char *)switch_xml_attr_soft(param, "name"); - char *val = (char *)switch_xml_attr_soft(param, "value"); - - if(!strcasecmp(var, "api-url")) { - if(val) globals.api_url = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "api-key")) { - if(val) globals.api_key = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "cache-path")) { - if(val) globals.cache_path = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "gender")) { - if(val) globals.opt_gender = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "encoding")) { - if(val) globals.opt_encoding = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "user-agent")) { - if(val) globals.user_agent = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "request-timeout")) { - if(val) globals.request_timeout = atoi(val); - } else if(!strcasecmp(var, "connect-timeout")) { - if(val) globals.connect_timeout = atoi(val); - } else if(!strcasecmp(var, "voice-name-as-language")) { - if(val) globals.fl_voice_name_as_lang = switch_true(val); - } else if(!strcasecmp(var, "log-http-errors")) { - if(val) globals.fl_log_http_error = switch_true(val); - } else if(!strcasecmp(var, "cache-enable")) { - if(val) globals.fl_cache_enabled = switch_true(val); - } else if(!strcasecmp(var, "file-size-max")) { - if(val) globals.file_size_max = atoi(val); - } else if(!strcasecmp(var, "proxy")) { - if(val) globals.proxy = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "proxy-credentials")) { - if(val) globals.proxy_credentials = switch_core_strdup(pool, val); - } - } - } - - if(!globals.api_url) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing required parameter: api-url\n"); - switch_goto_status(SWITCH_STATUS_GENERR, out); - } - if(!globals.api_key) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing required parameter: api-key\n"); - switch_goto_status(SWITCH_STATUS_GENERR, out); - } - - globals.tmp_path = SWITCH_GLOBAL_dirs.temp_dir; - globals.api_url_ep = switch_string_replace(globals.api_url, "${api-key}", globals.api_key); - globals.cache_path = (globals.cache_path == NULL ? "/tmp/google-tts-cache" : globals.cache_path); - globals.opt_gender = fmt_gender(globals.opt_gender == NULL ? "female" : globals.opt_gender); - globals.opt_encoding = fmt_encode(globals.opt_encoding == NULL ? "mp3" : globals.opt_encoding); - globals.file_size_max = globals.file_size_max > 0 ? globals.file_size_max : FILE_SIZE_MAX; - globals.file_ext = fmt_enct2fext(globals.opt_encoding); - - if(!globals.api_url_ep) { - globals.api_url_ep = strdup(globals.api_key); - } - - if(switch_directory_exists(globals.cache_path, NULL) != SWITCH_STATUS_SUCCESS) { - switch_dir_make(globals.cache_path, SWITCH_FPROT_OS_DEFAULT, NULL); - } - - *module_interface = switch_loadable_module_create_module_interface(pool, modname); - speech_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_SPEECH_INTERFACE); - speech_interface->interface_name = "google"; - - speech_interface->speech_open = speech_open; - speech_interface->speech_close = speech_close; - speech_interface->speech_feed_tts = speech_feed_tts; - speech_interface->speech_read_tts = speech_read_tts; - speech_interface->speech_flush_tts = speech_flush_tts; - - speech_interface->speech_text_param_tts = speech_text_param_tts; - speech_interface->speech_numeric_param_tts = speech_numeric_param_tts; - speech_interface->speech_float_param_tts = speech_float_param_tts; - - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "GoogleTTS (%s)\n", MOD_VERSION); -out: - if(xml) { - switch_xml_free(xml); - } - return status; -} - -SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_google_tts_shutdown) { - - switch_safe_free(globals.api_url_ep); - - return SWITCH_STATUS_SUCCESS; -} diff --git a/src/mod/asr_tts/mod_google_tts/mod_google_tts.h b/src/mod/asr_tts/mod_google_tts/mod_google_tts.h deleted file mode 100644 index 18090f9feed..00000000000 --- a/src/mod/asr_tts/mod_google_tts/mod_google_tts.h +++ /dev/null @@ -1,58 +0,0 @@ -/* - * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application - * Copyright (C) 2005-2014, Anthony Minessale II - * - * Version: MPL 1.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Module Contributor(s): - * Konstantin Alexandrin - * - * - */ -#ifndef MOD_GOOGLE_TTS_H -#define MOD_GOOGLE_TTS_H - -#include -#include - -#define MOD_VERSION "1.0_gcp_api_v1" -#define MOD_CONFIG_NAME "google_tts.conf" -#define FILE_SIZE_MAX (2*1024*1024) -#define BASE64_DEC_SZ(n) ((n*3)/4) -//#define GTTS_DEBUG - -typedef struct { - switch_memory_pool_t *pool; - switch_file_handle_t *fhnd; - switch_buffer_t *curl_recv_buffer; - char *curl_send_buffer_ref; - char *lang_code; - char *gender; - char *voice_name; - char *dst_file; - uint32_t samplerate; - uint32_t channels; - size_t curl_send_buffer_len; -} tts_ctx_t; - - -/* utils.c */ -char *lang2bcp47(const char *lng); -char *fmt_enct2fext(const char *fmt); -char *fmt_gender(const char *gender); -char *fmt_encode(const char *fmt); - -char *strnstr(const char *s, const char *find, size_t slen); -char *escape_squotes(const char *string); - -#endif diff --git a/src/mod/asr_tts/mod_google_tts/utils.c b/src/mod/asr_tts/mod_google_tts/utils.c deleted file mode 100644 index b83043199c9..00000000000 --- a/src/mod/asr_tts/mod_google_tts/utils.c +++ /dev/null @@ -1,112 +0,0 @@ -/* - * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application - * Copyright (C) 2005-2014, Anthony Minessale II - * - * Version: MPL 1.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Module Contributor(s): - * Konstantin Alexandrin - * - * - */ -#include "mod_google_tts.h" - -char *lang2bcp47(const char *lng) { - if(strcasecmp(lng, "en") == 0) { return "en-gb"; } - if(strcasecmp(lng, "de") == 0) { return "de-de"; } - if(strcasecmp(lng, "es") == 0) { return "es-es"; } - if(strcasecmp(lng, "it") == 0) { return "it-it"; } - if(strcasecmp(lng, "ru") == 0) { return "ru-ru"; } - return (char *)lng; -} - -char *fmt_gender(const char *gender) { - if(strcasecmp(gender, "male") == 0) { return "MALE"; } - if(strcasecmp(gender, "female") == 0) { return "FEMALE"; } - return (char *)gender; -} - -char *fmt_encode(const char *fmt) { - if(strcasecmp(fmt, "mp3") == 0) { return "MP3"; } - if(strcasecmp(fmt, "wav") == 0) { return "LINEAR16"; } - if(strcasecmp(fmt, "ulaw") == 0) { return "MULAW"; } - if(strcasecmp(fmt, "alaw") == 0) { return "ALAW"; } - return (char *)fmt; -} - -char *fmt_enct2fext(const char *fmt) { - if(strcasecmp(fmt, "mp3") == 0) { return "mp3"; } - if(strcasecmp(fmt, "linear16") == 0) { return "wav"; } - if(strcasecmp(fmt, "mulaw") == 0) { return "ulaw"; } - if(strcasecmp(fmt, "alaw") == 0) { return "alaw"; } - return (char *)fmt; -} - -char *escape_squotes(const char *string) { - size_t string_len = strlen(string); - size_t i; - size_t n = 0; - size_t dest_len = 0; - char *dest; - - dest_len = strlen(string) + 1; - for (i = 0; i < string_len; i++) { - switch (string[i]) { - case '\'': dest_len += 1; break; - } - } - - dest = (char *) malloc(sizeof(char) * dest_len); - switch_assert(dest); - - for (i = 0; i < string_len; i++) { - switch (string[i]) { - case '\'': - dest[n++] = '\\'; - dest[n++] = '\''; - break; - default: - dest[n++] = string[i]; - } - } - dest[n++] = '\0'; - - switch_assert(n == dest_len); - return dest; -} - -/*- - * Copyright (c) 2001 Mike Barcroft - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - */ -char *strnstr(const char *s, const char *find, size_t slen) { - char c, sc; - size_t len; - - if ((c = *find++) != '\0') { - len = strlen(find); - do { - do { - if (slen-- < 1 || (sc = *s++) == '\0') - return (NULL); - } while (sc != c); - if (len > slen) - return (NULL); - } while (strncmp(s, find, len) != 0); - s--; - } - - return ((char *)s); -} - diff --git a/src/mod/asr_tts/mod_openai_tts/Makefile.am b/src/mod/asr_tts/mod_openai_tts/Makefile.am deleted file mode 100644 index c15cee17586..00000000000 --- a/src/mod/asr_tts/mod_openai_tts/Makefile.am +++ /dev/null @@ -1,11 +0,0 @@ - -include $(top_srcdir)/build/modmake.rulesam -MODNAME=mod_openai_tts - -mod_LTLIBRARIES = mod_openai_tts.la -mod_openai_tts_la_SOURCES = mod_openai_tts.c utils.c -mod_openai_tts_la_CFLAGS = $(AM_CFLAGS) -I. -mod_openai_tts_la_LIBADD = $(switch_builddir)/libfreeswitch.la -mod_openai_tts_la_LDFLAGS = -avoid-version -module -no-undefined -shared - -$(am_mod_openai_tts_la_OBJECTS): mod_openai_tts.h diff --git a/src/mod/asr_tts/mod_openai_tts/conf/autoload_configs/openai_tts.conf.xml b/src/mod/asr_tts/mod_openai_tts/conf/autoload_configs/openai_tts.conf.xml deleted file mode 100644 index 5ebf5c5d588..00000000000 --- a/src/mod/asr_tts/mod_openai_tts/conf/autoload_configs/openai_tts.conf.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/src/mod/asr_tts/mod_openai_tts/conf/dialplan/example.xml b/src/mod/asr_tts/mod_openai_tts/conf/dialplan/example.xml deleted file mode 100644 index 3829423d2cd..00000000000 --- a/src/mod/asr_tts/mod_openai_tts/conf/dialplan/example.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.c b/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.c deleted file mode 100644 index acedadfca16..00000000000 --- a/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.c +++ /dev/null @@ -1,494 +0,0 @@ -/* - * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application - * Copyright (C) 2005-2014, Anthony Minessale II - * - * Version: MPL 1.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Module Contributor(s): - * Konstantin Alexandrin - * - * - * Provides the ability to use OpenAI TTS service in the Freeswitch - * https://platform.openai.com/docs/guides/text-to-speech - * - * Development repository: - * https://github.com/akscf/mod_openai_tts - * - */ -#include "mod_openai_tts.h" - -static struct { - switch_mutex_t *mutex; - switch_hash_t *models; - char *cache_path; - char *tmp_path; - char *opt_encoding; - char *user_agent; - char *api_url; - char *api_key; - char *proxy; - char *proxy_credentials; - uint32_t file_size_max; - uint32_t request_timeout; // seconds - uint32_t connect_timeout; // seconds - uint8_t fl_voice_name_as_language; - uint8_t fl_log_http_error; - uint8_t fl_cache_enabled; -} globals; - -SWITCH_MODULE_LOAD_FUNCTION(mod_openai_tts_load); -SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_openai_tts_shutdown); -SWITCH_MODULE_DEFINITION(mod_openai_tts, mod_openai_tts_load, mod_openai_tts_shutdown, NULL); - -static tts_model_info_t *tts_model_lookup(const char *lang) { - tts_model_info_t *model = NULL; - - if(!lang) { return NULL; } - - switch_mutex_lock(globals.mutex); - model = switch_core_hash_find(globals.models, lang); - switch_mutex_unlock(globals.mutex); - - return model; -} - -static size_t curl_io_write_callback(char *buffer, size_t size, size_t nitems, void *user_data) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)user_data; - size_t len = (size * nitems); - - if(len > 0 && tts_ctx->curl_recv_buffer) { - switch_buffer_write(tts_ctx->curl_recv_buffer, buffer, len); - } - - return len; -} - -static size_t curl_io_read_callback(char *buffer, size_t size, size_t nitems, void *user_data) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)user_data; - size_t nmax = (size * nitems); - size_t ncur = (tts_ctx->curl_send_buffer_len > nmax) ? nmax : tts_ctx->curl_send_buffer_len; - - memmove(buffer, tts_ctx->curl_send_buffer_ref, ncur); - tts_ctx->curl_send_buffer_ref += ncur; - tts_ctx->curl_send_buffer_len -= ncur; - - return ncur; -} - -static switch_status_t curl_perform(tts_ctx_t *tts_ctx, char *text) { - switch_status_t status = SWITCH_STATUS_SUCCESS; - CURL *curl_handle = NULL; - switch_curl_slist_t *headers = NULL; - switch_CURLcode curl_ret = 0; - long http_resp = 0; - const char *voice_local = (tts_ctx->alt_voice ? tts_ctx->alt_voice : tts_ctx->model_info->voice); - const char *model_local = (tts_ctx->alt_model ? tts_ctx->alt_model : tts_ctx->model_info->model); - char *pdata = NULL; - char *qtext = NULL; - - if(text) { - qtext = escape_dquotes(text); - } - pdata = switch_mprintf("{\"model\":\"%s\",\"voice\":\"%s\",\"input\":\"%s\"}\n", - model_local, - voice_local, - qtext ? qtext : "" - ); - -#ifdef OAITTS_DEBUG - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "CURL: URL=[%s], PDATA=[%s]\n", globals.api_url, pdata); -#endif - - tts_ctx->curl_send_buffer_len = strlen(pdata); - tts_ctx->curl_send_buffer_ref = pdata; - - curl_handle = switch_curl_easy_init(); - - headers = switch_curl_slist_append(headers, "Content-Type: application/json; charset=utf-8"); - headers = switch_curl_slist_append(headers, "Expect:"); - - switch_curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, headers); - switch_curl_easy_setopt(curl_handle, CURLOPT_POST, 1); - switch_curl_easy_setopt(curl_handle, CURLOPT_NOSIGNAL, 1); - - switch_curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDSIZE, tts_ctx->curl_send_buffer_len); - switch_curl_easy_setopt(curl_handle, CURLOPT_POSTFIELDS, (void *) pdata); - switch_curl_easy_setopt(curl_handle, CURLOPT_READFUNCTION, curl_io_read_callback); - switch_curl_easy_setopt(curl_handle, CURLOPT_READDATA, (void *) tts_ctx); - - switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, curl_io_write_callback); - switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *) tts_ctx); - - if(globals.connect_timeout > 0) { - switch_curl_easy_setopt(curl_handle, CURLOPT_CONNECTTIMEOUT, globals.connect_timeout); - } - if(globals.request_timeout > 0) { - switch_curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, globals.request_timeout); - } - if(globals.user_agent) { - switch_curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, globals.user_agent); - } - - if(strncasecmp(globals.api_url, "https", 5) == 0) { - switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYPEER, 0); - switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYHOST, 0); - } - if(globals.proxy) { - if(globals.proxy_credentials != NULL) { - switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_ANY); - switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYUSERPWD, globals.proxy_credentials); - } - if(strncasecmp(globals.proxy, "https", 5) == 0) { - switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY_SSL_VERIFYPEER, 0); - } - switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY, globals.proxy); - } - - curl_easy_setopt(curl_handle, CURLOPT_XOAUTH2_BEARER, globals.api_key); - curl_easy_setopt(curl_handle, CURLOPT_HTTPAUTH, CURLAUTH_BEARER); - - switch_curl_easy_setopt(curl_handle, CURLOPT_URL, globals.api_url); - - curl_ret = switch_curl_easy_perform(curl_handle); - if(!curl_ret) { - switch_curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &http_resp); - if(!http_resp) { switch_curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CONNECTCODE, &http_resp); } - } else { - http_resp = curl_ret; - } - - if(http_resp != 200) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "http-error=[%ld] (%s)\n", http_resp, globals.api_url); - status = SWITCH_STATUS_FALSE; - } - - if(tts_ctx->curl_recv_buffer) { - if(switch_buffer_inuse(tts_ctx->curl_recv_buffer) > 0) { - switch_buffer_write(tts_ctx->curl_recv_buffer, "\0", 1); - } - } - - if(curl_handle) { switch_curl_easy_cleanup(curl_handle); } - if(headers) { switch_curl_slist_free_all(headers); } - - switch_safe_free(pdata); - switch_safe_free(qtext); - return status; -} - - -// --------------------------------------------------------------------------------------------------------------------------------------------- -// speech api -// --------------------------------------------------------------------------------------------------------------------------------------------- -static switch_status_t speech_open(switch_speech_handle_t *sh, const char *voice, int samplerate, int channels, switch_speech_flag_t *flags) { - switch_status_t status = SWITCH_STATUS_SUCCESS; - char name_uuid[SWITCH_UUID_FORMATTED_LENGTH + 1] = { 0 }; - tts_ctx_t *tts_ctx = NULL; - - tts_ctx = switch_core_alloc(sh->memory_pool, sizeof(tts_ctx_t)); - tts_ctx->pool = sh->memory_pool; - tts_ctx->fhnd = switch_core_alloc(tts_ctx->pool, sizeof(switch_file_handle_t)); - tts_ctx->language = (globals.fl_voice_name_as_language && voice) ? switch_core_strdup(sh->memory_pool, voice) : NULL; - tts_ctx->channels = channels; - tts_ctx->samplerate = samplerate; - tts_ctx->dst_file = NULL; - - sh->private_info = tts_ctx; - - if(tts_ctx->language) { - tts_ctx->model_info = tts_model_lookup(tts_ctx->language); - } - - if((status = switch_buffer_create_dynamic(&tts_ctx->curl_recv_buffer, 1024, 8192, globals.file_size_max)) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_buffer_create_dynamic() fail\n"); - goto out; - } - - if(!globals.fl_cache_enabled) { - switch_uuid_str((char *)name_uuid, sizeof(name_uuid)); - tts_ctx->dst_file = switch_core_sprintf(sh->memory_pool, "%s%sopenai-%s.%s", - globals.tmp_path, - SWITCH_PATH_SEPARATOR, - name_uuid, - enc2ext(globals.opt_encoding) - ); - } - -out: - return status; -} - -static switch_status_t speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags) { - tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; - assert(tts_ctx != NULL); - - if(switch_test_flag(tts_ctx->fhnd, SWITCH_FILE_OPEN)) { - switch_core_file_close(tts_ctx->fhnd); - } - - if(tts_ctx->curl_recv_buffer) { - switch_buffer_destroy(&tts_ctx->curl_recv_buffer); - } - - if(tts_ctx->dst_file && !globals.fl_cache_enabled) { - unlink(tts_ctx->dst_file); - } - - return SWITCH_STATUS_SUCCESS; -} - -static switch_status_t speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; - switch_status_t status = SWITCH_STATUS_SUCCESS; - char digest[SWITCH_MD5_DIGEST_STRING_SIZE + 1] = { 0 }; - const void *ptr = NULL; - uint32_t recv_len = 0; - - assert(tts_ctx != NULL); - - if(!tts_ctx->dst_file) { - switch_md5_string(digest, (void *)text, strlen(text)); - tts_ctx->dst_file = switch_core_sprintf(sh->memory_pool, "%s%s%s.%s", - globals.cache_path, - SWITCH_PATH_SEPARATOR, - digest, - enc2ext(globals.opt_encoding) - ); - } - - if(switch_file_exists(tts_ctx->dst_file, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { - if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_file, tts_ctx->channels, tts_ctx->samplerate, - (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { - - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_file); - status = SWITCH_STATUS_FALSE; - goto out; - } - } else { - if(tts_ctx->alt_voice == NULL && tts_ctx->model_info == NULL) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "voice not determined\n"); - status = SWITCH_STATUS_FALSE; goto out; - } - if(tts_ctx->alt_model == NULL && tts_ctx->model_info == NULL) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "model not determined\n"); - status = SWITCH_STATUS_FALSE; goto out; - } - - switch_buffer_zero(tts_ctx->curl_recv_buffer); - status = curl_perform(tts_ctx , text); - recv_len = switch_buffer_peek_zerocopy(tts_ctx->curl_recv_buffer, &ptr); - - if(status == SWITCH_STATUS_SUCCESS) { - if((status = write_file(tts_ctx->dst_file, (switch_byte_t *)ptr, recv_len)) == SWITCH_STATUS_SUCCESS) { - if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_file, tts_ctx->channels, tts_ctx->samplerate, - (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { - - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_file); - goto out; - } - } - } else { - if(globals.fl_log_http_error && recv_len > 0) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Services response: %s\n", (char *)ptr); - } - } - } -out: - return status; -} - -static switch_status_t speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *data_len, switch_speech_flag_t *flags) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; - size_t len = (*data_len / sizeof(int16_t)); - - assert(tts_ctx != NULL); - - if(tts_ctx->fhnd->file_interface == NULL) { - return SWITCH_STATUS_FALSE; - } - - if(switch_core_file_read(tts_ctx->fhnd, data, &len) != SWITCH_STATUS_SUCCESS) { - switch_core_file_close(tts_ctx->fhnd); - return SWITCH_STATUS_FALSE; - } - - *data_len = (len * sizeof(int16_t)); - if(!data_len) { - switch_core_file_close(tts_ctx->fhnd); - return SWITCH_STATUS_BREAK; - } - - return SWITCH_STATUS_SUCCESS; -} - -static void speech_flush_tts(switch_speech_handle_t *sh) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; - - assert(tts_ctx != NULL); - - if(tts_ctx->fhnd != NULL && tts_ctx->fhnd->file_interface != NULL) { - switch_core_file_close(tts_ctx->fhnd); - } -} - -static void speech_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; - - assert(tts_ctx != NULL); - - if(strcasecmp(param, "voice") == 0) { - if(val) { tts_ctx->alt_voice = switch_core_strdup(sh->memory_pool, val); } - } else if(strcasecmp(param, "model") == 0) { - if(val) { tts_ctx->alt_model = switch_core_strdup(sh->memory_pool, val); } - } -} - -static void speech_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val) { -} - -static void speech_float_param_tts(switch_speech_handle_t *sh, char *param, double val) { -} - -// --------------------------------------------------------------------------------------------------------------------------------------------- -// main -// --------------------------------------------------------------------------------------------------------------------------------------------- -SWITCH_MODULE_LOAD_FUNCTION(mod_openai_tts_load) { - switch_status_t status = SWITCH_STATUS_SUCCESS; - switch_xml_t cfg, xml, settings, param, xmodels, xmodel; - switch_speech_interface_t *speech_interface; - - memset(&globals, 0, sizeof(globals)); - switch_mutex_init(&globals.mutex, SWITCH_MUTEX_NESTED, pool); - switch_core_hash_init(&globals.models); - - if((xml = switch_xml_open_cfg(MOD_CONFIG_NAME, &cfg, NULL)) == NULL) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open configuration: %s\n", MOD_CONFIG_NAME); - switch_goto_status(SWITCH_STATUS_GENERR, out); - } - - if((settings = switch_xml_child(cfg, "settings"))) { - for (param = switch_xml_child(settings, "param"); param; param = param->next) { - char *var = (char *) switch_xml_attr_soft(param, "name"); - char *val = (char *) switch_xml_attr_soft(param, "value"); - - if(!strcasecmp(var, "api-url")) { - if(val) globals.api_url = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "api-key")) { - if(val) globals.api_key = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "cache-path")) { - if(val) globals.cache_path = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "encoding")) { - if(val) globals.opt_encoding = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "user-agent")) { - if(val) globals.user_agent = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "request-timeout")) { - if(val) globals.request_timeout = atoi(val); - } else if(!strcasecmp(var, "connect-timeout")) { - if(val) globals.connect_timeout = atoi(val); - } else if(!strcasecmp(var, "voice-name-as-language")) { - if(val) globals.fl_voice_name_as_language = switch_true(val); - } else if(!strcasecmp(var, "log-http-errors")) { - if(val) globals.fl_log_http_error = switch_true(val); - } else if(!strcasecmp(var, "cache-enable")) { - if(val) globals.fl_cache_enabled = switch_true(val); - } else if(!strcasecmp(var, "file-size-max")) { - if(val) globals.file_size_max = atoi(val); - } else if(!strcasecmp(var, "proxy")) { - if(val) globals.proxy = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "proxy-credentials")) { - if(val) globals.proxy_credentials = switch_core_strdup(pool, val); - } - } - } - - if((xmodels = switch_xml_child(cfg, "models"))) { - for(xmodel = switch_xml_child(xmodels, "model"); xmodel; xmodel = xmodel->next) { - char *lang = (char *) switch_xml_attr_soft(xmodel, "language"); - char *voice = (char *) switch_xml_attr_soft(xmodel, "voice"); - char *model = (char *) switch_xml_attr_soft(xmodel, "model"); - - tts_model_info_t *model_info = NULL; - - if(!lang || !voice || !model) { continue; } - - if(switch_core_hash_find(globals.models, lang)) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Language '%s' already registered\n", lang); - continue; - } - - if((model_info = switch_core_alloc(pool, sizeof(tts_model_info_t))) == NULL) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_alloc()\n"); - switch_goto_status(SWITCH_STATUS_GENERR, out); - } - model_info->lang = switch_core_strdup(pool, lang); - model_info->voice = switch_core_strdup(pool, voice); - model_info->model = switch_core_strdup(pool, model); - - switch_core_hash_insert(globals.models, model_info->lang, model_info); - } - } - - - if(!globals.api_url) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing required parameter: api-url\n"); - switch_goto_status(SWITCH_STATUS_GENERR, out); - } - if(!globals.api_key) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing required parameter: api-key\n"); - switch_goto_status(SWITCH_STATUS_GENERR, out); - } - - globals.tmp_path = SWITCH_GLOBAL_dirs.temp_dir; - globals.cache_path = (globals.cache_path == NULL ? "/tmp/openai-tts-cache" : globals.cache_path); - globals.opt_encoding = (globals.opt_encoding == NULL ? "mp3" : globals.opt_encoding); - globals.file_size_max = globals.file_size_max > 0 ? globals.file_size_max : FILE_SIZE_MAX; - - if(switch_directory_exists(globals.cache_path, NULL) != SWITCH_STATUS_SUCCESS) { - switch_dir_make(globals.cache_path, SWITCH_FPROT_OS_DEFAULT, NULL); - } - - *module_interface = switch_loadable_module_create_module_interface(pool, modname); - speech_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_SPEECH_INTERFACE); - speech_interface->interface_name = "openai"; - - speech_interface->speech_open = speech_open; - speech_interface->speech_close = speech_close; - speech_interface->speech_feed_tts = speech_feed_tts; - speech_interface->speech_read_tts = speech_read_tts; - speech_interface->speech_flush_tts = speech_flush_tts; - - speech_interface->speech_text_param_tts = speech_text_param_tts; - speech_interface->speech_numeric_param_tts = speech_numeric_param_tts; - speech_interface->speech_float_param_tts = speech_float_param_tts; - - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "OpenAI-TTS (%s)\n", MOD_VERSION); -out: - if(xml) { - switch_xml_free(xml); - } - if(status != SWITCH_STATUS_SUCCESS) { - if(globals.models) { - switch_core_hash_destroy(&globals.models); - } - } - return status; -} - -SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_openai_tts_shutdown) { - - if(globals.models) { - switch_core_hash_destroy(&globals.models); - } - - return SWITCH_STATUS_SUCCESS; -} diff --git a/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.h b/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.h deleted file mode 100644 index 1f9d1c53beb..00000000000 --- a/src/mod/asr_tts/mod_openai_tts/mod_openai_tts.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application - * Copyright (C) 2005-2014, Anthony Minessale II - * - * Version: MPL 1.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Module Contributor(s): - * Konstantin Alexandrin - * - * - */ -#ifndef MOD_OPENAI_TTS_H -#define MOD_OPENAI_TTS_H - -#include -#include - -#define MOD_VERSION "1.0_apiv1" -#define MOD_CONFIG_NAME "openai_tts.conf" -#define FILE_SIZE_MAX (2*1024*1024) -// #define OAITTS_DEBUG - -typedef struct { - char *lang; - char *voice; - char *model; -} tts_model_info_t; - -typedef struct { - switch_memory_pool_t *pool; - switch_file_handle_t *fhnd; - switch_buffer_t *curl_recv_buffer; - tts_model_info_t *model_info; - char *curl_send_buffer_ref; - char *language; - char *alt_voice; - char *alt_model; - char *dst_file; - uint32_t samplerate; - uint32_t channels; - size_t curl_send_buffer_len; -} tts_ctx_t; - -char *enc2ext(const char *fmt); -char *escape_dquotes(const char *string); - -switch_status_t write_file(char *file_name, switch_byte_t *buf, uint32_t buf_len); - -#endif diff --git a/src/mod/asr_tts/mod_openai_tts/utils.c b/src/mod/asr_tts/mod_openai_tts/utils.c deleted file mode 100644 index df16d332c92..00000000000 --- a/src/mod/asr_tts/mod_openai_tts/utils.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application - * Copyright (C) 2005-2014, Anthony Minessale II - * - * Version: MPL 1.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Module Contributor(s): - * Konstantin Alexandrin - * - * - */ -#include "mod_openai_tts.h" - -char *enc2ext(const char *fmt) { - if(strcasecmp(fmt, "mp3") == 0) { return "mp3"; } - return (char *)fmt; -} - -char *escape_dquotes(const char *string) { - size_t string_len = strlen(string); - size_t i; - size_t n = 0; - size_t dest_len = 0; - char *dest; - - dest_len = strlen(string) + 1; - for (i = 0; i < string_len; i++) { - switch (string[i]) { - case '\"': dest_len += 1; break; - } - } - - dest = (char *) malloc(sizeof(char) * dest_len); - switch_assert(dest); - - for (i = 0; i < string_len; i++) { - switch (string[i]) { - case '\"': - dest[n++] = '\\'; - dest[n++] = '\"'; - break; - default: - dest[n++] = string[i]; - } - } - dest[n++] = '\0'; - - switch_assert(n == dest_len); - return dest; -} - -switch_status_t write_file(char *file_name, switch_byte_t *buf, uint32_t buf_len) { - switch_status_t status = SWITCH_STATUS_SUCCESS; - switch_memory_pool_t *pool = NULL; - switch_size_t len = buf_len; - switch_file_t *fd = NULL; - - if(switch_core_new_memory_pool(&pool) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_new_memory_pool() fail\n"); - switch_goto_status(SWITCH_STATUS_GENERR, out); - } - if((status = switch_file_open(&fd, file_name, (SWITCH_FOPEN_WRITE | SWITCH_FOPEN_TRUNCATE | SWITCH_FOPEN_CREATE), SWITCH_FPROT_OS_DEFAULT, pool)) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Open fail: %s\n", file_name); - goto out; - } - if((status = switch_file_write(fd, buf, &len)) != SWITCH_STATUS_SUCCESS) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Write fail (%s)\n", file_name); - } - switch_file_close(fd); -out: - if(pool) { - switch_core_destroy_memory_pool(&pool); - } - return status; -} diff --git a/src/mod/asr_tts/mod_piper_tts/Makefile.am b/src/mod/asr_tts/mod_piper_tts/Makefile.am deleted file mode 100644 index 013d7df268c..00000000000 --- a/src/mod/asr_tts/mod_piper_tts/Makefile.am +++ /dev/null @@ -1,11 +0,0 @@ - -include $(top_srcdir)/build/modmake.rulesam -MODNAME=mod_piper_tts - -mod_LTLIBRARIES = mod_piper_tts.la -mod_piper_tts_la_SOURCES = mod_piper_tts.c -mod_piper_tts_la_CFLAGS = $(AM_CFLAGS) -I. -mod_piper_tts_la_LIBADD = $(switch_builddir)/libfreeswitch.la -mod_piper_tts_la_LDFLAGS = -avoid-version -module -no-undefined -shared - -$(am_mod_piper_tts_la_OBJECTS): mod_piper_tts.h diff --git a/src/mod/asr_tts/mod_piper_tts/conf/autoload_configs/piper_tts.conf.xml b/src/mod/asr_tts/mod_piper_tts/conf/autoload_configs/piper_tts.conf.xml deleted file mode 100644 index 76d52c24f89..00000000000 --- a/src/mod/asr_tts/mod_piper_tts/conf/autoload_configs/piper_tts.conf.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - - - - - - - - - - - - - - - - - diff --git a/src/mod/asr_tts/mod_piper_tts/conf/dialplan/example.xml b/src/mod/asr_tts/mod_piper_tts/conf/dialplan/example.xml deleted file mode 100644 index e4e09e41f63..00000000000 --- a/src/mod/asr_tts/mod_piper_tts/conf/dialplan/example.xml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.c b/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.c deleted file mode 100644 index 0fd10560845..00000000000 --- a/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.c +++ /dev/null @@ -1,339 +0,0 @@ -/* - * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application - * Copyright (C) 2005-2014, Anthony Minessale II - * - * Version: MPL 1.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Module Contributor(s): - * Konstantin Alexandrin - * - * - * Provides the ability to use PIPER TTS in the Freeswitch - * https://github.com/rhasspy/piper - * - * - * Development repository: - * https://github.com/akscf/mod_piper_tts - * - */ -#include "mod_piper_tts.h" - -static piper_globals_t globals; - -SWITCH_MODULE_LOAD_FUNCTION(mod_piper_tts_load); -SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_piper_tts_shutdown); -SWITCH_MODULE_DEFINITION(mod_piper_tts, mod_piper_tts_load, mod_piper_tts_shutdown, NULL); - - -static piper_model_info_t *piper_lookup_model(const char *lang) { - piper_model_info_t *model = NULL; - - if(!lang) { - return NULL; - } - - switch_mutex_lock(globals.mutex); - model = switch_core_hash_find(globals.models, lang); - switch_mutex_unlock(globals.mutex); - - return model; -} - -static switch_status_t speech_open(switch_speech_handle_t *sh, const char *voice, int samplerate, int channels, switch_speech_flag_t *flags) { - char name_uuid[SWITCH_UUID_FORMATTED_LENGTH + 1] = { 0 }; - switch_status_t status = SWITCH_STATUS_SUCCESS; - tts_ctx_t *tts_ctx = NULL; - - tts_ctx = switch_core_alloc(sh->memory_pool, sizeof(tts_ctx_t)); - tts_ctx->pool = sh->memory_pool; - tts_ctx->fhnd = switch_core_alloc(tts_ctx->pool, sizeof(switch_file_handle_t)); - tts_ctx->voice = switch_core_strdup(tts_ctx->pool, voice); - tts_ctx->language = (globals.fl_voice_as_language && voice ? switch_core_strdup(sh->memory_pool, voice) : "en"); - tts_ctx->channels = channels; - tts_ctx->samplerate = samplerate; - - sh->private_info = tts_ctx; - - if(tts_ctx->language) { - tts_ctx->model_info = piper_lookup_model(tts_ctx->language); - if(!tts_ctx->model_info) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Language '%s' not registered!\n", tts_ctx->language); - switch_goto_status(SWITCH_STATUS_FALSE, out); - } - } - - if(!globals.fl_cache_enabled) { - switch_uuid_str((char *)name_uuid, sizeof(name_uuid)); - tts_ctx->dst_fname = switch_core_sprintf(sh->memory_pool, "%s%spiper-%s.%s", - globals.tmp_path, - SWITCH_PATH_SEPARATOR, - name_uuid, - PIPER_FILE_ENCODING - ); - } - -out: - return status; -} - -static switch_status_t speech_close(switch_speech_handle_t *sh, switch_speech_flag_t *flags) { - tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; - assert(tts_ctx != NULL); - - if(switch_test_flag(tts_ctx->fhnd, SWITCH_FILE_OPEN)) { - switch_core_file_close(tts_ctx->fhnd); - } - - if(tts_ctx->dst_fname && !globals.fl_cache_enabled) { - unlink(tts_ctx->dst_fname); - } - - return SWITCH_STATUS_SUCCESS; -} - -static switch_status_t speech_feed_tts(switch_speech_handle_t *sh, char *text, switch_speech_flag_t *flags) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; - char digest[SWITCH_MD5_DIGEST_STRING_SIZE + 1] = { 0 }; - switch_status_t status = SWITCH_STATUS_SUCCESS; - - assert(tts_ctx != NULL); - - if(!tts_ctx->dst_fname) { - switch_md5_string(digest, (void *)text, strlen(text)); - tts_ctx->dst_fname = switch_core_sprintf(sh->memory_pool, "%s%s%s.%s", - globals.cache_path, - SWITCH_PATH_SEPARATOR, - digest, - PIPER_FILE_ENCODING - ); - } - - if(switch_file_exists(tts_ctx->dst_fname, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { - if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_fname, tts_ctx->channels, tts_ctx->samplerate, - (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { - - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_fname); - switch_goto_status(SWITCH_STATUS_FALSE, out); - } - } else { - char *cmd = NULL; - char *textq = NULL; - - if(!tts_ctx->model_info) { - if(tts_ctx->language) { - tts_ctx->model_info = piper_lookup_model(tts_ctx->language); - } - if(!tts_ctx->model_info) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to lookup the model for lang: %s\n", tts_ctx->language); - switch_goto_status(SWITCH_STATUS_FALSE, out); - } - } - - textq = switch_util_quote_shell_arg(text); - cmd = switch_mprintf("echo %s | %s %s --model '%s' --output_file '%s'", - textq, globals.piper_bin, - globals.piper_opts ? globals.piper_opts : "", - tts_ctx->model_info->model, - tts_ctx->dst_fname - ); - -#ifdef PIPER_DEBUG - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "PIPER-CMD: [%s]\n", cmd); -#endif - - if(switch_system(cmd, SWITCH_TRUE)) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to perform cmd: %s\n", cmd); - status = SWITCH_STATUS_FALSE; - } - - switch_safe_free(textq); - switch_safe_free(cmd); - - if(status == SWITCH_STATUS_SUCCESS) { - if(switch_file_exists(tts_ctx->dst_fname, tts_ctx->pool) == SWITCH_STATUS_SUCCESS) { - if((status = switch_core_file_open(tts_ctx->fhnd, tts_ctx->dst_fname, tts_ctx->channels, tts_ctx->samplerate, - (SWITCH_FILE_FLAG_READ | SWITCH_FILE_DATA_SHORT), sh->memory_pool)) != SWITCH_STATUS_SUCCESS) { - - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file: %s\n", tts_ctx->dst_fname); - switch_goto_status(SWITCH_STATUS_FALSE, out); - } - } else { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "File not found: %s\n", tts_ctx->dst_fname); - switch_goto_status(SWITCH_STATUS_FALSE, out); - } - } - } - -out: - return status; -} - -static switch_status_t speech_read_tts(switch_speech_handle_t *sh, void *data, size_t *data_len, switch_speech_flag_t *flags) { - tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; - size_t len = (*data_len / sizeof(int16_t)); - - assert(tts_ctx != NULL); - - if(tts_ctx->fhnd->file_interface == NULL) { - return SWITCH_STATUS_FALSE; - } - - if(switch_core_file_read(tts_ctx->fhnd, data, &len) != SWITCH_STATUS_SUCCESS) { - switch_core_file_close(tts_ctx->fhnd); - return SWITCH_STATUS_FALSE; - } - - *data_len = (len * sizeof(int16_t)); - if(!data_len) { - switch_core_file_close(tts_ctx->fhnd); - return SWITCH_STATUS_BREAK; - } - - return SWITCH_STATUS_SUCCESS; -} - -static void speech_flush_tts(switch_speech_handle_t *sh) { - tts_ctx_t *tts_ctx = (tts_ctx_t *)sh->private_info; - - assert(tts_ctx != NULL); - - if(tts_ctx->fhnd != NULL && tts_ctx->fhnd->file_interface != NULL) { - switch_core_file_close(tts_ctx->fhnd); - } -} - -static void speech_text_param_tts(switch_speech_handle_t *sh, char *param, const char *val) { - tts_ctx_t *tts_ctx = (tts_ctx_t *) sh->private_info; - - assert(tts_ctx != NULL); - - if(strcasecmp(param, "lang") == 0) { - if(val) { tts_ctx->language = switch_core_strdup(sh->memory_pool, val); } - } else if(strcasecmp(param, "voice") == 0) { - if(val) { tts_ctx->voice = switch_core_strdup(sh->memory_pool, val); } - } -} - -static void speech_numeric_param_tts(switch_speech_handle_t *sh, char *param, int val) { -} - -static void speech_float_param_tts(switch_speech_handle_t *sh, char *param, double val) { -} - -// --------------------------------------------------------------------------------------------------------------------------------------------- -// main -// --------------------------------------------------------------------------------------------------------------------------------------------- -SWITCH_MODULE_LOAD_FUNCTION(mod_piper_tts_load) { - switch_status_t status = SWITCH_STATUS_SUCCESS; - switch_xml_t cfg, xml, settings, param, xmodels, xmodel; - switch_speech_interface_t *speech_interface; - - memset(&globals, 0, sizeof(globals)); - switch_mutex_init(&globals.mutex, SWITCH_MUTEX_NESTED, pool); - switch_core_hash_init(&globals.models); - - if((xml = switch_xml_open_cfg(MOD_CONFIG_NAME, &cfg, NULL)) == NULL) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open configuration: %s\n", MOD_CONFIG_NAME); - switch_goto_status(SWITCH_STATUS_GENERR, out); - } - - if((settings = switch_xml_child(cfg, "settings"))) { - for(param = switch_xml_child(settings, "param"); param; param = param->next) { - char *var = (char *) switch_xml_attr_soft(param, "name"); - char *val = (char *) switch_xml_attr_soft(param, "value"); - - if(!strcasecmp(var, "cache-path")) { - if(val) globals.cache_path = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "piper-bin")) { - if(val) globals.piper_bin = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "piper-opts")) { - if(val) globals.piper_opts = switch_core_strdup(pool, val); - } else if(!strcasecmp(var, "voice-name-as-language")) { - if(val) globals.fl_voice_as_language = switch_true(val); - } else if(!strcasecmp(var, "cache-enable")) { - if(val) globals.fl_cache_enabled = switch_true(val); - } - } - } - - if((xmodels = switch_xml_child(cfg, "models"))) { - for(xmodel = switch_xml_child(xmodels, "model"); xmodel; xmodel = xmodel->next) { - char *lang = (char *) switch_xml_attr_soft(xmodel, "language"); - char *model = (char *) switch_xml_attr_soft(xmodel, "model"); - piper_model_info_t *model_info = NULL; - - if(!lang || !model) { continue; } - - if(switch_core_hash_find(globals.models, lang)) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Language '%s' already registered\n", lang); - continue; - } - - if((model_info = switch_core_alloc(pool, sizeof(piper_model_info_t))) == NULL) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_alloc()\n"); - switch_goto_status(SWITCH_STATUS_GENERR, out); - } - model_info->lang = switch_core_strdup(pool, lang); - model_info->model = switch_core_strdup(pool, model); - - switch_core_hash_insert(globals.models, model_info->lang, model_info); - } - } - - if(!globals.piper_bin) { - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "piper-bin - not determined!\n"); - switch_goto_status(SWITCH_STATUS_FALSE, out); - } - - globals.tmp_path = SWITCH_GLOBAL_dirs.temp_dir; - globals.cache_path = (globals.cache_path == NULL ? "/tmp/piper-tts-cache" : globals.cache_path); - - if(switch_directory_exists(globals.cache_path, NULL) != SWITCH_STATUS_SUCCESS) { - switch_dir_make(globals.cache_path, SWITCH_FPROT_OS_DEFAULT, NULL); - } - - *module_interface = switch_loadable_module_create_module_interface(pool, modname); - speech_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_SPEECH_INTERFACE); - speech_interface->interface_name = "piper"; - - speech_interface->speech_open = speech_open; - speech_interface->speech_close = speech_close; - speech_interface->speech_feed_tts = speech_feed_tts; - speech_interface->speech_read_tts = speech_read_tts; - speech_interface->speech_flush_tts = speech_flush_tts; - - speech_interface->speech_text_param_tts = speech_text_param_tts; - speech_interface->speech_float_param_tts = speech_float_param_tts; - speech_interface->speech_numeric_param_tts = speech_numeric_param_tts; - - switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "PiperTTS (%s)\n", MOD_VERSION); -out: - if(xml) { - switch_xml_free(xml); - } - if(status != SWITCH_STATUS_SUCCESS) { - if(globals.models) { - switch_core_hash_destroy(&globals.models); - } - } - return status; -} - -SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_piper_tts_shutdown) { - - if(globals.models) { - switch_core_hash_destroy(&globals.models); - } - - return SWITCH_STATUS_SUCCESS; -} diff --git a/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.h b/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.h deleted file mode 100644 index ea6c1b9de1d..00000000000 --- a/src/mod/asr_tts/mod_piper_tts/mod_piper_tts.h +++ /dev/null @@ -1,60 +0,0 @@ -/* - * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application - * Copyright (C) 2005-2014, Anthony Minessale II - * - * Version: MPL 1.1 - * - * The contents of this file are subject to the Mozilla Public License Version - * 1.1 (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * http://www.mozilla.org/MPL/ - * - * Software distributed under the License is distributed on an "AS IS" basis, - * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License - * for the specific language governing rights and limitations under the - * License. - * - * Module Contributor(s): - * Konstantin Alexandrin - * - * - */ -#ifndef MOD_PIPER_TTS_H -#define MOD_PIPER_TTS_H - -#include - -#define MOD_VERSION "1.0" -#define MOD_CONFIG_NAME "piper_tts.conf" -#define PIPER_FILE_ENCODING "wav" -// #define PIPER_DEBUG - -typedef struct { - switch_mutex_t *mutex; - switch_hash_t *models; - const char *tmp_path; - const char *cache_path; - const char *piper_bin; - const char *piper_opts; - uint8_t fl_cache_enabled; - uint8_t fl_voice_as_language; -} piper_globals_t; - -typedef struct { - char *lang; - char *model; -} piper_model_info_t; - -typedef struct { - piper_model_info_t *model_info; - switch_memory_pool_t *pool; - switch_file_handle_t *fhnd; - char *language; - char *voice; - char *dst_fname; - uint32_t samplerate; - uint32_t channels; -} tts_ctx_t; - - -#endif From 4bf81d34584795bf3e9a9e8d67b9e19c7654ea1a Mon Sep 17 00:00:00 2001 From: aks Date: Sun, 18 May 2025 16:06:38 +0400 Subject: [PATCH 6/6] added: mod_google_asr, mod_openai_asr --- build/modules.conf.in | 5 +- configure.ac | 5 +- src/mod/asr_tts/mod_google_asr/Makefile.am | 12 + .../conf/autoload_configs/google_asr.conf.xml | 42 + .../mod_google_asr/conf/dialplan/dialplan.xml | 11 + src/mod/asr_tts/mod_google_asr/curl.c | 129 +++ .../asr_tts/mod_google_asr/mod_google_asr.c | 792 ++++++++++++++++++ .../asr_tts/mod_google_asr/mod_google_asr.h | 149 ++++ src/mod/asr_tts/mod_google_asr/utils.c | 171 ++++ src/mod/asr_tts/mod_openai_asr/Makefile.am | 12 + .../conf/autoload_configs/openai_asr.conf.xml | 26 + .../mod_openai_asr/conf/dialplan/dialplan.xml | 8 + src/mod/asr_tts/mod_openai_asr/curl.c | 127 +++ .../asr_tts/mod_openai_asr/mod_openai_asr.c | 729 ++++++++++++++++ .../asr_tts/mod_openai_asr/mod_openai_asr.h | 110 +++ src/mod/asr_tts/mod_openai_asr/utils.c | 149 ++++ 16 files changed, 2471 insertions(+), 6 deletions(-) create mode 100644 src/mod/asr_tts/mod_google_asr/Makefile.am create mode 100644 src/mod/asr_tts/mod_google_asr/conf/autoload_configs/google_asr.conf.xml create mode 100644 src/mod/asr_tts/mod_google_asr/conf/dialplan/dialplan.xml create mode 100644 src/mod/asr_tts/mod_google_asr/curl.c create mode 100644 src/mod/asr_tts/mod_google_asr/mod_google_asr.c create mode 100644 src/mod/asr_tts/mod_google_asr/mod_google_asr.h create mode 100644 src/mod/asr_tts/mod_google_asr/utils.c create mode 100644 src/mod/asr_tts/mod_openai_asr/Makefile.am create mode 100644 src/mod/asr_tts/mod_openai_asr/conf/autoload_configs/openai_asr.conf.xml create mode 100644 src/mod/asr_tts/mod_openai_asr/conf/dialplan/dialplan.xml create mode 100644 src/mod/asr_tts/mod_openai_asr/curl.c create mode 100644 src/mod/asr_tts/mod_openai_asr/mod_openai_asr.c create mode 100644 src/mod/asr_tts/mod_openai_asr/mod_openai_asr.h create mode 100644 src/mod/asr_tts/mod_openai_asr/utils.c diff --git a/build/modules.conf.in b/build/modules.conf.in index 13838e8a635..c2e0d7c08b4 100755 --- a/build/modules.conf.in +++ b/build/modules.conf.in @@ -47,9 +47,8 @@ applications/mod_voicemail #asr_tts/mod_flite #asr_tts/mod_pocketsphinx #asr_tts/mod_tts_commandline -#asr_tts/mod_google_tts -#asr_tts/mod_openai_tts -#asr_tts/mod_piper_tts +#asr_tts/mod_google_asr +#asr_tts/mod_openai_asr codecs/mod_amr #codecs/mod_amrwb codecs/mod_b64 diff --git a/configure.ac b/configure.ac index 8ee57af16dd..c9c807f8d4b 100755 --- a/configure.ac +++ b/configure.ac @@ -2016,9 +2016,8 @@ AC_CONFIG_FILES([Makefile src/mod/asr_tts/mod_flite/Makefile src/mod/asr_tts/mod_pocketsphinx/Makefile src/mod/asr_tts/mod_tts_commandline/Makefile - src/mod/asr_tts/mod_google_tts/Makefile - src/mod/asr_tts/mod_openai_tts/Makefile - src/mod/asr_tts/mod_piper_tts/Makefile + src/mod/asr_tts/mod_google_asr/Makefile + src/mod/asr_tts/mod_openai_asr/Makefile src/mod/codecs/mod_amr/Makefile src/mod/codecs/mod_amrwb/Makefile src/mod/codecs/mod_b64/Makefile diff --git a/src/mod/asr_tts/mod_google_asr/Makefile.am b/src/mod/asr_tts/mod_google_asr/Makefile.am new file mode 100644 index 00000000000..a829a855c9c --- /dev/null +++ b/src/mod/asr_tts/mod_google_asr/Makefile.am @@ -0,0 +1,12 @@ + +include $(top_srcdir)/build/modmake.rulesam + +MODNAME=mod_google_asr +mod_LTLIBRARIES = mod_google_asr.la +mod_google_asr_la_SOURCES = mod_google_asr.c utils.c curl.c +mod_google_asr_la_CFLAGS = $(AM_CFLAGS) -I. -Wno-pointer-arith +mod_google_asr_la_LIBADD = $(switch_builddir)/libfreeswitch.la +mod_google_asr_la_LDFLAGS = -avoid-version -module -no-undefined -shared + +$(am_mod_google_asr_la_OBJECTS): mod_google_asr.h + diff --git a/src/mod/asr_tts/mod_google_asr/conf/autoload_configs/google_asr.conf.xml b/src/mod/asr_tts/mod_google_asr/conf/autoload_configs/google_asr.conf.xml new file mode 100644 index 00000000000..76772fa7776 --- /dev/null +++ b/src/mod/asr_tts/mod_google_asr/conf/autoload_configs/google_asr.conf.xml @@ -0,0 +1,42 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/mod/asr_tts/mod_google_asr/conf/dialplan/dialplan.xml b/src/mod/asr_tts/mod_google_asr/conf/dialplan/dialplan.xml new file mode 100644 index 00000000000..121cc2b13d4 --- /dev/null +++ b/src/mod/asr_tts/mod_google_asr/conf/dialplan/dialplan.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + diff --git a/src/mod/asr_tts/mod_google_asr/curl.c b/src/mod/asr_tts/mod_google_asr/curl.c new file mode 100644 index 00000000000..636a49c59c6 --- /dev/null +++ b/src/mod/asr_tts/mod_google_asr/curl.c @@ -0,0 +1,129 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#include "mod_google_asr.h" + +static size_t curl_io_write_callback(char *buffer, size_t size, size_t nitems, void *user_data) { + asr_ctx_t *asr_ctx = (asr_ctx_t *)user_data; + size_t len = (size * nitems); + + if(len > 0 && asr_ctx->curl_recv_buffer_ref) { + switch_buffer_write(asr_ctx->curl_recv_buffer_ref, buffer, len); + } + + return len; +} + +static size_t curl_io_read_callback(char *buffer, size_t size, size_t nitems, void *user_data) { + asr_ctx_t *asr_ctx = (asr_ctx_t *)user_data; + size_t nmax = (size * nitems); + size_t ncur = (asr_ctx->curl_send_buffer_len > nmax) ? nmax : asr_ctx->curl_send_buffer_len; + + if(ncur > 0) { + memmove(buffer, asr_ctx->curl_send_buffer_ref, ncur); + asr_ctx->curl_send_buffer_ref += ncur; + asr_ctx->curl_send_buffer_len -= ncur; + } + + return ncur; +} + +switch_status_t curl_perform(asr_ctx_t *asr_ctx, globals_t *globals) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + CURL *curl_handle = NULL; + switch_curl_slist_t *headers = NULL; + char *epurl = NULL; + switch_CURLcode curl_ret = 0; + long http_resp = 0; + + if(asr_ctx->api_key) { + epurl = switch_string_replace(globals->api_url, "${api-key}", asr_ctx->api_key); + } else { + epurl = strdup(globals->api_url); + } + + curl_handle = switch_curl_easy_init(); + headers = switch_curl_slist_append(headers, "Content-Type: application/json; charset=utf-8"); + + switch_curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, headers); + switch_curl_easy_setopt(curl_handle, CURLOPT_POST, 1); + switch_curl_easy_setopt(curl_handle, CURLOPT_NOSIGNAL, 1); + switch_curl_easy_setopt(curl_handle, CURLOPT_READFUNCTION, curl_io_read_callback); + switch_curl_easy_setopt(curl_handle, CURLOPT_READDATA, (void *)asr_ctx); + switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, curl_io_write_callback); + switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)asr_ctx); + + if(globals->connect_timeout > 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_CONNECTTIMEOUT, globals->connect_timeout); + } + if(globals->request_timeout > 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, globals->request_timeout); + } + if(globals->user_agent) { + switch_curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, globals->user_agent); + } + if(strncasecmp(epurl, "https", 5) == 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYPEER, 0); + switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYHOST, 0); + } + if(globals->proxy) { + if(globals->proxy_credentials != NULL) { + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_ANY); + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYUSERPWD, globals->proxy_credentials); + } + if(strncasecmp(globals->proxy, "https", 5) == 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY_SSL_VERIFYPEER, 0); + } + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY, globals->proxy); + } + + switch_curl_easy_setopt(curl_handle, CURLOPT_URL, epurl); + + curl_ret = switch_curl_easy_perform(curl_handle); + if(!curl_ret) { + switch_curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &http_resp); + if(!http_resp) { switch_curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CONNECTCODE, &http_resp); } + } else { + http_resp = curl_ret; + } + + if(http_resp != 200) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "http-error=[%ld] (%s)\n", http_resp, globals->api_url); + status = SWITCH_STATUS_FALSE; + } + + if(asr_ctx->curl_recv_buffer_ref) { + if(switch_buffer_inuse(asr_ctx->curl_recv_buffer_ref) > 0) { + switch_buffer_write(asr_ctx->curl_recv_buffer_ref, "\0", 1); + } + } + + if(curl_handle) { + switch_curl_easy_cleanup(curl_handle); + } + + if(headers) { + switch_curl_slist_free_all(headers); + } + + switch_safe_free(epurl); + return status; +} diff --git a/src/mod/asr_tts/mod_google_asr/mod_google_asr.c b/src/mod/asr_tts/mod_google_asr/mod_google_asr.c new file mode 100644 index 00000000000..736577b26be --- /dev/null +++ b/src/mod/asr_tts/mod_google_asr/mod_google_asr.c @@ -0,0 +1,792 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + * Google Speech-To-Text service for the Freeswitch. + * https://cloud.google.com/speech-to-text/docs/reference/rest + * + * Development repository: + * https://github.com/akscf/mod_google_asr + * + */ +#include "mod_google_asr.h" + +globals_t globals; + +SWITCH_MODULE_LOAD_FUNCTION(mod_google_asr_load); +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_google_asr_shutdown); +SWITCH_MODULE_DEFINITION(mod_google_asr, mod_google_asr_load, mod_google_asr_shutdown, NULL); + + +static void *SWITCH_THREAD_FUNC transcribe_thread(switch_thread_t *thread, void *obj) { + volatile asr_ctx_t *_ref = (asr_ctx_t *)obj; + asr_ctx_t *asr_ctx = (asr_ctx_t *)_ref; + switch_status_t status = SWITCH_STATUS_FALSE; + switch_byte_t *base64_buffer = NULL; + switch_byte_t *curl_send_buffer = NULL; + switch_buffer_t *chunk_buffer = NULL; + switch_buffer_t *curl_recv_buffer = NULL; + switch_memory_pool_t *pool = NULL; + time_t speech_timeout = 0; + uint32_t base64_buffer_size = 0, chunk_buffer_size = 0, recv_len = 0; + uint32_t schunks = 0; + uint8_t fl_cbuff_overflow = SWITCH_FALSE; + const void *curl_recv_buffer_ptr = NULL; + void *pop = NULL; + + switch_mutex_lock(asr_ctx->mutex); + asr_ctx->refs++; + switch_mutex_unlock(asr_ctx->mutex); + + if(switch_core_new_memory_pool(&pool) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "switch_core_new_memory_pool()\n"); + goto out; + } + if(switch_buffer_create_dynamic(&curl_recv_buffer, 1024, 4096, 32648) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_buffer_create_dynamic()\n"); + goto out; + } + + while(SWITCH_TRUE) { + if(globals.fl_shutdown || asr_ctx->fl_destroyed ) { + break; + } + + if(chunk_buffer_size == 0) { + switch_mutex_lock(asr_ctx->mutex); + chunk_buffer_size = asr_ctx->chunk_buffer_size; + switch_mutex_unlock(asr_ctx->mutex); + + if(chunk_buffer_size > 0) { + if(switch_buffer_create(pool, &chunk_buffer, chunk_buffer_size) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "switch_buffer_create()\n"); + break; + } + switch_buffer_zero(chunk_buffer); + } + + goto timer_next; + } + + fl_cbuff_overflow = SWITCH_FALSE; + while(switch_queue_trypop(asr_ctx->q_audio, &pop) == SWITCH_STATUS_SUCCESS) { + xdata_buffer_t *audio_buffer = (xdata_buffer_t *)pop; + if(globals.fl_shutdown || asr_ctx->fl_destroyed ) { + xdata_buffer_free(&audio_buffer); + break; + } + if(audio_buffer && audio_buffer->len) { + if(switch_buffer_write(chunk_buffer, audio_buffer->data, audio_buffer->len) >= chunk_buffer_size) { + fl_cbuff_overflow = SWITCH_TRUE; + break; + } + schunks++; + } + xdata_buffer_free(&audio_buffer); + } + + if(fl_cbuff_overflow) { + speech_timeout = 1; + } else { + if(schunks && asr_ctx->vad_state == SWITCH_VAD_STATE_STOP_TALKING) { + if(!speech_timeout) { + speech_timeout = asr_ctx->silence_sec + switch_epoch_time_now(NULL); + } + } + if(speech_timeout && (asr_ctx->vad_state == SWITCH_VAD_STATE_START_TALKING || asr_ctx->vad_state == SWITCH_VAD_STATE_TALKING)) { + speech_timeout = 0; + } + } + + if(speech_timeout && speech_timeout <= switch_epoch_time_now(NULL)) { + const void *chunk_buffer_ptr = NULL; + uint32_t buf_len = switch_buffer_peek_zerocopy(chunk_buffer, &chunk_buffer_ptr); + uint32_t b64_len = BASE64_ENC_SZ(buf_len) + 1; + uint32_t stt_failed = 0; + + if(base64_buffer_size == 0 || base64_buffer_size < b64_len) { + if(base64_buffer_size > 0) { switch_safe_free(base64_buffer); } + switch_zmalloc(base64_buffer, b64_len); + base64_buffer_size = b64_len; + } else { + memset(base64_buffer, 0x0, b64_len); + } + + if(switch_b64_encode((uint8_t *)chunk_buffer_ptr, buf_len, base64_buffer, base64_buffer_size) == SWITCH_STATUS_SUCCESS) { + curl_send_buffer = (switch_byte_t *)switch_mprintf( "{'config':{" \ + "'languageCode':'%s', 'encoding':'%s', 'sampleRateHertz':'%u', 'audioChannelCount':'%u', 'maxAlternatives':'%u', " \ + "'profanityFilter':'%s', 'enableWordTimeOffsets':'%s', 'enableWordConfidence':'%s', 'enableAutomaticPunctuation':'%s', " \ + "'enableSpokenPunctuation':'%s', 'enableSpokenEmojis':'%s', 'model':'%s', 'useEnhanced':'%s', " \ + " 'diarizationConfig':{'enableSpeakerDiarization': '%s', 'minSpeakerCount': '%u', 'maxSpeakerCount': '%u'}, " \ + "'metadata':{'interactionType':'%s', 'microphoneDistance':'%s', 'recordingDeviceType':'%s'}}, 'audio':{'content':'%s'}}", + asr_ctx->lang, + globals.opt_encoding, + asr_ctx->samplerate, + asr_ctx->channels, + asr_ctx->opt_max_alternatives, + BOOL2STR(asr_ctx->opt_enable_profanity_filter), + BOOL2STR(asr_ctx->opt_enable_word_time_offsets), + BOOL2STR(asr_ctx->opt_enable_word_confidence), + BOOL2STR(asr_ctx->opt_enable_automatic_punctuation), + BOOL2STR(asr_ctx->opt_enable_spoken_punctuation), + BOOL2STR(asr_ctx->opt_enable_spoken_emojis), + asr_ctx->opt_speech_model, + BOOL2STR(asr_ctx->opt_use_enhanced_model), + BOOL2STR(asr_ctx->opt_enable_speaker_diarization), + asr_ctx->opt_diarization_min_speaker_count, + asr_ctx->opt_diarization_max_speaker_count, + asr_ctx->opt_meta_interaction_type, + asr_ctx->opt_meta_microphone_distance, + asr_ctx->opt_meta_recording_device_type, + base64_buffer + ); + + asr_ctx->curl_send_buffer_ref = curl_send_buffer; + asr_ctx->curl_send_buffer_len = strlen((const char *)curl_send_buffer); + asr_ctx->curl_recv_buffer_ref = curl_recv_buffer; + + for(int rqtry = 0; rqtry < asr_ctx->retries_on_error; rqtry++) { + switch_buffer_zero(curl_recv_buffer); + status = curl_perform(asr_ctx, &globals); + if(status == SWITCH_STATUS_SUCCESS || globals.fl_shutdown || asr_ctx->fl_destroyed) { break; } + switch_yield(1000); + } + + recv_len = switch_buffer_peek_zerocopy(curl_recv_buffer, &curl_recv_buffer_ptr); + if(status == SWITCH_STATUS_SUCCESS) { + if(curl_recv_buffer_ptr && recv_len) { + char *txt = parse_response((char *)curl_recv_buffer_ptr, NULL); +#ifdef MOD_GOOGLE_ASR_DEBUG + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Service response [%s]\n", (char *)curl_recv_buffer_ptr); + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Text [%s]\n", txt ? txt : "null"); +#endif + if(!txt) txt = strdup(""); + if(switch_queue_trypush(asr_ctx->q_text, txt) == SWITCH_STATUS_SUCCESS) { + switch_mutex_lock(asr_ctx->mutex); + asr_ctx->transcription_results++; + switch_mutex_unlock(asr_ctx->mutex); + } else { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Queue is full!\n"); + switch_safe_free(txt); + } + } else { + stt_failed = 1; + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Empty service response!\n"); + } + } else { + stt_failed = 1; + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to perform request!\n"); + } + switch_safe_free(curl_send_buffer); + } else { + stt_failed = 1; + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_b64_encode() failed\n"); + } + + if(stt_failed) { + char *txt = strdup("[transcription failed]"); + if(switch_queue_trypush(asr_ctx->q_text, txt) == SWITCH_STATUS_SUCCESS) { + switch_mutex_lock(asr_ctx->mutex); + asr_ctx->transcription_results++; + switch_mutex_unlock(asr_ctx->mutex); + } else { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Queue is full!\n"); + switch_safe_free(txt); + } + } + + schunks = 0; + speech_timeout = 0; + switch_buffer_zero(chunk_buffer); + } + + timer_next: + switch_yield(10000); + } + +out: + switch_safe_free(base64_buffer); + switch_safe_free(curl_send_buffer); + + if(curl_recv_buffer) { + switch_buffer_destroy(&curl_recv_buffer); + } + if(chunk_buffer) { + switch_buffer_destroy(&chunk_buffer); + } + if(pool) { + switch_core_destroy_memory_pool(&pool); + } + + switch_mutex_lock(asr_ctx->mutex); + if(asr_ctx->refs > 0) asr_ctx->refs--; + switch_mutex_unlock(asr_ctx->mutex); + + switch_mutex_lock(globals.mutex); + if(globals.active_threads) globals.active_threads--; + switch_mutex_unlock(globals.mutex); + + return NULL; +} + +// --------------------------------------------------------------------------------------------------------------------------------------------- +// asr interface +// --------------------------------------------------------------------------------------------------------------------------------------------- +static switch_status_t asr_open(switch_asr_handle_t *ah, const char *codec, int samplerate, const char *dest, switch_asr_flag_t *flags) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + switch_threadattr_t *attr = NULL; + switch_thread_t *thread = NULL; + asr_ctx_t *asr_ctx = NULL; + + if(strcmp(codec, "L16") !=0) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unsupported encoding: %s\n", codec); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + + if((asr_ctx = switch_core_alloc(ah->memory_pool, sizeof(asr_ctx_t))) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_alloc()\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + asr_ctx->channels = 1; + asr_ctx->chunk_buffer_size = 0; + asr_ctx->samplerate = samplerate; + asr_ctx->silence_sec = globals.speech_silence_sec; + asr_ctx->lang = (char *)globals.default_lang; + asr_ctx->api_key = globals.api_key; + asr_ctx->retries_on_error = globals.retries_on_error; + + asr_ctx->opt_max_alternatives = globals.opt_max_alternatives; + asr_ctx->opt_enable_profanity_filter = globals.opt_enable_profanity_filter; + asr_ctx->opt_enable_word_time_offsets = globals.opt_enable_word_time_offsets; + asr_ctx->opt_enable_word_confidence = globals.opt_enable_word_confidence; + asr_ctx->opt_enable_automatic_punctuation = globals.opt_enable_automatic_punctuation; + asr_ctx->opt_enable_spoken_punctuation = globals.opt_enable_spoken_punctuation; + asr_ctx->opt_enable_spoken_emojis = globals.opt_enable_spoken_emojis; + asr_ctx->opt_meta_interaction_type = globals.opt_meta_interaction_type; + asr_ctx->opt_meta_microphone_distance = globals.opt_meta_microphone_distance; + asr_ctx->opt_meta_recording_device_type = globals.opt_meta_recording_device_type; + asr_ctx->opt_speech_model = globals.opt_speech_model; + asr_ctx->opt_use_enhanced_model = globals.opt_use_enhanced_model; + asr_ctx->opt_enable_speaker_diarization = SWITCH_FALSE; + asr_ctx->opt_diarization_min_speaker_count = 1; + asr_ctx->opt_diarization_max_speaker_count = 1; + + if((status = switch_mutex_init(&asr_ctx->mutex, SWITCH_MUTEX_NESTED, ah->memory_pool)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_mutex_init()\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + switch_queue_create(&asr_ctx->q_audio, QUEUE_SIZE, ah->memory_pool); + switch_queue_create(&asr_ctx->q_text, QUEUE_SIZE, ah->memory_pool); + + asr_ctx->vad_buffer = NULL; + asr_ctx->frame_len = 0; + asr_ctx->vad_buffer_size = 0; + asr_ctx->vad_stored_frames = 0; + asr_ctx->fl_vad_first_cycle = SWITCH_TRUE; + + if((asr_ctx->vad = switch_vad_init(asr_ctx->samplerate, asr_ctx->channels)) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_vad_init()\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + switch_vad_set_mode(asr_ctx->vad, -1); + switch_vad_set_param(asr_ctx->vad, "debug", globals.fl_vad_debug); + if(globals.vad_silence_ms > 0) { switch_vad_set_param(asr_ctx->vad, "silence_ms", globals.vad_silence_ms); } + if(globals.vad_voice_ms > 0) { switch_vad_set_param(asr_ctx->vad, "voice_ms", globals.vad_voice_ms); } + if(globals.vad_threshold > 0) { switch_vad_set_param(asr_ctx->vad, "thresh", globals.vad_threshold); } + + ah->private_info = asr_ctx; + + switch_mutex_lock(globals.mutex); + globals.active_threads++; + switch_mutex_unlock(globals.mutex); + + switch_threadattr_create(&attr, ah->memory_pool); + switch_threadattr_detach_set(attr, 1); + switch_threadattr_stacksize_set(attr, SWITCH_THREAD_STACKSIZE); + switch_thread_create(&thread, attr, transcribe_thread, asr_ctx, ah->memory_pool); + +out: + return status; +} + +static switch_status_t asr_close(switch_asr_handle_t *ah, switch_asr_flag_t *flags) { + asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; + uint8_t fl_wloop = SWITCH_TRUE; + + assert(asr_ctx != NULL); + + asr_ctx->fl_abort = SWITCH_TRUE; + asr_ctx->fl_destroyed = SWITCH_TRUE; + + switch_mutex_lock(asr_ctx->mutex); + fl_wloop = (asr_ctx->refs != 0); + switch_mutex_unlock(asr_ctx->mutex); + + if(fl_wloop) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Waiting for unlock (refs=%u)...\n", asr_ctx->refs); + while(fl_wloop) { + switch_mutex_lock(asr_ctx->mutex); + fl_wloop = (asr_ctx->refs != 0); + switch_mutex_unlock(asr_ctx->mutex); + switch_yield(100000); + } + } + + if(asr_ctx->q_audio) { + xdata_buffer_queue_clean(asr_ctx->q_audio); + switch_queue_term(asr_ctx->q_audio); + } + if(asr_ctx->q_text) { + text_queue_clean(asr_ctx->q_text); + switch_queue_term(asr_ctx->q_text); + } + if(asr_ctx->vad) { + switch_vad_destroy(&asr_ctx->vad); + } + + if(asr_ctx->vad_buffer) { + switch_buffer_destroy(&asr_ctx->vad_buffer); + } + + switch_set_flag(ah, SWITCH_ASR_FLAG_CLOSED); + + return SWITCH_STATUS_SUCCESS; +} + +static switch_status_t asr_feed(switch_asr_handle_t *ah, void *data, unsigned int data_len, switch_asr_flag_t *flags) { + asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; + switch_vad_state_t vad_state = 0; + uint8_t fl_has_audio = SWITCH_FALSE; + + assert(asr_ctx != NULL); + + if(switch_test_flag(ah, SWITCH_ASR_FLAG_CLOSED)) { + return SWITCH_STATUS_BREAK; + } + if(asr_ctx->fl_destroyed || asr_ctx->fl_abort) { + return SWITCH_STATUS_BREAK; + } + if(asr_ctx->fl_pause) { + return SWITCH_STATUS_SUCCESS; + } + if(!data || !data_len) { + return SWITCH_STATUS_BREAK; + } + + if(data_len > 0 && asr_ctx->frame_len == 0) { + switch_mutex_lock(asr_ctx->mutex); + asr_ctx->frame_len = data_len; + asr_ctx->vad_buffer_size = asr_ctx->frame_len * VAD_STORE_FRAMES; + asr_ctx->chunk_buffer_size = asr_ctx->samplerate * globals.speech_max_sec; + switch_mutex_unlock(asr_ctx->mutex); + + if(switch_buffer_create(ah->memory_pool, &asr_ctx->vad_buffer, asr_ctx->vad_buffer_size) != SWITCH_STATUS_SUCCESS) { + asr_ctx->vad_buffer_size = 0; + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_buffer_create()\n"); + } + } + + if(asr_ctx->vad_buffer_size) { + if(asr_ctx->vad_state == SWITCH_VAD_STATE_STOP_TALKING || (asr_ctx->vad_state == vad_state && vad_state == SWITCH_VAD_STATE_NONE)) { + if(data_len <= asr_ctx->frame_len) { + if(asr_ctx->vad_stored_frames >= VAD_STORE_FRAMES) { + switch_buffer_zero(asr_ctx->vad_buffer); + asr_ctx->vad_stored_frames = 0; + asr_ctx->fl_vad_first_cycle = SWITCH_FALSE; + } + switch_buffer_write(asr_ctx->vad_buffer, data, MIN(asr_ctx->frame_len, data_len)); + asr_ctx->vad_stored_frames++; + } + } + + vad_state = switch_vad_process(asr_ctx->vad, (int16_t *)data, (data_len / sizeof(int16_t))); + if(vad_state == SWITCH_VAD_STATE_START_TALKING) { + asr_ctx->vad_state = vad_state; + fl_has_audio = SWITCH_TRUE; + } else if (vad_state == SWITCH_VAD_STATE_STOP_TALKING) { + asr_ctx->vad_state = vad_state; + fl_has_audio = SWITCH_FALSE; + switch_vad_reset(asr_ctx->vad); + } else if (vad_state == SWITCH_VAD_STATE_TALKING) { + asr_ctx->vad_state = vad_state; + fl_has_audio = SWITCH_TRUE; + } + } else { + fl_has_audio = SWITCH_TRUE; + } + + if(fl_has_audio) { + if(vad_state == SWITCH_VAD_STATE_START_TALKING && asr_ctx->vad_stored_frames > 0) { + xdata_buffer_t *tau_buf = NULL; + const void *ptr = NULL; + switch_size_t vblen = 0; + uint32_t rframes = 0, rlen = 0; + int ofs = 0; + + if((vblen = switch_buffer_peek_zerocopy(asr_ctx->vad_buffer, &ptr)) && ptr && vblen > 0) { + rframes = (asr_ctx->vad_stored_frames >= VAD_RECOVERY_FRAMES ? VAD_RECOVERY_FRAMES : (asr_ctx->fl_vad_first_cycle ? asr_ctx->vad_stored_frames : VAD_RECOVERY_FRAMES)); + rlen = (rframes * asr_ctx->frame_len); + ofs = (vblen - rlen); + + if(ofs < 0) { + uint32_t hdr_sz = -ofs; + uint32_t hdr_ofs = (asr_ctx->vad_buffer_size - hdr_sz); + + switch_zmalloc(tau_buf, sizeof(xdata_buffer_t)); + + tau_buf->len = (hdr_sz + vblen + data_len); + switch_malloc(tau_buf->data, tau_buf->len); + + memcpy(tau_buf->data, (void *)(ptr + hdr_ofs), hdr_sz); + memcpy(tau_buf->data + hdr_sz , (void *)(ptr + 0), vblen); + memcpy(tau_buf->data + rlen, data, data_len); + + if(switch_queue_trypush(asr_ctx->q_audio, tau_buf) != SWITCH_STATUS_SUCCESS) { + xdata_buffer_free(&tau_buf); + } + + switch_buffer_zero(asr_ctx->vad_buffer); + asr_ctx->vad_stored_frames = 0; + } else { + switch_zmalloc(tau_buf, sizeof(xdata_buffer_t)); + + tau_buf->len = (rlen + data_len); + switch_malloc(tau_buf->data, tau_buf->len); + + memcpy(tau_buf->data, (void *)(ptr + ofs), rlen); + memcpy(tau_buf->data + rlen, data, data_len); + + if(switch_queue_trypush(asr_ctx->q_audio, tau_buf) != SWITCH_STATUS_SUCCESS) { + xdata_buffer_free(&tau_buf); + } + + switch_buffer_zero(asr_ctx->vad_buffer); + asr_ctx->vad_stored_frames = 0; + } + } + } else { + xdata_buffer_push(asr_ctx->q_audio, data, data_len); + } + } + + return SWITCH_STATUS_SUCCESS; +} + +static switch_status_t asr_check_results(switch_asr_handle_t *ah, switch_asr_flag_t *flags) { + asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; + + assert(asr_ctx != NULL); + + if(asr_ctx->fl_pause) { + return SWITCH_STATUS_FALSE; + } + + return (asr_ctx->transcription_results > 0 ? SWITCH_STATUS_SUCCESS : SWITCH_STATUS_FALSE); +} + +static switch_status_t asr_get_results(switch_asr_handle_t *ah, char **xmlstr, switch_asr_flag_t *flags) { + asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; + switch_status_t status = SWITCH_STATUS_FALSE; + void *pop = NULL; + + assert(asr_ctx != NULL); + + if(switch_queue_trypop(asr_ctx->q_text, &pop) == SWITCH_STATUS_SUCCESS) { + if(pop) { + *xmlstr = (char *)pop; + status = SWITCH_STATUS_SUCCESS; + + switch_mutex_lock(asr_ctx->mutex); + if(asr_ctx->transcription_results > 0) asr_ctx->transcription_results--; + switch_mutex_unlock(asr_ctx->mutex); + } + } + + return status; +} + +static switch_status_t asr_start_input_timers(switch_asr_handle_t *ah) { + asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; + + assert(asr_ctx != NULL); + + asr_ctx->fl_start_timers = SWITCH_TRUE; + + return SWITCH_STATUS_SUCCESS; +} + +static switch_status_t asr_pause(switch_asr_handle_t *ah) { + asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; + + assert(asr_ctx != NULL); + + asr_ctx->fl_pause = SWITCH_TRUE; + + return SWITCH_STATUS_SUCCESS; +} + +static switch_status_t asr_resume(switch_asr_handle_t *ah) { + asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; + + assert(asr_ctx != NULL); + + asr_ctx->fl_pause = SWITCH_FALSE; + + return SWITCH_STATUS_SUCCESS; +} + +static void asr_text_param(switch_asr_handle_t *ah, char *param, const char *val) { + asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; + + assert(asr_ctx != NULL); + + if(strcasecmp(param, "lang") == 0) { + if(val) asr_ctx->lang = switch_core_strdup(ah->memory_pool, gcp_get_language(val)); + } else if(strcasecmp(param, "silence") == 0) { + if(val) asr_ctx->silence_sec = atoi(val); + } else if(strcasecmp(param, "key") == 0) { + if(val) asr_ctx->api_key = switch_core_strdup(ah->memory_pool, val); + } else if(!strcasecmp(param, "speech-model")) { + if(val) asr_ctx->opt_speech_model = switch_core_strdup(ah->memory_pool, val); + } else if(!strcasecmp(param, "use-enhanced-model")) { + if(val) asr_ctx->opt_use_enhanced_model = switch_true(val); + } else if(!strcasecmp(param, "max-alternatives")) { + if(val) asr_ctx->opt_max_alternatives = atoi(val); + } else if(!strcasecmp(param, "enable-word-time-offsets")) { + if(val) asr_ctx->opt_enable_word_time_offsets = switch_true(val); + } else if(!strcasecmp(param, "enable-enable-word-confidence;")) { + if(val) asr_ctx->opt_enable_word_confidence = switch_true(val); + } else if(!strcasecmp(param, "enable-profanity-filter")) { + if(val) asr_ctx->opt_enable_profanity_filter = switch_true(val); + } else if(!strcasecmp(param, "enable-automatic-punctuation")) { + if(val) asr_ctx->opt_enable_automatic_punctuation = switch_true(val); + } else if(!strcasecmp(param, "enable-spoken-punctuation")) { + if(val) asr_ctx->opt_enable_spoken_punctuation = switch_true(val); + } else if(!strcasecmp(param, "enable-spoken-emojis")) { + if(val) asr_ctx->opt_enable_spoken_emojis = switch_true(val); + } else if(!strcasecmp(param, "microphone-distance")) { + if(val) asr_ctx->opt_meta_microphone_distance = switch_core_strdup(ah->memory_pool, gcp_get_microphone_distance(val)); + } else if(!strcasecmp(param, "recording-device-type")) { + if(val) asr_ctx->opt_meta_recording_device_type = switch_core_strdup(ah->memory_pool, gcp_get_recording_device(val)); + } else if(!strcasecmp(param, "interaction-type")) { + if(val) asr_ctx->opt_meta_interaction_type = switch_core_strdup(ah->memory_pool, gcp_get_interaction(val)); + } else if(!strcasecmp(param, "enable-speaker-diarizatio")) { + if(val) asr_ctx->opt_enable_speaker_diarization = switch_true(val); + } else if(!strcasecmp(param, "diarization-min-speakers")) { + if(val) asr_ctx->opt_diarization_min_speaker_count = atoi(val); + } else if(!strcasecmp(param, "diarization-max-speakers")) { + if(val) asr_ctx->opt_diarization_max_speaker_count = atoi(val); + } +} + +static void asr_numeric_param(switch_asr_handle_t *ah, char *param, int val) { +} + +static void asr_float_param(switch_asr_handle_t *ah, char *param, double val) { +} + +static switch_status_t asr_load_grammar(switch_asr_handle_t *ah, const char *grammar, const char *name) { + return SWITCH_STATUS_SUCCESS; +} + +static switch_status_t asr_unload_grammar(switch_asr_handle_t *ah, const char *name) { + return SWITCH_STATUS_SUCCESS; +} + +#define CMD_SYNTAX "path_to/filename.(mp3|wav) []\n" +SWITCH_STANDARD_API(google_asr_cmd_handler) { + //switch_status_t status = 0; + char *mycmd = NULL, *argv[10] = { 0 }; int argc = 0; + + if (!zstr(cmd)) { + mycmd = strdup(cmd); + switch_assert(mycmd); + argc = switch_separate_string(mycmd, ' ', argv, (sizeof(argv) / sizeof(argv[0]))); + } + if(argc == 0) { + goto usage; + } + + // + // todo + // + + stream->write_function(stream, "-ERR: not yet implemented\n"); + goto out; +usage: + stream->write_function(stream, "-ERR:\nUsage: %s\n", CMD_SYNTAX); + +out: + + switch_safe_free(mycmd); + return SWITCH_STATUS_SUCCESS; +} + +// --------------------------------------------------------------------------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------------------------------------------------------------------------- +SWITCH_MODULE_LOAD_FUNCTION(mod_google_asr_load) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + switch_xml_t cfg, xml, settings, param; + switch_api_interface_t *commands_interface; + switch_asr_interface_t *asr_interface; + + memset(&globals, 0, sizeof(globals)); + switch_mutex_init(&globals.mutex, SWITCH_MUTEX_NESTED, pool); + + if((xml = switch_xml_open_cfg(MOD_CONFIG_NAME, &cfg, NULL)) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open configuration: %s\n", MOD_CONFIG_NAME); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + if((settings = switch_xml_child(cfg, "settings"))) { + for (param = switch_xml_child(settings, "param"); param; param = param->next) { + char *var = (char *) switch_xml_attr_soft(param, "name"); + char *val = (char *) switch_xml_attr_soft(param, "value"); + + if(!strcasecmp(var, "vad-silence-ms")) { + if(val) globals.vad_silence_ms = atoi (val); + } else if(!strcasecmp(var, "vad-voice-ms")) { + if(val) globals.vad_voice_ms = atoi (val); + } else if(!strcasecmp(var, "vad-threshold")) { + if(val) globals.vad_threshold = atoi (val); + } else if(!strcasecmp(var, "vad-debug")) { + if(val) globals.fl_vad_debug = switch_true(val); + } else if(!strcasecmp(var, "api-key")) { + if(val) globals.api_key = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "api-url")) { + if(val) globals.api_url = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "user-agent")) { + if(val) globals.user_agent = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "proxy")) { + if(val) globals.proxy = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "proxy-credentials")) { + if(val) globals.proxy_credentials = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "default-language")) { + if(val) globals.default_lang = switch_core_strdup(pool, gcp_get_language(val)); + } else if(!strcasecmp(var, "encoding")) { + if(val) globals.opt_encoding = switch_core_strdup(pool, gcp_get_encoding(val)); + } else if(!strcasecmp(var, "speech-max-sec")) { + if(val) globals.speech_max_sec = atoi(val); + } else if(!strcasecmp(var, "speech-silence-sec")) { + if(val) globals.speech_silence_sec = atoi(val); + } else if(!strcasecmp(var, "request-timeout")) { + if(val) globals.request_timeout = atoi(val); + } else if(!strcasecmp(var, "connect-timeout")) { + if(val) globals.connect_timeout = atoi(val); + } else if(!strcasecmp(var, "retries-on-error")) { + if(val) globals.retries_on_error = atoi(val); + } else if(!strcasecmp(var, "speech-model")) { + if(val) globals.opt_speech_model = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "use-enhanced-model")) { + if(val) globals.opt_use_enhanced_model = switch_true(val); + } else if(!strcasecmp(var, "max-alternatives")) { + if(val) globals.opt_max_alternatives = atoi(val); + } else if(!strcasecmp(var, "enable-word-time-offsets")) { + if(val) globals.opt_enable_word_time_offsets = switch_true(val); + } else if(!strcasecmp(var, "enable-word-confidence")) { + if(val) globals.opt_enable_word_confidence = switch_true(val); + } else if(!strcasecmp(var, "enable-profanity-filter")) { + if(val) globals.opt_enable_profanity_filter = switch_true(val); + } else if(!strcasecmp(var, "enable-automatic-punctuation")) { + if(val) globals.opt_enable_automatic_punctuation = switch_true(val); + } else if(!strcasecmp(var, "enable-spoken-punctuation")) { + if(val) globals.opt_enable_spoken_punctuation = switch_true(val); + } else if(!strcasecmp(var, "enable-spoken-emojis")) { + if(val) globals.opt_enable_spoken_emojis = switch_true(val); + } else if(!strcasecmp(var, "microphone-distance")) { + if(val) globals.opt_meta_microphone_distance = switch_core_strdup(pool, gcp_get_microphone_distance(val)); + } else if(!strcasecmp(var, "recording-device-type")) { + if(val) globals.opt_meta_recording_device_type = switch_core_strdup(pool, gcp_get_recording_device(val)); + } else if(!strcasecmp(var, "interaction-type")) { + if(val) globals.opt_meta_interaction_type = switch_core_strdup(pool, gcp_get_interaction(val)); + } + } + } + + if(!globals.api_url) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing required parameter: api-url\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + globals.speech_max_sec = !globals.speech_max_sec ? 35 : globals.speech_max_sec; + globals.speech_silence_sec = !globals.speech_silence_sec ? 3 : globals.speech_silence_sec; + globals.opt_encoding = globals.opt_encoding ? globals.opt_encoding : gcp_get_encoding("l16"); + globals.opt_speech_model = globals.opt_speech_model ? globals.opt_speech_model : "phone_call"; + globals.opt_max_alternatives = globals.opt_max_alternatives > 0 ? globals.opt_max_alternatives : 1; + globals.opt_meta_microphone_distance = globals.opt_meta_microphone_distance ? globals.opt_meta_microphone_distance : gcp_get_microphone_distance("unspecified"); + globals.opt_meta_recording_device_type = globals.opt_meta_recording_device_type ? globals.opt_meta_recording_device_type : gcp_get_recording_device("unspecified"); + globals.opt_meta_interaction_type = globals.opt_meta_interaction_type ? globals.opt_meta_interaction_type : gcp_get_interaction("unspecified"); + globals.retries_on_error = !globals.retries_on_error ? 1 : globals.retries_on_error; + + globals.tmp_path = switch_core_sprintf(pool, "%s%sgoogle-asr-cache", SWITCH_GLOBAL_dirs.temp_dir, SWITCH_PATH_SEPARATOR); + if(switch_directory_exists(globals.tmp_path, NULL) != SWITCH_STATUS_SUCCESS) { + switch_dir_make(globals.tmp_path, SWITCH_FPROT_OS_DEFAULT, NULL); + } + + *module_interface = switch_loadable_module_create_module_interface(pool, modname); + SWITCH_ADD_API(commands_interface, "google_asr_transcript", "Google speech-to-text", google_asr_cmd_handler, CMD_SYNTAX); + + asr_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_ASR_INTERFACE); + asr_interface->interface_name = "google"; + asr_interface->asr_open = asr_open; + asr_interface->asr_close = asr_close; + asr_interface->asr_feed = asr_feed; + asr_interface->asr_pause = asr_pause; + asr_interface->asr_resume = asr_resume; + asr_interface->asr_check_results = asr_check_results; + asr_interface->asr_get_results = asr_get_results; + asr_interface->asr_start_input_timers = asr_start_input_timers; + asr_interface->asr_text_param = asr_text_param; + asr_interface->asr_numeric_param = asr_numeric_param; + asr_interface->asr_float_param = asr_float_param; + asr_interface->asr_load_grammar = asr_load_grammar; + asr_interface->asr_unload_grammar = asr_unload_grammar; + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "Google-ASR (%s)\n", MOD_VERSION); +out: + if(xml) { + switch_xml_free(xml); + } + return status; +} + +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_google_asr_shutdown) { + uint8_t fl_wloop = SWITCH_TRUE; + + globals.fl_shutdown = SWITCH_TRUE; + + switch_mutex_lock(globals.mutex); + fl_wloop = (globals.active_threads > 0); + switch_mutex_unlock(globals.mutex); + + if(fl_wloop) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_WARNING, "Waiting for termination (%d) threads...\n", globals.active_threads); + while(fl_wloop) { + switch_mutex_lock(globals.mutex); + fl_wloop = (globals.active_threads > 0); + switch_mutex_unlock(globals.mutex); + switch_yield(100000); + } + } + + return SWITCH_STATUS_SUCCESS; +} diff --git a/src/mod/asr_tts/mod_google_asr/mod_google_asr.h b/src/mod/asr_tts/mod_google_asr/mod_google_asr.h new file mode 100644 index 00000000000..ec56f244876 --- /dev/null +++ b/src/mod/asr_tts/mod_google_asr/mod_google_asr.h @@ -0,0 +1,149 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + * + */ +#ifndef MOD_GOOGLE_ASR_H +#define MOD_GOOGLE_ASR_H + +#include +#include +#include + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +#define MOD_CONFIG_NAME "google_asr.conf" +#define MOD_VERSION "1.0.4" +#define QUEUE_SIZE 128 +#define VAD_STORE_FRAMES 64 +#define VAD_RECOVERY_FRAMES 20 +#define BASE64_ENC_SZ(n) (4*((n+2)/3)) +#define BOOL2STR(v) (v ? "true" : "false") + +//#define MOD_GOOGLE_ASR_DEBUG + +typedef struct { + switch_mutex_t *mutex; + uint32_t active_threads; + uint32_t speech_max_sec; + uint32_t speech_silence_sec; + uint32_t vad_silence_ms; + uint32_t vad_voice_ms; + uint32_t vad_threshold; + uint32_t request_timeout; // seconds + uint32_t connect_timeout; // seconds + uint32_t retries_on_error; + uint8_t fl_vad_debug; + uint8_t fl_shutdown; + char *tmp_path; + char *api_key; + char *api_url; + char *user_agent; + char *default_lang; + char *proxy; + char *proxy_credentials; + char *opt_encoding; + char *opt_speech_model; + char *opt_meta_microphone_distance; + char *opt_meta_recording_device_type; + char *opt_meta_interaction_type; + uint32_t opt_max_alternatives; + uint32_t opt_use_enhanced_model; + uint32_t opt_enable_word_time_offsets; + uint32_t opt_enable_word_confidence; + uint32_t opt_enable_profanity_filter; + uint32_t opt_enable_automatic_punctuation; + uint32_t opt_enable_spoken_punctuation; + uint32_t opt_enable_spoken_emojis; +} globals_t; + + +typedef struct { + switch_memory_pool_t *pool; + switch_vad_t *vad; + switch_buffer_t *vad_buffer; + switch_mutex_t *mutex; + switch_queue_t *q_audio; + switch_queue_t *q_text; + switch_buffer_t *curl_recv_buffer_ref; + switch_byte_t *curl_send_buffer_ref; + char *api_key; + char *lang; + switch_vad_state_t vad_state; + uint32_t retries_on_error; + uint32_t curl_send_buffer_len; + uint32_t transcription_results; + uint32_t vad_buffer_size; + uint32_t vad_stored_frames; + uint32_t chunk_buffer_size; + uint32_t refs; + uint32_t samplerate; + uint32_t channels; + uint32_t frame_len; + uint32_t silence_sec; + uint8_t fl_start_timers; + uint8_t fl_pause; + uint8_t fl_vad_first_cycle; + uint8_t fl_destroyed; + uint8_t fl_abort; + // + char *opt_speech_model; + char *opt_meta_microphone_distance; + char *opt_meta_recording_device_type; + char *opt_meta_interaction_type; + uint32_t opt_max_alternatives; + uint32_t opt_use_enhanced_model; + uint32_t opt_enable_word_time_offsets; + uint32_t opt_enable_word_confidence; + uint32_t opt_enable_profanity_filter; + uint32_t opt_enable_automatic_punctuation; + uint32_t opt_enable_spoken_punctuation; + uint32_t opt_enable_spoken_emojis; + uint32_t opt_enable_speaker_diarization; + uint32_t opt_diarization_min_speaker_count; + uint32_t opt_diarization_max_speaker_count; +} asr_ctx_t; + +typedef struct { + uint32_t len; + switch_byte_t *data; +} xdata_buffer_t; + + +/* curl.c */ +switch_status_t curl_perform(asr_ctx_t *asr_ctx, globals_t *globals); + +/* utils.c */ +switch_status_t xdata_buffer_push(switch_queue_t *queue, switch_byte_t *data, uint32_t data_len); +switch_status_t xdata_buffer_alloc(xdata_buffer_t **out, switch_byte_t *data, uint32_t data_len); +void xdata_buffer_free(xdata_buffer_t **buf); +void xdata_buffer_queue_clean(switch_queue_t *queue); +void text_queue_clean(switch_queue_t *queue); +char *parse_response(char *data, switch_stream_handle_t *stream); + +char *gcp_get_language(const char *val); +char *gcp_get_encoding(const char *val); +char *gcp_get_microphone_distance(const char *val); +char *gcp_get_recording_device(const char *val); +char *gcp_get_interaction(const char *val); + + +#endif diff --git a/src/mod/asr_tts/mod_google_asr/utils.c b/src/mod/asr_tts/mod_google_asr/utils.c new file mode 100644 index 00000000000..9842da557d3 --- /dev/null +++ b/src/mod/asr_tts/mod_google_asr/utils.c @@ -0,0 +1,171 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#include "mod_google_asr.h" + +extern globals_t globals; + +/** + ** https://cloud.google.com/speech-to-text/docs/reference/rest/v1/RecognitionConfig + ** https://cloud.google.com/speech-to-text/docs/speech-to-text-supported-languages + ** + **/ +char *gcp_get_language(const char *val) { + if(strcasecmp(val, "en") == 0) { return "en-US"; } + if(strcasecmp(val, "de") == 0) { return "de-DE"; } + if(strcasecmp(val, "es") == 0) { return "es-US"; } + if(strcasecmp(val, "it") == 0) { return "it-IT"; } + if(strcasecmp(val, "ru") == 0) { return "ru-RU"; } + return (char *)val; +} + +char *gcp_get_encoding(const char *val) { + if(strcasecmp(val, "unspecified") == 0) { return "ENCODING_UNSPECIFIED"; } + if(strcasecmp(val, "l16") == 0) { return "LINEAR16"; } + if(strcasecmp(val, "flac") == 0) { return "FLAC"; } + if(strcasecmp(val, "ulaw") == 0) { return "MULAW"; } + if(strcasecmp(val, "amr") == 0) { return "AMR"; } + return (char *)val; +} + +char *gcp_get_microphone_distance(const char *val) { + if(strcasecmp(val, "unspecified") == 0) { return "MICROPHONE_DISTANCE_UNSPECIFIED"; } + if(strcasecmp(val, "nearfield") == 0) { return "NEARFIELD"; } + if(strcasecmp(val, "midfield") == 0) { return "MIDFIELD"; } + if(strcasecmp(val, "farfield") == 0) { return "FARFIELD"; } + return (char *)val; +} + +char *gcp_get_recording_device(const char *val) { + if(strcasecmp(val, "unspecified") == 0) { return "RECORDING_DEVICE_TYPE_UNSPECIFIED"; } + if(strcasecmp(val, "smartphone") == 0) { return "SMARTPHONE"; } + if(strcasecmp(val, "pc") == 0) { return "PC"; } + if(strcasecmp(val, "phone_line") == 0) { return "PHONE_LINE"; } + if(strcasecmp(val, "vehicle") == 0) { return "VEHICLE"; } + if(strcasecmp(val, "other_outdoor_device") == 0) { return "OTHER_OUTDOOR_DEVICE"; } + if(strcasecmp(val, "other_indoor_device") == 0) { return "OTHER_INDOOR_DEVICE"; } + return (char *)val; +} + +char *gcp_get_interaction(const char *val) { + if(strcasecmp(val, "unspecified") == 0) { return "INTERACTION_TYPE_UNSPECIFIED"; } + if(strcasecmp(val, "discussion") == 0) { return "DISCUSSION"; } + if(strcasecmp(val, "presentation") == 0) { return "PRESENTATION"; } + if(strcasecmp(val, "phone_call") == 0) { return "PHONE_CALL"; } + if(strcasecmp(val, "voicemal") == 0) { return "VOICEMAIL"; } + if(strcasecmp(val, "professionally_produced") == 0) { return "PROFESSIONALLY_PRODUCED"; } + if(strcasecmp(val, "voice_search") == 0) { return "VOICE_SEARCH"; } + if(strcasecmp(val, "voice_command") == 0) { return "VOICE_COMMAND"; } + if(strcasecmp(val, "dictation") == 0) { return "DICTATION"; } + return (char *)val; +} + +switch_status_t xdata_buffer_alloc(xdata_buffer_t **out, switch_byte_t *data, uint32_t data_len) { + xdata_buffer_t *buf = NULL; + + switch_zmalloc(buf, sizeof(xdata_buffer_t)); + + if(data_len) { + switch_malloc(buf->data, data_len); + switch_assert(buf->data); + + buf->len = data_len; + memcpy(buf->data, data, data_len); + } + + *out = buf; + return SWITCH_STATUS_SUCCESS; +} + +void xdata_buffer_free(xdata_buffer_t **buf) { + if(buf && *buf) { + switch_safe_free((*buf)->data); + free(*buf); + } +} + +void xdata_buffer_queue_clean(switch_queue_t *queue) { + xdata_buffer_t *data = NULL; + + if(!queue || !switch_queue_size(queue)) { return; } + + while(switch_queue_trypop(queue, (void *) &data) == SWITCH_STATUS_SUCCESS) { + if(data) { xdata_buffer_free(&data); } + } +} + +switch_status_t xdata_buffer_push(switch_queue_t *queue, switch_byte_t *data, uint32_t data_len) { + xdata_buffer_t *buff = NULL; + + if(xdata_buffer_alloc(&buff, data, data_len) == SWITCH_STATUS_SUCCESS) { + if(switch_queue_trypush(queue, buff) == SWITCH_STATUS_SUCCESS) { + return SWITCH_STATUS_SUCCESS; + } + xdata_buffer_free(&buff); + } + return SWITCH_STATUS_FALSE; +} + +void text_queue_clean(switch_queue_t *queue) { + void *data = NULL; + + if(!queue || !switch_queue_size(queue)) { + return; + } + + while(switch_queue_trypop(queue, (void *)&data) == SWITCH_STATUS_SUCCESS) { + switch_safe_free(data); + } +} + +char *parse_response(char *data, switch_stream_handle_t *stream) { + char *result = NULL; + cJSON *json = NULL; + + if(!data) { + return NULL; + } + + if((json = cJSON_Parse(data)) != NULL) { + cJSON *jres = cJSON_GetObjectItem(json, "results"); + if(jres && cJSON_GetArraySize(jres) > 0) { + cJSON *jelem = cJSON_GetArrayItem(jres, 0); + if(jelem) { + jres = cJSON_GetObjectItem(jelem, "alternatives"); + if(jres && cJSON_GetArraySize(jres) > 0) { + jelem = cJSON_GetArrayItem(jres, 0); + if(jelem) { + cJSON *jt = cJSON_GetObjectItem(jelem, "transcript"); + if(jt && jt->valuestring) { + result = strdup(jt->valuestring); + } + } + } + } + } + } + + if(json) { + cJSON_Delete(json); + } + + return result; +} diff --git a/src/mod/asr_tts/mod_openai_asr/Makefile.am b/src/mod/asr_tts/mod_openai_asr/Makefile.am new file mode 100644 index 00000000000..2213c833904 --- /dev/null +++ b/src/mod/asr_tts/mod_openai_asr/Makefile.am @@ -0,0 +1,12 @@ + +include $(top_srcdir)/build/modmake.rulesam + +MODNAME=mod_openai_asr +mod_LTLIBRARIES = mod_openai_asr.la +mod_openai_asr_la_SOURCES = mod_openai_asr.c utils.c curl.c +mod_openai_asr_la_CFLAGS = $(AM_CFLAGS) -I. -Wno-pointer-arith +mod_openai_asr_la_LIBADD = $(switch_builddir)/libfreeswitch.la +mod_openai_asr_la_LDFLAGS = -avoid-version -module -no-undefined -shared + +$(am_mod_openai_asr_la_OBJECTS): mod_openai_asr.h + diff --git a/src/mod/asr_tts/mod_openai_asr/conf/autoload_configs/openai_asr.conf.xml b/src/mod/asr_tts/mod_openai_asr/conf/autoload_configs/openai_asr.conf.xml new file mode 100644 index 00000000000..99b35e99dfd --- /dev/null +++ b/src/mod/asr_tts/mod_openai_asr/conf/autoload_configs/openai_asr.conf.xml @@ -0,0 +1,26 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/mod/asr_tts/mod_openai_asr/conf/dialplan/dialplan.xml b/src/mod/asr_tts/mod_openai_asr/conf/dialplan/dialplan.xml new file mode 100644 index 00000000000..e72bd6f5927 --- /dev/null +++ b/src/mod/asr_tts/mod_openai_asr/conf/dialplan/dialplan.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/src/mod/asr_tts/mod_openai_asr/curl.c b/src/mod/asr_tts/mod_openai_asr/curl.c new file mode 100644 index 00000000000..622aafe9334 --- /dev/null +++ b/src/mod/asr_tts/mod_openai_asr/curl.c @@ -0,0 +1,127 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#include "mod_openai_asr.h" + +static size_t curl_io_write_callback(char *buffer, size_t size, size_t nitems, void *user_data) { + switch_buffer_t *recv_buffer = (switch_buffer_t *)user_data; + size_t len = (size * nitems); + + if(len > 0 && recv_buffer) { + switch_buffer_write(recv_buffer, buffer, len); + } + + return len; +} + +switch_status_t curl_perform(switch_buffer_t *recv_buffer, char *api_key, char *model_name, char *filename, globals_t *globals) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + CURL *curl_handle = NULL; + curl_mime *form = NULL; + curl_mimepart *field1=NULL, *field2=NULL; + switch_curl_slist_t *headers = NULL; + switch_CURLcode curl_ret = 0; + long http_resp = 0; + + curl_handle = switch_curl_easy_init(); + headers = switch_curl_slist_append(headers, "Content-Type: multipart/form-data"); + + switch_curl_easy_setopt(curl_handle, CURLOPT_HTTPHEADER, headers); + switch_curl_easy_setopt(curl_handle, CURLOPT_POST, 1); + switch_curl_easy_setopt(curl_handle, CURLOPT_NOSIGNAL, 1); + switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, curl_io_write_callback); + switch_curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *) recv_buffer); + + if(globals->connect_timeout > 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_CONNECTTIMEOUT, globals->connect_timeout); + } + if(globals->request_timeout > 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_TIMEOUT, globals->request_timeout); + } + if(globals->user_agent) { + switch_curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, globals->user_agent); + } + if(strncasecmp(globals->api_url, "https", 5) == 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYPEER, 0); + switch_curl_easy_setopt(curl_handle, CURLOPT_SSL_VERIFYHOST, 0); + } + if(globals->proxy) { + if(globals->proxy_credentials != NULL) { + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYAUTH, CURLAUTH_ANY); + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXYUSERPWD, globals->proxy_credentials); + } + if(strncasecmp(globals->proxy, "https", 5) == 0) { + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY_SSL_VERIFYPEER, 0); + } + switch_curl_easy_setopt(curl_handle, CURLOPT_PROXY, globals->proxy); + } + + if(api_key) { + curl_easy_setopt(curl_handle, CURLOPT_XOAUTH2_BEARER, api_key); + curl_easy_setopt(curl_handle, CURLOPT_HTTPAUTH, CURLAUTH_BEARER); + } + + if((form = curl_mime_init(curl_handle))) { + if((field1 = curl_mime_addpart(form))) { + curl_mime_name(field1, "model"); + curl_mime_data(field1, model_name, CURL_ZERO_TERMINATED); + } + if((field2 = curl_mime_addpart(form))) { + curl_mime_name(field2, "file"); + curl_mime_filedata(field2, filename); + } + switch_curl_easy_setopt(curl_handle, CURLOPT_MIMEPOST, form); + } + + headers = switch_curl_slist_append(headers, "Expect:"); + switch_curl_easy_setopt(curl_handle, CURLOPT_URL, globals->api_url); + + curl_ret = switch_curl_easy_perform(curl_handle); + if(!curl_ret) { + switch_curl_easy_getinfo(curl_handle, CURLINFO_RESPONSE_CODE, &http_resp); + if(!http_resp) { switch_curl_easy_getinfo(curl_handle, CURLINFO_HTTP_CONNECTCODE, &http_resp); } + } else { + http_resp = curl_ret; + } + + if(http_resp != 200) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "http-error=[%ld] (%s)\n", http_resp, globals->api_url); + status = SWITCH_STATUS_FALSE; + } + + if(recv_buffer) { + if(switch_buffer_inuse(recv_buffer) > 0) { + switch_buffer_write(recv_buffer, "\0", 1); + } + } + + if(curl_handle) { + switch_curl_easy_cleanup(curl_handle); + } + if(form) { + curl_mime_free(form); + } + if(headers) { + switch_curl_slist_free_all(headers); + } + + return status; +} diff --git a/src/mod/asr_tts/mod_openai_asr/mod_openai_asr.c b/src/mod/asr_tts/mod_openai_asr/mod_openai_asr.c new file mode 100644 index 00000000000..88923db07a0 --- /dev/null +++ b/src/mod/asr_tts/mod_openai_asr/mod_openai_asr.c @@ -0,0 +1,729 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + * OpenAI Speech-To-Text service for the Freeswitch. + * https://platform.openai.com/docs/guides/speech-to-text + * + * Development respository: + * https://github.com/akscf/mod_openai_asr + * + */ +#include "mod_openai_asr.h" + +globals_t globals; + +SWITCH_MODULE_LOAD_FUNCTION(mod_openai_asr_load); +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_openai_asr_shutdown); +SWITCH_MODULE_DEFINITION(mod_openai_asr, mod_openai_asr_load, mod_openai_asr_shutdown, NULL); + +static void *SWITCH_THREAD_FUNC transcribe_thread(switch_thread_t *thread, void *obj) { + volatile asr_ctx_t *_ref = (asr_ctx_t *)obj; + asr_ctx_t *asr_ctx = (asr_ctx_t *)_ref; + switch_status_t status = SWITCH_STATUS_FALSE; + switch_buffer_t *chunk_buffer = NULL; + switch_buffer_t *curl_recv_buffer = NULL; + switch_memory_pool_t *pool = NULL; + cJSON *json = NULL; + time_t sentence_timeout = 0; + uint32_t schunks = 0; + uint32_t chunk_buffer_size = 0; + uint8_t fl_cbuff_overflow = SWITCH_FALSE; + void *pop = NULL; + + switch_mutex_lock(asr_ctx->mutex); + asr_ctx->refs++; + switch_mutex_unlock(asr_ctx->mutex); + + if(switch_core_new_memory_pool(&pool) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "switch_core_new_memory_pool()\n"); + goto out; + } + if(switch_buffer_create_dynamic(&curl_recv_buffer, 1024, 2048, 8192) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_buffer_create_dynamic()\n"); + goto out; + } + + while(SWITCH_TRUE) { + if(globals.fl_shutdown || asr_ctx->fl_destroyed) { + break; + } + if(chunk_buffer_size == 0) { + switch_mutex_lock(asr_ctx->mutex); + chunk_buffer_size = asr_ctx->chunk_buffer_size; + switch_mutex_unlock(asr_ctx->mutex); + + if(chunk_buffer_size > 0) { + if(switch_buffer_create(pool, &chunk_buffer, chunk_buffer_size) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_CRIT, "switch_buffer_create()\n"); + break; + } + switch_buffer_zero(chunk_buffer); + } + goto timer_next; + } + + fl_cbuff_overflow = SWITCH_FALSE; + while(switch_queue_trypop(asr_ctx->q_audio, &pop) == SWITCH_STATUS_SUCCESS) { + xdata_buffer_t *audio_buffer = (xdata_buffer_t *)pop; + if(globals.fl_shutdown || asr_ctx->fl_destroyed ) { + xdata_buffer_free(&audio_buffer); + break; + } + if(audio_buffer && audio_buffer->len) { + if(switch_buffer_write(chunk_buffer, audio_buffer->data, audio_buffer->len) >= chunk_buffer_size) { + fl_cbuff_overflow = SWITCH_TRUE; + break; + } + schunks++; + } + xdata_buffer_free(&audio_buffer); + } + + if(fl_cbuff_overflow) { + sentence_timeout = 1; + } else { + if(schunks && asr_ctx->vad_state == SWITCH_VAD_STATE_STOP_TALKING) { + if(!sentence_timeout) { + sentence_timeout = asr_ctx->silence_sec + switch_epoch_time_now(NULL); + } + } + if(sentence_timeout && (asr_ctx->vad_state == SWITCH_VAD_STATE_START_TALKING || asr_ctx->vad_state == SWITCH_VAD_STATE_TALKING)) { + sentence_timeout = 0; + } + } + + if(sentence_timeout && sentence_timeout <= switch_epoch_time_now(NULL)) { + const void *chunk_buffer_ptr = NULL; + const void *http_response_ptr = NULL; + uint32_t buf_len = 0, http_recv_len = 0, stt_failed = 0; + char *chunk_fname = NULL; + + if((buf_len = switch_buffer_peek_zerocopy(chunk_buffer, &chunk_buffer_ptr)) > 0 && chunk_buffer_ptr) { + chunk_fname = chunk_write((switch_byte_t *)chunk_buffer_ptr, buf_len, asr_ctx->channels, asr_ctx->samplerate, globals.opt_encoding); + } + if(chunk_fname) { + for(uint32_t rqtry = 0; rqtry < asr_ctx->retries_on_error; rqtry++) { + switch_buffer_zero(curl_recv_buffer); + status = curl_perform(curl_recv_buffer, asr_ctx->opt_api_key, asr_ctx->opt_model, chunk_fname, &globals); + if(status == SWITCH_STATUS_SUCCESS || globals.fl_shutdown || asr_ctx->fl_destroyed) { break; } + switch_yield(1000); + } + + http_recv_len = switch_buffer_peek_zerocopy(curl_recv_buffer, &http_response_ptr); + if(status == SWITCH_STATUS_SUCCESS) { + if(http_response_ptr && http_recv_len) { + char *txt = parse_response((char *)http_response_ptr, NULL); +#ifdef MOD_OPENAI_ASR_DEBUG + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Service response [%s]\n", (char *)http_response_ptr); + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Text [%s]\n", txt ? txt : "null"); +#endif + if(!txt) txt = strdup(""); + if(switch_queue_trypush(asr_ctx->q_text, txt) == SWITCH_STATUS_SUCCESS) { + switch_mutex_lock(asr_ctx->mutex); + asr_ctx->transcription_results++; + switch_mutex_unlock(asr_ctx->mutex); + } else { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Queue is full!\n"); + switch_safe_free(txt); + } + } else { + stt_failed = 1; + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Empty service response!\n"); + } + } else { + stt_failed = 1; + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to perform request!\n"); + } + + if(stt_failed) { + char *txt = strdup("[transcription failed]"); + if(switch_queue_trypush(asr_ctx->q_text, txt) == SWITCH_STATUS_SUCCESS) { + switch_mutex_lock(asr_ctx->mutex); + asr_ctx->transcription_results++; + switch_mutex_unlock(asr_ctx->mutex); + } else { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Queue is full!\n"); + switch_safe_free(txt); + } + } + + schunks = 0; + sentence_timeout = 0; + unlink(chunk_fname); + switch_safe_free(chunk_fname); + switch_buffer_zero(chunk_buffer); + } + } + + timer_next: + switch_yield(10000); + } + +out: + if(json != NULL) { + cJSON_Delete(json); + } + if(curl_recv_buffer) { + switch_buffer_destroy(&curl_recv_buffer); + } + if(chunk_buffer) { + switch_buffer_destroy(&chunk_buffer); + } + if(pool) { + switch_core_destroy_memory_pool(&pool); + } + + switch_mutex_lock(asr_ctx->mutex); + if(asr_ctx->refs > 0) asr_ctx->refs--; + switch_mutex_unlock(asr_ctx->mutex); + + switch_mutex_lock(globals.mutex); + if(globals.active_threads) globals.active_threads--; + switch_mutex_unlock(globals.mutex); + + return NULL; +} + +// --------------------------------------------------------------------------------------------------------------------------------------------- +static switch_status_t asr_open(switch_asr_handle_t *ah, const char *codec, int samplerate, const char *dest, switch_asr_flag_t *flags) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + switch_threadattr_t *attr = NULL; + switch_thread_t *thread = NULL; + asr_ctx_t *asr_ctx = NULL; + + if(strcmp(codec, "L16") !=0) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unsupported encoding (%s)\n", codec); + switch_goto_status(SWITCH_STATUS_FALSE, out); + } + + if((asr_ctx = switch_core_alloc(ah->memory_pool, sizeof(asr_ctx_t))) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_core_alloc()\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + asr_ctx->channels = 1; + asr_ctx->chunk_buffer_size = 0; + asr_ctx->samplerate = samplerate; + asr_ctx->silence_sec = globals.speech_silence_sec; + asr_ctx->retries_on_error = globals.retries_on_error; + + asr_ctx->opt_model = globals.opt_model; + asr_ctx->opt_api_key = globals.api_key; + + if((status = switch_mutex_init(&asr_ctx->mutex, SWITCH_MUTEX_NESTED, ah->memory_pool)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_mutex_init()\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + switch_queue_create(&asr_ctx->q_audio, QUEUE_SIZE, ah->memory_pool); + switch_queue_create(&asr_ctx->q_text, QUEUE_SIZE, ah->memory_pool); + + asr_ctx->vad_buffer = NULL; + asr_ctx->frame_len = 0; + asr_ctx->vad_buffer_size = 0; + asr_ctx->vad_stored_frames = 0; + asr_ctx->fl_vad_first_cycle = SWITCH_TRUE; + + if((asr_ctx->vad = switch_vad_init(asr_ctx->samplerate, asr_ctx->channels)) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_vad_init()\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + switch_vad_set_mode(asr_ctx->vad, -1); + switch_vad_set_param(asr_ctx->vad, "debug", globals.fl_vad_debug); + if(globals.vad_silence_ms > 0) { switch_vad_set_param(asr_ctx->vad, "silence_ms", globals.vad_silence_ms); } + if(globals.vad_voice_ms > 0) { switch_vad_set_param(asr_ctx->vad, "voice_ms", globals.vad_voice_ms); } + if(globals.vad_threshold > 0) { switch_vad_set_param(asr_ctx->vad, "thresh", globals.vad_threshold); } + + ah->private_info = asr_ctx; + + switch_mutex_lock(globals.mutex); + globals.active_threads++; + switch_mutex_unlock(globals.mutex); + + switch_threadattr_create(&attr, ah->memory_pool); + switch_threadattr_detach_set(attr, 1); + switch_threadattr_stacksize_set(attr, SWITCH_THREAD_STACKSIZE); + switch_thread_create(&thread, attr, transcribe_thread, asr_ctx, ah->memory_pool); + +out: + return status; +} + +static switch_status_t asr_close(switch_asr_handle_t *ah, switch_asr_flag_t *flags) { + asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; + uint8_t fl_wloop = SWITCH_TRUE; + + assert(asr_ctx != NULL); + + asr_ctx->fl_abort = SWITCH_TRUE; + asr_ctx->fl_destroyed = SWITCH_TRUE; + + switch_mutex_lock(asr_ctx->mutex); + fl_wloop = (asr_ctx->refs != 0); + switch_mutex_unlock(asr_ctx->mutex); + + if(fl_wloop) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Waiting for unlock (refs=%d)...\n", asr_ctx->refs); + while(fl_wloop) { + switch_mutex_lock(asr_ctx->mutex); + fl_wloop = (asr_ctx->refs != 0); + switch_mutex_unlock(asr_ctx->mutex); + switch_yield(100000); + } + } + + if(asr_ctx->q_audio) { + xdata_buffer_queue_clean(asr_ctx->q_audio); + switch_queue_term(asr_ctx->q_audio); + } + if(asr_ctx->q_text) { + text_queue_clean(asr_ctx->q_text); + switch_queue_term(asr_ctx->q_text); + } + if(asr_ctx->vad) { + switch_vad_destroy(&asr_ctx->vad); + } + if(asr_ctx->vad_buffer) { + switch_buffer_destroy(&asr_ctx->vad_buffer); + } + + switch_set_flag(ah, SWITCH_ASR_FLAG_CLOSED); + + return SWITCH_STATUS_SUCCESS; +} + +static switch_status_t asr_feed(switch_asr_handle_t *ah, void *data, unsigned int data_len, switch_asr_flag_t *flags) { + asr_ctx_t *asr_ctx = (asr_ctx_t *) ah->private_info; + switch_vad_state_t vad_state = 0; + uint8_t fl_has_audio = SWITCH_FALSE; + + assert(asr_ctx != NULL); + + if(switch_test_flag(ah, SWITCH_ASR_FLAG_CLOSED)) { + return SWITCH_STATUS_BREAK; + } + if(asr_ctx->fl_destroyed || asr_ctx->fl_abort) { + return SWITCH_STATUS_BREAK; + } + if(asr_ctx->fl_pause) { + return SWITCH_STATUS_SUCCESS; + } + if(!data || !data_len) { + return SWITCH_STATUS_BREAK; + } + + if(data_len > 0 && asr_ctx->frame_len == 0) { + switch_mutex_lock(asr_ctx->mutex); + asr_ctx->frame_len = data_len; + asr_ctx->vad_buffer_size = asr_ctx->frame_len * VAD_STORE_FRAMES; + asr_ctx->chunk_buffer_size = asr_ctx->samplerate * globals.speech_max_sec; + switch_mutex_unlock(asr_ctx->mutex); + + if(switch_buffer_create(ah->memory_pool, &asr_ctx->vad_buffer, asr_ctx->vad_buffer_size) != SWITCH_STATUS_SUCCESS) { + asr_ctx->vad_buffer_size = 0; + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "switch_buffer_create()\n"); + } + } + + if(asr_ctx->vad_buffer_size) { + if(asr_ctx->vad_state == SWITCH_VAD_STATE_STOP_TALKING || (asr_ctx->vad_state == vad_state && vad_state == SWITCH_VAD_STATE_NONE)) { + if(data_len <= asr_ctx->frame_len) { + if(asr_ctx->vad_stored_frames >= VAD_STORE_FRAMES) { + switch_buffer_zero(asr_ctx->vad_buffer); + asr_ctx->vad_stored_frames = 0; + asr_ctx->fl_vad_first_cycle = SWITCH_FALSE; + } + switch_buffer_write(asr_ctx->vad_buffer, data, MIN(asr_ctx->frame_len, data_len)); + asr_ctx->vad_stored_frames++; + } + } + + vad_state = switch_vad_process(asr_ctx->vad, (int16_t *)data, (data_len / sizeof(int16_t))); + if(vad_state == SWITCH_VAD_STATE_START_TALKING) { + asr_ctx->vad_state = vad_state; + fl_has_audio = SWITCH_TRUE; + } else if (vad_state == SWITCH_VAD_STATE_STOP_TALKING) { + asr_ctx->vad_state = vad_state; + fl_has_audio = SWITCH_FALSE; + switch_vad_reset(asr_ctx->vad); + } else if (vad_state == SWITCH_VAD_STATE_TALKING) { + asr_ctx->vad_state = vad_state; + fl_has_audio = SWITCH_TRUE; + } + } else { + fl_has_audio = SWITCH_TRUE; + } + + if(fl_has_audio) { + if(vad_state == SWITCH_VAD_STATE_START_TALKING && asr_ctx->vad_stored_frames > 0) { + xdata_buffer_t *tau_buf = NULL; + const void *ptr = NULL; + switch_size_t vblen = 0; + uint32_t rframes = 0, rlen = 0; + int ofs = 0; + + if((vblen = switch_buffer_peek_zerocopy(asr_ctx->vad_buffer, &ptr)) && ptr && vblen > 0) { + rframes = (asr_ctx->vad_stored_frames >= VAD_RECOVERY_FRAMES ? VAD_RECOVERY_FRAMES : (asr_ctx->fl_vad_first_cycle ? asr_ctx->vad_stored_frames : VAD_RECOVERY_FRAMES)); + rlen = (rframes * asr_ctx->frame_len); + ofs = (vblen - rlen); + + if(ofs < 0) { + uint32_t hdr_sz = -ofs; + uint32_t hdr_ofs = (asr_ctx->vad_buffer_size - hdr_sz); + + switch_zmalloc(tau_buf, sizeof(xdata_buffer_t)); + + tau_buf->len = (hdr_sz + vblen + data_len); + switch_malloc(tau_buf->data, tau_buf->len); + + memcpy(tau_buf->data, (void *)(ptr + hdr_ofs), hdr_sz); + memcpy(tau_buf->data + hdr_sz , (void *)(ptr + 0), vblen); + memcpy(tau_buf->data + rlen, data, data_len); + + if(switch_queue_trypush(asr_ctx->q_audio, tau_buf) != SWITCH_STATUS_SUCCESS) { + xdata_buffer_free(&tau_buf); + } + + switch_buffer_zero(asr_ctx->vad_buffer); + asr_ctx->vad_stored_frames = 0; + } else { + switch_zmalloc(tau_buf, sizeof(xdata_buffer_t)); + + tau_buf->len = (rlen + data_len); + switch_malloc(tau_buf->data, tau_buf->len); + + memcpy(tau_buf->data, (void *)(ptr + ofs), rlen); + memcpy(tau_buf->data + rlen, data, data_len); + + if(switch_queue_trypush(asr_ctx->q_audio, tau_buf) != SWITCH_STATUS_SUCCESS) { + xdata_buffer_free(&tau_buf); + } + + switch_buffer_zero(asr_ctx->vad_buffer); + asr_ctx->vad_stored_frames = 0; + } + } + } else { + xdata_buffer_push(asr_ctx->q_audio, data, data_len); + } + } + + return SWITCH_STATUS_SUCCESS; +} + +static switch_status_t asr_check_results(switch_asr_handle_t *ah, switch_asr_flag_t *flags) { + asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; + + assert(asr_ctx != NULL); + + if(asr_ctx->fl_pause) { + return SWITCH_STATUS_FALSE; + } + + return (asr_ctx->transcription_results > 0 ? SWITCH_STATUS_SUCCESS : SWITCH_STATUS_FALSE); +} + +static switch_status_t asr_get_results(switch_asr_handle_t *ah, char **xmlstr, switch_asr_flag_t *flags) { + asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; + switch_status_t status = SWITCH_STATUS_FALSE; + void *pop = NULL; + + assert(asr_ctx != NULL); + + if(switch_queue_trypop(asr_ctx->q_text, &pop) == SWITCH_STATUS_SUCCESS) { + if(pop) { + *xmlstr = (char *)pop; + status = SWITCH_STATUS_SUCCESS; + + switch_mutex_lock(asr_ctx->mutex); + if(asr_ctx->transcription_results > 0) asr_ctx->transcription_results--; + switch_mutex_unlock(asr_ctx->mutex); + } + } + + return status; +} + +static switch_status_t asr_start_input_timers(switch_asr_handle_t *ah) { + asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; + + assert(asr_ctx != NULL); + + asr_ctx->fl_start_timers = SWITCH_TRUE; + + return SWITCH_STATUS_SUCCESS; +} + +static switch_status_t asr_pause(switch_asr_handle_t *ah) { + asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; + + assert(asr_ctx != NULL); + + asr_ctx->fl_pause = SWITCH_TRUE; + + return SWITCH_STATUS_SUCCESS; +} + +static switch_status_t asr_resume(switch_asr_handle_t *ah) { + asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; + + assert(asr_ctx != NULL); + + asr_ctx->fl_pause = SWITCH_FALSE; + + return SWITCH_STATUS_SUCCESS; +} + +static void asr_text_param(switch_asr_handle_t *ah, char *param, const char *val) { + asr_ctx_t *asr_ctx = (asr_ctx_t *)ah->private_info; + + assert(asr_ctx != NULL); + + if(strcasecmp(param, "lang") == 0) { + if(val) asr_ctx->opt_lang = switch_core_strdup(ah->memory_pool, val); + } else if(strcasecmp(param, "model") == 0) { + if(val) asr_ctx->opt_model = switch_core_strdup(ah->memory_pool, val); + } else if(strcasecmp(param, "key") == 0) { + if(val) asr_ctx->opt_api_key = switch_core_strdup(ah->memory_pool, val); + } else if(strcasecmp(param, "silence") == 0) { + if(val) asr_ctx->silence_sec = atoi(val); + } +} + +static void asr_numeric_param(switch_asr_handle_t *ah, char *param, int val) { +} + +static void asr_float_param(switch_asr_handle_t *ah, char *param, double val) { +} + +static switch_status_t asr_load_grammar(switch_asr_handle_t *ah, const char *grammar, const char *name) { + return SWITCH_STATUS_SUCCESS; +} + +static switch_status_t asr_unload_grammar(switch_asr_handle_t *ah, const char *name) { + return SWITCH_STATUS_SUCCESS; +} + +// --------------------------------------------------------------------------------------------------------------------------------------------- +#define CMD_SYNTAX "path_to/filename.(mp3|wav) [key=altkey model=altModel]\n" +SWITCH_STANDARD_API(openai_asr_cmd_handler) { + switch_status_t status = 0; + char *mycmd = NULL, *argv[10] = { 0 }; int argc = 0; + switch_buffer_t *recv_buf = NULL; + const void *response_ptr = NULL; + char *opt_api_key = globals.api_key; + char *opt_model = globals.opt_model; + char *file_name = NULL, *file_ext = NULL; + uint32_t recv_len = 0; + + if (!zstr(cmd)) { + mycmd = strdup(cmd); + switch_assert(mycmd); + argc = switch_separate_string(mycmd, ' ', argv, (sizeof(argv) / sizeof(argv[0]))); + } + if(argc == 0) { + goto usage; + } + + file_name = argv[0]; + if(switch_file_exists(file_name, NULL) != SWITCH_STATUS_SUCCESS) { + stream->write_function(stream, "-ERR: file not found (%s)\n", file_name); + goto out; + } + + file_ext = strrchr(file_name, '.'); + if(!file_ext) { + stream->write_function(stream, "-ERR: unsupported file encoding (null)\n"); + goto out; + } + + file_ext++; + if(strcasecmp("mp3", file_ext) && strcasecmp("wav", file_ext)) { + stream->write_function(stream, "-ERR: unsupported file encoding (%s)\n", file_ext); + goto out; + } + + if(switch_buffer_create_dynamic(&recv_buf, 1024, 2048, 8192) != SWITCH_STATUS_SUCCESS) { + stream->write_function(stream, "-ERR: switch_buffer_create_dynamic()\n"); + goto out; + } + + if(argc > 1) { + for(int i = 1; i < argc; i++) { + char *kvp[2] = { 0 }; + if(switch_separate_string(argv[i], '=', kvp, 2) >= 2) { + if(strcasecmp(kvp[0], "key") == 0) { + if(kvp[1]) opt_api_key = kvp[1]; + } else if(strcasecmp(kvp[0], "model") == 0) { + if(kvp[1]) opt_model = kvp[1]; + } + } + } + } + + status = curl_perform(recv_buf, opt_api_key, opt_model, file_name, &globals); + + recv_len = switch_buffer_peek_zerocopy(recv_buf, &response_ptr); + if(status == SWITCH_STATUS_SUCCESS && response_ptr && recv_len) { + char *txt = parse_response((char *)response_ptr, stream); + if(txt) { + stream->write_function(stream, "+OK: %s\n", txt); + } + switch_safe_free(txt); + } else { + stream->write_function(stream, "-ERR: unable to perform request\n"); + } + + goto out; +usage: + stream->write_function(stream, "-ERR:\nUsage: %s\n", CMD_SYNTAX); + +out: + if(recv_buf) { + switch_buffer_destroy(&recv_buf); + } + + switch_safe_free(mycmd); + return SWITCH_STATUS_SUCCESS; +} + +// --------------------------------------------------------------------------------------------------------------------------------------------- +// main +// --------------------------------------------------------------------------------------------------------------------------------------------- +SWITCH_MODULE_LOAD_FUNCTION(mod_openai_asr_load) { + switch_status_t status = SWITCH_STATUS_SUCCESS; + switch_xml_t cfg, xml, settings, param; + switch_api_interface_t *commands_interface; + switch_asr_interface_t *asr_interface; + + memset(&globals, 0, sizeof(globals)); + switch_mutex_init(&globals.mutex, SWITCH_MUTEX_NESTED, pool); + + if((xml = switch_xml_open_cfg(MOD_CONFIG_NAME, &cfg, NULL)) == NULL) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open configuration: %s\n", MOD_CONFIG_NAME); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + if((settings = switch_xml_child(cfg, "settings"))) { + for (param = switch_xml_child(settings, "param"); param; param = param->next) { + char *var = (char *) switch_xml_attr_soft(param, "name"); + char *val = (char *) switch_xml_attr_soft(param, "value"); + + if(!strcasecmp(var, "vad-silence-ms")) { + if(val) globals.vad_silence_ms = atoi (val); + } else if(!strcasecmp(var, "vad-voice-ms")) { + if(val) globals.vad_voice_ms = atoi (val); + } else if(!strcasecmp(var, "vad-threshold")) { + if(val) globals.vad_threshold = atoi (val); + } else if(!strcasecmp(var, "vad-debug")) { + if(val) globals.fl_vad_debug = switch_true(val); + } else if(!strcasecmp(var, "api-key")) { + if(val) globals.api_key = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "api-url")) { + if(val) globals.api_url = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "user-agent")) { + if(val) globals.user_agent = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "proxy")) { + if(val) globals.proxy = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "proxy-credentials")) { + if(val) globals.proxy_credentials = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "encoding")) { + if(val) globals.opt_encoding = switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "model")) { + if(val) globals.opt_model= switch_core_strdup(pool, val); + } else if(!strcasecmp(var, "speech-max-sec")) { + if(val) globals.speech_max_sec = atoi(val); + } else if(!strcasecmp(var, "speech-silence-sec")) { + if(val) globals.speech_silence_sec = atoi(val); + } else if(!strcasecmp(var, "request-timeout")) { + if(val) globals.request_timeout = atoi(val); + } else if(!strcasecmp(var, "connect-timeout")) { + if(val) globals.connect_timeout = atoi(val); + } else if(!strcasecmp(var, "log-http-errors")) { + if(val) globals.fl_log_http_errors = switch_true(val); + } else if(!strcasecmp(var, "retries-on-error")) { + if(val) globals.retries_on_error = atoi(val); + } + } + } + + if(!globals.api_url) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Missing required parameter: api-url\n"); + switch_goto_status(SWITCH_STATUS_GENERR, out); + } + + globals.opt_encoding = globals.opt_encoding ? globals.opt_encoding : "wav"; + globals.speech_max_sec = !globals.speech_max_sec ? 35 : globals.speech_max_sec; + globals.speech_silence_sec = !globals.speech_silence_sec ? 3 : globals.speech_silence_sec; + globals.retries_on_error = !globals.retries_on_error ? 1 : globals.retries_on_error; + + globals.tmp_path = switch_core_sprintf(pool, "%s%sopenai-asr-cache", SWITCH_GLOBAL_dirs.temp_dir, SWITCH_PATH_SEPARATOR); + if(switch_directory_exists(globals.tmp_path, NULL) != SWITCH_STATUS_SUCCESS) { + switch_dir_make(globals.tmp_path, SWITCH_FPROT_OS_DEFAULT, NULL); + } + + *module_interface = switch_loadable_module_create_module_interface(pool, modname); + SWITCH_ADD_API(commands_interface, "openai_asr_transcript", "OpenAI speech-to-text", openai_asr_cmd_handler, CMD_SYNTAX); + + asr_interface = switch_loadable_module_create_interface(*module_interface, SWITCH_ASR_INTERFACE); + asr_interface->interface_name = "openai"; + asr_interface->asr_open = asr_open; + asr_interface->asr_close = asr_close; + asr_interface->asr_feed = asr_feed; + asr_interface->asr_pause = asr_pause; + asr_interface->asr_resume = asr_resume; + asr_interface->asr_check_results = asr_check_results; + asr_interface->asr_get_results = asr_get_results; + asr_interface->asr_start_input_timers = asr_start_input_timers; + asr_interface->asr_text_param = asr_text_param; + asr_interface->asr_numeric_param = asr_numeric_param; + asr_interface->asr_float_param = asr_float_param; + asr_interface->asr_load_grammar = asr_load_grammar; + asr_interface->asr_unload_grammar = asr_unload_grammar; + + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "OpenAI-ASR (%s)\n", MOD_VERSION); +out: + if(xml) { + switch_xml_free(xml); + } + return status; +} + +SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_openai_asr_shutdown) { + uint8_t fl_wloop = SWITCH_TRUE; + + globals.fl_shutdown = SWITCH_TRUE; + + switch_mutex_lock(globals.mutex); + fl_wloop = (globals.active_threads > 0); + switch_mutex_unlock(globals.mutex); + + if(fl_wloop) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "Waiting for termination (%d) threads...\n", globals.active_threads); + while(fl_wloop) { + switch_mutex_lock(globals.mutex); + fl_wloop = (globals.active_threads > 0); + switch_mutex_unlock(globals.mutex); + switch_yield(100000); + } + } + + return SWITCH_STATUS_SUCCESS; +} diff --git a/src/mod/asr_tts/mod_openai_asr/mod_openai_asr.h b/src/mod/asr_tts/mod_openai_asr/mod_openai_asr.h new file mode 100644 index 00000000000..230cc68212c --- /dev/null +++ b/src/mod/asr_tts/mod_openai_asr/mod_openai_asr.h @@ -0,0 +1,110 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#ifndef MOD_OPENAI_ASR_H +#define MOD_OPENAI_ASR_H + +#include +#include +#include + +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +#define MOD_CONFIG_NAME "openai_asr.conf" +#define MOD_VERSION "1.0.4" +#define QUEUE_SIZE 128 +#define VAD_STORE_FRAMES 64 +#define VAD_RECOVERY_FRAMES 20 + +//#define MOD_OPENAI_ASR_DEBUG + +typedef struct { + switch_mutex_t *mutex; + uint32_t active_threads; + uint32_t speech_max_sec; + uint32_t speech_silence_sec; + uint32_t vad_silence_ms; + uint32_t vad_voice_ms; + uint32_t vad_threshold; + uint32_t request_timeout; // secondss + uint32_t connect_timeout; // seconds + uint32_t retries_on_error; + uint8_t fl_vad_debug; + uint8_t fl_shutdown; + uint8_t fl_log_http_errors; + char *tmp_path; + char *api_key; + char *api_url; + char *user_agent; + char *proxy; + char *proxy_credentials; + char *opt_encoding; + char *opt_model; +} globals_t; + +typedef struct { + switch_memory_pool_t *pool; + switch_vad_t *vad; + switch_buffer_t *vad_buffer; + switch_mutex_t *mutex; + switch_queue_t *q_audio; + switch_queue_t *q_text; + switch_buffer_t *curl_recv_buffer_ref; + switch_vad_state_t vad_state; + char *opt_lang; + char *opt_model; + char *opt_api_key; + int32_t transcription_results; + uint32_t retries_on_error; + uint32_t vad_buffer_size; + uint32_t vad_stored_frames; + uint32_t chunk_buffer_size; + uint32_t refs; + uint32_t samplerate; + uint32_t channels; + uint32_t frame_len; + uint32_t silence_sec; + uint8_t fl_start_timers; + uint8_t fl_pause; + uint8_t fl_vad_first_cycle; + uint8_t fl_destroyed; + uint8_t fl_abort; +} asr_ctx_t; + +typedef struct { + uint32_t len; + switch_byte_t *data; +} xdata_buffer_t; + +/* curl.c */ +switch_status_t curl_perform(switch_buffer_t *recv_buffer, char *api_key, char *model_name, char *filename, globals_t *globals); + +/* utils.c */ +char *chunk_write(switch_byte_t *buf, uint32_t buf_len, uint32_t channels, uint32_t samplerate, const char *file_ext); +switch_status_t xdata_buffer_push(switch_queue_t *queue, switch_byte_t *data, uint32_t data_len); +switch_status_t xdata_buffer_alloc(xdata_buffer_t **out, switch_byte_t *data, uint32_t data_len); +void xdata_buffer_free(xdata_buffer_t **buf); +void xdata_buffer_queue_clean(switch_queue_t *queue); +void text_queue_clean(switch_queue_t *queue); +char *parse_response(char *data, switch_stream_handle_t *stream); + +#endif diff --git a/src/mod/asr_tts/mod_openai_asr/utils.c b/src/mod/asr_tts/mod_openai_asr/utils.c new file mode 100644 index 00000000000..ed9a620dc2c --- /dev/null +++ b/src/mod/asr_tts/mod_openai_asr/utils.c @@ -0,0 +1,149 @@ +/* + * FreeSWITCH Modular Media Switching Software Library / Soft-Switch Application + * Copyright (C) 2005-2014, Anthony Minessale II + * + * Version: MPL 1.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * Module Contributor(s): + * Konstantin Alexandrin + * + * + */ +#include "mod_openai_asr.h" + +extern globals_t globals; + +switch_status_t xdata_buffer_alloc(xdata_buffer_t **out, switch_byte_t *data, uint32_t data_len) { + xdata_buffer_t *buf = NULL; + + switch_zmalloc(buf, sizeof(xdata_buffer_t)); + + if(data_len) { + switch_malloc(buf->data, data_len); + switch_assert(buf->data); + + buf->len = data_len; + memcpy(buf->data, data, data_len); + } + + *out = buf; + return SWITCH_STATUS_SUCCESS; +} + +void xdata_buffer_free(xdata_buffer_t **buf) { + if(buf && *buf) { + switch_safe_free((*buf)->data); + free(*buf); + } +} + +void xdata_buffer_queue_clean(switch_queue_t *queue) { + xdata_buffer_t *data = NULL; + + if(!queue || !switch_queue_size(queue)) { + return; + } + + while(switch_queue_trypop(queue, (void *) &data) == SWITCH_STATUS_SUCCESS) { + if(data) { xdata_buffer_free(&data); } + } +} + +switch_status_t xdata_buffer_push(switch_queue_t *queue, switch_byte_t *data, uint32_t data_len) { + xdata_buffer_t *buff = NULL; + + if(xdata_buffer_alloc(&buff, data, data_len) == SWITCH_STATUS_SUCCESS) { + if(switch_queue_trypush(queue, buff) == SWITCH_STATUS_SUCCESS) { + return SWITCH_STATUS_SUCCESS; + } + xdata_buffer_free(&buff); + } + return SWITCH_STATUS_FALSE; +} + +char *chunk_write(switch_byte_t *buf, uint32_t buf_len, uint32_t channels, uint32_t samplerate, const char *file_ext) { + switch_status_t status = SWITCH_STATUS_FALSE; + switch_size_t len = (buf_len / sizeof(int16_t)); + switch_file_handle_t fh = { 0 }; + char *file_name = NULL; + char name_uuid[SWITCH_UUID_FORMATTED_LENGTH + 1] = { 0 }; + int flags = (SWITCH_FILE_FLAG_WRITE | SWITCH_FILE_DATA_SHORT); + + switch_uuid_str((char *)name_uuid, sizeof(name_uuid)); + file_name = switch_mprintf("%s%s%s.%s", globals.tmp_path, SWITCH_PATH_SEPARATOR, name_uuid, (file_ext == NULL ? "wav" : file_ext) ); + + if((status = switch_core_file_open(&fh, file_name, channels, samplerate, flags, NULL)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to open file (%s)\n", file_name); + goto out; + } + + if((status = switch_core_file_write(&fh, buf, &len)) != SWITCH_STATUS_SUCCESS) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to write (%s)\n", file_name); + goto out; + } + + switch_core_file_close(&fh); +out: + if(status != SWITCH_STATUS_SUCCESS) { + if(file_name) { + unlink(file_name); + switch_safe_free(file_name); + } + return NULL; + } + + return file_name; +} + +void text_queue_clean(switch_queue_t *queue) { + void *data = NULL; + + if(!queue || !switch_queue_size(queue)) { + return; + } + + while(switch_queue_trypop(queue, (void *)&data) == SWITCH_STATUS_SUCCESS) { + switch_safe_free(data); + } +} + +char *parse_response(char *data, switch_stream_handle_t *stream) { + char *result = NULL; + cJSON *json = NULL; + + if(!data) { + return NULL; + } + + if(!(json = cJSON_Parse(data))) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Unable to parse json (%s)\n", data); + if(stream) stream->write_function(stream, "-ERR: Unable to parse json (see log)\n"); + } else { + cJSON *jres = cJSON_GetObjectItem(json, "error"); + if(jres) { + switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Service returns error (%s)\n", data); + if(stream) stream->write_function(stream, "-ERR: Service returns error (see log)\n"); + } else { + cJSON *jres = cJSON_GetObjectItem(json, "text"); + if(jres) { + result = strdup(jres->valuestring); + } + } + } + + if(json) { + cJSON_Delete(json); + } + + return result; +}