From 57e319abece1200263dd88fd6211e9e7730050f3 Mon Sep 17 00:00:00 2001 From: Benjamin Joseph Date: Fri, 30 Sep 2016 10:32:06 -0700 Subject: [PATCH 001/102] comments, calling volume detector at 66hz --- kitsune/tensor/tinytensor_features.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/kitsune/tensor/tinytensor_features.c b/kitsune/tensor/tinytensor_features.c index 1de00fc7..bf2f8674 100644 --- a/kitsune/tensor/tinytensor_features.c +++ b/kitsune/tensor/tinytensor_features.c @@ -327,10 +327,26 @@ static uint8_t add_samples_and_get_mel(int16_t * maxmel,int16_t * avgmel, int16_ _this.speech_detector_counter++; //GET MEL FEATURES (one time slice in the mel spectrogram) tinytensor_features_get_mel_bank(melbank,fr,fi,temp16); + - if( (_this.speech_detector_counter & 0x3)==0 ) { - set_background_energy(fr, fi); - } + /*--------VOLUME CALCULATION CALL -------- + in theory, we are arrive at this point in this function @ 66.6666 Hz + currently: + Fs = 16000Hz, 400 samples per FFT (last 400 samples), @ 66.666 Hz + FFT_SIZE = 512 + + originally + Fs = 16000Hz, 256 samples per FFT (last 256 samples), @ 62.5Hz + FFT_SIZE = 256 + + I think we just normalize based on the number of samples, since Fs is the same + 256 / 400 = 0.64 = -3.87 dBenergy + + and for the purposes of disturbance calculations, let's just say that 62.5 Hz ~ 66.66 Hz and call it good. + ----------------------------------*/ + /***********/ + set_background_energy(fr, fi); + /***********/ //GET MAX temp16 = MIN_INT_16; From e1f1c68189af728d30e4a6c3207fc26b1fea86c2 Mon Sep 17 00:00:00 2001 From: Chris Date: Fri, 30 Sep 2016 11:24:30 -0700 Subject: [PATCH 002/102] don't race the alarm with the voice connection refresh --- kitsune/hlo_audio_tools.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kitsune/hlo_audio_tools.c b/kitsune/hlo_audio_tools.c index b7b44c94..61582053 100644 --- a/kitsune/hlo_audio_tools.c +++ b/kitsune/hlo_audio_tools.c @@ -373,7 +373,9 @@ int hlo_filter_voice_command(hlo_stream_t * input, hlo_stream_t * output, void * light_sensor_power(HIGH_POWER); if (ret < 0) { - stop_led_animation(0, 33); + if( ret != HLO_STREAM_EAGAIN ) { + stop_led_animation(0, 33); + } if (ret == HLO_STREAM_ERROR) { play_led_animation_solid(LED_MAX, LED_MAX, 0, 0, 1, 18, 1); } From 3f021eeacd08b619bb9cc70849b03238686f1082 Mon Sep 17 00:00:00 2001 From: Benjamin Joseph Date: Fri, 30 Sep 2016 16:19:17 -0700 Subject: [PATCH 003/102] removed unused stuff in audiofeatures, lots of cleanup --- kitsune/audio_types.h | 34 +----- kitsune/audiofeatures.c | 149 +++++++++++++-------------- kitsune/tensor/features_types.h | 24 +++++ kitsune/tensor/tinytensor_features.c | 21 ++-- kitsune/tensor/tinytensor_features.h | 15 +-- 5 files changed, 105 insertions(+), 138 deletions(-) create mode 100644 kitsune/tensor/features_types.h diff --git a/kitsune/audio_types.h b/kitsune/audio_types.h index c8299c7b..6e84b0ac 100644 --- a/kitsune/audio_types.h +++ b/kitsune/audio_types.h @@ -13,11 +13,7 @@ #define MIN_CLASSIFICATION_ENERGY (100) -#define AUDIO_FFT_SIZE_2N (8) -#define AUDIO_FFT_SIZE (1 << AUDIO_FFT_SIZE_2N) -#define EXPECTED_AUDIO_SAMPLE_RATE_HZ (AUDIO_SAMPLE_RATE) - -#define SAMPLE_RATE_IN_HZ (EXPECTED_AUDIO_SAMPLE_RATE_HZ / AUDIO_FFT_SIZE) +#define SAMPLE_RATE_IN_HZ (66) #define SAMPLE_PERIOD_IN_MILLISECONDS (1000 / SAMPLE_RATE_IN_HZ) #define NUM_AUDIO_FEATURES (16) @@ -37,39 +33,13 @@ extern "C" { #endif -typedef struct { - uint16_t psd_min_energy; - -} AudioFeaturesOptions_t; - -typedef struct { - uint16_t min_energy_classification; -} AudioClassifierOptions_t; - -typedef enum { - segmentCoherent -} ESegmentType_t; typedef struct { - int64_t t1; - int64_t t2; - int32_t duration; - ESegmentType_t type; - -} Segment_t; - -typedef struct { - int64_t samplecount; + int64_t samplecount; int16_t logenergy; - int8_t feats4bit[NUM_AUDIO_FEATURES]; } AudioFeatures_t; -/* FOR SAVING AUDIO / UPLOADING AUDIO - * - * Yes, some of these flags are mutually exclusive. Others aren ot. - */ - #define AUDIO_TRANSFER_FLAG_AUTO_CLOSE_OUTPUT (1 << 0) /* automatically close output stream when done */ diff --git a/kitsune/audiofeatures.c b/kitsune/audiofeatures.c index 5f7e3099..e12f200a 100644 --- a/kitsune/audiofeatures.c +++ b/kitsune/audiofeatures.c @@ -1,10 +1,12 @@ #include "audiofeatures.h" #include "fft.h" +#include #include #include /* abs */ #include "debugutils/debuglog.h" #include #include "hellomath.h" +#include "tensor/features_types.h" #ifdef USED_ON_DESKTOP #define LOGA(...) @@ -15,26 +17,6 @@ #define TOFIX(x,q)\ ((int32_t) ((x) * (float)(1 << (q)))) -/* - How is this all going to work? - -Extract features, one of which is total energy - -If average energy over some period is stable, then - - 1) See if this frame of features is similar to others. - 2) If not similar, store feature vector in memory - 3) Report this frame as being - a. interesting, - b. interesting but already observed (i.e. similar) - c. totally fucking uninteresting (ah, blissful silence) - 4) Some other piece of code will later - - - -Potential pitfalls as is: - - impulse noises may not register at all, but will certainly be noticed by a human - well we'll deal with that later, I can already think of a processing scheme to incorporate this. - */ - /*-------------------------------- * Memory sizes, constants, macros, and related items @@ -45,7 +27,7 @@ #define PSD_SIZE_2N (5) #define PSD_SIZE (1 << PSD_SIZE_2N) -#define SAMPLE_RATE_IN_HZ (EXPECTED_AUDIO_SAMPLE_RATE_HZ / AUDIO_FFT_SIZE) +#define SAMPLE_RATE_IN_HZ (66) #define SAMPLE_PERIOD_IN_MILLISECONDS (1000 / SAMPLE_RATE_IN_HZ) #define ENERGY_BUF_SIZE_2N (4) @@ -56,18 +38,6 @@ #define CHANGE_SIGNAL_BUF_SIZE (1 << CHANGE_SIGNAL_BUF_SIZE_2N) #define CHANGE_SIGNAL_BUF_MASK (CHANGE_SIGNAL_BUF_SIZE - 1) -#define STEADY_STATE_AVERAGING_PERIOD_2N (6) -#define STEADY_STATE_AVERAGING_PERIOD (1 << STEADY_STATE_AVERAGING_PERIOD_2N) - -#ifdef NO_EQUALIZATION - #define STARTUP_PERIOD_IN_MS (0) -#else - //default - #define STARTUP_PERIOD_IN_MS (10000) -#endif - -#define STARTUP_EQUALIZATION_COUNTS (STARTUP_PERIOD_IN_MS / SAMPLE_PERIOD_IN_MILLISECONDS) - #define QFIXEDPOINT (12) #define TRUE (1) @@ -76,10 +46,6 @@ #define MICROPHONE_NOISE_FLOOR_DB (0.0f) -/* Have fun tuning these magic numbers! - Perhaps eventually we will have some pre-canned - data to show you how? */ - //the higher this gets, the less likely you are to be stable static const int16_t k_stable_likelihood_coefficient = TOFIX(1.0,QFIXEDPOINT); @@ -95,11 +61,6 @@ static const int32_t k_min_log_prob = TOFIX(-0.25f,QFIXEDPOINT); static const uint32_t k_stable_counts_to_be_considered_stable = STABLE_TIME_TO_BE_CONSIDERED_STABLE_IN_MILLISECONDS / SAMPLE_PERIOD_IN_MILLISECONDS; -/*-------------------------------- - * forward declarations - *--------------------------------*/ -void fix_window(int16_t fr[], int32_t n); - /*-------------------------------- * Types @@ -111,12 +72,6 @@ typedef enum { numChangeModes } EChangeModes_t; -typedef enum { - incoherent, - coherent, - numCoherencyModes -} ECoherencyModes_t; - typedef struct { int16_t energybuf[ENERGY_BUF_SIZE];//32 bytes int32_t energyaccumulator; @@ -133,29 +88,20 @@ typedef struct { int16_t energyStable; uint32_t stableCount; uint32_t stablePeriodCounter; - EChangeModes_t lastModes[3]; - int64_t modechangeTimes[3]; - uint8_t isValidSteadyStateSegment; - uint16_t psd_min_energy; + uint16_t min_energy; uint8_t statsLastIsStable; int16_t maxenergy; AudioFeatureCallback_t fpCallback; AudioOncePerMinuteDataCallback_t fpOncePerMinuteDataCallback; -} MelFeatures_t; +} AudioFeatures_t; -typedef enum { - eAudioSignalIsNotInteresting, - eAudioSignalIsDiverse, - eAudioSignalIsSimilar -} EAudioSignalSimilarity_t; - /*-------------------------------- * Static Memory Declarations *--------------------------------*/ -static MelFeatures_t _data; +static AudioFeatures_t _data; @@ -169,7 +115,7 @@ void init_background_energy(AudioOncePerMinuteDataCallback_t fpOncePerMinuteCall _data.fpOncePerMinuteDataCallback = fpOncePerMinuteCallback; - _data.psd_min_energy = MIN_ENERGY; + _data.min_energy = MIN_ENERGY; } @@ -218,7 +164,7 @@ static void UpdateEnergyStats(uint8_t isStable,int16_t logTotalEnergyAvg,int16_t //leaving stable mode -- therefore starting a disturbance if (!isStable && _data.statsLastIsStable) { - //LOGI("S->US\r\n"); + DISP("S->US\r\n"); _data.maxenergy = logTotalEnergy; } @@ -230,7 +176,7 @@ static void UpdateEnergyStats(uint8_t isStable,int16_t logTotalEnergyAvg,int16_t //entering stable mode --ending a disturbance if (isStable && !_data.statsLastIsStable ) { - //LOGI("US->S\r\n"); + DISP("US->S\r\n"); data.num_disturbances = 1; } @@ -440,24 +386,68 @@ static void UpdateChangeSignals(EChangeModes_t * pCurrentMode, const int16_t new } -void set_background_energy(const int16_t fr[], const int16_t fi[]) { - //enjoy this nice large stack. - //this can all go away if we get fftr to work, and do the - int16_t psd[PSD_SIZE]; +__attribute__((section(".ramcode"))) static void getvolume(int16_t * logTotalEnergy,int16_t * const int16_t fr[],const int16_t fi[],uint16_t min_energy, const int16_t log2scale) { + uint16_t i; + uint16_t ufr; + uint16_t ufi; + uint64_t utemp64; + uint64_t non_weighted_energy = 0; + uint64_t a_weighted_energy = 0; + int32_t temp32; + + const static uint16_t a_weight_q10[128] = { 0, 0, 100, 150, 263, 379, 489, + 510, 725, 763, 823, 859, 896, 934, 963, 994, 1024, 1054, 1085, 1093, + 1101, 1110, 1123, 1136, 1149, 1152, 1156, 1159, 1162, 1166, 1169, + 1172, 1176, 1166, 1170, 1174, 1178, 1182, 1184, 1185, 1187, 1188, + 1189, 1185, 1180, 1176, 1171, 1167, 1162, 1162, 1162, 1162, 1162, + 1162, 1162, 1162, 1161, 1159, 1157, 1156, 1154, 1152, 1151, 1149, + 1146, 1142, 1139, 1136, 1133, 1129, 1126, 1123, 1120, 1116, 1112, + 1107, 1103, 1098, 1094, 1089, 1085, 1081, 1076, 1072, 1067, 1063, + 1059, 1054, 1050, 1046, 1042, 1037, 1033, 1029, 1025, 1021, 1016, + 1012, 1012, 1009, 1005, 1002, 998, 995, 991, 987, 984, 981, 977, + 974, 970, 967, 963, 959, 956, 952, 948, 945, 941, 937, 934, 930, + 927, 923, 920, 916, 913, 913 }; + + uint16_t idx, ifft, iend; + + int16_t idx_shift = FEATURES_FFT_SIZE_2N - 7; + + for (ifft = 1; ifft < FEATURES_FFT_SIZE/2; ifft++) { + utemp64 = 0; + utemp64 += (int32_t)fr[ifft]*(int32_t)fr[ifft]; + utemp64 += (int32_t)fi[ifft]*(int32_t)fi[ifft]; + + idx = ifft >> idx_shift; + kit_assert(idx < 128); + + a_weighted_energy += (utemp64 * a_weight_q10[idx]) >> 10; + non_weighted_energy += utemp64; + } + - uint8_t log2scaleOfRawSignal; - int16_t logTotalEnergy; - int16_t logTotalEnergyAvg; + temp32 = FixedPointLog2Q10(a_weighted_energy + min_energy) - 2 * log2scale*1024; + + if (temp32 > INT16_MAX) { + temp32 = INT16_MAX; + } + + if (temp32 < INT16_MIN) { + temp32 = INT16_MIN; + } + *logTotalEnergy = (int16_t)temp32; + +} + + +void set_background_energy(const int16_t fr[], const int16_t fi[], int16_t log2scale) { + int16_t logTotalEnergy = 0; + int16_t logTotalEnergyAvg = 0; EChangeModes_t currentMode; uint8_t isStable; - /* Normalize time series signal */ - //ScaleInt16Vector(fr,&log2scaleOfRawSignal,AUDIO_FFT_SIZE,RAW_SAMPLES_SCALE); - log2scaleOfRawSignal = 0; - - /* Get PSD of variously spaced non-overlapping frequency windows*/ - logpsdmel(&logTotalEnergy,psd,fr,fi,log2scaleOfRawSignal,_data.psd_min_energy); //psd is now 64, and on a logarithmic scale after 1khz + /* compute volume */ + getvolume(&logTotalEnergy,fr,fi,_data.min_energy,log2scale); /* Determine stability of signal energy order to figure out when to estimate background spectrum */ logTotalEnergyAvg = MovingAverage16(_data.callcounter, logTotalEnergy, _data.energybuf, &_data.energyaccumulator,ENERGY_BUF_MASK,ENERGY_BUF_SIZE_2N); @@ -470,9 +460,9 @@ void set_background_energy(const int16_t fr[], const int16_t fi[]) { isStable = IsStable(currentMode,logTotalEnergyAvg); - //if (c++ == 255) { - LOGA("%d\n",GetAudioEnergyAsDBA(logTotalEnergyAvg)); - //} + if (c++ == 255) { + DISP("vol_energy=%d\r\n",GetAudioEnergyAsDBA(logTotalEnergyAvg)); + } UpdateEnergyStats(isStable,logTotalEnergyAvg,logTotalEnergy); @@ -482,5 +472,4 @@ void set_background_energy(const int16_t fr[], const int16_t fi[]) { /* Update counter. It's okay if this one rolls over*/ _data.callcounter++; - } diff --git a/kitsune/tensor/features_types.h b/kitsune/tensor/features_types.h new file mode 100644 index 00000000..cfb4abdd --- /dev/null +++ b/kitsune/tensor/features_types.h @@ -0,0 +1,24 @@ +#ifndef _FEATURESTYPES_H_ +#define _FEATURESTYPES_H_ + +#include + +#define NUM_SAMPLES_TO_RUN_FFT (240) +#define FFT_UNPADDED_SIZE (400) +#define BUF_SIZE_IN_SAMPLES (600) +#define NUM_MEL_BINS (40) +#define MEL_FEAT_BUF_TIME_LEN (157) + +#define FEATURES_FFT_SIZE_2N (9) +#define FEATURES_FFT_SIZE (1 << FEATURES_FFT_SIZE_2N) + +typedef void(*tinytensor_audio_feat_callback_t)(void * context, int16_t * feats); + +typedef enum { + start_speech, + stop_speech +} SpeechTransition_t; + +typedef void(*tinytensor_speech_detector_callback_t)(void * context, SpeechTransition_t transition); + +#endif //FEATURESTYPES_H_ diff --git a/kitsune/tensor/tinytensor_features.c b/kitsune/tensor/tinytensor_features.c index bf2f8674..196e220c 100644 --- a/kitsune/tensor/tinytensor_features.c +++ b/kitsune/tensor/tinytensor_features.c @@ -9,16 +9,11 @@ #include "tinytensor_math.h" -void set_background_energy(const int16_t fr[], const int16_t fi[]); +void set_background_energy(const int16_t fr[], const int16_t fi[], int16_t log2scale); #define USE_BACKGROUND_NORMALIZATION (1) #define BACKGROUND_NOISE_MAX_ATTENUATION (-2048) -//this controls how much less to "descale" the FFT output (NOT USED CURRENTLY) - -#define FFT_SIZE_2N (9) -#define FFT_SIZE (1 << FFT_SIZE_2N) - //0.95 in Q15 #define PREEMPHASIS (31129) @@ -152,7 +147,7 @@ void tinytensor_features_force_voice_activity_detection(void) { #define SPEECH_BIN_START (4) #define SPEECH_BIN_END (16) -#define ENERGY_END (FFT_SIZE/2) +#define ENERGY_END (FEATURES_FFT_SIZE/2) static void do_voice_activity_detection(int16_t * fr,int16_t * fi,int16_t input_scaling) { uint32_t i; int16_t log_energy_frac; @@ -235,8 +230,8 @@ static void do_voice_activity_detection(int16_t * fr,int16_t * fi,int16_t input_ __attribute__((section(".ramcode"))) static uint8_t add_samples_and_get_mel(int16_t * maxmel,int16_t * avgmel, int16_t * melbank, const int16_t * samples, const uint32_t num_samples) { - int16_t fr[FFT_SIZE] = {0}; - int16_t fi[FFT_SIZE] = {0}; + int16_t fr[FEATURES_FFT_SIZE] = {0}; + int16_t fi[FEATURES_FFT_SIZE] = {0}; const int16_t preemphasis_coeff = PREEMPHASIS; uint32_t i; @@ -249,7 +244,7 @@ static uint8_t add_samples_and_get_mel(int16_t * maxmel,int16_t * avgmel, int16_ /* add samples to circular buffer while current pointer is NUM_SAMPLES_TO_RUN_FFT behind the buffer pointer - then we copy the last FFT_UNPADDED_SIZE samples to the FFT buf, zero pad it up to FFT_SIZE + then we copy the last FFT_UNPADDED_SIZE samples to the FFT buf, zero pad it up to FEATURES_FFT_SIZE */ @@ -316,7 +311,7 @@ static uint8_t add_samples_and_get_mel(int16_t * maxmel,int16_t * avgmel, int16_ CHKCYC(" fft prep"); //PERFORM FFT - fft(fr,fi,FFT_SIZE_2N); + fft(fr,fi,FEATURES_FFT_SIZE_2N); CHKCYC("FFT"); @@ -341,11 +336,13 @@ static uint8_t add_samples_and_get_mel(int16_t * maxmel,int16_t * avgmel, int16_ I think we just normalize based on the number of samples, since Fs is the same 256 / 400 = 0.64 = -3.87 dBenergy + + ...and keep in mind that the FFT size is larger than it was before and for the purposes of disturbance calculations, let's just say that 62.5 Hz ~ 66.66 Hz and call it good. ----------------------------------*/ /***********/ - set_background_energy(fr, fi); + set_background_energy(fr, fi,temp16); /***********/ //GET MAX diff --git a/kitsune/tensor/tinytensor_features.h b/kitsune/tensor/tinytensor_features.h index 2120ba34..5d02987e 100644 --- a/kitsune/tensor/tinytensor_features.h +++ b/kitsune/tensor/tinytensor_features.h @@ -8,22 +8,9 @@ extern "C" { #endif #include "tinytensor_types.h" -#define NUM_SAMPLES_TO_RUN_FFT (240) -#define FFT_UNPADDED_SIZE (400) -#define BUF_SIZE_IN_SAMPLES (600) -#define NUM_MEL_BINS (40) -#define MEL_FEAT_BUF_TIME_LEN (157) +#include "feautures_types.h" -typedef void(*tinytensor_audio_feat_callback_t)(void * context, int16_t * feats); - -typedef enum { - start_speech, - stop_speech -} SpeechTransition_t; - -typedef void(*tinytensor_speech_detector_callback_t)(void * context, SpeechTransition_t transition); - void tinytensor_features_initialize(void * results_context, tinytensor_audio_feat_callback_t results_callback,tinytensor_speech_detector_callback_t speech_detector_callback); void tinytensor_features_deinitialize(void); From 7180d6418e9896a70d9f0b633e916b5504fad4ba Mon Sep 17 00:00:00 2001 From: Benjamin Joseph Date: Fri, 30 Sep 2016 17:04:33 -0700 Subject: [PATCH 004/102] more cleanup, compiles --- kitsune/audio_types.h | 5 +- kitsune/audioclassifier.c | 438 ------------------------- kitsune/audioclassifier.h | 31 -- kitsune/audiofeatures.c | 39 ++- kitsune/audiofeatures.h | 4 +- kitsune/audiosimilarity.c | 431 ------------------------ kitsune/audiosimilarity.h | 25 -- kitsune/fft.c | 1 + kitsune/main/ccs/.cproject | 8 - kitsune/main/ccs/.project | 15 +- kitsune/main/ccs/hlo/audioclassifier.c | 438 ------------------------- kitsune/main/ccs/hlo/audioclassifier.h | 31 -- kitsune/tensor/tinytensor_features.h | 2 +- 13 files changed, 31 insertions(+), 1437 deletions(-) delete mode 100644 kitsune/audioclassifier.c delete mode 100644 kitsune/audioclassifier.h delete mode 100644 kitsune/audiosimilarity.c delete mode 100644 kitsune/audiosimilarity.h delete mode 100644 kitsune/main/ccs/hlo/audioclassifier.c delete mode 100644 kitsune/main/ccs/hlo/audioclassifier.h diff --git a/kitsune/audio_types.h b/kitsune/audio_types.h index 6e84b0ac..6bd91ae5 100644 --- a/kitsune/audio_types.h +++ b/kitsune/audio_types.h @@ -18,7 +18,9 @@ #define NUM_AUDIO_FEATURES (16) -#define OCTOGRAM_SIZE (AUDIO_FFT_SIZE_2N - 1) +#define OCTOGRAM_SIZE (7) +#define AUDIO_FFT_SIZE_2N (OCTOGRAM_SIZE + 1) +#define AUDIO_FFT_SIZE (1 << AUDIO_FFT_SIZE_2N) /* // use simplelink.h instead @@ -71,7 +73,6 @@ typedef struct { uint8_t isValid; } AudioOncePerMinuteData_t; -typedef void (*SegmentAndFeatureCallback_t)(const int16_t * feats, const Segment_t * pSegment); typedef void (*AudioFeatureCallback_t)(const AudioFeatures_t * pfeats); typedef void (*AudioOncePerMinuteDataCallback_t) (const AudioOncePerMinuteData_t * pdata); typedef void (*NotificationCallback_t)(void * context); diff --git a/kitsune/audioclassifier.c b/kitsune/audioclassifier.c deleted file mode 100644 index d8ff0c3a..00000000 --- a/kitsune/audioclassifier.c +++ /dev/null @@ -1,438 +0,0 @@ - -#include "audioclassifier.h" -#include "fft.h" -#include "debugutils/debuglog.h" -#include "debugutils/matmessageutils.h" -#include "hellomath.h" -#include "machinelearning/audiohmm.h" -#include -#include -//#include "uartstdio.h" - -#define CIRCULAR_FEATBUF_SIZE_2N (5) -#define CIRCULAR_BUF_SIZE (1 << CIRCULAR_FEATBUF_SIZE_2N) -#define CIRCULAR_BUF_MASK (CIRCULAR_BUF_SIZE - 1) -#define BUF_SIZE_IN_CHUNK (32) - -#define CLASSIFIER_BUF_LEN (16) -#define MAX_NUMBER_CLASSES (5) -#define EXPECTED_NUMBER_OF_CLASSIFIER_INPUTS (NUM_AUDIO_FEATURES) - -#define CLASS_OF_INTEREST_TO_ENABLE_CALLBACK (0) - -#define RECORD_DURATION_IN_MS (10000) -#define RECORD_DURATION_IN_FRAMES (RECORD_DURATION_IN_MS / SAMPLE_PERIOD_IN_MILLISECONDS) - -#define SNORING_LOG_LIK_THRESHOLD_Q10 (600) - - -#ifndef true -#define true (1) -#endif - -#ifndef false -#define false (0) -#endif - - - - -typedef struct { - uint8_t packedbuf[BUF_SIZE_IN_CHUNK][NUM_AUDIO_FEATURES/2];// 32 x 16 = 2^5 * 2^4 = 2^9 = 256 bytes - int16_t energy[BUF_SIZE_IN_CHUNK]; //32 * 2 = 64bytes - int64_t samplecount; // 8 bytes - int16_t maxenergy; // 2 bytes -} AudioFeatureChunk_t; //330 bytes - -typedef struct { - int8_t classifier_feat_buf[CLASSIFIER_BUF_LEN][NUM_AUDIO_FEATURES]; - uint16_t classifier_feat_idx; - - //cicular buffer of incoming data - uint8_t packedbuf[CIRCULAR_BUF_SIZE][NUM_AUDIO_FEATURES/2]; //32 * 8 = 256 bytes - int16_t totalenergy[CIRCULAR_BUF_SIZE];// - - uint16_t chunkbufidx; //current index of chunk to write to, wraps to zero when it >= chunk_buf_size - uint16_t numchunkbuf; //number of chunks written, saturated at chunk_buf_size - - AudioFeatureChunk_t * pchunkbuf; - uint32_t chunk_buf_size; - - - uint16_t incomingidx; - uint16_t numincoming; - - uint8_t isThereAnythingInteresting; - uint8_t isWorthClassifying; - - -} DataBuffer_t; - -typedef struct { - const DataBuffer_t * buf; - uint32_t currentidx; - uint32_t endidx; - uint8_t state; - int8_t unpackedbuffer[BUF_SIZE_IN_CHUNK][NUM_AUDIO_FEATURES]; //32 * 16 * 1 = 512 bytes - -} Encoder_t; - -static const char * k_id_feature_chunk = "feature_chunk"; -static const char * k_id_energy_chunk = "energy_chunk"; - -//"long term storage" -static DataBuffer_t _buffer; -static Classifier_t _classifier; -static Classifier_t _hmm; - - -static inline uint8_t pack_int8_to_int4(const int8_t x) { - const uint8_t sign = x < 0; - - return (x & 0x07) | (sign * 8); -} - -// assumes two's complement architecture (who the heck doesn't do this these days?) -static inline void unpack_int4_pair_to_int8(const uint8_t packed, int8_t * upper, int8_t * lower) { - *lower = packed & 0x07; - if (packed & 0x08) { - *lower |= 0xF8; //sign extension - } - - *upper = (packed & 0x70) >> 4; - if (packed & 0x80) { - *upper |= 0xF8; //sign extension - } - - -} - -static void PackFeats(uint8_t * datahead, const int8_t * feats4bit) { - uint8_t i; - uint8_t idx; - - - for (i = 0; i < NUM_AUDIO_FEATURES/2; i++) { - idx = 2*i; - - datahead[i] = pack_int8_to_int4(feats4bit[idx]); - datahead[i] |= (pack_int8_to_int4(feats4bit[idx + 1]) << 4); - } -} - - - -static void UnpackFeats8(int8_t * unpacked8, const uint8_t * datahead) { - uint8_t i; - for (i = 0; i < NUM_AUDIO_FEATURES/2; i++) { - //lsb first - - unpack_int4_pair_to_int8(datahead[i],unpacked8+1,unpacked8); - unpacked8 += 2; - } -} - -#if 0 -static void TestPackUnpack(void) { - int8_t input[NUM_AUDIO_FEATURES] = {-8,-7,-6,-5,-4,-3,-2,-1,0,1,2,3,4,5,6,7}; - int8_t out[NUM_AUDIO_FEATURES] = {0}; - uint8_t packed[NUM_AUDIO_FEATURES/2]; - int foo = 3; - PackFeats(packed, input); - UnpackFeats8(out, packed); - - foo++; - -} -#endif - -static void CopyCircularBufferToPermanentStorage(int64_t samplecount) { - - AudioFeatureChunk_t chunk; - uint16_t idx; - int32_t size1,size2; - uint16_t i; - int16_t max; - - idx = _buffer.incomingidx; //oldest - /* - t2 t1 - ---| |------- - - t1 t2 - |-------------| - */ - - //copy circular buf out in chronological order - size1 = CIRCULAR_BUF_SIZE - idx; - memcpy(&chunk.packedbuf[0][0],&_buffer.packedbuf[idx][0],size1*NUM_AUDIO_FEATURES/2*sizeof(uint8_t)); - memcpy(&chunk.energy[0],&_buffer.totalenergy[idx],size1*sizeof(int16_t)); - - if (size1 < CIRCULAR_BUF_SIZE) { - size2 = idx; - memcpy(&chunk.packedbuf[size1][0],&_buffer.packedbuf[0][0],size2*NUM_AUDIO_FEATURES/2*sizeof(uint8_t)); - memcpy(&chunk.energy[size1],&_buffer.totalenergy[0],size2*sizeof(int16_t)); - } - - /* find max in energy buffer */ - max = MIN_INT_16; - for (i = 0 ; i < CIRCULAR_BUF_SIZE; i++) { - if (_buffer.totalenergy[i] > max) { - max = _buffer.totalenergy[i]; - } - } - chunk.maxenergy = max; - chunk.samplecount = samplecount; - - - memcpy(&_buffer.pchunkbuf[_buffer.chunkbufidx],&chunk,sizeof(chunk)); - _buffer.chunkbufidx++; - - //wrap - if (_buffer.chunkbufidx >= _buffer.chunk_buf_size) { - _buffer.chunkbufidx -= _buffer.chunk_buf_size; - } - - //track how full the buffer is even if you wrapped - //if you wrapped, you're discarding data, but you're still full - if (_buffer.numchunkbuf < _buffer.chunk_buf_size) { - _buffer.numchunkbuf++; - } - -} - - - -void AudioClassifier_SetStorageBuffers(void * buffer, uint32_t buf_size_in_bytes) { - memset(&_buffer,0,sizeof(_buffer)); - - _buffer.pchunkbuf = (AudioFeatureChunk_t *)buffer; - _buffer.chunk_buf_size = buf_size_in_bytes / sizeof(AudioFeatureChunk_t); - -} - -void AudioClassifier_Init(RecordAudioCallback_t recordfunc) { - memset(&_buffer,0,sizeof(_buffer)); - memset(&_classifier,0,sizeof(Classifier_t)); - memset(&_hmm,0,sizeof(_hmm)); - -} - - -void AudioClassifier_DataCallback( AudioFeatures_t * pfeats) { - - uint16_t idx; - - - - /* - - Data comes in, and we save it to a circular buffer. - Classification is disabled for now - - */ - - /************************ - THE BUFFERING SECTION - ***********************/ - - //do nothing if we do not have a storage buffer allocated - if (!_buffer.pchunkbuf || _buffer.chunk_buf_size == 0) { - return; - } - - idx = _buffer.incomingidx; - PackFeats(_buffer.packedbuf[idx],pfeats->feats4bit); - - _buffer.totalenergy[idx] = pfeats->logenergy; - - //increment circular buffer index - _buffer.incomingidx++; - _buffer.incomingidx &= CIRCULAR_BUF_MASK; - - //increment until full - if (_buffer.numincoming < CIRCULAR_BUF_SIZE) { - _buffer.numincoming++; - } - - //everything is interesting - _buffer.isThereAnythingInteresting = true; - _buffer.isWorthClassifying = false; //don't classify anything for now - - - //if something interesting happend and the circular buffer is full - //dump it to storage - // if (_buffer.isThereAnythingInteresting == true && - if( _buffer.numincoming == CIRCULAR_BUF_SIZE ) - { - //this may block... hopefully not for too long? - CopyCircularBufferToPermanentStorage(pfeats->samplecount); - _buffer.numincoming = 0; //"empty" the buffer - _buffer.incomingidx = 0; - _buffer.isThereAnythingInteresting = false; - } - -} - -/* sadly this is not stateless, but was the only way to serialize chunks one at a time */ -static uint8_t GetNextMatrixCallback(uint8_t isFirst,const_MatDesc_t * pdesc,void * data) { - - - Encoder_t * encodedata = (Encoder_t *)data; - - const AudioFeatureChunk_t * pchunk; - int16_t * bufptr16 = (int16_t *) &encodedata->unpackedbuffer[0][0]; - const int16_t * beginning16 = (int16_t *) &encodedata->unpackedbuffer[0][0]; - - uint32_t i; - const uint32_t chunk_buf_size = encodedata->buf->chunk_buf_size; - const uint32_t numchunkbuf = encodedata->buf->numchunkbuf; - //assert(encodedata->buf == &_buffer); - - memset(pdesc,0,sizeof(const_MatDesc_t)); - - if (encodedata->buf->numchunkbuf == 0 || encodedata->buf->pchunkbuf == NULL) { - return MAT_MESSAGE_FAIL; //stop - } - - if (isFirst) { - encodedata->currentidx = 0; - encodedata->endidx = encodedata->buf->chunkbufidx; - - if (chunk_buf_size == numchunkbuf) { - encodedata->currentidx = encodedata->buf->chunkbufidx + 1; //oldest untouched - - if (encodedata->currentidx >= chunk_buf_size) { - encodedata->currentidx -= chunk_buf_size; - } - } - - encodedata->state = 1; - } - - pchunk = &encodedata->buf->pchunkbuf[encodedata->currentidx]; - - pdesc->t1 = pchunk->samplecount; - pdesc->t2 = pdesc->t1 + BUF_SIZE_IN_CHUNK*2; //*2 because we are decimating audio samples by 2 - - - if (encodedata->state == 1) { - pdesc->id = k_id_feature_chunk; - - for (i = 0; i < BUF_SIZE_IN_CHUNK; i++) { - UnpackFeats8(encodedata->unpackedbuffer[i], pchunk->packedbuf[i]); - } - -/* - for (int j = 0; j < BUF_SIZE_IN_CHUNK; j++) { - printf("up="); - for (i = 0; i < NUM_AUDIO_FEATURES; i++) { - printf("%d,",encodedata->unpackedbuffer[j][i]); - } - printf("\n"); - - } -*/ - - - pdesc->data.len = BUF_SIZE_IN_CHUNK * NUM_AUDIO_FEATURES; - pdesc->data.type = esint8; - pdesc->data.data.sint8 = &encodedata->unpackedbuffer[0][0]; - pdesc->rows = BUF_SIZE_IN_CHUNK; - pdesc->cols = NUM_AUDIO_FEATURES; - - encodedata->state = 2; - - } - else if (encodedata->state == 2) { - pdesc->id = k_id_energy_chunk; - - //re-use the unpacked buffer, but let's pretend it's 16 bit... - for (i = 0; i < BUF_SIZE_IN_CHUNK; i++) { - *bufptr16 = pchunk->energy[i]; - bufptr16++; - } - - /* - for (i = 0; i < BUF_SIZE_IN_CHUNK + 1; i++) { - printf("%d,",beginning16[i]); - } - printf("\n"); - */ - - - pdesc->data.len = BUF_SIZE_IN_CHUNK; - pdesc->data.type = esint16; - pdesc->data.data.sint16 = beginning16; - pdesc->rows = 1; - pdesc->cols = BUF_SIZE_IN_CHUNK; - - encodedata->state = 1; - encodedata->currentidx++; - - //wrap - if (encodedata->currentidx >= chunk_buf_size) { - encodedata->currentidx -= chunk_buf_size; - } - - //have we reached the end? - if (encodedata->currentidx == encodedata->endidx) { - encodedata->state = 0; - return MAT_MESSAGE_DONE; - } - - } - - return MAT_MESSAGE_CONTINUE; -} - -void AudioClassifier_ResetStorageBuffer(void) { - /* Buffer read out? Great, let's reset it */ - - _buffer.chunkbufidx = 0; - _buffer.numchunkbuf = 0; -} - -#include "proto_utils.h" -#include "proto_utils.h" -#include "sys_time.h" -#include "matrix.pb.h" -#include "debugutils/matmessageutils.h" - -#ifdef USED_ON_DESKTOP -uint32_t get_time(void) { - return 0; -} - -bool encode_device_id_string(pb_ostream_t *stream, const pb_field_t *field, void * const *arg) { - const char * k_device_id = "DESKTOP"; - return pb_encode_tag_for_field(stream, field) && pb_encode_string(stream, (uint8_t*)k_device_id, strlen(k_device_id)); -} - -#endif - -void * getMatrixClientMessage() { - //this code leaks references to these, can't have them on the stack - //also makes this function non reentrant - static Encoder_t encoderstruct; - static MatrixClientMessage mess; - static MatrixListEncodeContext_t matrix_list_context; - - memset(&mess, 0, sizeof(mess)); - mess.unix_time = get_time(); - mess.has_unix_time = 1; - mess.has_matrix_payload = 0; - - memset(&matrix_list_context, 0, sizeof(matrix_list_context)); - memset(&encoderstruct, 0, sizeof(encoderstruct)); - encoderstruct.buf = &_buffer; - - matrix_list_context.data = &encoderstruct; - matrix_list_context.func = GetNextMatrixCallback; - - mess.matrix_list.funcs.encode = write_mat_array; - mess.matrix_list.arg = (void *) &matrix_list_context; - mess.device_id.funcs.encode = encode_device_id_string; - - return &mess; -} diff --git a/kitsune/audioclassifier.h b/kitsune/audioclassifier.h deleted file mode 100644 index 1aff3f90..00000000 --- a/kitsune/audioclassifier.h +++ /dev/null @@ -1,31 +0,0 @@ -#ifndef _AUDIOCLASSIFIER_H_ -#define _AUDIOCLASSIFIER_H_ - -#include -#include "audio_types.h" -#include "machinelearning/machine_learning_types.h" - -#ifdef __cplusplus -extern "C" { -#endif - -void AudioClassifier_Init(RecordAudioCallback_t recordfunc); - -void AudioClassifier_SetStorageBuffers(void * buffer, uint32_t buf_size_in_bytes); - -void AudioClassifier_DataCallback(AudioFeatures_t * pfeats); - -uint32_t AudioClassifier_EncodeAudioFeatures(pb_ostream_t * stream, void * encode_data); - -void AudioClassifier_ResetStorageBuffer(void); - -void * getMatrixClientMessage(); - -#ifdef __cplusplus -} -#endif - - - -#endif //#ifndef _AUDIOCLASSIFIER_H_ - diff --git a/kitsune/audiofeatures.c b/kitsune/audiofeatures.c index e12f200a..f49d9443 100644 --- a/kitsune/audiofeatures.c +++ b/kitsune/audiofeatures.c @@ -7,11 +7,12 @@ #include #include "hellomath.h" #include "tensor/features_types.h" - #ifdef USED_ON_DESKTOP #define LOGA(...) +#include #else #include "uart_logger.h" +#include "kit_assert.h" #endif #define TOFIX(x,q)\ @@ -83,7 +84,6 @@ typedef struct { int16_t changebuf[CHANGE_SIGNAL_BUF_SIZE]; int32_t logProbOfModes[numChangeModes]; - int32_t logProbOfCoherencyModes[numCoherencyModes]; uint8_t isStable; int16_t energyStable; uint32_t stableCount; @@ -95,13 +95,13 @@ typedef struct { AudioFeatureCallback_t fpCallback; AudioOncePerMinuteDataCallback_t fpOncePerMinuteDataCallback; -} AudioFeatures_t; +} SimpleAudioFeatures_t; /*-------------------------------- * Static Memory Declarations *--------------------------------*/ -static AudioFeatures_t _data; +static SimpleAudioFeatures_t _data; @@ -386,10 +386,9 @@ static void UpdateChangeSignals(EChangeModes_t * pCurrentMode, const int16_t new } -__attribute__((section(".ramcode"))) static void getvolume(int16_t * logTotalEnergy,int16_t * const int16_t fr[],const int16_t fi[],uint16_t min_energy, const int16_t log2scale) { - uint16_t i; - uint16_t ufr; - uint16_t ufi; + +__attribute__((section(".ramcode"))) +static void getvolume(int16_t * logTotalEnergy, const int16_t * fr,const int16_t * fi,uint16_t min_energy, const int16_t log2scale) { uint64_t utemp64; uint64_t non_weighted_energy = 0; uint64_t a_weighted_energy = 0; @@ -408,20 +407,20 @@ __attribute__((section(".ramcode"))) static void getvolume(int16_t * logTotalEne 974, 970, 967, 963, 959, 956, 952, 948, 945, 941, 937, 934, 930, 927, 923, 920, 916, 913, 913 }; - uint16_t idx, ifft, iend; + uint16_t idx, ifft; - int16_t idx_shift = FEATURES_FFT_SIZE_2N - 7; + const int16_t idx_shift = FEATURES_FFT_SIZE_2N - 7; for (ifft = 1; ifft < FEATURES_FFT_SIZE/2; ifft++) { - utemp64 = 0; - utemp64 += (int32_t)fr[ifft]*(int32_t)fr[ifft]; - utemp64 += (int32_t)fi[ifft]*(int32_t)fi[ifft]; - - idx = ifft >> idx_shift; - kit_assert(idx < 128); - - a_weighted_energy += (utemp64 * a_weight_q10[idx]) >> 10; - non_weighted_energy += utemp64; + utemp64 = 0; + utemp64 += (int32_t)fr[ifft]*(int32_t)fr[ifft]; + utemp64 += (int32_t)fi[ifft]*(int32_t)fi[ifft]; + + idx = ifft >> idx_shift; + assert(idx < 128); + + a_weighted_energy += (utemp64 * a_weight_q10[idx]) >> 10; + non_weighted_energy += utemp64; } @@ -460,7 +459,7 @@ void set_background_energy(const int16_t fr[], const int16_t fi[], int16_t log2s isStable = IsStable(currentMode,logTotalEnergyAvg); - if (c++ == 255) { + if (_data.callcounter & 0xFF == 0) { DISP("vol_energy=%d\r\n",GetAudioEnergyAsDBA(logTotalEnergyAvg)); } diff --git a/kitsune/audiofeatures.h b/kitsune/audiofeatures.h index 3e9d0952..b171914d 100644 --- a/kitsune/audiofeatures.h +++ b/kitsune/audiofeatures.h @@ -17,8 +17,8 @@ extern "C" { /* exported for your enjoyment -- use these! */ void init_background_energy(AudioOncePerMinuteDataCallback_t fpOncePerMinuteCallback); -/* Expects AUDIO_FFT_SIZE samples in samplebuf */ -void set_background_energy(const int16_t fr[], const int16_t fi[]); +/* Expects FEATURES_FFT_SIZE samples in samplebuf */ +void set_background_energy(const int16_t fr[], const int16_t fi[], int16_t log2scale); #ifdef __cplusplus } diff --git a/kitsune/audiosimilarity.c b/kitsune/audiosimilarity.c deleted file mode 100644 index 6a980667..00000000 --- a/kitsune/audiosimilarity.c +++ /dev/null @@ -1,431 +0,0 @@ -#include "audioclassifier.h" -#include -#include -#include /* abs */ -#include -#include "fft.h" -#include "debugutils/debuglog.h" -#include "debugutils/matmessageutils.h" - - - -#define ITEMS_SIZE_2N (8) -#define ITEMS_SIZE (1 << ITEMS_SIZE_2N) -#define ITEMS_SIZE_MASK (ITEMS_SIZE - 1) - -#define NUM_LIST_ITEMS (32) - -#define SIMILARITY_THRESHOLD (0.707f) - -/****************** - typedefs - *****************/ - - -typedef struct ListItem { - uint8_t listidx; - uint8_t updatecount; - struct ListItem * next; - struct ListItem * prev; -} ListItem_t; //~12 bytes - - -typedef struct { - /* For similarity */ - int8_t feats[NUM_LIST_ITEMS][NUM_AUDIO_FEATURES]; //32 * 16 = 512 bytes - uint8_t featsidx[NUM_LIST_ITEMS]; //32 bytes - ListItem_t listdata[NUM_LIST_ITEMS];//32 * 12 = 384 bytes - uint16_t featModeIndex; - - /* For storage */ - uint8_t occurencesindices[ITEMS_SIZE]; //256 bytes - uint8_t occurenceDurations[ITEMS_SIZE]; //256 bytes - uint16_t occurenceDeltaTimeSinceLastSegment[ITEMS_SIZE]; //512 bytes - //int8_t occurencesfeats[ITEMS_SIZE][NUM_AUDIO_FEATURES]; //32 * 16 = 512 bytes - - - uint16_t occurenceidx; - - uint16_t numItemsInOccurenceBuffer; - NotificationCallback_t fpNovelDataCallback; - int64_t lastUpdateTime; - int64_t firstUpdateTime; - uint8_t updateCountNoveltyThreshold; - - -} AudioClassifier_t; - - - -/****************** - static memory - *****************/ -static const int16_t k_similarity_threshold = TOFIX(SIMILARITY_THRESHOLD,10); -static const char * k_occurence_indices_buf_id = "occurenceIndices"; -static const char * k_occurence_durations_buf_id = "occurenceDurations"; -static const char * k_occurence_deltatimes_buf_id = "occurenceDeltaTimes"; -static const char * k_feat_vec_buf_id = "featVecs"; -static const char * k_feat_index_buf_id = "featIndices"; -static const char * k_occurence_info_buf_id = "occurenceInfo"; - -static ListItem_t * _pHead; -static ListItem_t * _pFree; -static ListItem_t * _pTail; - -static AudioClassifier_t _data; - - -/****************** - static functions - *****************/ -static ListItem_t * FindSimilarItem(const int8_t * featvec8) { - - int16_t cosvec; - ListItem_t * p; - int8_t * feat; - - //go through list to all stored feature vectors - //take dot product, normalized, of featvec8 with stored feat vecs - //if cosine of angle is greater than some threshold, we consider - //these vectors similar - p = _pHead; - while(p) { - feat = _data.feats[p->listidx]; - - //take dot product of each feature vector - cosvec = cosvec8(feat, featvec8, NUM_AUDIO_FEATURES); - - //if similar enough - if (cosvec > k_similarity_threshold) { - break; - } - - p = p->next; - } - - return p; -} - - -/* - List is arranged in descending order in time - - If the list fills up (i.e. no more free elements) we pop off the last - element in the list. - - Otherwise, we insert an element in the list - */ -static ListItem_t * AddItemToHeadOfList(const int8_t * featvec8,const Segment_t * pSeg) { - ListItem_t * p; - int8_t * featdata; - - //kill oldest item if we have no free space - if (_pFree == NULL) { - - if (!_pTail) { - //! \todo LOG ERROR! - return NULL; - } - - //pop - _pFree = _pTail; - _pTail = _pTail->prev; - - //terminate end - _pTail->next = NULL; - - _pFree->prev = NULL; - } - - //we will always have a free item here - if (!_pHead) { - //first add - _pHead = _pFree; - _pFree = _pFree->next; - _pHead->next = NULL; - _pFree->prev = NULL; - _pTail = _pHead; - } - else { - //insert new item at top - p = _pHead; //old head is p - - //new head is free - _pHead = _pFree; - - //pop head of free list - _pFree = _pFree->next; - if (_pFree) { - _pFree->prev = NULL; - } - - //new head next is old head - _pHead->next = p; - - //old head prev is new head - p->prev = _pHead; - } - - featdata = _data.feats[_pHead->listidx]; - - memcpy(featdata,featvec8,sizeof(int8_t)*NUM_AUDIO_FEATURES); - _data.featsidx[_pHead->listidx] = _data.featModeIndex; - _pHead->updatecount = 0; - return _pHead; -} - -static void MoveItemToHeadOfList(ListItem_t * p) { - - //I'm the head? - if (!p->prev) { - //yes so do nothing - return; - } - - //am I the tail? - if (!p->next) { - //yes, so pop from tail - _pTail = p->prev; - _pTail->next = NULL; - } - else { - //pop from the middle of the list - p->prev->next = p->next; - p->next->prev = p->prev; - } - - p->next = _pHead; - - _pHead = p; - _pHead->next->prev = _pHead; - _pHead->prev = NULL; - - -} - - - - - - -/****************** - exported functions - *****************/ - -void AudioClassifier_Init(uint8_t updateCountNoveltyThreshold,NotificationCallback_t novelDataCallback,MutexCallback_t fpLock, MutexCallback_t fpUnlock) { - uint16_t i; - - _data.fpNovelDataCallback = novelDataCallback; - - //set up the link list - memset(&_data,0,sizeof(AudioClassifier_t)); - - _pFree = &_data.listdata[0]; - _pHead = NULL; - _pTail = NULL; - for (i = 0; i < NUM_LIST_ITEMS - 1; i++) { - _data.listdata[i].next = &_data.listdata[i+1]; - _data.listdata[i+1].prev = &_data.listdata[i]; - } - - for (i = 0; i < NUM_LIST_ITEMS; i++) { - _data.listdata[i].listidx = i; - } - - _data.updateCountNoveltyThreshold = updateCountNoveltyThreshold; - _data.fpUnlock = fpUnlock; - _data.fpLock = fpLock; -} - -/* Call this after you pull the buffer */ -void AudioClassifier_ResetUpdateTime(void) { - - if (_data.fpLock) { - _data.fpLock(); - } - - _data.firstUpdateTime = 0; - _data.numItemsInOccurenceBuffer = 0; - - if (_data.fpUnlock) { - _data.fpUnlock(); - } -} - -/* - - - Set stream to NULL to get size of written buffer - - Set source and tags to NULL if you want. - - */ -uint32_t AudioClassifier_GetSerializedBuffer(pb_ostream_t * stream,const char * macbytes, uint32_t unix_time,const char * tags, const char * source) { - - uint32_t size = 0; - - - if (_data.fpLock) { - _data.fpLock(); - } - - { - const uint16_t info[2] = {_data.occurenceidx,_data.numItemsInOccurenceBuffer}; - - MatDesc_t descs[6] = { - {k_occurence_info_buf_id,tags,source,{},1,2,_data.firstUpdateTime,_data.lastUpdateTime}, - {k_occurence_indices_buf_id,tags,source,{},1,_data.numItemsInOccurenceBuffer,_data.firstUpdateTime,_data.lastUpdateTime}, - {k_occurence_durations_buf_id,tags,source,{},1,_data.numItemsInOccurenceBuffer,_data.firstUpdateTime,_data.lastUpdateTime}, - {k_occurence_deltatimes_buf_id,tags,source,{},1,_data.numItemsInOccurenceBuffer,_data.firstUpdateTime,_data.lastUpdateTime}, - {k_feat_index_buf_id,tags,source,{},1,NUM_LIST_ITEMS,_data.firstUpdateTime,_data.lastUpdateTime}, - {k_feat_vec_buf_id,tags,source,{},NUM_LIST_ITEMS,NUM_AUDIO_FEATURES,_data.firstUpdateTime,_data.lastUpdateTime}, - }; - - /*****************/ - descs[0].data.len = 2; - descs[0].data.type = euint16; - descs[0].data.data.uint16 = info; - - /*****************/ - descs[1].data.len = _data.numItemsInOccurenceBuffer; - descs[1].data.type = euint8; - descs[1].data.data.uint8 = _data.occurencesindices; - - /*****************/ - descs[2].data.len = _data.numItemsInOccurenceBuffer; - descs[2].data.type = euint8; - descs[2].data.data.uint8 = _data.occurenceDurations; - - /*****************/ - descs[3].data.len = _data.numItemsInOccurenceBuffer; - descs[3].data.type = euint16; - descs[3].data.data.uint16 = _data.occurenceDeltaTimeSinceLastSegment; - - /*****************/ - descs[4].data.len = NUM_LIST_ITEMS; - descs[4].data.type = euint8; - descs[4].data.data.uint8 = _data.featsidx; - - /*****************/ - descs[5].data.len = NUM_LIST_ITEMS*NUM_AUDIO_FEATURES; - descs[5].data.type = esint8; - descs[5].data.data.sint8 = &_data.feats[0][0]; - - - size = SetMatrixMessage(stream, macbytes, unix_time, descs, sizeof(descs) / sizeof(MatDesc_t)); - } - - - if (_data.fpUnlock) { - _data.fpUnlock(); - } - - return size; -} - - - -void AudioClassifier_DataCallback(int64_t samplecount, const AudioFeatures_t * feats) { - - uint8_t isNovel = FALSE; - - if (_data.fpLock) { - _data.fpLock(); - } - - { - int8_t featvec8[NUM_AUDIO_FEATURES]; - ListItem_t * pitem; - uint8_t duration; - int64_t deltaTimeSinceLastUpdate; - - //scale to int8 - Scale16VecTo8(featvec8,feats,NUM_AUDIO_FEATURES); - - //go through list and find a similar item (potentially NUM_LIST_ITEMS dot products) - pitem = FindSimilarItem(featvec8); - - if (pitem) { - //we found a similar item! - MoveItemToHeadOfList(pitem); - } - else { - //add similarity vector, because this is phresh - pitem = AddItemToHeadOfList(featvec8,pSegment); - - //increment (and possibly even rollover) our index number for the features - _data.featModeIndex++; - } - - //safety first - if (pitem) { - - //for the first N new feature vectors, let someone know that this is a new sound - if (pitem->updatecount++ < _data.updateCountNoveltyThreshold) { - isNovel = TRUE; - } - - - //compute how long it's been since the last segment came in - deltaTimeSinceLastUpdate = pSegment->t1 - _data.lastUpdateTime; - _data.lastUpdateTime = pSegment->t1; - - if (deltaTimeSinceLastUpdate > UINT16_MAX) { - deltaTimeSinceLastUpdate = UINT16_MAX; - } - - _data.occurenceDeltaTimeSinceLastSegment[_data.occurenceidx] = (uint16_t)deltaTimeSinceLastUpdate; - - if (!_data.firstUpdateTime) { - _data.firstUpdateTime = pSegment->t1; - } - - //add occurence to circular buffer, give it index of the feature vector to which is associated - _data.occurencesindices[_data.occurenceidx] = _data.featsidx[pitem->listidx]; - - //compute duration - if (pSegment->duration > UINT8_MAX) { - duration = UINT8_MAX; - } - else { - duration = pSegment->duration; - } - - _data.occurenceDurations[_data.occurenceidx] = duration; - - - //update occurence index, wrapping as necessary - _data.occurenceidx++; - _data.occurenceidx &= ITEMS_SIZE_MASK; - - //update number of items in occurence buffer - if (++_data.numItemsInOccurenceBuffer > ITEMS_SIZE ) { - _data.numItemsInOccurenceBuffer = ITEMS_SIZE; - } - - } - } - - if (_data.fpUnlock) { - _data.fpUnlock(); - } - - if (isNovel && _data.fpNovelDataCallback) { - _data.fpNovelDataCallback(); - } - -} - - - - - - - - - - - - - - - - diff --git a/kitsune/audiosimilarity.h b/kitsune/audiosimilarity.h deleted file mode 100644 index e7c8cf77..00000000 --- a/kitsune/audiosimilarity.h +++ /dev/null @@ -1,25 +0,0 @@ -#ifndef _AUDIOSIMILARITY_H_ -#define _AUDIOSIMILARITY_H_ - -#include "audio_types.h" -#include - -#ifdef __cplusplus -extern "C" { -#endif - -void AudioClassifier_Init(uint8_t updateCountNoveltyThreshold,NotificationCallback_t novelDataCallback,MutexCallback_t fpLock, MutexCallback_t fpUnlock); - -void AudioClassifier_ResetUpdateTime(void); - -uint32_t AudioClassifier_GetSerializedBuffer(pb_ostream_t * stream,const char * macbytes, uint32_t unix_time,const char * tags, const char * source); - -void AudioClassifier_DataCallback(AudioFeatures_t * feats); - -#ifdef __cplusplus -} -#endif - - -#endif - diff --git a/kitsune/fft.c b/kitsune/fft.c index 9f37166b..b9c4d040 100644 --- a/kitsune/fft.c +++ b/kitsune/fft.c @@ -236,6 +236,7 @@ int fftr(int16_t f[], int32_t m) f[i] = tt; } fft(fi, fr, m-1); + return 0; } //requires 2N memory... for now diff --git a/kitsune/main/ccs/.cproject b/kitsune/main/ccs/.cproject index 82215328..d29d377e 100644 --- a/kitsune/main/ccs/.cproject +++ b/kitsune/main/ccs/.cproject @@ -186,14 +186,6 @@ - - - - - - - -