Skip to content

Commit ff98202

Browse files
committed
Retrained speech/music RNN
Larger network, using 8-bit weights
1 parent 877d3d2 commit ff98202

File tree

3 files changed

+642
-205
lines changed

3 files changed

+642
-205
lines changed

src/analysis.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -867,7 +867,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
867867
info->activity_probability = frame_probs[1];
868868
/* It seems like the RNN tends to have a bias towards speech and this
869869
warping of the probabilities compensates for it. */
870-
info->music_prob = frame_probs[0] * (2 - frame_probs[0]);
870+
info->music_prob = MAX16(1-10*(1-frame_probs[0]), MIN16(10*frame_probs[0], .12+.69*frame_probs[0]*(2-frame_probs[0])));
871871

872872
/*printf("%f %f %f\n", frame_probs[0], frame_probs[1], info->music_prob);*/
873873
#ifdef MLP_TRAINING

src/mlp.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,22 +29,22 @@
2929

3030
#include "opus_types.h"
3131

32-
#define WEIGHTS_SCALE (1.f/8192)
32+
#define WEIGHTS_SCALE (1.f/128)
3333

34-
#define MAX_NEURONS 20
34+
#define MAX_NEURONS 32
3535

3636
typedef struct {
37-
const opus_int16 *bias;
38-
const opus_int16 *input_weights;
37+
const opus_int8 *bias;
38+
const opus_int8 *input_weights;
3939
int nb_inputs;
4040
int nb_neurons;
4141
int sigmoid;
4242
} DenseLayer;
4343

4444
typedef struct {
45-
const opus_int16 *bias;
46-
const opus_int16 *input_weights;
47-
const opus_int16 *recurrent_weights;
45+
const opus_int8 *bias;
46+
const opus_int8 *input_weights;
47+
const opus_int8 *recurrent_weights;
4848
int nb_inputs;
4949
int nb_neurons;
5050
} GRULayer;

0 commit comments

Comments
 (0)