Skip to content

Commit 783ad76

Browse files
linfengzfelicialim
authored andcommitted
Revise celt_fir_c() to not pass in argument "mem"
The "mem" in celt_fir_c() either is contained in the head of input "x" in reverse order already, or can be easily attached to the head of "x" before calling the function. Removing argument "mem" can eliminate the redundant buffer copies inside. Update celt_fir_sse4_1() accordingly.
1 parent c9ba552 commit 783ad76

File tree

7 files changed

+36
-83
lines changed

7 files changed

+36
-83
lines changed

celt/celt_decoder.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -556,10 +556,11 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
556556
} else {
557557
/* Pitch-based PLC */
558558
const opus_val16 *window;
559+
opus_val16 *exc;
559560
opus_val16 fade = Q15ONE;
560561
int pitch_index;
561562
VARDECL(opus_val32, etmp);
562-
VARDECL(opus_val16, exc);
563+
VARDECL(opus_val16, _exc);
563564

564565
if (loss_count == 0)
565566
{
@@ -570,7 +571,8 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
570571
}
571572

572573
ALLOC(etmp, overlap, opus_val32);
573-
ALLOC(exc, MAX_PERIOD, opus_val16);
574+
ALLOC(_exc, MAX_PERIOD+LPC_ORDER, opus_val16);
575+
exc = _exc+LPC_ORDER;
574576
window = mode->window;
575577
c=0; do {
576578
opus_val16 decay;
@@ -635,15 +637,14 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
635637
/* Initialize the LPC history with the samples just before the start
636638
of the region for which we're computing the excitation. */
637639
{
638-
opus_val16 lpc_mem[LPC_ORDER];
639640
for (i=0;i<LPC_ORDER;i++)
640641
{
641-
lpc_mem[i] =
642-
ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-1-i], SIG_SHIFT);
642+
exc[MAX_PERIOD-exc_length-LPC_ORDER+i] =
643+
ROUND16(buf[DECODE_BUFFER_SIZE-exc_length-LPC_ORDER+i], SIG_SHIFT);
643644
}
644645
/* Compute the excitation for exc_length samples before the loss. */
645646
celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
646-
exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, lpc_mem, st->arch);
647+
exc+MAX_PERIOD-exc_length, exc_length, LPC_ORDER, st->arch);
647648
}
648649

649650
/* Check if the waveform is decaying, and if so how fast.

celt/celt_lpc.c

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -89,56 +89,47 @@ int p
8989

9090

9191
void celt_fir_c(
92-
const opus_val16 *_x,
92+
const opus_val16 *x,
9393
const opus_val16 *num,
94-
opus_val16 *_y,
94+
opus_val16 *y,
9595
int N,
9696
int ord,
97-
opus_val16 *mem,
9897
int arch)
9998
{
10099
int i,j;
101100
VARDECL(opus_val16, rnum);
102-
VARDECL(opus_val16, x);
103101
SAVE_STACK;
104102

105103
ALLOC(rnum, ord, opus_val16);
106-
ALLOC(x, N+ord, opus_val16);
107104
for(i=0;i<ord;i++)
108105
rnum[i] = num[ord-i-1];
109-
for(i=0;i<ord;i++)
110-
x[i] = mem[ord-i-1];
111-
for (i=0;i<N;i++)
112-
x[i+ord]=_x[i];
113-
for(i=0;i<ord;i++)
114-
mem[i] = _x[N-i-1];
115106
#ifdef SMALL_FOOTPRINT
116107
(void)arch;
117108
for (i=0;i<N;i++)
118109
{
119-
opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
110+
opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
120111
for (j=0;j<ord;j++)
121112
{
122-
sum = MAC16_16(sum,rnum[j],x[i+j]);
113+
sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
123114
}
124-
_y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
115+
y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
125116
}
126117
#else
127118
for (i=0;i<N-3;i+=4)
128119
{
129120
opus_val32 sum[4]={0,0,0,0};
130-
xcorr_kernel(rnum, x+i, sum, ord, arch);
131-
_y[i ] = SATURATE16(ADD32(EXTEND32(_x[i ]), PSHR32(sum[0], SIG_SHIFT)));
132-
_y[i+1] = SATURATE16(ADD32(EXTEND32(_x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
133-
_y[i+2] = SATURATE16(ADD32(EXTEND32(_x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
134-
_y[i+3] = SATURATE16(ADD32(EXTEND32(_x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
121+
xcorr_kernel(rnum, x+i-ord, sum, ord, arch);
122+
y[i ] = SATURATE16(ADD32(EXTEND32(x[i ]), PSHR32(sum[0], SIG_SHIFT)));
123+
y[i+1] = SATURATE16(ADD32(EXTEND32(x[i+1]), PSHR32(sum[1], SIG_SHIFT)));
124+
y[i+2] = SATURATE16(ADD32(EXTEND32(x[i+2]), PSHR32(sum[2], SIG_SHIFT)));
125+
y[i+3] = SATURATE16(ADD32(EXTEND32(x[i+3]), PSHR32(sum[3], SIG_SHIFT)));
135126
}
136127
for (;i<N;i++)
137128
{
138129
opus_val32 sum = 0;
139130
for (j=0;j<ord;j++)
140-
sum = MAC16_16(sum,rnum[j],x[i+j]);
141-
_y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
131+
sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
132+
y[i] = SATURATE16(ADD32(EXTEND32(x[i]), PSHR32(sum, SIG_SHIFT)));
142133
}
143134
#endif
144135
RESTORE_STACK;

celt/celt_lpc.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,11 @@ void celt_fir_c(
4545
opus_val16 *y,
4646
int N,
4747
int ord,
48-
opus_val16 *mem,
4948
int arch);
5049

5150
#if !defined(OVERRIDE_CELT_FIR)
52-
#define celt_fir(x, num, y, N, ord, mem, arch) \
53-
(celt_fir_c(x, num, y, N, ord, mem, arch))
51+
#define celt_fir(x, num, y, N, ord, arch) \
52+
(celt_fir_c(x, num, y, N, ord, arch))
5453
#endif
5554

5655
void celt_iir(const opus_val32 *x,

celt/x86/celt_lpc_sse.c

Lines changed: 10 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -40,63 +40,32 @@
4040

4141
#if defined(FIXED_POINT)
4242

43-
void celt_fir_sse4_1(const opus_val16 *_x,
43+
void celt_fir_sse4_1(const opus_val16 *x,
4444
const opus_val16 *num,
45-
opus_val16 *_y,
45+
opus_val16 *y,
4646
int N,
4747
int ord,
48-
opus_val16 *mem,
4948
int arch)
5049
{
5150
int i,j;
5251
VARDECL(opus_val16, rnum);
53-
VARDECL(opus_val16, x);
5452

5553
__m128i vecNoA;
5654
opus_int32 noA ;
5755
SAVE_STACK;
5856

5957
ALLOC(rnum, ord, opus_val16);
60-
ALLOC(x, N+ord, opus_val16);
6158
for(i=0;i<ord;i++)
6259
rnum[i] = num[ord-i-1];
63-
for(i=0;i<ord;i++)
64-
x[i] = mem[ord-i-1];
65-
66-
for (i=0;i<N-7;i+=8)
67-
{
68-
x[i+ord ]=_x[i ];
69-
x[i+ord+1]=_x[i+1];
70-
x[i+ord+2]=_x[i+2];
71-
x[i+ord+3]=_x[i+3];
72-
x[i+ord+4]=_x[i+4];
73-
x[i+ord+5]=_x[i+5];
74-
x[i+ord+6]=_x[i+6];
75-
x[i+ord+7]=_x[i+7];
76-
}
77-
78-
for (;i<N-3;i+=4)
79-
{
80-
x[i+ord ]=_x[i ];
81-
x[i+ord+1]=_x[i+1];
82-
x[i+ord+2]=_x[i+2];
83-
x[i+ord+3]=_x[i+3];
84-
}
85-
86-
for (;i<N;i++)
87-
x[i+ord]=_x[i];
88-
89-
for(i=0;i<ord;i++)
90-
mem[i] = _x[N-i-1];
9160
#ifdef SMALL_FOOTPRINT
9261
for (i=0;i<N;i++)
9362
{
94-
opus_val32 sum = SHL32(EXTEND32(_x[i]), SIG_SHIFT);
63+
opus_val32 sum = SHL32(EXTEND32(x[i]), SIG_SHIFT);
9564
for (j=0;j<ord;j++)
9665
{
97-
sum = MAC16_16(sum,rnum[j],x[i+j]);
66+
sum = MAC16_16(sum,rnum[j],x[i+j-ord]);
9867
}
99-
_y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
68+
y[i] = SATURATE16(PSHR32(sum, SIG_SHIFT));
10069
}
10170
#else
10271
noA = EXTEND32(1) << SIG_SHIFT >> 1;
@@ -107,22 +76,22 @@ void celt_fir_sse4_1(const opus_val16 *_x,
10776
opus_val32 sums[4] = {0};
10877
__m128i vecSum, vecX;
10978

110-
xcorr_kernel(rnum, x+i, sums, ord, arch);
79+
xcorr_kernel(rnum, x+i-ord, sums, ord, arch);
11180

11281
vecSum = _mm_loadu_si128((__m128i *)sums);
11382
vecSum = _mm_add_epi32(vecSum, vecNoA);
11483
vecSum = _mm_srai_epi32(vecSum, SIG_SHIFT);
115-
vecX = OP_CVTEPI16_EPI32_M64(_x + i);
84+
vecX = OP_CVTEPI16_EPI32_M64(x + i);
11685
vecSum = _mm_add_epi32(vecSum, vecX);
11786
vecSum = _mm_packs_epi32(vecSum, vecSum);
118-
_mm_storel_epi64((__m128i *)(_y + i), vecSum);
87+
_mm_storel_epi64((__m128i *)(y + i), vecSum);
11988
}
12089
for (;i<N;i++)
12190
{
12291
opus_val32 sum = 0;
12392
for (j=0;j<ord;j++)
124-
sum = MAC16_16(sum, rnum[j], x[i + j]);
125-
_y[i] = SATURATE16(ADD32(EXTEND32(_x[i]), PSHR32(sum, SIG_SHIFT)));
93+
sum = MAC16_16(sum, rnum[j], x[i+j-ord]);
94+
y[i] = SATURATE16(ADD32(EXTEND32(x[i]), PSHR32(sum, SIG_SHIFT)));
12695
}
12796

12897
#endif

celt/x86/celt_lpc_sse.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,11 @@ void celt_fir_sse4_1(
4141
opus_val16 *y,
4242
int N,
4343
int ord,
44-
opus_val16 *mem,
4544
int arch);
4645

4746
#if defined(OPUS_X86_PRESUME_SSE4_1)
48-
#define celt_fir(x, num, y, N, ord, mem, arch) \
49-
((void)arch, celt_fir_sse4_1(x, num, y, N, ord, mem, arch))
47+
#define celt_fir(x, num, y, N, ord, arch) \
48+
((void)arch, celt_fir_sse4_1(x, num, y, N, ord, arch))
5049

5150
#else
5251

@@ -56,11 +55,10 @@ extern void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
5655
opus_val16 *y,
5756
int N,
5857
int ord,
59-
opus_val16 *mem,
6058
int arch);
6159

62-
# define celt_fir(x, num, y, N, ord, mem, arch) \
63-
((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, mem, arch))
60+
# define celt_fir(x, num, y, N, ord, arch) \
61+
((*CELT_FIR_IMPL[(arch) & OPUS_ARCHMASK])(x, num, y, N, ord, arch))
6462

6563
#endif
6664
#endif

celt/x86/x86_celt_map.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@ void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
4747
opus_val16 *y,
4848
int N,
4949
int ord,
50-
opus_val16 *mem,
5150
int arch
5251
) = {
5352
celt_fir_c, /* non-sse */

silk/LPC_analysis_filter.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,6 @@ void silk_LPC_analysis_filter(
5757
{
5858
opus_int j;
5959
#if USE_CELT_FIR
60-
opus_int16 mem[SILK_MAX_ORDER_LPC];
6160
opus_int16 num[SILK_MAX_ORDER_LPC];
6261
#else
6362
int ix;
@@ -74,10 +73,7 @@ void silk_LPC_analysis_filter(
7473
for ( j = 0; j < d; j++ ) {
7574
num[ j ] = -B[ j ];
7675
}
77-
for (j=0;j<d;j++) {
78-
mem[ j ] = in[ d - j - 1 ];
79-
}
80-
celt_fir( in + d, num, out + d, len - d, d, mem, arch );
76+
celt_fir( in + d, num, out + d, len - d, d, arch );
8177
for ( j = 0; j < d; j++ ) {
8278
out[ j ] = 0;
8379
}

0 commit comments

Comments
 (0)