x86: float dsp: butterflies_float SSE

97c -> 49c
Some codecs could benefit from more unrolling, but AAC doesn't.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Christophe Gisquet 2013-04-12 21:07:01 +02:00 committed by Michael Niedermayer
parent 295ce83e2f
commit 1a4007964c
2 changed files with 26 additions and 0 deletions

View file

@ -263,3 +263,26 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset
%endif
RET
;-----------------------------------------------------------------------------
; void ff_butterflies_float(float *src0, float *src1, int len);
;-----------------------------------------------------------------------------
INIT_XMM sse
cglobal butterflies_float, 3,3,3, src0, src1, len
movsxdifnidn lenq, lend
test lenq, lenq
jz .end
shl lenq, 2
lea src0q, [src0q + lenq]
lea src1q, [src1q + lenq]
neg lenq
.loop:
mova m0, [src0q + lenq]
mova m1, [src1q + lenq]
subps m2, m0, m1
addps m0, m0, m1
mova [src1q + lenq], m2
mova [src0q + lenq], m0
add lenq, mmsize
jl .loop
.end:
REP_RET