x86: float dsp: butterflies_float SSE
97c -> 49c Some codecs could benefit from more unrolling, but AAC doesn't. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
295ce83e2f
commit
1a4007964c
2 changed files with 26 additions and 0 deletions
|
@ -263,3 +263,26 @@ cglobal scalarproduct_float, 3,3,2, v1, v2, offset
|
|||
%endif
|
||||
RET
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; void ff_butterflies_float(float *src0, float *src1, int len);
|
||||
;-----------------------------------------------------------------------------
|
||||
INIT_XMM sse
|
||||
cglobal butterflies_float, 3,3,3, src0, src1, len
|
||||
movsxdifnidn lenq, lend
|
||||
test lenq, lenq
|
||||
jz .end
|
||||
shl lenq, 2
|
||||
lea src0q, [src0q + lenq]
|
||||
lea src1q, [src1q + lenq]
|
||||
neg lenq
|
||||
.loop:
|
||||
mova m0, [src0q + lenq]
|
||||
mova m1, [src1q + lenq]
|
||||
subps m2, m0, m1
|
||||
addps m0, m0, m1
|
||||
mova [src1q + lenq], m2
|
||||
mova [src0q + lenq], m0
|
||||
add lenq, mmsize
|
||||
jl .loop
|
||||
.end:
|
||||
REP_RET
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue