From 7cb545370bcda90b8a6cc65347b42ef362cea1f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nils=20Wallm=C3=A9nius?= Date: Wed, 22 May 2013 23:08:42 +0200 Subject: [PATCH] Slightly faster C_MULC for ARMv4. Reorder register usage to take advantage of early termination on multiplications and reorder a load instruction to hide its latency on ARM9. Speeds up decoding of a 64 kbps test file by 0.1MHz on an ARM7TDMI and 0.2MHz on an ARM9TDMI. Signed-off-by: Timothy B. Terriberry --- celt/arm/kiss_fft_armv4.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/celt/arm/kiss_fft_armv4.h b/celt/arm/kiss_fft_armv4.h index dc9e01b0..1f09eaba 100644 --- a/celt/arm/kiss_fft_armv4.h +++ b/celt/arm/kiss_fft_armv4.h @@ -96,18 +96,18 @@ int tt__; \ __asm__ __volatile__( \ "#C_MULC\n\t" \ - "ldm %[ap], {r0,r1}\n\t" \ "ldrsh %[br], [%[bp], #0]\n\t" \ + "ldm %[ap], {r0,r1}\n\t" \ "ldrsh %[bi], [%[bp], #2]\n\t" \ "smull %[tt], %[mr], r0, %[br]\n\t" \ "smlal %[tt], %[mr], r1, %[bi]\n\t" \ "rsb %[bi], %[bi], #0\n\t" \ - "smull r1, %[mi], %[br], r1\n\t" \ + "smull %[br], %[mi], r1, %[br]\n\t" \ "mov %[tt], %[tt], lsr #15\n\t" \ - "smlal r1, %[mi], r0, %[bi]\n\t" \ + "smlal %[br], %[mi], r0, %[bi]\n\t" \ "orr %[mr], %[tt], %[mr], lsl #17\n\t" \ - "mov r1, r1, lsr #15\n\t" \ - "orr %[mi], r1, %[mi], lsl #17\n\t" \ + "mov %[br], %[br], lsr #15\n\t" \ + "orr %[mi], %[br], %[mi], lsl #17\n\t" \ : [mr]"=r"((m).r), [mi]"=r"((m).i), \ [br]"=&r"(br__), [bi]"=r"(bi__), [tt]"=r"(tt__) \ : [ap]"r"(&(a)), [bp]"r"(&(b)) \