Arduino_STM32/STM32F1/libraries/stm_fft/cr4_fft_64_stm32.asm

250 lines
7.6 KiB
NASM

/*;******************** (C) COPYRIGHT 2009 STMicroelectronics ********************
;* File Name : cr4_fft_64_stm32.s
;* Author : MCD Application Team
;* Version : V2.0.0
;* Date : 04/27/2009
;* Description : Optimized 64-point radix-4 complex FFT for Cortex-M3
;********************************************************************************
;* THE PRESENT FIRMWARE WHICH IS FOR GUIDANCE ONLY AIMS AT PROVIDING CUSTOMERS
;* WITH CODING INFORMATION REGARDING THEIR PRODUCTS IN ORDER FOR THEM TO SAVE TIME.
;* AS A RESULT, STMICROELECTRONICS SHALL NOT BE HELD LIABLE FOR ANY DIRECT,
;* INDIRECT OR CONSEQUENTIAL DAMAGES WITH RESPECT TO ANY CLAIMS ARISING FROM THE
;* CONTENT OF SUCH SOFTWARE AND/OR THE USE MADE BY CUSTOMERS OF THE CODING
;* INFORMATION CONTAINED HEREIN IN CONNECTION WITH THEIR PRODUCTS.
;*******************************************************************************/
.cpu cortex-m3
.fpu softvfp
.syntax unified
.thumb
.text
.global cr4_fft_64_stm32
.extern TableFFT
.equ NPT, 64
/*;*******************************************************************************
;* Function Name : cr4_fft_64_stm32
;* Description : complex radix-4 64 points FFT
;* Input : - R0 = pssOUT: Output array .
;* - R1 = pssIN: Input array
;* - R2 = Nbin: =64 number of points, this optimized FFT function
;* can only convert 64 points.
;* Output : None
;* Return : None
;********************************************************************************/
.thumb_func
cr4_fft_64_stm32:
STMFD SP!, {R4-R11, LR}
MOV r12, #0
MOV r3, r0
MOV r0,#0
preloop_v7:
ADD r14, r1, r12, LSR#26
LDRSH r5, [r14, #2]
LDRSH r4, [r14],#NPT
LDRSH r9, [r14, #2]
LDRSH r8, [r14],#NPT
LDRSH r7, [r14, #2]
LDRSH r6, [r14],#NPT
LDRSH r11, [r14, #2]
LDRSH r10, [r14],#NPT
ADD r8, r8, r10
ADD r9, r9, r11
SUB r10, r8, r10, LSL#1
SUB r11, r9, r11, LSL#1
MOV r4, r4, ASR#2
MOV r5, r5, ASR#2
ADD r4, r4, r6, ASR#2
ADD r5, r5, r7, ASR#2
SUB r6, r4, r6, ASR#1
SUB r7, r5, r7, ASR#1
ADD r4, r4, r8, ASR#2
ADD r5, r5, r9, ASR#2
SUB r8, r4, r8, ASR#1
SUB r9, r5, r9, ASR#1
ADD r6, r6, r11, ASR#2
SUB r7, r7, r10, ASR#2
SUB r11, r6, r11, ASR#1
ADD r10, r7, r10, ASR#1
STRH r5, [r3, #2]
STRH r4, [r3], #4
STRH r7, [r3, #2]
STRH r6, [r3], #4
STRH r9, [r3, #2]
STRH r8, [r3], #4
STRH r10, [r3, #2]
STRH r11, [r3], #4
ADD r0, r0, #1
RBIT r12, r0
CMP r0,#16
BNE preloop_v7
SUB r1, r3, r2, LSL#2
MOV r0, #16
MOVS r2, r2, LSR#4
/*;------------------------------------------------------------------------------
; The FFT coefficients table can be stored into Flash or RAM.
; The following two lines of code allow selecting the method for coefficients
; storage.
; In the case of choosing coefficients in RAM, you have to:
; 1. Include the file table_fft.h, which is a part of the DSP library,
; in your main file.
; 2. Decomment the line LDR.W pssK, =TableFFT and comment the line
; ADRL pssK, TableFFT_V7
; 3. Comment all the TableFFT_V7 data.
;------------------------------------------------------------------------------*/
ADR r3, TableFFT_V7
/*LDR.W r3, =TableFFT*/
passloop_v7:
STMFD SP!, {r1,r2}
ADD r12, r0, r0, LSL#1
ADD r1, r1, r12
SUB r2, r2, #1<<16
grouploop_v7:
ADD r2,r2,r0,LSL#(16-2)
butterloop_v7:
LDRSH r5, [r1, #2]
LDRSH r4, [r1]
SUB r1, r1, r0
LDRSH r11, [r3, #2]
LDRSH r10, [r3]
ADD r3, r3, #4
SUB r14, r5, r4
MUL r12, r14, r11
ADD r14, r10, r11, LSL#1
MLA r11, r5, r10, r12
MLA r10, r4, r14, r12
LDRSH r5, [r1, #2]
LDRSH r4, [r1]
SUB r1, r1, r0
LDRSH r9, [r3, #2]
LDRSH r8, [r3]
ADD r3, r3, #4
SUB r14, r5, r4
MUL r12, r14, r9
ADD r14, r8, r9, LSL#1
MLA r9, r5, r8, r12
MLA r8, r4, r14, r12
LDRSH r5, [r1, #2]
LDRSH r4, [r1]
SUB r1, r1, r0
LDRSH r7, [r3, #2]
LDRSH r6, [r3]
ADD r3, r3, #4
SUB r14, r5, r4
MUL r12, r14, r7
ADD r14, r6, r7, LSL#1
MLA r7, r5, r6, r12
MLA r6, r4, r14, r12
LDRSH r5, [r1, #2]
LDRSH r4, [r1]
ADD r8, r8, r10
ADD r9, r9, r11
SUB r10, r8, r10, LSL#1
SUB r11, r9, r11, LSL#1
MOV r4, r4, ASR#2
MOV r5, r5, ASR#2
ADD r4, r4, r6, ASR#(2+14)
ADD r5, r5, r7, ASR#(2+14)
SUB r6, r4, r6, ASR#(1+14)
SUB r7, r5, r7, ASR#(1+14)
ADD r4, r4, r8, ASR#(2+14)
ADD r5, r5, r9, ASR#(2+14)
SUB r8, r4, r8, ASR#(1+14)
SUB r9, r5, r9, ASR#(1+14)
ADD r6, r6, r11, ASR#(2+14)
SUB r7, r7, r10, ASR#(2+14)
SUB r11, r6, r11, ASR#(1+14)
ADD r10, r7, r10, ASR#(1+14)
STRH r5, [r1, #2]
STRH r4, [r1]
ADD r1, r1, r0
STRH r7, [r1, #2]
STRH r6, [r1]
ADD r1, r1, r0
STRH r9, [r1, #2]
STRH r8, [r1]
ADD r1, r1, r0
STRH r10, [r1, #2]
STRH r11, [r1], #4
SUBS r2,r2, #1<<16
BGE butterloop_v7
ADD r12, r0, r0, LSL#1
ADD r1, r1, r12
SUB r2, r2, #1
MOVS r14, r2, LSL#16
IT ne
SUBNE r3, r3, r12
BNE grouploop_v7
LDMFD sp!, {r1, r2}
MOV r0,r0,LSL#2
MOVS r2, r2, LSR#2
BNE passloop_v7
LDMFD SP!, {R4-R11, PC}
TableFFT_V7:
.short 0x4000,0x0000, 0x4000,0x0000, 0x4000,0x0000
.short 0xdd5d,0x3b21, 0x22a3,0x187e, 0x0000,0x2d41
.short 0xa57e,0x2d41, 0x0000,0x2d41, 0xc000,0x4000
.short 0xdd5d,0xe782, 0xdd5d,0x3b21, 0xa57e,0x2d41
.short 0x4000,0x0000, 0x4000,0x0000, 0x4000,0x0000
.short 0x2aaa,0x1294, 0x396b,0x0646, 0x3249,0x0c7c
.short 0x11a8,0x238e, 0x3249,0x0c7c, 0x22a3,0x187e
.short 0xf721,0x3179, 0x2aaa,0x1294, 0x11a8,0x238e
.short 0xdd5d,0x3b21, 0x22a3,0x187e, 0x0000,0x2d41
.short 0xc695,0x3fb1, 0x1a46,0x1e2b, 0xee58,0x3537
.short 0xb4be,0x3ec5, 0x11a8,0x238e, 0xdd5d,0x3b21
.short 0xa963,0x3871, 0x08df,0x289a, 0xcdb7,0x3ec5
.short 0xa57e,0x2d41, 0x0000,0x2d41, 0xc000,0x4000
.short 0xa963,0x1e2b, 0xf721,0x3179, 0xb4be,0x3ec5
.short 0xb4be,0x0c7c, 0xee58,0x3537, 0xac61,0x3b21
.short 0xc695,0xf9ba, 0xe5ba,0x3871, 0xa73b,0x3537
.short 0xdd5d,0xe782, 0xdd5d,0x3b21, 0xa57e,0x2d41
.short 0xf721,0xd766, 0xd556,0x3d3f, 0xa73b,0x238e
.short 0x11a8,0xcac9, 0xcdb7,0x3ec5, 0xac61,0x187e
.short 0x2aaa,0xc2c1, 0xc695,0x3fb1, 0xb4be,0x0c7c
.end
/******************* (C) COPYRIGHT 2009 STMicroelectronics *****END OF FILE****/