00001 /* ---------------------------------------------------------------------- 00002 * Copyright (C) 2010 ARM Limited. All rights reserved. 00003 * 00004 * $Date: 15. July 2011 00005 * $Revision: V1.0.10 00006 * 00007 * Project: CMSIS DSP Library 00008 * Title: arm_fir_decimate_fast_q15.c 00009 * 00010 * Description: Fast Q15 FIR Decimator. 00011 * 00012 * Target Processor: Cortex-M4/Cortex-M3 00013 * 00014 * Version 1.0.10 2011/7/15 00015 * Big Endian support added and Merged M0 and M3/M4 Source code. 00016 * 00017 * Version 1.0.3 2010/11/29 00018 * Re-organized the CMSIS folders and updated documentation. 00019 * 00020 * Version 1.0.2 2010/11/11 00021 * Documentation updated. 00022 * 00023 * Version 1.0.1 2010/10/05 00024 * Production release and review comments incorporated. 00025 * 00026 * Version 1.0.0 2010/09/20 00027 * Production release and review comments incorporated. 00028 * -------------------------------------------------------------------- */ 00029 00030 #include "arm_math.h" 00031 00063 void arm_fir_decimate_fast_q15( 00064 const arm_fir_decimate_instance_q15 * S, 00065 q15_t * pSrc, 00066 q15_t * pDst, 00067 uint32_t blockSize) 00068 { 00069 q15_t *pState = S->pState; /* State pointer */ 00070 q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */ 00071 q15_t *pStateCurnt; /* Points to the current sample of the state */ 00072 q15_t *px; /* Temporary pointer for state buffer */ 00073 q15_t *pb; /* Temporary pointer coefficient buffer */ 00074 q31_t x0, c0; /* Temporary variables to hold state and coefficient values */ 00075 q31_t sum0; /* Accumulators */ 00076 uint32_t numTaps = S->numTaps; /* Number of taps */ 00077 uint32_t i, blkCnt, tapCnt, outBlockSize = blockSize / S->M; /* Loop counters */ 00078 00079 00080 /* S->pState buffer contains previous frame (numTaps - 1) samples */ 00081 /* pStateCurnt points to the location where the new input data should be written */ 00082 pStateCurnt = S->pState + (numTaps - 1u); 00083 00084 /* Total number of output samples to be computed */ 00085 blkCnt = outBlockSize; 00086 00087 while(blkCnt > 0u) 00088 { 00089 /* Copy decimation factor number of new input samples into the state buffer */ 00090 i = S->M; 00091 00092 do 00093 { 00094 *pStateCurnt++ = *pSrc++; 00095 00096 } while(--i); 00097 00098 /*Set sum to zero */ 00099 sum0 = 0; 00100 00101 /* Initialize state pointer */ 00102 px = pState; 00103 00104 /* Initialize coeff pointer */ 00105 pb = pCoeffs; 00106 00107 /* Loop unrolling. Process 4 taps at a time. */ 00108 tapCnt = numTaps >> 2; 00109 00110 /* Loop over the number of taps. Unroll by a factor of 4. 00111 ** Repeat until we've computed numTaps-4 coefficients. */ 00112 while(tapCnt > 0u) 00113 { 00114 /* Read the Read b[numTaps-1] and b[numTaps-2] coefficients */ 00115 c0 = *__SIMD32(pb)++; 00116 00117 /* Read x[n-numTaps-1] and x[n-numTaps-2]sample */ 00118 x0 = *__SIMD32(px)++; 00119 00120 /* Perform the multiply-accumulate */ 00121 sum0 = __SMLAD(x0, c0, sum0); 00122 00123 /* Read the b[numTaps-3] and b[numTaps-4] coefficient */ 00124 c0 = *__SIMD32(pb)++; 00125 00126 /* Read x[n-numTaps-2] and x[n-numTaps-3] sample */ 00127 x0 = *__SIMD32(px)++; 00128 00129 /* Perform the multiply-accumulate */ 00130 sum0 = __SMLAD(x0, c0, sum0); 00131 00132 /* Decrement the loop counter */ 00133 tapCnt--; 00134 } 00135 00136 /* If the filter length is not a multiple of 4, compute the remaining filter taps */ 00137 tapCnt = numTaps % 0x4u; 00138 00139 while(tapCnt > 0u) 00140 { 00141 /* Read coefficients */ 00142 c0 = *pb++; 00143 00144 /* Fetch 1 state variable */ 00145 x0 = *px++; 00146 00147 /* Perform the multiply-accumulate */ 00148 sum0 = __SMLAD(x0, c0, sum0); 00149 00150 /* Decrement the loop counter */ 00151 tapCnt--; 00152 } 00153 00154 /* Advance the state pointer by the decimation factor 00155 * to process the next group of decimation factor number samples */ 00156 pState = pState + S->M; 00157 00158 /* Store filter output , smlad returns the values in 2.14 format */ 00159 /* so downsacle by 15 to get output in 1.15 */ 00160 *pDst++ = (q15_t) ((sum0 >> 15)); 00161 00162 /* Decrement the loop counter */ 00163 blkCnt--; 00164 } 00165 00166 /* Processing is complete. 00167 ** Now copy the last numTaps - 1 samples to the satrt of the state buffer. 00168 ** This prepares the state buffer for the next function call. */ 00169 00170 /* Points to the start of the state buffer */ 00171 pStateCurnt = S->pState; 00172 00173 i = (numTaps - 1u) >> 2u; 00174 00175 /* copy data */ 00176 while(i > 0u) 00177 { 00178 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00179 *__SIMD32(pStateCurnt)++ = *__SIMD32(pState)++; 00180 00181 /* Decrement the loop counter */ 00182 i--; 00183 } 00184 00185 i = (numTaps - 1u) % 0x04u; 00186 00187 /* copy data */ 00188 while(i > 0u) 00189 { 00190 *pStateCurnt++ = *pState++; 00191 00192 /* Decrement the loop counter */ 00193 i--; 00194 } 00195 } 00196