Commit c35c8d2d authored by Christophe Favergeon's avatar Christophe Favergeon
Browse files

Corrected issue #124

parent 72d6ccb7
Loading
Loading
Loading
Loading
+14 −2
Original line number Diff line number Diff line
@@ -58,10 +58,13 @@
  @remark
                   Refer to \ref arm_conv_opt_q15() for a faster implementation of this function using scratch buffers.
 */


#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"
#include "arm_vec_filtering.h"


void arm_conv_q15(
  const q15_t * pSrcA,
        uint32_t srcALen,
@@ -82,6 +85,7 @@ void arm_conv_q15(
    int32_t   block1, block2, block3;



    uint16x8_t decrIdxVec = vddupq_u16(7, 1);


@@ -107,6 +111,7 @@ void arm_conv_q15(
    block2 = srcALen - srcBLen + 1;
    block3 = srcBLen - 1;


    pA = pIn1;
    pB = pIn2 - 7;

@@ -192,7 +197,7 @@ void arm_conv_q15(
        pA++;
    }

    for (i = block3; i >= 1; i -= 2)
    for (i = block3; i >= 2; i -= 2)
    {
        uint32_t  count = i;
        int64_t   acc0 = 0LL;
@@ -206,7 +211,7 @@ void arm_conv_q15(
        *pDst++ = (q15_t) acc1;
        pA += 2;
    }
    for (; i >= 1; i--)
    for (; i > 0; i--)
    {
        uint32_t  count = i;
        int64_t   acc = 0LL;
@@ -218,6 +223,8 @@ void arm_conv_q15(
        *pDst++ = (q15_t) acc;
        pA++;
    }


}
#else
void arm_conv_q15(
@@ -241,6 +248,8 @@ void arm_conv_q15(
        uint32_t blockSize1, blockSize2, blockSize3;   /* Loop counters */
        uint32_t j, k, count, blkCnt;                  /* Loop counters */



  /* The algorithm implementation is based on the lengths of the inputs. */
  /* srcB is always made to slide across srcA. */
  /* So srcBLen is always considered as shorter or equal to srcALen */
@@ -280,6 +289,9 @@ void arm_conv_q15(
  blockSize1 = srcBLen - 1U;
  blockSize2 = srcALen - (srcBLen - 1U);




  /* --------------------------
   * Initializations of stage1
   * -------------------------*/
+1 −1
Original line number Diff line number Diff line
@@ -84,6 +84,7 @@ void arm_conv_q31(
    uint32_t  vddupStartIdx = 3;
    uint32x4_t decrIdxVec = vddupq_u32(vddupStartIdx, 1);


    if (srcALen < srcBLen)
    {
        /*
@@ -219,7 +220,6 @@ void arm_conv_q31(
        *pDst++ = (q31_t) acc;
        pA++;
    }

}

#else
+4 −2
Original line number Diff line number Diff line
@@ -55,6 +55,7 @@
  @remark
                   Refer to \ref arm_conv_opt_q7() for a faster implementation of this function.
 */

#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_helium_utils.h"

@@ -186,7 +187,7 @@ void arm_conv_q7(
        pA++;
    }

    for (i = block3; i >= 1; i -= 2)
    for (i = block3; i >= 2; i -= 2)
    {
        uint32_t  count = i;
        int32_t   acc0 = 0;
@@ -200,7 +201,7 @@ void arm_conv_q7(
        *pDst++ = (q7_t) acc1;
        pA += 2;
    }
    for (; i >= 1; i--)
    for (; i > 0; i--)
    {
        uint32_t  count = i;
        int32_t   acc = 0;
@@ -212,6 +213,7 @@ void arm_conv_q7(
        *pDst++ = (q7_t) acc;
        pA++;
    }

}

#else
+1 −0
Original line number Diff line number Diff line
@@ -42,6 +42,7 @@ a double precision computation.
          inpB, inputB.nbSamples(),
          outp);

        ASSERT_EMPTY_TAIL(output);
        ASSERT_SNR(ref,output,(q15_t)SNR_THRESHOLD);
        ASSERT_NEAR_EQ(ref,output,ABS_ERROR_Q15);

+1 −0
Original line number Diff line number Diff line
@@ -70,6 +70,7 @@ For tests of the error value of the Levinson Durbin algorithm
          inpB, inputB.nbSamples(),
          outp);

        ASSERT_EMPTY_TAIL(output);
        ASSERT_SNR(ref,output,(q31_t)SNR_THRESHOLD);
        ASSERT_NEAR_EQ(ref,output,ABS_ERROR_Q31);

Loading