Commit 5f86244b authored by Stephanos Ioannidis's avatar Stephanos Ioannidis
Browse files

DSP: Fix vector type mismatches



This commit fixes the vector type mismatches in the MVE function
implementations that are treated as errors in GCC unless the
`-flax-vector-conversions` option is specified.

Note that most of these mismatches were already fixed upstream.

Signed-off-by: default avatarStephanos Ioannidis <root@stephanos.io>
parent db837500
Loading
Loading
Loading
Loading
+13 −13
Original line number Diff line number Diff line
@@ -140,7 +140,7 @@ __STATIC_INLINE void arm_bitreversal_16_inpl_mve(
{
    uint32_t       *src = (uint32_t *) pSrc;
    int32_t         blkCnt;     /* loop counters */
    uint32x4_t      bitRevTabOff;
    uint16x8_t      bitRevTabOff;
    uint16x8_t      one = vdupq_n_u16(1);
    uint32x4_t      bitRevOff1Low, bitRevOff0Low;
    uint32x4_t      bitRevOff1High, bitRevOff0High;
@@ -152,8 +152,8 @@ __STATIC_INLINE void arm_bitreversal_16_inpl_mve(

    bitRevOff0Low = vmullbq_int_u16(bitRevTabOff, one);
    bitRevOff0High = vmulltq_int_u16(bitRevTabOff, one);
    bitRevOff0Low = vshrq_n_u16(bitRevOff0Low, 3);
    bitRevOff0High = vshrq_n_u16(bitRevOff0High, 3);
    bitRevOff0Low = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0Low, 3);
    bitRevOff0High = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0High, 3);

    blkCnt = (bitRevLen / 16);
    while (blkCnt > 0) {
@@ -162,8 +162,8 @@ __STATIC_INLINE void arm_bitreversal_16_inpl_mve(

        bitRevOff1Low = vmullbq_int_u16(bitRevTabOff, one);
        bitRevOff1High = vmulltq_int_u16(bitRevTabOff, one);
        bitRevOff1Low = vshrq_n_u16(bitRevOff1Low, 3);
        bitRevOff1High = vshrq_n_u16(bitRevOff1High, 3);
        bitRevOff1Low = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff1Low, 3);
        bitRevOff1High = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff1High, 3);

        inLow = vldrwq_gather_shifted_offset_u32(src, bitRevOff0Low);
        inHigh = vldrwq_gather_shifted_offset_u32(src, bitRevOff0High);
@@ -177,8 +177,8 @@ __STATIC_INLINE void arm_bitreversal_16_inpl_mve(

        bitRevOff0Low = vmullbq_int_u16(bitRevTabOff, one);
        bitRevOff0High = vmulltq_int_u16(bitRevTabOff, one);
        bitRevOff0Low = vshrq_n_u16(bitRevOff0Low, 3);
        bitRevOff0High = vshrq_n_u16(bitRevOff0High, 3);
        bitRevOff0Low = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0Low, 3);
        bitRevOff0High = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0High, 3);

        inLow = vldrwq_gather_shifted_offset_u32(src, bitRevOff1Low);
        inHigh = vldrwq_gather_shifted_offset_u32(src, bitRevOff1High);
@@ -211,8 +211,8 @@ __STATIC_INLINE void arm_bitreversal_16_inpl_mve(

        bitRevOff0Low = vmullbq_int_u16(bitRevTabOff, one);
        bitRevOff0High = vmulltq_int_u16(bitRevTabOff, one);
        bitRevOff0Low = vshrq_n_u16(bitRevOff0Low, 3);
        bitRevOff0High = vshrq_n_u16(bitRevOff0High, 3);
        bitRevOff0Low = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0Low, 3);
        bitRevOff0High = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0High, 3);

        inLow = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff0Low, p);
        inHigh = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff0High, p);
@@ -251,13 +251,13 @@ __STATIC_INLINE void arm_bitreversal_32_outpl_mve(void *pDst, void *pSrc, uint32
    while (blkCnt > 0) {
        uint64x2_t      vecIn;

        vecIn = vldrdq_gather_offset_u64(pSrc, (int64x2_t) bitRevOffs0);
        vecIn = vldrdq_gather_offset_u64(pSrc, (uint64x2_t) bitRevOffs0);
        idxOffs0 = idxOffs0 + 16;
        vst1q(pDst32, (uint32x4_t) vecIn);
        pDst32 += 4;
        bitRevOffs0 = vbrsrq(idxOffs0, bitRevPos);

        vecIn = vldrdq_gather_offset_u64(pSrc, (int64x2_t) bitRevOffs1);
        vecIn = vldrdq_gather_offset_u64(pSrc, (uint64x2_t) bitRevOffs1);
        idxOffs1 = idxOffs1 + 16;
        vst1q(pDst32, (uint32x4_t) vecIn);
        pDst32 += 4;
@@ -297,13 +297,13 @@ __STATIC_INLINE void arm_bitreversal_16_outpl_mve(void *pDst, void *pSrc, uint32
    while (blkCnt > 0) {
        uint32x4_t      vecIn;

        vecIn = vldrwq_gather_offset_s32(pSrc, bitRevOffs0);
        vecIn = (uint32x4_t)vldrwq_gather_offset_s32(pSrc, bitRevOffs0);
        idxOffs0 = idxOffs0 + 32;
        vst1q(pDst16, (uint16x8_t) vecIn);
        pDst16 += 8;
        bitRevOffs0 = vbrsrq(idxOffs0, bitRevPos);

        vecIn = vldrwq_gather_offset_s32(pSrc, bitRevOffs1);
        vecIn = (uint32x4_t)vldrwq_gather_offset_s32(pSrc, bitRevOffs1);
        idxOffs1 = idxOffs1 + 32;
        vst1q(pDst16, (uint16x8_t) vecIn);
        pDst16 += 8;
+2 −2
Original line number Diff line number Diff line
@@ -102,9 +102,9 @@ void arm_biquad_cascade_df1_q15(
        bCoeffs1[7] = a2;
        bCoeffs1[6] = a1;

        bCoeffs2 =
        bCoeffs2 = (q15x8_t)
            vsetq_lane_s32(vgetq_lane_s32((q31x4_t) bCoeffs0, 3), (q31x4_t) bCoeffs2, 3);
        bCoeffs3 =
        bCoeffs3 = (q15x8_t)
            vsetq_lane_s32(vgetq_lane_s32((q31x4_t) bCoeffs1, 3), (q31x4_t) bCoeffs3, 3);


+3 −3
Original line number Diff line number Diff line
@@ -178,7 +178,7 @@ arm_status arm_mat_ldlt_f32(

        int32x4_t vecOffs;
        int w;
        vecOffs = vidupq_u32((uint32_t)0, 1);
        vecOffs = (int32x4_t)vidupq_u32((uint32_t)0, 1);
        vecOffs = vmulq_n_s32(vecOffs,n);

        for(w=k+1; w<n; w+=4)
@@ -204,7 +204,7 @@ arm_status arm_mat_ldlt_f32(
             //pA[w*n+x] = pA[w*n+x] - pA[w*n+k] * (pA[x*n+k] * invA);


             vecX = vldrwq_gather_shifted_offset_z_f32(&pA[x*n+k], vecOffs, p0);
             vecX = vldrwq_gather_shifted_offset_z_f32(&pA[x*n+k], (uint32x4_t)vecOffs, p0);
             vecX = vmulq_m_n_f32(vuninitializedq_f32(),vecX,invA,p0);

             
@@ -247,7 +247,7 @@ arm_status arm_mat_ldlt_f32(

             vecA = vldrwq_z_f32(&pA[w*n+x],p0);
             
             vecX = vldrwq_gather_shifted_offset_z_f32(&pA[x*n+k], vecOffs, p0);
             vecX = vldrwq_gather_shifted_offset_z_f32(&pA[x*n+k], (uint32x4_t)vecOffs, p0);
             vecX = vmulq_m_n_f32(vuninitializedq_f32(),vecX,invA,p0);

             vecA = vfmsq_m(vecA, vecW, vecX, p0);
+2 −2
Original line number Diff line number Diff line
@@ -72,9 +72,9 @@ void arm_q15_to_float(
  {
      /* C = (float32_t) A / 32768 */
      /* convert from q15 to float and then store the results in the destination buffer */
      vecDst = vldrhq_s32(pSrcVec); 
      vecDst = (q15x8_t)vldrhq_s32(pSrcVec); 
      pSrcVec += 4;
      vstrwq(pDst, vcvtq_n_f32_s32(vecDst, 15));  
      vstrwq(pDst, vcvtq_n_f32_s32((int32x4_t)vecDst, 15));  
      pDst += 4;
      /*
       * Decrement the blockSize loop counter
+2 −2
Original line number Diff line number Diff line
@@ -70,9 +70,9 @@ void arm_q7_to_float(
    {
        /* C = (float32_t) A / 32768 */
        /* convert from q7 to float and then store the results in the destination buffer */
        vecDst = vldrbq_s32(pSrcVec);    
        vecDst = (q7x16_t)vldrbq_s32(pSrcVec);    
        pSrcVec += 4;
        vstrwq(pDst, vcvtq_n_f32_s32(vecDst, 7));   
        vstrwq(pDst, vcvtq_n_f32_s32((int32x4_t)vecDst, 7));   
        pDst += 4;
        /*
         * Decrement the blockSize loop counter
Loading