DSP: Fix vector type mismatches (5f86244b) · Commits · Wenxi XU / cmsis

CMSIS/DSP/PrivateInclude/arm_vec_fft.h

+13 −13

Original line number	Diff line number	Diff line
		@@ -140,7 +140,7 @@ __STATIC_INLINE void arm_bitreversal_16_inpl_mve(
		{
		uint32_t src = (uint32_t ) pSrc;
		int32_t blkCnt; /* loop counters */
		uint32x4_t bitRevTabOff;
		uint16x8_t bitRevTabOff;
		uint16x8_t one = vdupq_n_u16(1);
		uint32x4_t bitRevOff1Low, bitRevOff0Low;
		uint32x4_t bitRevOff1High, bitRevOff0High;
		@@ -152,8 +152,8 @@ __STATIC_INLINE void arm_bitreversal_16_inpl_mve(

		bitRevOff0Low = vmullbq_int_u16(bitRevTabOff, one);
		bitRevOff0High = vmulltq_int_u16(bitRevTabOff, one);
		bitRevOff0Low = vshrq_n_u16(bitRevOff0Low, 3);
		bitRevOff0High = vshrq_n_u16(bitRevOff0High, 3);
		bitRevOff0Low = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0Low, 3);
		bitRevOff0High = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0High, 3);

		blkCnt = (bitRevLen / 16);
		while (blkCnt > 0) {
		@@ -162,8 +162,8 @@ __STATIC_INLINE void arm_bitreversal_16_inpl_mve(

		bitRevOff1Low = vmullbq_int_u16(bitRevTabOff, one);
		bitRevOff1High = vmulltq_int_u16(bitRevTabOff, one);
		bitRevOff1Low = vshrq_n_u16(bitRevOff1Low, 3);
		bitRevOff1High = vshrq_n_u16(bitRevOff1High, 3);
		bitRevOff1Low = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff1Low, 3);
		bitRevOff1High = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff1High, 3);

		inLow = vldrwq_gather_shifted_offset_u32(src, bitRevOff0Low);
		inHigh = vldrwq_gather_shifted_offset_u32(src, bitRevOff0High);
		@@ -177,8 +177,8 @@ __STATIC_INLINE void arm_bitreversal_16_inpl_mve(

		bitRevOff0Low = vmullbq_int_u16(bitRevTabOff, one);
		bitRevOff0High = vmulltq_int_u16(bitRevTabOff, one);
		bitRevOff0Low = vshrq_n_u16(bitRevOff0Low, 3);
		bitRevOff0High = vshrq_n_u16(bitRevOff0High, 3);
		bitRevOff0Low = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0Low, 3);
		bitRevOff0High = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0High, 3);

		inLow = vldrwq_gather_shifted_offset_u32(src, bitRevOff1Low);
		inHigh = vldrwq_gather_shifted_offset_u32(src, bitRevOff1High);
		@@ -211,8 +211,8 @@ __STATIC_INLINE void arm_bitreversal_16_inpl_mve(

		bitRevOff0Low = vmullbq_int_u16(bitRevTabOff, one);
		bitRevOff0High = vmulltq_int_u16(bitRevTabOff, one);
		bitRevOff0Low = vshrq_n_u16(bitRevOff0Low, 3);
		bitRevOff0High = vshrq_n_u16(bitRevOff0High, 3);
		bitRevOff0Low = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0Low, 3);
		bitRevOff0High = (uint32x4_t)vshrq_n_u16((uint16x8_t)bitRevOff0High, 3);

		inLow = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff0Low, p);
		inHigh = vldrwq_gather_shifted_offset_z_u32(src, bitRevOff0High, p);
		@@ -251,13 +251,13 @@ __STATIC_INLINE void arm_bitreversal_32_outpl_mve(void pDst, void pSrc, uint32
		while (blkCnt > 0) {
		uint64x2_t vecIn;

		vecIn = vldrdq_gather_offset_u64(pSrc, (int64x2_t) bitRevOffs0);
		vecIn = vldrdq_gather_offset_u64(pSrc, (uint64x2_t) bitRevOffs0);
		idxOffs0 = idxOffs0 + 16;
		vst1q(pDst32, (uint32x4_t) vecIn);
		pDst32 += 4;
		bitRevOffs0 = vbrsrq(idxOffs0, bitRevPos);

		vecIn = vldrdq_gather_offset_u64(pSrc, (int64x2_t) bitRevOffs1);
		vecIn = vldrdq_gather_offset_u64(pSrc, (uint64x2_t) bitRevOffs1);
		idxOffs1 = idxOffs1 + 16;
		vst1q(pDst32, (uint32x4_t) vecIn);
		pDst32 += 4;
		@@ -297,13 +297,13 @@ __STATIC_INLINE void arm_bitreversal_16_outpl_mve(void pDst, void pSrc, uint32
		while (blkCnt > 0) {
		uint32x4_t vecIn;

		vecIn = vldrwq_gather_offset_s32(pSrc, bitRevOffs0);
		vecIn = (uint32x4_t)vldrwq_gather_offset_s32(pSrc, bitRevOffs0);
		idxOffs0 = idxOffs0 + 32;
		vst1q(pDst16, (uint16x8_t) vecIn);
		pDst16 += 8;
		bitRevOffs0 = vbrsrq(idxOffs0, bitRevPos);

		vecIn = vldrwq_gather_offset_s32(pSrc, bitRevOffs1);
		vecIn = (uint32x4_t)vldrwq_gather_offset_s32(pSrc, bitRevOffs1);
		idxOffs1 = idxOffs1 + 32;
		vst1q(pDst16, (uint16x8_t) vecIn);
		pDst16 += 8;

CMSIS/DSP/Source/FilteringFunctions/arm_biquad_cascade_df1_q15.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -102,9 +102,9 @@ void arm_biquad_cascade_df1_q15(
		bCoeffs1[7] = a2;
		bCoeffs1[6] = a1;

		bCoeffs2 =
		bCoeffs2 = (q15x8_t)
		vsetq_lane_s32(vgetq_lane_s32((q31x4_t) bCoeffs0, 3), (q31x4_t) bCoeffs2, 3);
		bCoeffs3 =
		bCoeffs3 = (q15x8_t)
		vsetq_lane_s32(vgetq_lane_s32((q31x4_t) bCoeffs1, 3), (q31x4_t) bCoeffs3, 3);

CMSIS/DSP/Source/MatrixFunctions/arm_mat_ldlt_f32.c

+3 −3

Original line number	Diff line number	Diff line
		@@ -178,7 +178,7 @@ arm_status arm_mat_ldlt_f32(

		int32x4_t vecOffs;
		int w;
		vecOffs = vidupq_u32((uint32_t)0, 1);
		vecOffs = (int32x4_t)vidupq_u32((uint32_t)0, 1);
		vecOffs = vmulq_n_s32(vecOffs,n);

		for(w=k+1; w<n; w+=4)
		@@ -204,7 +204,7 @@ arm_status arm_mat_ldlt_f32(
		//pA[wn+x] = pA[wn+x] - pA[wn+k] (pA[xn+k] invA);


		vecX = vldrwq_gather_shifted_offset_z_f32(&pA[x*n+k], vecOffs, p0);
		vecX = vldrwq_gather_shifted_offset_z_f32(&pA[x*n+k], (uint32x4_t)vecOffs, p0);
		vecX = vmulq_m_n_f32(vuninitializedq_f32(),vecX,invA,p0);


		@@ -247,7 +247,7 @@ arm_status arm_mat_ldlt_f32(

		vecA = vldrwq_z_f32(&pA[w*n+x],p0);

		vecX = vldrwq_gather_shifted_offset_z_f32(&pA[x*n+k], vecOffs, p0);
		vecX = vldrwq_gather_shifted_offset_z_f32(&pA[x*n+k], (uint32x4_t)vecOffs, p0);
		vecX = vmulq_m_n_f32(vuninitializedq_f32(),vecX,invA,p0);

		vecA = vfmsq_m(vecA, vecW, vecX, p0);

CMSIS/DSP/Source/SupportFunctions/arm_q15_to_float.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -72,9 +72,9 @@ void arm_q15_to_float(
		{
		/* C = (float32_t) A / 32768 */
		/* convert from q15 to float and then store the results in the destination buffer */
		vecDst = vldrhq_s32(pSrcVec);
		vecDst = (q15x8_t)vldrhq_s32(pSrcVec);
		pSrcVec += 4;
		vstrwq(pDst, vcvtq_n_f32_s32(vecDst, 15));
		vstrwq(pDst, vcvtq_n_f32_s32((int32x4_t)vecDst, 15));
		pDst += 4;
		/*
		* Decrement the blockSize loop counter

CMSIS/DSP/Source/SupportFunctions/arm_q7_to_float.c

+2 −2

Original line number	Diff line number	Diff line
		@@ -70,9 +70,9 @@ void arm_q7_to_float(
		{
		/* C = (float32_t) A / 32768 */
		/* convert from q7 to float and then store the results in the destination buffer */
		vecDst = vldrbq_s32(pSrcVec);
		vecDst = (q7x16_t)vldrbq_s32(pSrcVec);
		pSrcVec += 4;
		vstrwq(pDst, vcvtq_n_f32_s32(vecDst, 7));
		vstrwq(pDst, vcvtq_n_f32_s32((int32x4_t)vecDst, 7));
		pDst += 4;
		/*
		* Decrement the blockSize loop counter

Admin message