Commit 0e8b8ec8 authored by Ryan McClelland's avatar Ryan McClelland Committed by Ryan McClelland
Browse files

Revert "Removed some gcc workarounds for Helium that are no more needed."



This reverts commit e71a2ba1.

Signed-off-by: default avatarRyan McClelland <rymcclel@gmail.com>
parent 893dba31
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -45,7 +45,12 @@
  @param[out]    pDst      points to the block of output data
  @param[in]     blockSize number of samples to process
 */
#if (defined(ARM_MATH_MVE_FLOAT16) && defined(ARM_MATH_HELIUM_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)

#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(ARM_DSP_BUILT_WITH_GCC)
#pragma GCC warning "Scalar version of arm_biquad_cascade_stereo_df2T_f16 built. Helium version has build issues with gcc."
#endif 

#if (defined(ARM_MATH_MVE_FLOAT16) && defined(ARM_MATH_HELIUM_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE) && !defined(ARM_DSP_BUILT_WITH_GCC)
ARM_DSP_ATTRIBUTE void arm_biquad_cascade_stereo_df2T_f16(
  const arm_biquad_cascade_stereo_df2T_instance_f16 * S,
  const float16_t * pSrc,
+5 −1
Original line number Diff line number Diff line
@@ -56,7 +56,11 @@
                   Refer to \ref arm_correlate_opt_q7() for a faster implementation of this function.
 */

#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(ARM_DSP_BUILT_WITH_GCC)
#pragma GCC warning "Scalar version of arm_correlate_q7 built. Helium version has build issues with gcc."
#endif 

#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) &&  !defined(ARM_DSP_BUILT_WITH_GCC)
#include "arm_helium_utils.h"

#include "arm_vec_filtering.h"
+5 −1
Original line number Diff line number Diff line
@@ -51,7 +51,11 @@
                   - \ref ARM_MATH_SIZE_MISMATCH : Matrix size check failed
 */

#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(ARM_DSP_BUILT_WITH_GCC)
#pragma GCC warning "Scalar version of arm_mat_cmplx_mult_f16 built. Helium version has build issues with gcc."
#endif 

#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) &&  !defined(ARM_DSP_BUILT_WITH_GCC)

#include "arm_helium_utils.h"

+6 −1
Original line number Diff line number Diff line
@@ -45,7 +45,12 @@
  @param[out]    pIndex     index of maximum value returned here
 */

#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(ARM_DSP_BUILT_WITH_GCC)
#pragma GCC warning "Scalar version of arm_absmax_q7 built. Helium version has build issues with gcc."
#endif 


#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) &&  !defined(ARM_DSP_BUILT_WITH_GCC)

#include <stdint.h>
#include "arm_helium_utils.h"
+10 −1
Original line number Diff line number Diff line
@@ -159,6 +159,11 @@ ARM_DSP_ATTRIBUTE void arm_rfft_q15(
#include "arm_helium_utils.h"
#include "arm_vec_fft.h"

#if defined(ARM_DSP_BUILT_WITH_GCC)
#define MVE_CMPLX_MULT_FX_AxB_S16(A,B)          vqdmladhxq_s16(vqdmlsdhq_s16((__typeof(A))vuninitializedq_s16(), A, B), A, B)
#define MVE_CMPLX_MULT_FX_AxConjB_S16(A,B)      vqdmladhq_s16(vqdmlsdhxq_s16((__typeof(A))vuninitializedq_s16(), A, B), A, B)

#endif 

ARM_DSP_ATTRIBUTE void arm_split_rfft_q15(
        q15_t * pSrc,
@@ -200,9 +205,13 @@ ARM_DSP_ATTRIBUTE void arm_split_rfft_q15(
        q15x8_t         coefA = vldrhq_gather_shifted_offset_s16(pCoefAb, offsetCoef);
        q15x8_t         coefB = vldrhq_gather_shifted_offset_s16(pCoefBb, offsetCoef);


#if defined(ARM_DSP_BUILT_WITH_GCC)
        q15x8_t         out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxB_S16(in1, coefA),
                                     MVE_CMPLX_MULT_FX_AxConjB_S16(coefB, in2));
#else
        q15x8_t         out = vhaddq_s16(MVE_CMPLX_MULT_FX_AxB(in1, coefA, q15x8_t),
                                         MVE_CMPLX_MULT_FX_AxConjB(coefB, in2, q15x8_t));
#endif
        vst1q_s16(pOut1, out);
        pOut1 += 8;

Loading