Commit 99e15d3d authored by Christophe Favergeon's avatar Christophe Favergeon
Browse files

Corrected issue #131

parent 03658076
Loading
Loading
Loading
Loading
+22 −22
Original line number Diff line number Diff line
@@ -213,7 +213,7 @@ extern const float32_t log_tab[4*8];
extern const float64_t log_tab_64[2*8];

#ifndef DOXYGEN_SKIP_THIS
inline float32x4_t vfloorq_f32(float32x4_t val)
static inline float32x4_t vfloorq_f32(float32x4_t val)
{
    static const float32_t CONST_1[4] = {1.f,1.f,1.f,1.f};

@@ -223,7 +223,7 @@ inline float32x4_t vfloorq_f32(float32x4_t val)
    return vbslq_f32(vcgtq_f32(r, val), vsubq_f32(r, vld1q_f32(CONST_1)), r);
}

inline float32x2_t vinvsqrt_f32(float32x2_t x)
static inline float32x2_t vinvsqrt_f32(float32x2_t x)
{
    float32x2_t sqrt_reciprocal = vrsqrte_f32(x);
    sqrt_reciprocal             = vmul_f32(vrsqrts_f32(vmul_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
@@ -232,7 +232,7 @@ inline float32x2_t vinvsqrt_f32(float32x2_t x)
    return sqrt_reciprocal;
}

inline float32x4_t vinvsqrtq_f32(float32x4_t x)
static inline float32x4_t vinvsqrtq_f32(float32x4_t x)
{
    float32x4_t sqrt_reciprocal = vrsqrteq_f32(x);
    sqrt_reciprocal             = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
@@ -241,7 +241,7 @@ inline float32x4_t vinvsqrtq_f32(float32x4_t x)
    return sqrt_reciprocal;
}

inline float32x2_t vinv_f32(float32x2_t x)
static inline float32x2_t vinv_f32(float32x2_t x)
{
    float32x2_t recip = vrecpe_f32(x);
    recip             = vmul_f32(vrecps_f32(x, recip), recip);
@@ -249,7 +249,7 @@ inline float32x2_t vinv_f32(float32x2_t x)
    return recip;
}

inline float32x4_t vinvq_f32(float32x4_t x)
static inline float32x4_t vinvq_f32(float32x4_t x)
{
    float32x4_t recip = vrecpeq_f32(x);
    recip             = vmulq_f32(vrecpsq_f32(x, recip), recip);
@@ -259,7 +259,7 @@ inline float32x4_t vinvq_f32(float32x4_t x)

#if defined(__aarch64__)

inline float64x2_t vinvq_f64(float64x2_t x)
static inline float64x2_t vinvq_f64(float64x2_t x)
{
    float64x2_t recip = vrecpeq_f64(x);
    recip             = vmulq_f64(vrecpsq_f64(x, recip), recip);
@@ -269,7 +269,7 @@ inline float64x2_t vinvq_f64(float64x2_t x)

#endif /* #if defined(__aarch64__) */

inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const float32_t *coeffs)
static inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const float32_t *coeffs)
{
    float32x4_t A   = vmlaq_f32(vld1q_f32(&coeffs[4*0]), vld1q_f32(&coeffs[4*4]), x);
    float32x4_t B   = vmlaq_f32(vld1q_f32(&coeffs[4*2]), vld1q_f32(&coeffs[4*6]), x);
@@ -283,7 +283,7 @@ inline float32x4_t vtaylor_polyq_f32(float32x4_t x, const float32_t *coeffs)

#if defined(__aarch64__)

inline float64x2_t vtaylor_polyq_f64(float64x2_t x, const float64_t *coeffs)
static inline float64x2_t vtaylor_polyq_f64(float64x2_t x, const float64_t *coeffs)
{
    float64x2_t A   = vmlaq_f64(vld1q_f64(&coeffs[2*0]), vld1q_f64(&coeffs[2*4]), x);
    float64x2_t B   = vmlaq_f64(vld1q_f64(&coeffs[2*2]), vld1q_f64(&coeffs[2*6]), x);
@@ -298,7 +298,7 @@ inline float64x2_t vtaylor_polyq_f64(float64x2_t x, const float64_t *coeffs)
#endif /* #if defined(__aarch64__) */


inline float32x4_t vexpq_f32(float32x4_t x)
static inline float32x4_t vexpq_f32(float32x4_t x)
{
    static const float32_t CONST_LN2[4]          = {0.6931471805f,0.6931471805f,0.6931471805f,0.6931471805f}; // ln(2)
    static const float32_t CONST_INV_LN2[4]      = {1.4426950408f,1.4426950408f,1.4426950408f,1.4426950408f}; // 1/ln(2)
@@ -320,7 +320,7 @@ inline float32x4_t vexpq_f32(float32x4_t x)
    return poly;
}

inline float32x4_t vlogq_f32(float32x4_t x)
static inline float32x4_t vlogq_f32(float32x4_t x)
{
    static const int32_t   CONST_127[4] = {127,127,127,127};           // 127
    static const float32_t CONST_LN2[4] = {0.6931471805f,0.6931471805f,0.6931471805f,0.6931471805f}; // ln(2)
@@ -339,7 +339,7 @@ inline float32x4_t vlogq_f32(float32x4_t x)
}


inline float32x4_t vtanhq_f32(float32x4_t val)
static inline float32x4_t vtanhq_f32(float32x4_t val)
{
    static const float32_t CONST_1[4]        = {1.f,1.f,1.f,1.f};
    static const float32_t CONST_2[4]        = {2.f,2.f,2.f,2.f};
@@ -354,7 +354,7 @@ inline float32x4_t vtanhq_f32(float32x4_t val)
    return tanh;
}

inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n)
static inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n)
{
    return vexpq_f32(vmulq_f32(n, vlogq_f32(val)));
}
@@ -364,7 +364,7 @@ inline float32x4_t vpowq_f32(float32x4_t val, float32x4_t n)
/** Exponent polynomial coefficients */
/** Logarithm polynomial coefficients */
#ifndef DOXYGEN_SKIP_THIS
inline float16x8_t vfloorq_f16(float16x8_t val)
static inline float16x8_t vfloorq_f16(float16x8_t val)
{
    static const float16_t CONST_1[8] = {1.f,1.f,1.f,1.f,1.f,1.f,1.f,1.f};

@@ -373,7 +373,7 @@ inline float16x8_t vfloorq_f16(float16x8_t val)

    return vbslq_f16(vcgtq_f16(r, val), vsubq_f16(r, vld1q_f16(CONST_1)), r);
}
inline float16x4_t vinvsqrt_f16(float16x4_t x)
static inline float16x4_t vinvsqrt_f16(float16x4_t x)
{
    float16x4_t sqrt_reciprocal = vrsqrte_f16(x);
    sqrt_reciprocal             = vmul_f16(vrsqrts_f16(vmul_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
@@ -381,7 +381,7 @@ inline float16x4_t vinvsqrt_f16(float16x4_t x)
    return sqrt_reciprocal;
}

inline float16x8_t vinvsqrtq_f16(float16x8_t x)
static inline float16x8_t vinvsqrtq_f16(float16x8_t x)
{
    float16x8_t sqrt_reciprocal = vrsqrteq_f16(x);
    sqrt_reciprocal             = vmulq_f16(vrsqrtsq_f16(vmulq_f16(x, sqrt_reciprocal), sqrt_reciprocal), sqrt_reciprocal);
@@ -389,7 +389,7 @@ inline float16x8_t vinvsqrtq_f16(float16x8_t x)
    return sqrt_reciprocal;
}

inline float16x4_t vinv_f16(float16x4_t x)
static inline float16x4_t vinv_f16(float16x4_t x)
{
    float16x4_t recip = vrecpe_f16(x);
    recip             = vmul_f16(vrecps_f16(x, recip), recip);
@@ -397,7 +397,7 @@ inline float16x4_t vinv_f16(float16x4_t x)
    return recip;
}

inline float16x8_t vinvq_f16(float16x8_t x)
static inline float16x8_t vinvq_f16(float16x8_t x)
{
    float16x8_t recip = vrecpeq_f16(x);
    recip             = vmulq_f16(vrecpsq_f16(x, recip), recip);
@@ -405,7 +405,7 @@ inline float16x8_t vinvq_f16(float16x8_t x)
    return recip;
}

inline float16x8_t vtanhq_f16(float16x8_t val)
static inline float16x8_t vtanhq_f16(float16x8_t val)
{
    const float16_t CONST_1[8]        = {1.f,1.f,1.f,1.f,1.f,1.f,1.f,1.f};
    const float16_t CONST_2[8]        = {2.f,2.f,2.f,2.f,2.f,2.f,2.f,2.f};
@@ -420,7 +420,7 @@ inline float16x8_t vtanhq_f16(float16x8_t val)
    return tanh;
}

inline float16x8_t vtaylor_polyq_f16(float16x8_t x, const float16_t *coeffs)
static inline float16x8_t vtaylor_polyq_f16(float16x8_t x, const float16_t *coeffs)
{
    const float16x8_t A   = vaddq_f16(vld1q_f16(&coeffs[8*0]), vmulq_f16(vld1q_f16(&coeffs[8*4]), x));
    const float16x8_t B   = vaddq_f16(vld1q_f16(&coeffs[8*2]), vmulq_f16(vld1q_f16(&coeffs[8*6]), x));
@@ -432,7 +432,7 @@ inline float16x8_t vtaylor_polyq_f16(float16x8_t x, const float16_t *coeffs)
    return res;
}

inline float16x8_t vexpq_f16(float16x8_t x)
static inline float16x8_t vexpq_f16(float16x8_t x)
{
    // TODO (COMPMID-1535) : Revisit FP16 approximations
    const float32x4_t x_high = vcvt_f32_f16(vget_high_f16(x));
@@ -442,7 +442,7 @@ inline float16x8_t vexpq_f16(float16x8_t x)
    return res;
}

inline float16x8_t vlogq_f16(float16x8_t x)
static inline float16x8_t vlogq_f16(float16x8_t x)
{
    // TODO (COMPMID-1535) : Revisit FP16 approximations
    const float32x4_t x_high = vcvt_f32_f16(vget_high_f16(x));
@@ -452,7 +452,7 @@ inline float16x8_t vlogq_f16(float16x8_t x)
    return res;
}

inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n)
static inline float16x8_t vpowq_f16(float16x8_t val, float16x8_t n)
{
    // TODO (giaiod01) - COMPMID-1535
    float32x4_t n0_f32   = vcvt_f32_f16(vget_low_f16(n));
+1 −0
Original line number Diff line number Diff line
@@ -53,6 +53,7 @@ For tests of the error value of the Levinson Durbin algorithm
          inpB, inputB.nbSamples(),
          outp);

        ASSERT_EMPTY_TAIL(output);
        ASSERT_SNR(ref,output,(float32_t)SNR_THRESHOLD);
        ASSERT_CLOSE_ERROR(ref,output,ABS_ERROR,REL_ERROR);

+1 −1
Original line number Diff line number Diff line
@@ -196,7 +196,7 @@ for t in tests:
#allSuites=[("StatsTestsQ7","../Output.pickle")]

allSuites=[
("UnaryTestsF32","../Output.pickle"),
("MISCF32","../Output.pickle"),
#("MISCQ15","../Output.pickle"),
#("MISCQ7","../Output.pickle"),
#("FIRF16","../Output_f16.pickle")
+1 −1
Original line number Diff line number Diff line
<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
<cprj schemaVersion="2.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="CPRJ.xsd">
  <created timestamp="2023-11-27T06:32:22" tool="csolution 2.0.0"/>
  <created timestamp="2023-11-27T06:42:48" tool="csolution 2.0.0"/>

  <info isLayer="false">
    <description>Automatically generated project</description>
+1 −1
Original line number Diff line number Diff line
<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
<cprj schemaVersion="2.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="CPRJ.xsd">
  <created timestamp="2023-11-27T06:32:22" tool="csolution 2.0.0"/>
  <created timestamp="2023-11-27T06:42:48" tool="csolution 2.0.0"/>

  <info isLayer="false">
    <description>Automatically generated project</description>
Loading