Commit 602828c1 authored by Nicolas Pitre's avatar Nicolas Pitre Committed by Arnd Bergmann
Browse files

__div64_const32(): improve the generic C version



Let's rework that code to avoid large immediate values and convert some
64-bit variables to 32-bit ones when possible. This allows gcc to
produce smaller and better code. This even produces optimal code on
RISC-V.

Signed-off-by: default avatarNicolas Pitre <nico@fluxnic.net>
Signed-off-by: default avatarArnd Bergmann <arnd@arndb.de>
parent 3940ba8e
Loading
Loading
Loading
Loading
+10 −6
Original line number Diff line number Diff line
@@ -178,7 +178,8 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
	uint32_t m_hi = m >> 32;
	uint32_t n_lo = n;
	uint32_t n_hi = n >> 32;
	uint64_t res, tmp;
	uint64_t res;
	uint32_t res_lo, res_hi, tmp;

	if (!bias) {
		res = ((uint64_t)m_lo * n_lo) >> 32;
@@ -187,8 +188,9 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
		res = (m + (uint64_t)m_lo * n_lo) >> 32;
	} else {
		res = m + (uint64_t)m_lo * n_lo;
		tmp = (res < m) ? (1ULL << 32) : 0;
		res = (res >> 32) + tmp;
		res_lo = res >> 32;
		res_hi = (res_lo < m_hi);
		res = res_lo | ((uint64_t)res_hi << 32);
	}

	if (!(m & ((1ULL << 63) | (1ULL << 31)))) {
@@ -197,10 +199,12 @@ static inline uint64_t __arch_xprod_64(const uint64_t m, uint64_t n, bool bias)
		res += (uint64_t)m_hi * n_lo;
		res >>= 32;
	} else {
		tmp = res += (uint64_t)m_lo * n_hi;
		res += (uint64_t)m_lo * n_hi;
		tmp = res >> 32;
		res += (uint64_t)m_hi * n_lo;
		tmp = (res < tmp) ? (1ULL << 32) : 0;
		res = (res >> 32) + tmp;
		res_lo = res >> 32;
		res_hi = (res_lo < tmp);
		res = res_lo | ((uint64_t)res_hi << 32);
	}

	res += (uint64_t)m_hi * n_hi;