Commit 7e0fb73c authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'hash' of git://ftp.sciencehorizons.net/linux

Pull string hash improvements from George Spelvin:
 "This series does several related things:

   - Makes the dcache hash (fs/namei.c) useful for general kernel use.

     (Thanks to Bruce for noticing the zero-length corner case)

   - Converts the string hashes in <linux/sunrpc/svcauth.h> to use the
     above.

   - Avoids 64-bit multiplies in hash_64() on 32-bit platforms.  Two
     32-bit multiplies will do well enough.

   - Rids the world of the bad hash multipliers in hash_32.

     This finishes the job started in commit 689de1d6 ("Minimal
     fix-up of bad hashing behavior of hash_64()")

     The vast majority of Linux architectures have hardware support for
     32x32-bit multiply and so derive no benefit from "simplified"
     multipliers.

     The few processors that do not (68000, h8/300 and some models of
     Microblaze) have arch-specific implementations added.  Those
     patches are last in the series.

   - Overhauls the dcache hash mixing.

     The patch in commit 0fed3ac8 ("namei: Improve hash mixing if
     CONFIG_DCACHE_WORD_ACCESS") was an off-the-cuff suggestion.
     Replaced with a much more careful design that's simultaneously
     faster and better.  (My own invention, as there was noting suitable
     in the literature I could find.  Comments welcome!)

   - Modify the hash_name() loop to skip the initial HASH_MIX().  This
     would let us salt the hash if we ever wanted to.

   - Sort out partial_name_hash().

     The hash function is declared as using a long state, even though
     it's truncated to 32 bits at the end and the extra internal state
     contributes nothing to the result.  And some callers do odd things:

      - fs/hfs/string.c only allocates 32 bits of state
      - fs/hfsplus/unicode.c uses it to hash 16-bit unicode symbols not bytes

   - Modify bytemask_from_count to handle inputs of 1..sizeof(long)
     rather than 0..sizeof(long)-1.  This would simplify users other
     than full_name_hash"

  Special thanks to Bruce Fields for testing and finding bugs in v1.  (I
  learned some humbling lessons about "obviously correct" code.)

  On the arch-specific front, the m68k assembly has been tested in a
  standalone test harness, I've been in contact with the Microblaze
  maintainers who mostly don't care, as the hardware multiplier is never
  omitted in real-world applications, and I haven't heard anything from
  the H8/300 world"

* 'hash' of git://ftp.sciencehorizons.net/linux:
  h8300: Add <asm/hash.h>
  microblaze: Add <asm/hash.h>
  m68k: Add <asm/hash.h>
  <linux/hash.h>: Add support for architecture-specific functions
  fs/namei.c: Improve dcache hash function
  Eliminate bad hash multipliers from hash_32() and  hash_64()
  Change hash_64() return value to 32 bits
  <linux/sunrpc/svcauth.h>: Define hash_str() in terms of hashlen_string()
  fs/namei.c: Add hashlen_string() function
  Pull out string hash to <linux/stringhash.h>
parents 4e8440b3 4684fe95
Loading
Loading
Loading
Loading
+8 −0
Original line number Diff line number Diff line
@@ -598,6 +598,14 @@ config HAVE_STACK_VALIDATION
	  Architecture supports the 'objtool check' host tool command, which
	  performs compile-time stack metadata validation.

config HAVE_ARCH_HASH
	bool
	default n
	help
	  If this is set, the architecture provides an <asm/hash.h>
	  file which provides platform-specific implementations of some
	  functions in <linux/hash.h> or fs/namei.c.

#
# ABI hall of shame
#
+1 −0
Original line number Diff line number Diff line
@@ -20,6 +20,7 @@ config H8300
	select HAVE_KERNEL_GZIP
	select HAVE_KERNEL_LZO
	select HAVE_ARCH_KGDB
	select HAVE_ARCH_HASH
	select CPU_NO_EFFICIENT_FFS

config RWSEM_GENERIC_SPINLOCK
+53 −0
Original line number Diff line number Diff line
#ifndef _ASM_HASH_H
#define _ASM_HASH_H

/*
 * The later H8SX models have a 32x32-bit multiply, but the H8/300H
 * and H8S have only 16x16->32.  Since it's tolerably compact, this is
 * basically an inlined version of the __mulsi3 code.  Since the inputs
 * are not expected to be small, it's also simplfied by skipping the
 * early-out checks.
 *
 * (Since neither CPU has any multi-bit shift instructions, a
 * shift-and-add version is a non-starter.)
 *
 * TODO: come up with an arch-specific version of the hashing in fs/namei.c,
 * since that is heavily dependent on rotates.  Which, as mentioned, suck
 * horribly on H8.
 */

#if defined(CONFIG_CPU_H300H) || defined(CONFIG_CPU_H8S)

#define HAVE_ARCH__HASH_32 1

/*
 * Multiply by k = 0x61C88647.  Fitting this into three registers requires
 * one extra instruction, but reducing register pressure will probably
 * make that back and then some.
 *
 * GCC asm note: %e1 is the high half of operand %1, while %f1 is the
 * low half.  So if %1 is er4, then %e1 is e4 and %f1 is r4.
 *
 * This has been designed to modify x in place, since that's the most
 * common usage, but preserve k, since hash_64() makes two calls in
 * quick succession.
 */
static inline u32 __attribute_const__ __hash_32(u32 x)
{
	u32 temp;

	asm(   "mov.w	%e1,%f0"
	"\n	mulxu.w	%f2,%0"		/* klow * xhigh */
	"\n	mov.w	%f0,%e1"	/* The extra instruction */
	"\n	mov.w	%f1,%f0"
	"\n	mulxu.w	%e2,%0"		/* khigh * xlow */
	"\n	add.w	%e1,%f0"
	"\n	mulxu.w	%f2,%1"		/* klow * xlow */
	"\n	add.w	%f0,%e1"
	: "=&r" (temp), "=r" (x)
	: "%r" (GOLDEN_RATIO_32), "1" (x));
	return x;
}

#endif
#endif /* _ASM_HASH_H */
+1 −0
Original line number Diff line number Diff line
@@ -41,6 +41,7 @@ config M68000
	select CPU_HAS_NO_UNALIGNED
	select GENERIC_CSUM
	select CPU_NO_EFFICIENT_FFS
	select HAVE_ARCH_HASH
	help
	  The Freescale (was Motorola) 68000 CPU is the first generation of
	  the well known M68K family of processors. The CPU core as well as
+59 −0
Original line number Diff line number Diff line
#ifndef _ASM_HASH_H
#define _ASM_HASH_H

/*
 * If CONFIG_M68000=y (original mc68000/010), this file is #included
 * to work around the lack of a MULU.L instruction.
 */

#define HAVE_ARCH__HASH_32 1
/*
 * While it would be legal to substitute a different hash operation
 * entirely, let's keep it simple and just use an optimized multiply
 * by GOLDEN_RATIO_32 = 0x61C88647.
 *
 * The best way to do that appears to be to multiply by 0x8647 with
 * shifts and adds, and use mulu.w to multiply the high half by 0x61C8.
 *
 * Because the 68000 has multi-cycle shifts, this addition chain is
 * chosen to minimise the shift distances.
 *
 * Despite every attempt to spoon-feed it simple operations, GCC
 * 6.1.1 doggedly insists on doing annoying things like converting
 * "lsl.l #2,<reg>" (12 cycles) to two adds (8+8 cycles).
 *
 * It also likes to notice two shifts in a row, like "a = x << 2" and
 * "a <<= 7", and convert that to "a = x << 9".  But shifts longer
 * than 8 bits are extra-slow on m68k, so that's a lose.
 *
 * Since the 68000 is a very simple in-order processor with no
 * instruction scheduling effects on execution time, we can safely
 * take it out of GCC's hands and write one big asm() block.
 *
 * Without calling overhead, this operation is 30 bytes (14 instructions
 * plus one immediate constant) and 166 cycles.
 *
 * (Because %2 is fetched twice, it can't be postincrement, and thus it
 * can't be a fully general "g" or "m".  Register is preferred, but
 * offsettable memory or immediate will work.)
 */
static inline u32 __attribute_const__ __hash_32(u32 x)
{
	u32 a, b;

	asm(   "move.l %2,%0"	/* a = x * 0x0001 */
	"\n	lsl.l #2,%0"	/* a = x * 0x0004 */
	"\n	move.l %0,%1"
	"\n	lsl.l #7,%0"	/* a = x * 0x0200 */
	"\n	add.l %2,%0"	/* a = x * 0x0201 */
	"\n	add.l %0,%1"	/* b = x * 0x0205 */
	"\n	add.l %0,%0"	/* a = x * 0x0402 */
	"\n	add.l %0,%1"	/* b = x * 0x0607 */
	"\n	lsl.l #5,%0"	/* a = x * 0x8040 */
	: "=&d,d" (a), "=&r,r" (b)
	: "r,roi?" (x));	/* a+b = x*0x8647 */

	return ((u16)(x*0x61c8) << 16) + a + b;
}

#endif	/* _ASM_HASH_H */
Loading