[PATCH] SLAB: use a multiply instead of a divide in obj_to_index() (6a2d7a95) · Commits · 戴 / test

include/linux/reciprocal_div.h

0 → 100644

+32 −0

Original line number	Diff line number	Diff line
		#ifndef _LINUX_RECIPROCAL_DIV_H
		#define _LINUX_RECIPROCAL_DIV_H

		#include <linux/types.h>

		/*
		* This file describes reciprocical division.
		*
		* This optimizes the (A/B) problem, when A and B are two u32
		* and B is a known value (but not known at compile time)
		*
		* The math principle used is :
		* Let RECIPROCAL_VALUE(B) be (((1LL << 32) + (B - 1))/ B)
		* Then A / B = (u32)(((u64)(A) * (R)) >> 32)
		*
		* This replaces a divide by a multiply (and a shift), and
		* is generally less expensive in CPU cycles.
		*/

		/*
		* Computes the reciprocal value (R) for the value B of the divisor.
		* Should not be called before each reciprocal_divide(),
		* or else the performance is slower than a normal divide.
		*/
		extern u32 reciprocal_value(u32 B);


		static inline u32 reciprocal_divide(u32 A, u32 R)
		{
		return (u32)(((u64)A * R) >> 32);
		}
		#endif

lib/Makefile

+1 −1

Original line number	Diff line number	Diff line
		@@ -5,7 +5,7 @@
		lib-y := ctype.o string.o vsprintf.o cmdline.o \
		bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
		idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \
		sha1.o irq_regs.o
		sha1.o irq_regs.o reciprocal_div.o

		lib-$(CONFIG_MMU) += ioremap.o
		lib-$(CONFIG_SMP) += cpumask.o

lib/reciprocal_div.c

0 → 100644

+9 −0

Original line number	Diff line number	Diff line
		#include <asm/div64.h>
		#include <linux/reciprocal_div.h>

		u32 reciprocal_value(u32 k)
		{
		u64 val = (1LL << 32) + (k - 1);
		do_div(val, k);
		return (u32)val;
		}

mm/slab.c

+15 −3

Original line number	Diff line number	Diff line
		@@ -109,6 +109,7 @@
		#include <linux/mutex.h>
		#include <linux/fault-inject.h>
		#include <linux/rtmutex.h>
		#include <linux/reciprocal_div.h>

		#include <asm/cacheflush.h>
		#include <asm/tlbflush.h>
		@@ -386,6 +387,7 @@ struct kmem_cache {
		unsigned int shared;

		unsigned int buffer_size;
		u32 reciprocal_buffer_size;
		/* 3) touched by every alloc & free from the backend */
		struct kmem_list3 *nodelists[MAX_NUMNODES];

		@@ -627,10 +629,17 @@ static inline void index_to_obj(struct kmem_cache cache, struct slab *slab,
		return slab->s_mem + cache->buffer_size * idx;
		}

		static inline unsigned int obj_to_index(struct kmem_cache *cache,
		struct slab slab, void obj)
		/*
		* We want to avoid an expensive divide : (offset / cache->buffer_size)
		* Using the fact that buffer_size is a constant for a particular cache,
		* we can replace (offset / cache->buffer_size) by
		* reciprocal_divide(offset, cache->reciprocal_buffer_size)
		*/
		static inline unsigned int obj_to_index(const struct kmem_cache *cache,
		const struct slab slab, void obj)
		{
		return (unsigned)(obj - slab->s_mem) / cache->buffer_size;
		u32 offset = (obj - slab->s_mem);
		return reciprocal_divide(offset, cache->reciprocal_buffer_size);
		}

		/*
		@@ -1427,6 +1436,8 @@ void __init kmem_cache_init(void)

		cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
		cache_line_size());
		cache_cache.reciprocal_buffer_size =
		reciprocal_value(cache_cache.buffer_size);

		for (order = 0; order < MAX_ORDER; order++) {
		cache_estimate(order, cache_cache.buffer_size,
		@@ -2313,6 +2324,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
		if (flags & SLAB_CACHE_DMA)
		cachep->gfpflags \|= GFP_DMA;
		cachep->buffer_size = size;
		cachep->reciprocal_buffer_size = reciprocal_value(size);

		if (flags & CFLGS_OFF_SLAB) {
		cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);

Admin message