drm/i915: Use SSE4.1 movntdqa to accelerate reads from WC memory (0b1de5d5) · Commits · 戴 / test

drivers/gpu/drm/i915/Makefile

+3 −0

Original line number	Diff line number	Diff line
		@@ -3,12 +3,15 @@
		# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.

		subdir-ccflags-$(CONFIG_DRM_I915_WERROR) := -Werror
		subdir-ccflags-y += \
		$(call as-instr,movntdqa (%eax)$(comma)%xmm0,-DCONFIG_AS_MOVNTDQA)

		# Please keep these build lists sorted!

		# core driver code
		i915-y := i915_drv.o \
		i915_irq.o \
		i915_memcpy.o \
		i915_params.o \
		i915_pci.o \
		i915_suspend.o \

drivers/gpu/drm/i915/i915_drv.c

+2 −0

Original line number	Diff line number	Diff line
		@@ -827,6 +827,8 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
		mutex_init(&dev_priv->wm.wm_mutex);
		mutex_init(&dev_priv->pps_mutex);

		i915_memcpy_init_early(dev_priv);

		ret = i915_workqueues_init(dev_priv);
		if (ret < 0)
		return ret;

drivers/gpu/drm/i915/i915_drv.h

+3 −0

Original line number	Diff line number	Diff line
		@@ -3907,6 +3907,9 @@ static inline bool __i915_request_irq_complete(struct drm_i915_gem_request *req)
		return false;
		}

		void i915_memcpy_init_early(struct drm_i915_private *dev_priv);
		bool i915_memcpy_from_wc(void dst, const void src, unsigned long len);

		#define ptr_unpack_bits(ptr, bits) ({ \
		unsigned long __v = (unsigned long)(ptr); \
		(bits) = __v & ~PAGE_MASK; \

drivers/gpu/drm/i915/i915_memcpy.c

0 → 100644

+101 −0

Original line number	Diff line number	Diff line
		/*
		* Copyright © 2016 Intel Corporation
		*
		* Permission is hereby granted, free of charge, to any person obtaining a
		* copy of this software and associated documentation files (the "Software"),
		* to deal in the Software without restriction, including without limitation
		* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		* and/or sell copies of the Software, and to permit persons to whom the
		* Software is furnished to do so, subject to the following conditions:
		*
		* The above copyright notice and this permission notice (including the next
		* paragraph) shall be included in all copies or substantial portions of the
		* Software.
		*
		* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
		* IN THE SOFTWARE.
		*
		*/

		#include <linux/kernel.h>
		#include <asm/fpu/api.h>

		#include "i915_drv.h"

		DEFINE_STATIC_KEY_FALSE(has_movntdqa);

		#ifdef CONFIG_AS_MOVNTDQA
		static void __memcpy_ntdqa(void dst, const void src, unsigned long len)
		{
		kernel_fpu_begin();

		len >>= 4;
		while (len >= 4) {
		asm("movntdqa (%0), %%xmm0\n"
		"movntdqa 16(%0), %%xmm1\n"
		"movntdqa 32(%0), %%xmm2\n"
		"movntdqa 48(%0), %%xmm3\n"
		"movaps %%xmm0, (%1)\n"
		"movaps %%xmm1, 16(%1)\n"
		"movaps %%xmm2, 32(%1)\n"
		"movaps %%xmm3, 48(%1)\n"
		:: "r" (src), "r" (dst) : "memory");
		src += 64;
		dst += 64;
		len -= 4;
		}
		while (len--) {
		asm("movntdqa (%0), %%xmm0\n"
		"movaps %%xmm0, (%1)\n"
		:: "r" (src), "r" (dst) : "memory");
		src += 16;
		dst += 16;
		}

		kernel_fpu_end();
		}
		#endif

		/**
		* i915_memcpy_from_wc: perform an accelerated aligned read from WC
		* @dst: destination pointer
		* @src: source pointer
		* @len: how many bytes to copy
		*
		* i915_memcpy_from_wc copies @len bytes from @src to @dst using
		* non-temporal instructions where available. Note that all arguments
		* (@src, @dst) must be aligned to 16 bytes and @len must be a multiple
		* of 16.
		*
		* To test whether accelerated reads from WC are supported, use
		* i915_memcpy_from_wc(NULL, NULL, 0);
		*
		* Returns true if the copy was successful, false if the preconditions
		* are not met.
		*/
		bool i915_memcpy_from_wc(void dst, const void src, unsigned long len)
		{
		if (unlikely(((unsigned long)dst \| (unsigned long)src \| len) & 15))
		return false;

		#ifdef CONFIG_AS_MOVNTDQA
		if (static_branch_likely(&has_movntdqa)) {
		if (likely(len))
		__memcpy_ntdqa(dst, src, len);
		return true;
		}
		#endif

		return false;
		}

		void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
		{
		if (static_cpu_has(X86_FEATURE_XMM4_1))
		static_branch_enable(&has_movntdqa);
		}

Admin message