Commit e8e00f5a authored by Ard Biesheuvel's avatar Ard Biesheuvel
Browse files

ARM: p2v: switch to MOVW for Thumb2 and ARM/LPAE



In preparation for reducing the phys-to-virt minimum relative alignment
from 16 MiB to 2 MiB, switch to patchable sequences involving MOVW
instructions that can more easily be manipulated to carry a 12-bit
immediate. Note that the non-LPAE ARM sequence is not updated: MOVW
may not be supported on non-LPAE platforms, and the sequence itself
can be updated more easily to apply the 12 bits of displacement.

For Thumb2, which has many more versions of opcodes, switch to a sequence
that can be patched by the same patching code for both versions. Note
that the Thumb2 opcodes for MOVW and MVN are unambiguous, and have no
rotation bits in their immediate fields, so there is no need to use
placeholder constants in the asm blocks.

While at it, drop the 'volatile' qualifiers from the asm blocks: the
code does not have any side effects that are invisible to the compiler,
so it is free to omit these sequences if the outputs are not used.

Suggested-by: default avatarRussell King <linux@armlinux.org.uk>
Acked-by: default avatarNicolas Pitre <nico@fluxnic.net>
Reviewed-by: default avatarLinus Walleij <linus.walleij@linaro.org>
Signed-off-by: default avatarArd Biesheuvel <ardb@kernel.org>
parent 0e3db6c9
Loading
Loading
Loading
Loading
+32 −12
Original line number Diff line number Diff line
@@ -183,6 +183,7 @@ extern const void *__pv_table_begin, *__pv_table_end;
#define PHYS_OFFSET	((phys_addr_t)__pv_phys_pfn_offset << PAGE_SHIFT)
#define PHYS_PFN_OFFSET	(__pv_phys_pfn_offset)

#ifndef CONFIG_THUMB2_KERNEL
#define __pv_stub(from,to,instr)			\
	__asm__("@ __pv_stub\n"				\
	"1:	" instr "	%0, %1, %2\n"		\
@@ -192,25 +193,45 @@ extern const void *__pv_table_begin, *__pv_table_end;
	: "=r" (to)					\
	: "r" (from), "I" (__PV_BITS_31_24))

#define __pv_stub_mov_hi(t)				\
	__asm__ volatile("@ __pv_stub_mov\n"		\
	"1:	mov	%R0, %1\n"			\
#define __pv_add_carry_stub(x, y)			\
	__asm__("@ __pv_add_carry_stub\n"		\
	"0:	movw	%R0, #0\n"			\
	"	adds	%Q0, %1, %R0, lsl #24\n"	\
	"1:	mov	%R0, %2\n"			\
	"	adc	%R0, %R0, #0\n"			\
	"	.pushsection .pv_table,\"a\"\n"		\
	"	.long	1b - .\n"			\
	"	.long	0b - ., 1b - .\n"		\
	"	.popsection\n"				\
	: "=r" (t)					\
	: "I" (__PV_BITS_7_0))
	: "=&r" (y)					\
	: "r" (x), "I" (__PV_BITS_7_0)			\
	: "cc")

#else
#define __pv_stub(from,to,instr)			\
	__asm__("@ __pv_stub\n"				\
	"0:	movw	%0, #0\n"			\
	"	lsl	%0, #24\n"			\
	"	" instr " %0, %1, %0\n"			\
	"	.pushsection .pv_table,\"a\"\n"		\
	"	.long	0b - .\n"			\
	"	.popsection\n"				\
	: "=&r" (to)					\
	: "r" (from))

#define __pv_add_carry_stub(x, y)			\
	__asm__ volatile("@ __pv_add_carry_stub\n"	\
	"1:	adds	%Q0, %1, %2\n"			\
	__asm__("@ __pv_add_carry_stub\n"		\
	"0:	movw	%R0, #0\n"			\
	"	lsls	%R0, #24\n"			\
	"	adds	%Q0, %1, %R0\n"			\
	"1:	mvn	%R0, #0\n"			\
	"	adc	%R0, %R0, #0\n"			\
	"	.pushsection .pv_table,\"a\"\n"		\
	"	.long	1b - .\n"			\
	"	.long	0b - ., 1b - .\n"		\
	"	.popsection\n"				\
	: "+r" (y)					\
	: "r" (x), "I" (__PV_BITS_31_24)		\
	: "=&r" (y)					\
	: "r" (x)					\
	: "cc")
#endif

static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
{
@@ -219,7 +240,6 @@ static inline phys_addr_t __virt_to_phys_nodebug(unsigned long x)
	if (sizeof(phys_addr_t) == 4) {
		__pv_stub(x, t, "add");
	} else {
		__pv_stub_mov_hi(t);
		__pv_add_carry_stub(x, t);
	}
	return t;
+116 −31
Original line number Diff line number Diff line
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 *  Copyright (C) 1994-2002 Russell King
 *  Copyright (c) 2003 ARM Limited
 *  Copyright (c) 2003, 2020 ARM Limited
 *  All Rights Reserved
 */

@@ -58,55 +58,140 @@ __fixup_a_pv_table:
	mov	r6, r6, lsr #24
	cmn	r0, #1
#ifdef CONFIG_THUMB2_KERNEL
	@
	@ The Thumb-2 versions of the patchable sequences are
	@
	@ phys-to-virt:			movw	<reg>, #offset<31:24>
	@				lsl	<reg>, #24
	@				sub	<VA>, <PA>, <reg>
	@
	@ virt-to-phys (non-LPAE):	movw	<reg>, #offset<31:24>
	@				lsl	<reg>, #24
	@				add	<PA>, <VA>, <reg>
	@
	@ virt-to-phys (LPAE):		movw	<reg>, #offset<31:24>
	@				lsl	<reg>, #24
	@				adds	<PAlo>, <VA>, <reg>
	@				mov	<PAhi>, #offset<39:32>
	@				adc	<PAhi>, <PAhi>, #0
	@
	@ In the non-LPAE case, all patchable instructions are MOVW
	@ instructions, where we need to patch in the offset into the
	@ second halfword of the opcode (the 16-bit immediate is encoded
	@ as imm4:i:imm3:imm8)
	@
	@       15       11 10  9           4 3    0  15  14  12 11 8 7    0
	@      +-----------+---+-------------+------++---+------+----+------+
	@ MOVW | 1 1 1 1 0 | i | 1 0 0 1 0 0 | imm4 || 0 | imm3 | Rd | imm8 |
	@      +-----------+---+-------------+------++---+------+----+------+
	@
	@ In the LPAE case, we also need to patch in the high word of the
	@ offset into the immediate field of the MOV instruction, or patch it
	@ to a MVN instruction if the offset is negative. In this case, we
	@ need to inspect the first halfword of the opcode, to check whether
	@ it is MOVW or MOV/MVN, and to perform the MOV to MVN patching if
	@ needed. The encoding of the immediate is rather complex for values
	@ of i:imm3 != 0b0000, but fortunately, we never need more than 8 lower
	@ order bits, which can be patched into imm8 directly (and i:imm3
	@ cleared)
	@
	@      15       11 10  9        5         0  15  14  12 11 8 7    0
	@     +-----------+---+---------------------++---+------+----+------+
	@ MOV | 1 1 1 1 0 | i | 0 0 0 1 0 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
	@ MVN | 1 1 1 1 0 | i | 0 0 0 1 1 0 1 1 1 1 || 0 | imm3 | Rd | imm8 |
	@     +-----------+---+---------------------++---+------+----+------+
	@
	moveq	r0, #0x200000		@ set bit 21, mov to mvn instruction
	lsls	r6, #24
	beq	.Lnext
	clz	r7, r6
	lsr	r6, #24
	lsl	r6, r7
	bic	r6, #0x0080
	lsrs	r7, #1
	orrcs	r6, #0x0080
	orr	r6, r6, r7, lsl #12
	orr	r6, #0x4000
	b	.Lnext
.Lloop:	add	r7, r4
	adds	r4, #4
	ldrh	ip, [r7, #2]
ARM_BE8(rev16	ip, ip)
	tst	ip, #0x4000
	and	ip, #0x8f00
	orrne	ip, r6			@ mask in offset bits 31-24
	orreq	ip, r0			@ mask in offset bits 7-0
ARM_BE8(rev16	ip, ip)
	strh	ip, [r7, #2]
	bne	.Lnext
	adds	r4, #4			@ clears Z flag
#ifdef CONFIG_ARM_LPAE
	ldrh	ip, [r7]
ARM_BE8(rev16	ip, ip)
	bic	ip, #0x20
	orr	ip, ip, r0, lsr #16
	tst	ip, #0x200		@ MOVW has bit 9 set, MVN has it clear
	bne	0f			@ skip to MOVW handling (Z flag is clear)
	bic	ip, #0x20		@ clear bit 5 (MVN -> MOV)
	orr	ip, ip, r0, lsr #16	@ MOV -> MVN if offset < 0
ARM_BE8(rev16	ip, ip)
	strh	ip, [r7]
	@ Z flag is set
0:
#endif
	ldrh	ip, [r7, #2]
ARM_BE8(rev16	ip, ip)
	and	ip, #0xf00		@ clear everything except Rd field
	orreq	ip, r0			@ Z flag set -> MOV/MVN -> patch in high bits
	orrne	ip, r6			@ Z flag clear -> MOVW -> patch in low bits
ARM_BE8(rev16	ip, ip)
	strh	ip, [r7, #2]
#else
#ifdef CONFIG_CPU_ENDIAN_BE8
@ in BE8, we load data in BE, but instructions still in LE
#define PV_BIT22	0x00004000
#define PV_BIT24	0x00000001
#define PV_IMM8_MASK	0xff000000
#define PV_ROT_MASK	0x000f0000
#else
#define PV_BIT22	0x00400000
#define PV_BIT24	0x01000000
#define PV_IMM8_MASK	0x000000ff
#define PV_ROT_MASK	0xf00
#endif

	@
	@ The ARM versions of the patchable sequences are
	@
	@ phys-to-virt:			sub	<VA>, <PA>, #offset<31:24>, lsl #24
	@
	@ virt-to-phys (non-LPAE):	add	<PA>, <VA>, #offset<31:24>, lsl #24
	@
	@ virt-to-phys (LPAE):		movw	<reg>, #offset<31:24>
	@				adds	<PAlo>, <VA>, <reg>, lsl #24
	@				mov	<PAhi>, #offset<39:32>
	@				adc	<PAhi>, <PAhi>, #0
	@
	@ In the non-LPAE case, all patchable instructions are ADD or SUB
	@ instructions, where we need to patch in the offset into the
	@ immediate field of the opcode, which is emitted with the correct
	@ rotation value. (The effective value of the immediate is imm12<7:0>
	@ rotated right by [2 * imm12<11:8>] bits)
	@
	@      31   28 27      23 22  20 19  16 15  12 11    0
	@      +------+-----------------+------+------+-------+
	@  ADD | cond | 0 0 1 0 1 0 0 0 |  Rn  |  Rd  | imm12 |
	@  SUB | cond | 0 0 1 0 0 1 0 0 |  Rn  |  Rd  | imm12 |
	@  MOV | cond | 0 0 1 1 1 0 1 0 |  Rn  |  Rd  | imm12 |
	@  MVN | cond | 0 0 1 1 1 1 1 0 |  Rn  |  Rd  | imm12 |
	@      +------+-----------------+------+------+-------+
	@
	@ In the LPAE case, we use a MOVW instruction to carry the low offset
	@ word, and patch in the high word of the offset into the immediate
	@ field of the subsequent MOV instruction, or patch it to a MVN
	@ instruction if the offset is negative. We can distinguish MOVW
	@ instructions based on bits 23:22 of the opcode, and ADD/SUB can be
	@ distinguished from MOV/MVN (all using the encodings above) using
	@ bit 24.
	@
	@      31   28 27      23 22  20 19  16 15  12 11    0
	@      +------+-----------------+------+------+-------+
	@ MOVW | cond | 0 0 1 1 0 0 0 0 | imm4 |  Rd  | imm12 |
	@      +------+-----------------+------+------+-------+
	@
	moveq	r0, #0x400000		@ set bit 22, mov to mvn instruction
	b	.Lnext
.Lloop:	ldr	ip, [r7, r4]
#ifdef CONFIG_ARM_LPAE
	tst	ip, #PV_BIT24		@ ADD/SUB have bit 24 clear
	beq	1f
ARM_BE8(rev	ip, ip)
	tst	ip, #0xc00000		@ MOVW has bits 23:22 clear
	bic	ip, ip, #0x400000	@ clear bit 22
	bfc	ip, #0, #12		@ clear imm12 field of MOV[W] instruction
	orreq	ip, ip, r6		@ MOVW -> mask in offset bits 31-24
	orrne	ip, ip, r0		@ MOV  -> mask in offset bits 7-0 (or bit 22)
ARM_BE8(rev	ip, ip)
	b	2f
1:
#endif
	bic	ip, ip, #PV_IMM8_MASK
	tst	ip, #PV_ROT_MASK		@ check the rotation field
	orrne	ip, ip, r6 ARM_BE8(, lsl #24)	@ mask in offset bits 31-24
	biceq	ip, ip, #PV_BIT22		@ clear bit 22
	orreq	ip, ip, r0 ARM_BE8(, ror #8)	@ mask in offset bits 7-0 (or bit 22)
	orr	ip, ip, r6 ARM_BE8(, lsl #24)	@ mask in offset bits 31-24
2:
	str	ip, [r7, r4]
	add	r4, r4, #4
#endif