Commit 3e5d8f97 authored by Tejun Heo's avatar Tejun Heo Committed by Ingo Molnar
Browse files

x86: make percpu symbols zerobased on SMP



[ Based on original patch from Christoph Lameter and Mike Travis. ]

This patch makes percpu symbols zerobased on x86_64 SMP by adding
PERCPU_VADDR() to vmlinux.lds.h which helps setting explicit vaddr on
the percpu output section and using it in vmlinux_64.lds.S.  A new
PHDR is added as existing ones cannot contain sections near address
zero.  PERCPU_VADDR() also adds a new symbol __per_cpu_load which
always points to the vaddr of the loaded percpu data.init region.

The following adjustments have been made to accomodate the address
change.

* code to locate percpu gdt_page in head_64.S is updated to add the
  load address to the gdt_page offset.

* __per_cpu_load is used in places where access to the init data area
  is necessary.

* pda->data_offset is initialized soon after C code is entered as zero
  value doesn't work anymore.

This patch is mostly taken from Mike Travis' "x86_64: Base percpu
variables at zero" patch.

Signed-off-by: default avatarTejun Heo <tj@kernel.org>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent a698c823
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -44,6 +44,8 @@ void __init x86_64_init_pda(void)
{
	_cpu_pda = __cpu_pda;
	cpu_pda(0) = &_boot_cpu_pda;
	cpu_pda(0)->data_offset =
		(unsigned long)(__per_cpu_load - __per_cpu_start);
	pda_init(0);
}

+23 −1
Original line number Diff line number Diff line
@@ -204,6 +204,23 @@ ENTRY(secondary_startup_64)
	pushq $0
	popfq

#ifdef CONFIG_SMP
	/*
	 * early_gdt_base should point to the gdt_page in static percpu init
	 * data area.  Computing this requires two symbols - __per_cpu_load
	 * and per_cpu__gdt_page.  As linker can't do no such relocation, do
	 * it by hand.  As early_gdt_descr is manipulated by C code for
	 * secondary CPUs, this should be done only once for the boot CPU
	 * when early_gdt_descr_base contains zero.
	 */
	movq	early_gdt_descr_base(%rip), %rax
	testq	%rax, %rax
	jnz	1f
	movq	$__per_cpu_load, %rax
	addq	$per_cpu__gdt_page, %rax
	movq	%rax, early_gdt_descr_base(%rip)
1:
#endif
	/*
	 * We must switch to a new descriptor in kernel space for the GDT
	 * because soon the kernel won't have access anymore to the userspace
@@ -401,7 +418,12 @@ NEXT_PAGE(level2_spare_pgt)
	.globl early_gdt_descr
early_gdt_descr:
	.word	GDT_ENTRIES*8-1
#ifdef CONFIG_SMP
early_gdt_descr_base:
	.quad   0x0000000000000000
#else
	.quad	per_cpu__gdt_page
#endif

ENTRY(phys_base)
	/* This must match the first entry in level2_kernel_pgt */
+1 −1
Original line number Diff line number Diff line
@@ -213,7 +213,7 @@ void __init setup_per_cpu_areas(void)
		}
#endif
		per_cpu_offset(cpu) = ptr - __per_cpu_start;
		memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start);
		memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);

		DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
	}
+16 −1
Original line number Diff line number Diff line
@@ -19,6 +19,9 @@ PHDRS {
	data PT_LOAD FLAGS(7);	/* RWE */
	user PT_LOAD FLAGS(7);	/* RWE */
	data.init PT_LOAD FLAGS(7);	/* RWE */
#ifdef CONFIG_SMP
	percpu PT_LOAD FLAGS(7);	/* RWE */
#endif
	note PT_NOTE FLAGS(0);	/* ___ */
}
SECTIONS
@@ -208,14 +211,26 @@ SECTIONS
  __initramfs_end = .;
#endif

#ifdef CONFIG_SMP
  /*
   * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the
   * output PHDR, so the next output section - __data_nosave - should
   * switch it back to data.init.
   */
  . = ALIGN(PAGE_SIZE);
  PERCPU_VADDR(0, :percpu)
#else
  PERCPU(PAGE_SIZE)
#endif

  . = ALIGN(PAGE_SIZE);
  __init_end = .;

  . = ALIGN(PAGE_SIZE);
  __nosave_begin = .;
  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) }
  .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
      *(.data.nosave)
  } :data.init	/* switch back to data.init, see PERCPU_VADDR() above */
  . = ALIGN(PAGE_SIZE);
  __nosave_end = .;

+1 −1
Original line number Diff line number Diff line
@@ -9,7 +9,7 @@ extern char __bss_start[], __bss_stop[];
extern char __init_begin[], __init_end[];
extern char _sinittext[], _einittext[];
extern char _end[];
extern char __per_cpu_start[], __per_cpu_end[];
extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
extern char __kprobes_text_start[], __kprobes_text_end[];
extern char __initdata_begin[], __initdata_end[];
extern char __start_rodata[], __end_rodata[];
Loading