Commit a7956113 authored by Zou Nan hai's avatar Zou Nan hai Committed by Tony Luck
Browse files

[IA64] IA64 Kexec/kdump



Changes and updates.

1. Remove fake rendz path and related code according to discuss with Khalid Aziz.
2. fc.i offset fix in relocate_kernel.S.
3. iospic shutdown code eoi and mask race fix from Fujitsu.
4. Warm boot hook in machine_kexec to SN SAL code from Jack Steiner.
5. Send slave to SAL slave loop patch from Jay Lan.
6. Kdump on non-recoverable MCA event patch from Jay Lan
7. Use CTL_UNNUMBERED in kdump_on_init sysctl.

Signed-off-by: default avatarZou Nan hai <nanhai.zou@intel.com>
Signed-off-by: default avatarTony Luck <tony.luck@intel.com>
parent 620034c8
Loading
Loading
Loading
Loading
+23 −0
Original line number Diff line number Diff line
@@ -434,6 +434,29 @@ config IA64_ESI

source "drivers/sn/Kconfig"

config KEXEC
	bool "kexec system call (EXPERIMENTAL)"
	depends on EXPERIMENTAL && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
	help
	  kexec is a system call that implements the ability to shutdown your
	  current kernel, and to start another kernel.  It is like a reboot
	  but it is indepedent of the system firmware.   And like a reboot
	  you can start any kernel with it, not just Linux.

	  The name comes from the similiarity to the exec system call.

	  It is an ongoing process to be certain the hardware in a machine
	  is properly shutdown, so do not be surprised if this code does not
	  initially work for you.  It may help to enable device hotplugging
	  support.  As of this writing the exact hardware interface is
	  strongly in flux, so no good recommendation can be made.

config CRASH_DUMP
	  bool "kernel crash dumps (EXPERIMENTAL)"
	  depends on EXPERIMENTAL && IA64_MCA_RECOVERY && !IA64_HP_SIM && (!SMP || HOTPLUG_CPU)
	  help
	    Generate crash dump after being started by kexec.

source "drivers/firmware/Kconfig"

source "fs/Kconfig.binfmt"
+1 −0
Original line number Diff line number Diff line
@@ -28,6 +28,7 @@ obj-$(CONFIG_IA64_CYCLONE) += cyclone.o
obj-$(CONFIG_CPU_FREQ)		+= cpufreq/
obj-$(CONFIG_IA64_MCA_RECOVERY)	+= mca_recovery.o
obj-$(CONFIG_KPROBES)		+= kprobes.o jprobes.o
obj-$(CONFIG_KEXEC)		+= machine_kexec.o relocate_kernel.o crash.o
obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR)	+= uncached.o
obj-$(CONFIG_AUDIT)		+= audit.o
obj-$(CONFIG_PCI_MSI)		+= msi_ia64.o
+245 −0
Original line number Diff line number Diff line
/*
 * arch/ia64/kernel/crash.c
 *
 * Architecture specific (ia64) functions for kexec based crash dumps.
 *
 * Created by: Khalid Aziz <khalid.aziz@hp.com>
 * Copyright (C) 2005 Hewlett-Packard Development Company, L.P.
 * Copyright (C) 2005 Intel Corp	Zou Nan hai <nanhai.zou@intel.com>
 *
 */
#include <linux/smp.h>
#include <linux/delay.h>
#include <linux/crash_dump.h>
#include <linux/bootmem.h>
#include <linux/kexec.h>
#include <linux/elfcore.h>
#include <linux/sysctl.h>
#include <linux/init.h>

#include <asm/kdebug.h>
#include <asm/mca.h>
#include <asm/uaccess.h>

int kdump_status[NR_CPUS];
atomic_t kdump_cpu_freezed;
atomic_t kdump_in_progress;
int kdump_on_init = 1;
ssize_t
copy_oldmem_page(unsigned long pfn, char *buf,
		size_t csize, unsigned long offset, int userbuf)
{
	void  *vaddr;

	if (!csize)
		return 0;
	vaddr = __va(pfn<<PAGE_SHIFT);
	if (userbuf) {
		if (copy_to_user(buf, (vaddr + offset), csize)) {
			return -EFAULT;
		}
	} else
		memcpy(buf, (vaddr + offset), csize);
	return csize;
}

static inline Elf64_Word
*append_elf_note(Elf64_Word *buf, char *name, unsigned type, void *data,
		size_t data_len)
{
	struct elf_note *note = (struct elf_note *)buf;
	note->n_namesz = strlen(name) + 1;
	note->n_descsz = data_len;
	note->n_type   = type;
	buf += (sizeof(*note) + 3)/4;
	memcpy(buf, name, note->n_namesz);
	buf += (note->n_namesz + 3)/4;
	memcpy(buf, data, data_len);
	buf += (data_len + 3)/4;
	return buf;
}

static void
final_note(void *buf)
{
	memset(buf, 0, sizeof(struct elf_note));
}

extern void ia64_dump_cpu_regs(void *);

static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus);

void
crash_save_this_cpu()
{
	void *buf;
	unsigned long cfm, sof, sol;

	int cpu = smp_processor_id();
	struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu);

	elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg);
	memset(prstatus, 0, sizeof(*prstatus));
	prstatus->pr_pid = current->pid;

	ia64_dump_cpu_regs(dst);
	cfm = dst[43];
	sol = (cfm >> 7) & 0x7f;
	sof = cfm & 0x7f;
	dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46],
			sof - sol);

	buf = (u64 *) per_cpu_ptr(crash_notes, cpu);
	if (!buf)
		return;
	buf = append_elf_note(buf, "CORE", NT_PRSTATUS, prstatus,
			sizeof(*prstatus));
	final_note(buf);
}

static int
kdump_wait_cpu_freeze(void)
{
	int cpu_num = num_online_cpus() - 1;
	int timeout = 1000;
	while(timeout-- > 0) {
		if (atomic_read(&kdump_cpu_freezed) == cpu_num)
			return 0;
		udelay(1000);
	}
	return 1;
}

void
machine_crash_shutdown(struct pt_regs *pt)
{
	/* This function is only called after the system
	 * has paniced or is otherwise in a critical state.
	 * The minimum amount of code to allow a kexec'd kernel
	 * to run successfully needs to happen here.
	 *
	 * In practice this means shooting down the other cpus in
	 * an SMP system.
	 */
	kexec_disable_iosapic();
#ifdef CONFIG_SMP
	kdump_smp_send_stop();
	if (kdump_wait_cpu_freeze() && kdump_on_init) 	{
		//not all cpu response to IPI, send INIT to freeze them
		kdump_smp_send_init();
	}
#endif
}

static void
machine_kdump_on_init(void)
{
	local_irq_disable();
	kexec_disable_iosapic();
	machine_kexec(ia64_kimage);
}

void
kdump_cpu_freeze(struct unw_frame_info *info, void *arg)
{
	int cpuid;
	local_irq_disable();
	cpuid = smp_processor_id();
	crash_save_this_cpu();
	current->thread.ksp = (__u64)info->sw - 16;
	atomic_inc(&kdump_cpu_freezed);
	kdump_status[cpuid] = 1;
	mb();
	if (cpuid == 0) {
		for (;;)
			cpu_relax();
	} else
		ia64_jump_to_sal(&sal_boot_rendez_state[cpuid]);
}

static int
kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
{
	struct ia64_mca_notify_die *nd;
	struct die_args *args = data;

	if (!kdump_on_init)
		return NOTIFY_DONE;

	if (val != DIE_INIT_MONARCH_ENTER &&
	    val != DIE_INIT_SLAVE_ENTER &&
	    val != DIE_MCA_RENDZVOUS_LEAVE &&
	    val != DIE_MCA_MONARCH_LEAVE)
		return NOTIFY_DONE;

	nd = (struct ia64_mca_notify_die *)args->err;
	/* Reason code 1 means machine check rendezous*/
	if ((val == DIE_INIT_MONARCH_ENTER || DIE_INIT_SLAVE_ENTER) &&
		 nd->sos->rv_rc == 1)
		return NOTIFY_DONE;

	switch (val) {
		case DIE_INIT_MONARCH_ENTER:
			machine_kdump_on_init();
			break;
		case DIE_INIT_SLAVE_ENTER:
			unw_init_running(kdump_cpu_freeze, NULL);
			break;
		case DIE_MCA_RENDZVOUS_LEAVE:
			if (atomic_read(&kdump_in_progress))
				unw_init_running(kdump_cpu_freeze, NULL);
			break;
		case DIE_MCA_MONARCH_LEAVE:
		     /* die_register->signr indicate if MCA is recoverable */
			if (!args->signr)
				machine_kdump_on_init();
			break;
	}
	return NOTIFY_DONE;
}

#ifdef CONFIG_SYSCTL
static ctl_table kdump_on_init_table[] = {
	{
		.ctl_name = CTL_UNNUMBERED,
		.procname = "kdump_on_init",
		.data = &kdump_on_init,
		.maxlen = sizeof(int),
		.mode = 0644,
		.proc_handler = &proc_dointvec,
	},
	{ .ctl_name = 0 }
};

static ctl_table sys_table[] = {
	{
	  .ctl_name = CTL_KERN,
	  .procname = "kernel",
	  .mode = 0555,
	  .child = kdump_on_init_table,
	},
	{ .ctl_name = 0 }
};
#endif

static int
machine_crash_setup(void)
{
	char *from = strstr(saved_command_line, "elfcorehdr=");
	static struct notifier_block kdump_init_notifier_nb = {
		.notifier_call = kdump_init_notifier,
	};
	int ret;
	if (from)
		elfcorehdr_addr = memparse(from+11, &from);
	saved_max_pfn = (unsigned long)-1;
	if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0)
		return ret;
#ifdef CONFIG_SYSCTL
	register_sysctl_table(sys_table, 0);
#endif
	return 0;
}

__initcall(machine_crash_setup);
+62 −3
Original line number Diff line number Diff line
@@ -26,6 +26,7 @@
#include <linux/types.h>
#include <linux/time.h>
#include <linux/efi.h>
#include <linux/kexec.h>

#include <asm/io.h>
#include <asm/kregs.h>
@@ -41,7 +42,7 @@ extern efi_status_t efi_call_phys (void *, ...);
struct efi efi;
EXPORT_SYMBOL(efi);
static efi_runtime_services_t *runtime;
static unsigned long mem_limit = ~0UL, max_addr = ~0UL;
static unsigned long mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL;

#define efi_call_virt(f, args...)	(*(f))(args)

@@ -421,6 +422,8 @@ efi_init (void)
			mem_limit = memparse(cp + 4, &cp);
		} else if (memcmp(cp, "max_addr=", 9) == 0) {
			max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
		} else if (memcmp(cp, "min_addr=", 9) == 0) {
			min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp));
		} else {
			while (*cp != ' ' && *cp)
				++cp;
@@ -428,6 +431,8 @@ efi_init (void)
				++cp;
		}
	}
	if (min_addr != 0UL)
		printk(KERN_INFO "Ignoring memory below %luMB\n", min_addr >> 20);
	if (max_addr != ~0UL)
		printk(KERN_INFO "Ignoring memory above %luMB\n", max_addr >> 20);

@@ -894,7 +899,8 @@ find_memmap_space (void)
		as = max(contig_low, md->phys_addr);
		ae = min(contig_high, efi_md_end(md));

		/* keep within max_addr= command line arg */
		/* keep within max_addr= and min_addr= command line arg */
		as = max(as, min_addr);
		ae = min(ae, max_addr);
		if (ae <= as)
			continue;
@@ -1004,7 +1010,8 @@ efi_memmap_init(unsigned long *s, unsigned long *e)
		} else
			ae = efi_md_end(md);

		/* keep within max_addr= command line arg */
		/* keep within max_addr= and min_addr= command line arg */
		as = max(as, min_addr);
		ae = min(ae, max_addr);
		if (ae <= as)
			continue;
@@ -1116,6 +1123,58 @@ efi_initialize_iomem_resources(struct resource *code_resource,
			 */
			insert_resource(res, code_resource);
			insert_resource(res, data_resource);
#ifdef CONFIG_KEXEC
                        insert_resource(res, &efi_memmap_res);
                        insert_resource(res, &boot_param_res);
			if (crashk_res.end > crashk_res.start)
				insert_resource(res, &crashk_res);
#endif
		}
	}
}

#ifdef CONFIG_KEXEC
/* find a block of memory aligned to 64M exclude reserved regions
   rsvd_regions are sorted
 */
unsigned long
kdump_find_rsvd_region (unsigned long size,
		struct rsvd_region *r, int n)
{
  int i;
  u64 start, end;
  u64 alignment = 1UL << _PAGE_SIZE_64M;
  void *efi_map_start, *efi_map_end, *p;
  efi_memory_desc_t *md;
  u64 efi_desc_size;

  efi_map_start = __va(ia64_boot_param->efi_memmap);
  efi_map_end   = efi_map_start + ia64_boot_param->efi_memmap_size;
  efi_desc_size = ia64_boot_param->efi_memdesc_size;

  for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
	  md = p;
	  if (!efi_wb(md))
		  continue;
	  start = ALIGN(md->phys_addr, alignment);
	  end = efi_md_end(md);
	  for (i = 0; i < n; i++) {
		if (__pa(r[i].start) >= start && __pa(r[i].end) < end) {
			if (__pa(r[i].start) > start + size)
				return start;
			start = ALIGN(__pa(r[i].end), alignment);
			if (i < n-1 && __pa(r[i+1].start) < start + size)
				continue;
			else
				break;
		}
	  }
	  if (end > start + size)
		return start;
  }

  printk(KERN_WARNING "Cannot reserve 0x%lx byte of memory for crashdump\n",
	size);
  return ~0UL;
}
#endif
+1 −1
Original line number Diff line number Diff line
@@ -1575,7 +1575,7 @@ sys_call_table:
	data8 sys_mq_timedreceive		// 1265
	data8 sys_mq_notify
	data8 sys_mq_getsetattr
	data8 sys_ni_syscall			// reserved for kexec_load
	data8 sys_kexec_load
	data8 sys_ni_syscall			// reserved for vserver
	data8 sys_waitid			// 1270
	data8 sys_add_key
Loading