Commit 42cbd8ef authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge branch 'x86-amd-nb-for-linus' of...

Merge branch 'x86-amd-nb-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-amd-nb-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, cacheinfo: Cleanup L3 cache index disable support
  x86, amd-nb: Cleanup AMD northbridge caching code
  x86, amd-nb: Complete the rename of AMD NB and related code
parents dda5f0a3 f658bcfb
Loading
Loading
Loading
Loading
+6 −6
Original line number Diff line number Diff line
@@ -1141,16 +1141,16 @@ config NUMA
comment "NUMA (Summit) requires SMP, 64GB highmem support, ACPI"
	depends on X86_32 && X86_SUMMIT && (!HIGHMEM64G || !ACPI)

config K8_NUMA
config AMD_NUMA
	def_bool y
	prompt "Old style AMD Opteron NUMA detection"
	depends on X86_64 && NUMA && PCI
	---help---
	  Enable K8 NUMA node topology detection.  You should say Y here if
	  you have a multi processor AMD K8 system. This uses an old
	  method to read the NUMA configuration directly from the builtin
	  Northbridge of Opteron. It is recommended to use X86_64_ACPI_NUMA
	  instead, which also takes priority if both are compiled in.
	  Enable AMD NUMA node topology detection.  You should say Y here if
	  you have a multi processor AMD system. This uses an old method to
	  read the NUMA configuration directly from the builtin Northbridge
	  of Opteron. It is recommended to use X86_64_ACPI_NUMA instead,
	  which also takes priority if both are compiled in.

config X86_64_ACPI_NUMA
	def_bool y
+33 −16
Original line number Diff line number Diff line
@@ -3,36 +3,53 @@

#include <linux/pci.h>

extern struct pci_device_id k8_nb_ids[];
extern struct pci_device_id amd_nb_misc_ids[];
struct bootnode;

extern int early_is_k8_nb(u32 value);
extern int cache_k8_northbridges(void);
extern void k8_flush_garts(void);
extern int k8_get_nodes(struct bootnode *nodes);
extern int k8_numa_init(unsigned long start_pfn, unsigned long end_pfn);
extern int k8_scan_nodes(void);
extern int early_is_amd_nb(u32 value);
extern int amd_cache_northbridges(void);
extern void amd_flush_garts(void);
extern int amd_get_nodes(struct bootnode *nodes);
extern int amd_numa_init(unsigned long start_pfn, unsigned long end_pfn);
extern int amd_scan_nodes(void);

struct k8_northbridge_info {
struct amd_northbridge {
	struct pci_dev *misc;
};

struct amd_northbridge_info {
	u16 num;
	u8 gart_supported;
	struct pci_dev **nb_misc;
	u64 flags;
	struct amd_northbridge *nb;
};
extern struct k8_northbridge_info k8_northbridges;
extern struct amd_northbridge_info amd_northbridges;

#define AMD_NB_GART			0x1
#define AMD_NB_L3_INDEX_DISABLE		0x2

#ifdef CONFIG_AMD_NB

static inline struct pci_dev *node_to_k8_nb_misc(int node)
static inline int amd_nb_num(void)
{
	return (node < k8_northbridges.num) ? k8_northbridges.nb_misc[node] : NULL;
	return amd_northbridges.num;
}

#else
static inline int amd_nb_has_feature(int feature)
{
	return ((amd_northbridges.flags & feature) == feature);
}

static inline struct pci_dev *node_to_k8_nb_misc(int node)
static inline struct amd_northbridge *node_to_amd_nb(int node)
{
	return NULL;
	return (node < amd_northbridges.num) ? &amd_northbridges.nb[node] : NULL;
}

#else

#define amd_nb_num(x)		0
#define amd_nb_has_feature(x)	false
#define node_to_amd_nb(x)	NULL

#endif


+80 −55
Original line number Diff line number Diff line
@@ -12,95 +12,116 @@

static u32 *flush_words;

struct pci_device_id k8_nb_ids[] = {
struct pci_device_id amd_nb_misc_ids[] = {
	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB_MISC) },
	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_10H_NB_MISC) },
	{ PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_15H_NB_MISC) },
	{}
};
EXPORT_SYMBOL(k8_nb_ids);
EXPORT_SYMBOL(amd_nb_misc_ids);

struct k8_northbridge_info k8_northbridges;
EXPORT_SYMBOL(k8_northbridges);
struct amd_northbridge_info amd_northbridges;
EXPORT_SYMBOL(amd_northbridges);

static struct pci_dev *next_k8_northbridge(struct pci_dev *dev)
static struct pci_dev *next_northbridge(struct pci_dev *dev,
					struct pci_device_id *ids)
{
	do {
		dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
		if (!dev)
			break;
	} while (!pci_match_id(&k8_nb_ids[0], dev));
	} while (!pci_match_id(ids, dev));
	return dev;
}

int cache_k8_northbridges(void)
int amd_cache_northbridges(void)
{
	int i;
	struct pci_dev *dev;
	int i = 0;
	struct amd_northbridge *nb;
	struct pci_dev *misc;

	if (k8_northbridges.num)
	if (amd_nb_num())
		return 0;

	dev = NULL;
	while ((dev = next_k8_northbridge(dev)) != NULL)
		k8_northbridges.num++;
	misc = NULL;
	while ((misc = next_northbridge(misc, amd_nb_misc_ids)) != NULL)
		i++;

	/* some CPU families (e.g. family 0x11) do not support GART */
	if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
	    boot_cpu_data.x86 == 0x15)
		k8_northbridges.gart_supported = 1;
	if (i == 0)
		return 0;

	k8_northbridges.nb_misc = kmalloc((k8_northbridges.num + 1) *
					  sizeof(void *), GFP_KERNEL);
	if (!k8_northbridges.nb_misc)
	nb = kzalloc(i * sizeof(struct amd_northbridge), GFP_KERNEL);
	if (!nb)
		return -ENOMEM;

	if (!k8_northbridges.num) {
		k8_northbridges.nb_misc[0] = NULL;
		return 0;
	}
	amd_northbridges.nb = nb;
	amd_northbridges.num = i;

	if (k8_northbridges.gart_supported) {
		flush_words = kmalloc(k8_northbridges.num * sizeof(u32),
				      GFP_KERNEL);
		if (!flush_words) {
			kfree(k8_northbridges.nb_misc);
			return -ENOMEM;
		}
	misc = NULL;
	for (i = 0; i != amd_nb_num(); i++) {
		node_to_amd_nb(i)->misc = misc =
			next_northbridge(misc, amd_nb_misc_ids);
        }

	dev = NULL;
	i = 0;
	while ((dev = next_k8_northbridge(dev)) != NULL) {
		k8_northbridges.nb_misc[i] = dev;
		if (k8_northbridges.gart_supported)
			pci_read_config_dword(dev, 0x9c, &flush_words[i++]);
	}
	k8_northbridges.nb_misc[i] = NULL;
	/* some CPU families (e.g. family 0x11) do not support GART */
	if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10 ||
	    boot_cpu_data.x86 == 0x15)
		amd_northbridges.flags |= AMD_NB_GART;

	/*
	 * Some CPU families support L3 Cache Index Disable. There are some
	 * limitations because of E382 and E388 on family 0x10.
	 */
	if (boot_cpu_data.x86 == 0x10 &&
	    boot_cpu_data.x86_model >= 0x8 &&
	    (boot_cpu_data.x86_model > 0x9 ||
	     boot_cpu_data.x86_mask >= 0x1))
		amd_northbridges.flags |= AMD_NB_L3_INDEX_DISABLE;

	return 0;
}
EXPORT_SYMBOL_GPL(cache_k8_northbridges);
EXPORT_SYMBOL_GPL(amd_cache_northbridges);

/* Ignores subdevice/subvendor but as far as I can figure out
   they're useless anyways */
int __init early_is_k8_nb(u32 device)
int __init early_is_amd_nb(u32 device)
{
	struct pci_device_id *id;
	u32 vendor = device & 0xffff;
	device >>= 16;
	for (id = k8_nb_ids; id->vendor; id++)
	for (id = amd_nb_misc_ids; id->vendor; id++)
		if (vendor == id->vendor && device == id->device)
			return 1;
	return 0;
}

void k8_flush_garts(void)
int amd_cache_gart(void)
{
       int i;

       if (!amd_nb_has_feature(AMD_NB_GART))
               return 0;

       flush_words = kmalloc(amd_nb_num() * sizeof(u32), GFP_KERNEL);
       if (!flush_words) {
               amd_northbridges.flags &= ~AMD_NB_GART;
               return -ENOMEM;
       }

       for (i = 0; i != amd_nb_num(); i++)
               pci_read_config_dword(node_to_amd_nb(i)->misc, 0x9c,
                                     &flush_words[i]);

       return 0;
}

void amd_flush_garts(void)
{
	int flushed, i;
	unsigned long flags;
	static DEFINE_SPINLOCK(gart_lock);

	if (!k8_northbridges.gart_supported)
	if (!amd_nb_has_feature(AMD_NB_GART))
		return;

	/* Avoid races between AGP and IOMMU. In theory it's not needed
@@ -109,16 +130,16 @@ void k8_flush_garts(void)
	   that it doesn't matter to serialize more. -AK */
	spin_lock_irqsave(&gart_lock, flags);
	flushed = 0;
	for (i = 0; i < k8_northbridges.num; i++) {
		pci_write_config_dword(k8_northbridges.nb_misc[i], 0x9c,
	for (i = 0; i < amd_nb_num(); i++) {
		pci_write_config_dword(node_to_amd_nb(i)->misc, 0x9c,
				       flush_words[i] | 1);
		flushed++;
	}
	for (i = 0; i < k8_northbridges.num; i++) {
	for (i = 0; i < amd_nb_num(); i++) {
		u32 w;
		/* Make sure the hardware actually executed the flush*/
		for (;;) {
			pci_read_config_dword(k8_northbridges.nb_misc[i],
			pci_read_config_dword(node_to_amd_nb(i)->misc,
					      0x9c, &w);
			if (!(w & 1))
				break;
@@ -129,19 +150,23 @@ void k8_flush_garts(void)
	if (!flushed)
		printk("nothing to flush?\n");
}
EXPORT_SYMBOL_GPL(k8_flush_garts);
EXPORT_SYMBOL_GPL(amd_flush_garts);

static __init int init_k8_nbs(void)
static __init int init_amd_nbs(void)
{
	int err = 0;

	err = cache_k8_northbridges();
	err = amd_cache_northbridges();

	if (err < 0)
		printk(KERN_NOTICE "K8 NB: Cannot enumerate AMD northbridges.\n");
		printk(KERN_NOTICE "AMD NB: Cannot enumerate AMD northbridges.\n");

	if (amd_cache_gart() < 0)
		printk(KERN_NOTICE "AMD NB: Cannot initialize GART flush words, "
		       "GART support disabled.\n");

	return err;
}

/* This has to go after the PCI subsystem */
fs_initcall(init_k8_nbs);
fs_initcall(init_amd_nbs);
+5 −5
Original line number Diff line number Diff line
@@ -206,7 +206,7 @@ static u32 __init read_agp(int bus, int slot, int func, int cap, u32 *order)
 * Do an PCI bus scan by hand because we're running before the PCI
 * subsystem.
 *
 * All K8 AGP bridges are AGPv3 compliant, so we can do this scan
 * All AMD AGP bridges are AGPv3 compliant, so we can do this scan
 * generically. It's probably overkill to always scan all slots because
 * the AGP bridges should be always an own bus on the HT hierarchy,
 * but do it here for future safety.
@@ -303,7 +303,7 @@ void __init early_gart_iommu_check(void)
		dev_limit = bus_dev_ranges[i].dev_limit;

		for (slot = dev_base; slot < dev_limit; slot++) {
			if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
			if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
				continue;

			ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -358,7 +358,7 @@ void __init early_gart_iommu_check(void)
		dev_limit = bus_dev_ranges[i].dev_limit;

		for (slot = dev_base; slot < dev_limit; slot++) {
			if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
			if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
				continue;

			ctl = read_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL);
@@ -400,7 +400,7 @@ int __init gart_iommu_hole_init(void)
		dev_limit = bus_dev_ranges[i].dev_limit;

		for (slot = dev_base; slot < dev_limit; slot++) {
			if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
			if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
				continue;

			iommu_detected = 1;
@@ -518,7 +518,7 @@ out:
		dev_base = bus_dev_ranges[i].dev_base;
		dev_limit = bus_dev_ranges[i].dev_limit;
		for (slot = dev_base; slot < dev_limit; slot++) {
			if (!early_is_k8_nb(read_pci_config(bus, slot, 3, 0x00)))
			if (!early_is_amd_nb(read_pci_config(bus, slot, 3, 0x00)))
				continue;

			write_pci_config(bus, slot, 3, AMD64_GARTAPERTURECTL, ctl);
+63 −84
Original line number Diff line number Diff line
@@ -149,8 +149,7 @@ union _cpuid4_leaf_ecx {
};

struct amd_l3_cache {
	struct	 pci_dev *dev;
	bool	 can_disable;
	struct	 amd_northbridge *nb;
	unsigned indices;
	u8	 subcaches[4];
};
@@ -311,14 +310,12 @@ struct _cache_attr {
/*
 * L3 cache descriptors
 */
static struct amd_l3_cache **__cpuinitdata l3_caches;

static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
{
	unsigned int sc0, sc1, sc2, sc3;
	u32 val = 0;

	pci_read_config_dword(l3->dev, 0x1C4, &val);
	pci_read_config_dword(l3->nb->misc, 0x1C4, &val);

	/* calculate subcache sizes */
	l3->subcaches[0] = sc0 = !(val & BIT(0));
@@ -330,47 +327,14 @@ static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
}

static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node)
{
	struct amd_l3_cache *l3;
	struct pci_dev *dev = node_to_k8_nb_misc(node);

	l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC);
	if (!l3) {
		printk(KERN_WARNING "Error allocating L3 struct\n");
		return NULL;
	}

	l3->dev = dev;

	amd_calc_l3_indices(l3);

	return l3;
}

static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf,
					int index)
{
	static struct amd_l3_cache *__cpuinitdata l3_caches;
	int node;

	if (boot_cpu_data.x86 != 0x10)
		return;

	if (index < 3)
		return;

	/* see errata #382 and #388 */
	if (boot_cpu_data.x86_model < 0x8)
		return;

	if ((boot_cpu_data.x86_model == 0x8 ||
	     boot_cpu_data.x86_model == 0x9)
		&&
	     boot_cpu_data.x86_mask < 0x1)
			return;

	/* not in virtualized environments */
	if (k8_northbridges.num == 0)
	/* only for L3, and not in virtualized environments */
	if (index < 3 || amd_nb_num() == 0)
		return;

	/*
@@ -378,7 +342,7 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,
	 * never freed but this is done only on shutdown so it doesn't matter.
	 */
	if (!l3_caches) {
		int size = k8_northbridges.num * sizeof(struct amd_l3_cache *);
		int size = amd_nb_num() * sizeof(struct amd_l3_cache);

		l3_caches = kzalloc(size, GFP_ATOMIC);
		if (!l3_caches)
@@ -387,14 +351,12 @@ static void __cpuinit amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf,

	node = amd_get_nb_id(smp_processor_id());

	if (!l3_caches[node]) {
		l3_caches[node] = amd_init_l3_cache(node);
		l3_caches[node]->can_disable = true;
	if (!l3_caches[node].nb) {
		l3_caches[node].nb = node_to_amd_nb(node);
		amd_calc_l3_indices(&l3_caches[node]);
	}

	WARN_ON(!l3_caches[node]);

	this_leaf->l3 = l3_caches[node];
	this_leaf->l3 = &l3_caches[node];
}

/*
@@ -408,7 +370,7 @@ int amd_get_l3_disable_slot(struct amd_l3_cache *l3, unsigned slot)
{
	unsigned int reg = 0;

	pci_read_config_dword(l3->dev, 0x1BC + slot * 4, &reg);
	pci_read_config_dword(l3->nb->misc, 0x1BC + slot * 4, &reg);

	/* check whether this slot is activated already */
	if (reg & (3UL << 30))
@@ -422,7 +384,8 @@ static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
{
	int index;

	if (!this_leaf->l3 || !this_leaf->l3->can_disable)
	if (!this_leaf->l3 ||
	    !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
		return -EINVAL;

	index = amd_get_l3_disable_slot(this_leaf->l3, slot);
@@ -457,7 +420,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
		if (!l3->subcaches[i])
			continue;

		pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
		pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);

		/*
		 * We need to WBINVD on a core on the node containing the L3
@@ -467,7 +430,7 @@ static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
		wbinvd_on_cpu(cpu);

		reg |= BIT(31);
		pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
		pci_write_config_dword(l3->nb->misc, 0x1BC + slot * 4, reg);
	}
}

@@ -524,7 +487,8 @@ static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

	if (!this_leaf->l3 || !this_leaf->l3->can_disable)
	if (!this_leaf->l3 ||
	    !amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
		return -EINVAL;

	cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
@@ -558,10 +522,7 @@ static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
		show_cache_disable_1, store_cache_disable_1);

#else	/* CONFIG_AMD_NB */
static void __cpuinit
amd_check_l3_disable(struct _cpuid4_info_regs *this_leaf, int index)
{
};
#define amd_init_l3_cache(x, y)
#endif /* CONFIG_AMD_NB */

static int
@@ -575,7 +536,7 @@ __cpuinit cpuid4_cache_lookup_regs(int index,

	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
		amd_cpuid4(index, &eax, &ebx, &ecx);
		amd_check_l3_disable(this_leaf, index);
		amd_init_l3_cache(this_leaf, index);
	} else {
		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
	}
@@ -983,30 +944,48 @@ define_one_ro(size);
define_one_ro(shared_cpu_map);
define_one_ro(shared_cpu_list);

#define DEFAULT_SYSFS_CACHE_ATTRS	\
	&type.attr,			\
	&level.attr,			\
	&coherency_line_size.attr,	\
	&physical_line_partition.attr,	\
	&ways_of_associativity.attr,	\
	&number_of_sets.attr,		\
	&size.attr,			\
	&shared_cpu_map.attr,		\
	&shared_cpu_list.attr

static struct attribute *default_attrs[] = {
	DEFAULT_SYSFS_CACHE_ATTRS,
	&type.attr,
	&level.attr,
	&coherency_line_size.attr,
	&physical_line_partition.attr,
	&ways_of_associativity.attr,
	&number_of_sets.attr,
	&size.attr,
	&shared_cpu_map.attr,
	&shared_cpu_list.attr,
	NULL
};

static struct attribute *default_l3_attrs[] = {
	DEFAULT_SYSFS_CACHE_ATTRS,
#ifdef CONFIG_AMD_NB
	&cache_disable_0.attr,
	&cache_disable_1.attr,
static struct attribute ** __cpuinit amd_l3_attrs(void)
{
	static struct attribute **attrs;
	int n;

	if (attrs)
		return attrs;

	n = sizeof (default_attrs) / sizeof (struct attribute *);

	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
		n += 2;

	attrs = kzalloc(n * sizeof (struct attribute *), GFP_KERNEL);
	if (attrs == NULL)
		return attrs = default_attrs;

	for (n = 0; default_attrs[n]; n++)
		attrs[n] = default_attrs[n];

	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
		attrs[n++] = &cache_disable_0.attr;
		attrs[n++] = &cache_disable_1.attr;
	}

	return attrs;
}
#endif
	NULL
};

static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
{
@@ -1117,11 +1096,11 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)

		this_leaf = CPUID4_INFO_IDX(cpu, i);

		if (this_leaf->l3 && this_leaf->l3->can_disable)
			ktype_cache.default_attrs = default_l3_attrs;
		else
		ktype_cache.default_attrs = default_attrs;

#ifdef CONFIG_AMD_NB
		if (this_leaf->l3)
			ktype_cache.default_attrs = amd_l3_attrs();
#endif
		retval = kobject_init_and_add(&(this_object->kobj),
					      &ktype_cache,
					      per_cpu(ici_cache_kobject, cpu),
Loading