Commit d75586ad authored by Shaohua Li's avatar Shaohua Li Committed by Ingo Molnar
Browse files

x86, pageattr: introduce APIs to change pageattr of a page array



Add array interface APIs of pageattr. page based cache flush is quite
slow for a lot of pages. If pages are more than 1024 (4M), the patch
will use a wbinvd(). We have a simple test here (run a 3d game - open
arena), nearly all agp memory allocation are small (< 1M), so suppose
this will not impact runtime performance.

Signed-off-by: default avatarDave Airlie <airlied@gmail.com>
Signed-off-by: default avatarShaohua Li <shaohua.li@intel.com>
Signed-off-by: default avatarIngo Molnar <mingo@elte.hu>
parent cacf8906
Loading
Loading
Loading
Loading
+166 −50
Original line number Diff line number Diff line
@@ -25,15 +25,19 @@
 * The current flushing context - we pass it instead of 5 arguments:
 */
struct cpa_data {
	unsigned long	vaddr;
	unsigned long	*vaddr;
	pgprot_t	mask_set;
	pgprot_t	mask_clr;
	int		numpages;
	int		flushtlb;
	int		flags;
	unsigned long	pfn;
	unsigned	force_split : 1;
	int		curpage;
};

#define CPA_FLUSHTLB 1
#define CPA_ARRAY 2

#ifdef CONFIG_PROC_FS
static unsigned long direct_pages_count[PG_LEVEL_NUM];

@@ -184,6 +188,41 @@ static void cpa_flush_range(unsigned long start, int numpages, int cache)
	}
}

static void cpa_flush_array(unsigned long *start, int numpages, int cache)
{
	unsigned int i, level;
	unsigned long *addr;

	BUG_ON(irqs_disabled());

	on_each_cpu(__cpa_flush_range, NULL, 1);

	if (!cache)
		return;

	/* 4M threshold */
	if (numpages >= 1024) {
		if (boot_cpu_data.x86_model >= 4)
			wbinvd();
		return;
	}
	/*
	 * We only need to flush on one CPU,
	 * clflush is a MESI-coherent instruction that
	 * will cause all other CPUs to flush the same
	 * cachelines:
	 */
	for (i = 0, addr = start; i < numpages; i++, addr++) {
		pte_t *pte = lookup_address(*addr, &level);

		/*
		 * Only flush present addresses:
		 */
		if (pte && (pte_val(*pte) & _PAGE_PRESENT))
			clflush_cache_range((void *) *addr, PAGE_SIZE);
	}
}

/*
 * Certain areas of memory on x86 require very specific protection flags,
 * for example the BIOS area or kernel text. Callers don't always get this
@@ -392,7 +431,7 @@ try_preserve_large_page(pte_t *kpte, unsigned long address,
		 */
		new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot));
		__set_pmd_pte(kpte, address, new_pte);
		cpa->flushtlb = 1;
		cpa->flags |= CPA_FLUSHTLB;
		do_split = 0;
	}

@@ -578,11 +617,16 @@ out_unlock:

static int __change_page_attr(struct cpa_data *cpa, int primary)
{
	unsigned long address = cpa->vaddr;
	unsigned long address;
	int do_split, err;
	unsigned int level;
	pte_t *kpte, old_pte;

	if (cpa->flags & CPA_ARRAY)
		address = cpa->vaddr[cpa->curpage];
	else
		address = *cpa->vaddr;

repeat:
	kpte = lookup_address(address, &level);
	if (!kpte)
@@ -594,8 +638,8 @@ repeat:
			return 0;
		printk(KERN_WARNING "CPA: called for zero pte. "
		       "vaddr = %lx cpa->vaddr = %lx\n", address,
		       cpa->vaddr);
		WARN_ON(1);
		       *cpa->vaddr);
		return -EINVAL;
	}

@@ -621,7 +665,7 @@ repeat:
		 */
		if (pte_val(old_pte) != pte_val(new_pte)) {
			set_pte_atomic(kpte, new_pte);
			cpa->flushtlb = 1;
			cpa->flags |= CPA_FLUSHTLB;
		}
		cpa->numpages = 1;
		return 0;
@@ -645,7 +689,7 @@ repeat:
	 */
	err = split_large_page(kpte, address);
	if (!err) {
		cpa->flushtlb = 1;
		cpa->flags |= CPA_FLUSHTLB;
		goto repeat;
	}

@@ -658,6 +702,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
{
	struct cpa_data alias_cpa;
	int ret = 0;
	unsigned long temp_cpa_vaddr, vaddr;

	if (cpa->pfn >= max_pfn_mapped)
		return 0;
@@ -670,16 +715,24 @@ static int cpa_process_alias(struct cpa_data *cpa)
	 * No need to redo, when the primary call touched the direct
	 * mapping already:
	 */
	if (!(within(cpa->vaddr, PAGE_OFFSET,
	if (cpa->flags & CPA_ARRAY)
		vaddr = cpa->vaddr[cpa->curpage];
	else
		vaddr = *cpa->vaddr;

	if (!(within(vaddr, PAGE_OFFSET,
		    PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT))
#ifdef CONFIG_X86_64
		|| within(cpa->vaddr, PAGE_OFFSET + (1UL<<32),
		|| within(vaddr, PAGE_OFFSET + (1UL<<32),
		    PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
#endif
	)) {

		alias_cpa = *cpa;
		alias_cpa.vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
		temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
		alias_cpa.vaddr = &temp_cpa_vaddr;
		alias_cpa.flags &= ~CPA_ARRAY;


		ret = __change_page_attr_set_clr(&alias_cpa, 0);
	}
@@ -691,7 +744,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
	 * No need to redo, when the primary call touched the high
	 * mapping already:
	 */
	if (within(cpa->vaddr, (unsigned long) _text, (unsigned long) _end))
	if (within(vaddr, (unsigned long) _text, (unsigned long) _end))
		return 0;

	/*
@@ -702,8 +755,9 @@ static int cpa_process_alias(struct cpa_data *cpa)
		return 0;

	alias_cpa = *cpa;
	alias_cpa.vaddr =
		(cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
	temp_cpa_vaddr = (cpa->pfn << PAGE_SHIFT) + __START_KERNEL_map - phys_base;
	alias_cpa.vaddr = &temp_cpa_vaddr;
	alias_cpa.flags &= ~CPA_ARRAY;

	/*
	 * The high mapping range is imprecise, so ignore the return value.
@@ -723,6 +777,9 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
		 * preservation check.
		 */
		cpa->numpages = numpages;
		/* for array changes, we can't use large page */
		if (cpa->flags & CPA_ARRAY)
			cpa->numpages = 1;

		ret = __change_page_attr(cpa, checkalias);
		if (ret)
@@ -741,7 +798,11 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
		 */
		BUG_ON(cpa->numpages > numpages);
		numpages -= cpa->numpages;
		cpa->vaddr += cpa->numpages * PAGE_SIZE;
		if (cpa->flags & CPA_ARRAY)
			cpa->curpage++;
		else
			*cpa->vaddr += cpa->numpages * PAGE_SIZE;

	}
	return 0;
}
@@ -752,9 +813,9 @@ static inline int cache_attr(pgprot_t attr)
		(_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD);
}

static int change_page_attr_set_clr(unsigned long addr, int numpages,
static int change_page_attr_set_clr(unsigned long *addr, int numpages,
				    pgprot_t mask_set, pgprot_t mask_clr,
				    int force_split)
				    int force_split, int array)
{
	struct cpa_data cpa;
	int ret, cache, checkalias;
@@ -769,13 +830,23 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
		return 0;

	/* Ensure we are PAGE_SIZE aligned */
	if (addr & ~PAGE_MASK) {
		addr &= PAGE_MASK;
	if (!array) {
		if (*addr & ~PAGE_MASK) {
			*addr &= PAGE_MASK;
			/*
			 * People should not be passing in unaligned addresses:
			 */
			WARN_ON_ONCE(1);
		}
	} else {
		int i;
		for (i = 0; i < numpages; i++) {
			if (addr[i] & ~PAGE_MASK) {
				addr[i] &= PAGE_MASK;
				WARN_ON_ONCE(1);
			}
		}
	}

	/* Must avoid aliasing mappings in the highmem code */
	kmap_flush_unused();
@@ -784,9 +855,13 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
	cpa.numpages = numpages;
	cpa.mask_set = mask_set;
	cpa.mask_clr = mask_clr;
	cpa.flushtlb = 0;
	cpa.flags = 0;
	cpa.curpage = 0;
	cpa.force_split = force_split;

	if (array)
		cpa.flags |= CPA_ARRAY;

	/* No alias checking for _NX bit modifications */
	checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;

@@ -795,7 +870,7 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
	/*
	 * Check whether we really changed something:
	 */
	if (!cpa.flushtlb)
	if (!(cpa.flags & CPA_FLUSHTLB))
		goto out;

	/*
@@ -810,9 +885,12 @@ static int change_page_attr_set_clr(unsigned long addr, int numpages,
	 * error case we fall back to cpa_flush_all (which uses
	 * wbindv):
	 */
	if (!ret && cpu_has_clflush)
		cpa_flush_range(addr, numpages, cache);
	if (!ret && cpu_has_clflush) {
		if (cpa.flags & CPA_ARRAY)
			cpa_flush_array(addr, numpages, cache);
		else
			cpa_flush_range(*addr, numpages, cache);
	} else
		cpa_flush_all(cache);

out:
@@ -821,16 +899,18 @@ out:
	return ret;
}

static inline int change_page_attr_set(unsigned long addr, int numpages,
				       pgprot_t mask)
static inline int change_page_attr_set(unsigned long *addr, int numpages,
				       pgprot_t mask, int array)
{
	return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0);
	return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0), 0,
		array);
}

static inline int change_page_attr_clear(unsigned long addr, int numpages,
					 pgprot_t mask)
static inline int change_page_attr_clear(unsigned long *addr, int numpages,
					 pgprot_t mask, int array)
{
	return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0);
	return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask, 0,
		array);
}

int _set_memory_uc(unsigned long addr, int numpages)
@@ -838,8 +918,8 @@ int _set_memory_uc(unsigned long addr, int numpages)
	/*
	 * for now UC MINUS. see comments in ioremap_nocache()
	 */
	return change_page_attr_set(addr, numpages,
				    __pgprot(_PAGE_CACHE_UC_MINUS));
	return change_page_attr_set(&addr, numpages,
				    __pgprot(_PAGE_CACHE_UC_MINUS), 0);
}

int set_memory_uc(unsigned long addr, int numpages)
@@ -855,10 +935,31 @@ int set_memory_uc(unsigned long addr, int numpages)
}
EXPORT_SYMBOL(set_memory_uc);

int set_memory_array_uc(unsigned long *addr, int addrinarray)
{
	int i;
	/*
	 * for now UC MINUS. see comments in ioremap_nocache()
	 */
	for (i = 0; i < addrinarray; i++) {
		if (reserve_memtype(addr[i], addr[i] + PAGE_SIZE,
			    _PAGE_CACHE_UC_MINUS, NULL))
			goto out;
	}

	return change_page_attr_set(addr, addrinarray,
				    __pgprot(_PAGE_CACHE_UC_MINUS), 1);
out:
	while (--i >= 0)
		free_memtype(addr[i], addr[i] + PAGE_SIZE);
	return -EINVAL;
}
EXPORT_SYMBOL(set_memory_array_uc);

int _set_memory_wc(unsigned long addr, int numpages)
{
	return change_page_attr_set(addr, numpages,
				    __pgprot(_PAGE_CACHE_WC));
	return change_page_attr_set(&addr, numpages,
				    __pgprot(_PAGE_CACHE_WC), 0);
}

int set_memory_wc(unsigned long addr, int numpages)
@@ -876,8 +977,8 @@ EXPORT_SYMBOL(set_memory_wc);

int _set_memory_wb(unsigned long addr, int numpages)
{
	return change_page_attr_clear(addr, numpages,
				      __pgprot(_PAGE_CACHE_MASK));
	return change_page_attr_clear(&addr, numpages,
				      __pgprot(_PAGE_CACHE_MASK), 0);
}

int set_memory_wb(unsigned long addr, int numpages)
@@ -888,37 +989,48 @@ int set_memory_wb(unsigned long addr, int numpages)
}
EXPORT_SYMBOL(set_memory_wb);

int set_memory_array_wb(unsigned long *addr, int addrinarray)
{
	int i;
	for (i = 0; i < addrinarray; i++)
		free_memtype(addr[i], addr[i] + PAGE_SIZE);

	return change_page_attr_clear(addr, addrinarray,
				      __pgprot(_PAGE_CACHE_MASK), 1);
}
EXPORT_SYMBOL(set_memory_array_wb);

int set_memory_x(unsigned long addr, int numpages)
{
	return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX));
	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_NX), 0);
}
EXPORT_SYMBOL(set_memory_x);

int set_memory_nx(unsigned long addr, int numpages)
{
	return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX));
	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_NX), 0);
}
EXPORT_SYMBOL(set_memory_nx);

int set_memory_ro(unsigned long addr, int numpages)
{
	return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW));
	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
}

int set_memory_rw(unsigned long addr, int numpages)
{
	return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW));
	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
}

int set_memory_np(unsigned long addr, int numpages)
{
	return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT));
	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_PRESENT), 0);
}

int set_memory_4k(unsigned long addr, int numpages)
{
	return change_page_attr_set_clr(addr, numpages, __pgprot(0),
					__pgprot(0), 1);
	return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
					__pgprot(0), 1, 0);
}

int set_pages_uc(struct page *page, int numpages)
@@ -971,20 +1083,24 @@ int set_pages_rw(struct page *page, int numpages)

static int __set_pages_p(struct page *page, int numpages)
{
	struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
	unsigned long tempaddr = (unsigned long) page_address(page);
	struct cpa_data cpa = { .vaddr = &tempaddr,
				.numpages = numpages,
				.mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
				.mask_clr = __pgprot(0)};
				.mask_clr = __pgprot(0),
				.flags = 0};

	return __change_page_attr_set_clr(&cpa, 1);
}

static int __set_pages_np(struct page *page, int numpages)
{
	struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page),
	unsigned long tempaddr = (unsigned long) page_address(page);
	struct cpa_data cpa = { .vaddr = &tempaddr,
				.numpages = numpages,
				.mask_set = __pgprot(0),
				.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)};
				.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
				.flags = 0};

	return __change_page_attr_set_clr(&cpa, 1);
}
+3 −0
Original line number Diff line number Diff line
@@ -66,6 +66,9 @@ int set_memory_rw(unsigned long addr, int numpages);
int set_memory_np(unsigned long addr, int numpages);
int set_memory_4k(unsigned long addr, int numpages);

int set_memory_array_uc(unsigned long *addr, int addrinarray);
int set_memory_array_wb(unsigned long *addr, int addrinarray);

/*
 * For legacy compatibility with the old APIs, a few functions
 * are provided that work on a "struct page".