Commit 82d8f4c2 authored by Nicholas Piggin's avatar Nicholas Piggin Committed by Michael Ellerman
Browse files

powerpc/64s/hash: Use POWER9 SLBIA IH=3 variant in switch_slb



POWER9 introduces SLBIA IH=3, which invalidates all SLB entries and
associated lookaside information that have a class value of 1, which
Linux assigns to user addresses. This matches what switch_slb wants,
and allows a simple fast implementation that avoids the slb_cache
complexity.

As a side-effect, the POWER5 < DD2.1 SLB invalidation workaround is
also avoided on POWER9.

Process context switching rate is improved about 2.2% for a small
process that hits the slb cache which is the best case for the current
code.

Signed-off-by: default avatarNicholas Piggin <npiggin@gmail.com>
Signed-off-by: default avatarMichael Ellerman <mpe@ellerman.id.au>
parent 5141c182
Loading
Loading
Loading
Loading
+49 −36
Original line number Diff line number Diff line
@@ -279,7 +279,6 @@ static inline int esids_match(unsigned long addr1, unsigned long addr2)
/* Flush all user entries from the segment table of the current processor. */
void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
{
	unsigned long offset;
	unsigned long pc = KSTK_EIP(tsk);
	unsigned long stack = KSTK_ESP(tsk);
	unsigned long exec_base;
@@ -291,7 +290,17 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
	 * which would update the slb_cache/slb_cache_ptr fields in the PACA.
	 */
	hard_irq_disable();
	offset = get_paca()->slb_cache_ptr;
	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
		/*
		 * SLBIA IH=3 invalidates all Class=1 SLBEs and their
		 * associated lookaside structures, which matches what
		 * switch_slb wants. So ARCH_300 does not use the slb
		 * cache.
		 */
		asm volatile("isync ; " PPC_SLBIA(3)" ; isync");
	} else {
		unsigned long offset = get_paca()->slb_cache_ptr;

		if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
		    offset <= SLB_CACHE_ENTRIES) {
			unsigned long slbie_data = 0;
@@ -299,11 +308,12 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)

			asm volatile("isync" : : : "memory");
			for (i = 0; i < offset; i++) {
			slbie_data = (unsigned long)get_paca()->slb_cache[i]
				<< SID_SHIFT; /* EA */
				/* EA */
				slbie_data = (unsigned long)
					get_paca()->slb_cache[i] << SID_SHIFT;
				slbie_data |= user_segment_size(slbie_data)
						<< SLBIE_SSIZE_SHIFT;
			slbie_data |= SLBIE_C; /* C set for user addresses */
				slbie_data |= SLBIE_C; /* user slbs have C=1 */
				asm volatile("slbie %0" : : "r" (slbie_data));
			}

@@ -325,11 +335,11 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
				     "isync"
				     :: "r"(ksp_vsid_data),
					"r"(ksp_esid_data));

		asm volatile("isync" : : : "memory");
		}

		get_paca()->slb_cache_ptr = 0;
	}

	copy_mm_to_paca(mm);

	/*
@@ -455,6 +465,9 @@ static void insert_slb_entry(unsigned long vsid, unsigned long ea,
	enum slb_index index;
	int slb_cache_index;

	if (cpu_has_feature(CPU_FTR_ARCH_300))
		return; /* ISAv3.0B and later does not use slb_cache */

	/*
	 * We are irq disabled, hence should be safe to access PACA.
	 */
+7 −4
Original line number Diff line number Diff line
@@ -2393,10 +2393,13 @@ static void dump_one_paca(int cpu)
		}
	}
	DUMP(p, vmalloc_sllp, "%#-*x");

	if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
		DUMP(p, slb_cache_ptr, "%#-*x");
		for (i = 0; i < SLB_CACHE_ENTRIES; i++)
			printf(" %-*s[%d] = 0x%016x\n",
			       22, "slb_cache", i, p->slb_cache[i]);
	}

	DUMP(p, rfi_flush_fallback_area, "%-*px");
#endif