x86/mm/tlb: Make lazy TLB mode lazier (145f573b) · Commits · 戴 / test

arch/x86/mm/tlb.c

+58 −9

Original line number	Diff line number	Diff line
		@@ -185,6 +185,7 @@ void switch_mm_irqs_off(struct mm_struct prev, struct mm_struct next,
		{
		struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
		u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
		bool was_lazy = this_cpu_read(cpu_tlbstate.is_lazy);
		unsigned cpu = smp_processor_id();
		u64 next_tlb_gen;
		bool need_flush;
		@@ -242,17 +243,40 @@ void switch_mm_irqs_off(struct mm_struct prev, struct mm_struct next,
		next->context.ctx_id);

		/*
		* We don't currently support having a real mm loaded without
		* our cpu set in mm_cpumask(). We have all the bookkeeping
		* in place to figure out whether we would need to flush
		* if our cpu were cleared in mm_cpumask(), but we don't
		* currently use it.
		* Even in lazy TLB mode, the CPU should stay set in the
		* mm_cpumask. The TLB shootdown code can figure out from
		* from cpu_tlbstate.is_lazy whether or not to send an IPI.
		*/
		if (WARN_ON_ONCE(real_prev != &init_mm &&
		!cpumask_test_cpu(cpu, mm_cpumask(next))))
		cpumask_set_cpu(cpu, mm_cpumask(next));

		/*
		* If the CPU is not in lazy TLB mode, we are just switching
		* from one thread in a process to another thread in the same
		* process. No TLB flush required.
		*/
		if (!was_lazy)
		return;

		/*
		* Read the tlb_gen to check whether a flush is needed.
		* If the TLB is up to date, just use it.
		* The barrier synchronizes with the tlb_gen increment in
		* the TLB shootdown code.
		*/
		smp_mb();
		next_tlb_gen = atomic64_read(&next->context.tlb_gen);
		if (this_cpu_read(cpu_tlbstate.ctxs[prev_asid].tlb_gen) ==
		next_tlb_gen)
		return;

		/*
		* TLB contents went out of date while we were in lazy
		* mode. Fall through to the TLB switching code below.
		*/
		new_asid = prev_asid;
		need_flush = true;
		} else {
		u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id);

		@@ -346,9 +370,11 @@ void switch_mm_irqs_off(struct mm_struct prev, struct mm_struct next,
		this_cpu_write(cpu_tlbstate.loaded_mm, next);
		this_cpu_write(cpu_tlbstate.loaded_mm_asid, new_asid);

		if (next != real_prev) {
		load_mm_cr4(next);
		switch_ldt(real_prev, next);
		}
		}

		/*
		* Please ignore the name of this function. It should be called
		@@ -455,6 +481,9 @@ static void flush_tlb_func_common(const struct flush_tlb_info *f,
		* paging-structure cache to avoid speculatively reading
		* garbage into our TLB. Since switching to init_mm is barely
		* slower than a minimal flush, just switch to init_mm.
		*
		* This should be rare, with native_flush_tlb_others skipping
		* IPIs to lazy TLB mode CPUs.
		*/
		switch_mm_irqs_off(NULL, &init_mm, NULL);
		return;
		@@ -557,6 +586,11 @@ static void flush_tlb_func_remote(void *info)
		flush_tlb_func_common(f, false, TLB_REMOTE_SHOOTDOWN);
		}

		static bool tlb_is_not_lazy(int cpu, void *data)
		{
		return !per_cpu(cpu_tlbstate.is_lazy, cpu);
		}

		void native_flush_tlb_others(const struct cpumask *cpumask,
		const struct flush_tlb_info *info)
		{
		@@ -592,8 +626,23 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
		(void *)info, 1);
		return;
		}

		/*
		* If no page tables were freed, we can skip sending IPIs to
		* CPUs in lazy TLB mode. They will flush the CPU themselves
		* at the next context switch.
		*
		* However, if page tables are getting freed, we need to send the
		* IPI everywhere, to prevent CPUs in lazy TLB mode from tripping
		* up on the new contents of what used to be page tables, while
		* doing a speculative memory access.
		*/
		if (info->freed_tables)
		smp_call_function_many(cpumask, flush_tlb_func_remote,
		(void *)info, 1);
		else
		on_each_cpu_cond_mask(tlb_is_not_lazy, flush_tlb_func_remote,
		(void *)info, 1, GFP_ATOMIC, cpumask);
		}

		/*

Admin message