rcu/tree: Defer kvfree_rcu() allocation to a clean context (56292e86) · Commits · 戴 / test

kernel/rcu/tree.c

+66 −43

Original line number	Diff line number	Diff line
		@@ -177,7 +177,7 @@ module_param(rcu_unlock_delay, int, 0444);
		* per-CPU. Object size is equal to one page. This value
		* can be changed at boot time.
		*/
		static int rcu_min_cached_objs = 2;
		static int rcu_min_cached_objs = 5;
		module_param(rcu_min_cached_objs, int, 0444);

		/* Retrieve RCU kthreads priority for rcutorture */
		@@ -3089,6 +3089,9 @@ struct kfree_rcu_cpu_work {
		* In order to save some per-cpu space the list is singular.
		* Even though it is lockless an access has to be protected by the
		* per-cpu lock.
		* @page_cache_work: A work to refill the cache when it is empty
		* @work_in_progress: Indicates that page_cache_work is running
		* @hrtimer: A hrtimer for scheduling a page_cache_work
		* @nr_bkv_objs: number of allocated objects at @bkvcache.
		*
		* This is a per-CPU structure. The reason that it is not included in
		@@ -3105,6 +3108,11 @@ struct kfree_rcu_cpu {
		bool monitor_todo;
		bool initialized;
		int count;

		struct work_struct page_cache_work;
		atomic_t work_in_progress;
		struct hrtimer hrtimer;

		struct llist_head bkvcache;
		int nr_bkv_objs;
		};
		@@ -3222,10 +3230,10 @@ static void kfree_rcu_work(struct work_struct *work)
		}
		rcu_lock_release(&rcu_callback_map);

		krcp = krc_this_cpu_lock(&flags);
		raw_spin_lock_irqsave(&krcp->lock, flags);
		if (put_cached_bnode(krcp, bkvhead[i]))
		bkvhead[i] = NULL;
		krc_this_cpu_unlock(krcp, flags);
		raw_spin_unlock_irqrestore(&krcp->lock, flags);

		if (bkvhead[i])
		free_page((unsigned long) bkvhead[i]);
		@@ -3352,6 +3360,57 @@ static void kfree_rcu_monitor(struct work_struct *work)
		raw_spin_unlock_irqrestore(&krcp->lock, flags);
		}

		static enum hrtimer_restart
		schedule_page_work_fn(struct hrtimer *t)
		{
		struct kfree_rcu_cpu *krcp =
		container_of(t, struct kfree_rcu_cpu, hrtimer);

		queue_work(system_highpri_wq, &krcp->page_cache_work);
		return HRTIMER_NORESTART;
		}

		static void fill_page_cache_func(struct work_struct *work)
		{
		struct kvfree_rcu_bulk_data *bnode;
		struct kfree_rcu_cpu *krcp =
		container_of(work, struct kfree_rcu_cpu,
		page_cache_work);
		unsigned long flags;
		bool pushed;
		int i;

		for (i = 0; i < rcu_min_cached_objs; i++) {
		bnode = (struct kvfree_rcu_bulk_data *)
		__get_free_page(GFP_KERNEL \| __GFP_NOWARN);

		if (bnode) {
		raw_spin_lock_irqsave(&krcp->lock, flags);
		pushed = put_cached_bnode(krcp, bnode);
		raw_spin_unlock_irqrestore(&krcp->lock, flags);

		if (!pushed) {
		free_page((unsigned long) bnode);
		break;
		}
		}
		}

		atomic_set(&krcp->work_in_progress, 0);
		}

		static void
		run_page_cache_worker(struct kfree_rcu_cpu *krcp)
		{
		if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING &&
		!atomic_xchg(&krcp->work_in_progress, 1)) {
		hrtimer_init(&krcp->hrtimer, CLOCK_MONOTONIC,
		HRTIMER_MODE_REL);
		krcp->hrtimer.function = schedule_page_work_fn;
		hrtimer_start(&krcp->hrtimer, 0, HRTIMER_MODE_REL);
		}
		}

		static inline bool
		kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu krcp, void ptr)
		{
		@@ -3368,32 +3427,8 @@ kvfree_call_rcu_add_ptr_to_bulk(struct kfree_rcu_cpu krcp, void ptr)
		if (!krcp->bkvhead[idx] \|\|
		krcp->bkvhead[idx]->nr_records == KVFREE_BULK_MAX_ENTR) {
		bnode = get_cached_bnode(krcp);
		if (!bnode) {
		/*
		* To keep this path working on raw non-preemptible
		* sections, prevent the optional entry into the
		* allocator as it uses sleeping locks. In fact, even
		* if the caller of kfree_rcu() is preemptible, this
		* path still is not, as krcp->lock is a raw spinlock.
		* With additional page pre-allocation in the works,
		* hitting this return is going to be much less likely.
		*/
		if (IS_ENABLED(CONFIG_PREEMPT_RT))
		return false;

		/*
		* NOTE: For one argument of kvfree_rcu() we can
		* drop the lock and get the page in sleepable
		* context. That would allow to maintain an array
		* for the CONFIG_PREEMPT_RT as well if no cached
		* pages are available.
		*/
		bnode = (struct kvfree_rcu_bulk_data *)
		__get_free_page(GFP_NOWAIT \| __GFP_NOWARN);
		}

		/* Switch to emergency path. */
		if (unlikely(!bnode))
		if (!bnode)
		return false;

		/* Initialize the new block. */
		@@ -3457,12 +3492,10 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
		goto unlock_return;
		}

		/*
		* Under high memory pressure GFP_NOWAIT can fail,
		* in that case the emergency path is maintained.
		*/
		success = kvfree_call_rcu_add_ptr_to_bulk(krcp, ptr);
		if (!success) {
		run_page_cache_worker(krcp);

		if (head == NULL)
		// Inline if kvfree_rcu(one_arg) call.
		goto unlock_return;
		@@ -4482,24 +4515,14 @@ static void __init kfree_rcu_batch_init(void)

		for_each_possible_cpu(cpu) {
		struct kfree_rcu_cpu *krcp = per_cpu_ptr(&krc, cpu);
		struct kvfree_rcu_bulk_data *bnode;

		for (i = 0; i < KFREE_N_BATCHES; i++) {
		INIT_RCU_WORK(&krcp->krw_arr[i].rcu_work, kfree_rcu_work);
		krcp->krw_arr[i].krcp = krcp;
		}

		for (i = 0; i < rcu_min_cached_objs; i++) {
		bnode = (struct kvfree_rcu_bulk_data *)
		__get_free_page(GFP_NOWAIT \| __GFP_NOWARN);

		if (bnode)
		put_cached_bnode(krcp, bnode);
		else
		pr_err("Failed to preallocate for %d CPU!\n", cpu);
		}

		INIT_DELAYED_WORK(&krcp->monitor_work, kfree_rcu_monitor);
		INIT_WORK(&krcp->page_cache_work, fill_page_cache_func);
		krcp->initialized = true;
		}
		if (register_shrinker(&kfree_rcu_shrinker))

Admin message