Drivers: hv: vmbus: Implement NUMA aware CPU affinity for channels (1f656ff3) · Commits · 戴 / test

drivers/hv/channel_mgmt.c

+43 −29

Original line number	Diff line number	Diff line
		@@ -370,25 +370,27 @@ static const struct hv_vmbus_device_id hp_devs[] = {
		/*
		* We use this state to statically distribute the channel interrupt load.
		*/
		static u32 next_vp;
		static int next_numa_node_id;

		/*
		* Starting with Win8, we can statically distribute the incoming
		* channel interrupt load by binding a channel to VCPU. We
		* implement here a simple round robin scheme for distributing
		* the interrupt load.
		* We will bind channels that are not performance critical to cpu 0 and
		* performance critical channels (IDE, SCSI and Network) will be uniformly
		* distributed across all available CPUs.
		* channel interrupt load by binding a channel to VCPU.
		* We do this in a hierarchical fashion:
		* First distribute the primary channels across available NUMA nodes
		* and then distribute the subchannels amongst the CPUs in the NUMA
		* node assigned to the primary channel.
		*
		* For pre-win8 hosts or non-performance critical channels we assign the
		* first CPU in the first NUMA node.
		*/
		static void init_vp_index(struct vmbus_channel channel, const uuid_le type_guid)
		{
		u32 cur_cpu;
		int i;
		bool perf_chn = false;
		u32 max_cpus = num_online_cpus();
		struct vmbus_channel primary = channel->primary_channel, prev;
		unsigned long flags;
		struct vmbus_channel *primary = channel->primary_channel;
		int next_node;
		struct cpumask available_mask;

		for (i = IDE; i < MAX_PERF_CHN; i++) {
		if (!memcmp(type_guid->b, hp_devs[i].guid,
		@@ -405,36 +407,48 @@ static void init_vp_index(struct vmbus_channel channel, const uuid_le type_gui
		* Also if the channel is not a performance critical
		* channel, bind it to cpu 0.
		*/
		channel->numa_node = 0;
		cpumask_set_cpu(0, &channel->alloced_cpus_in_node);
		channel->target_cpu = 0;
		channel->target_vp = hv_context.vp_index[0];
		return;
		}

		/*
		* Primary channels are distributed evenly across all vcpus we have.
		* When the host asks us to create subchannels it usually makes us
		* num_cpus-1 offers and we are supposed to distribute the work evenly
		* among the channel itself and all its subchannels. Make sure they are
		* all assigned to different vcpus.
		* We distribute primary channels evenly across all the available
		* NUMA nodes and within the assigned NUMA node we will assign the
		* first available CPU to the primary channel.
		* The sub-channels will be assigned to the CPUs available in the
		* NUMA node evenly.
		*/
		if (!primary)
		cur_cpu = (++next_vp % max_cpus);
		else {
		if (!primary) {
		while (true) {
		next_node = next_numa_node_id++;
		if (next_node == nr_node_ids)
		next_node = next_numa_node_id = 0;
		if (cpumask_empty(cpumask_of_node(next_node)))
		continue;
		break;
		}
		channel->numa_node = next_node;
		primary = channel;
		}

		if (cpumask_weight(&primary->alloced_cpus_in_node) ==
		cpumask_weight(cpumask_of_node(primary->numa_node))) {
		/*
		* Let's assign the first subchannel of a channel to the
		* primary->target_cpu+1 and all the subsequent channels to
		* the prev->target_cpu+1.
		* We have cycled through all the CPUs in the node;
		* reset the alloced map.
		*/
		spin_lock_irqsave(&primary->lock, flags);
		if (primary->num_sc == 1)
		cur_cpu = (primary->target_cpu + 1) % max_cpus;
		else {
		prev = list_prev_entry(channel, sc_list);
		cur_cpu = (prev->target_cpu + 1) % max_cpus;
		}
		spin_unlock_irqrestore(&primary->lock, flags);
		cpumask_clear(&primary->alloced_cpus_in_node);
		}

		cpumask_xor(&available_mask, &primary->alloced_cpus_in_node,
		cpumask_of_node(primary->numa_node));

		cur_cpu = cpumask_next(-1, &available_mask);
		cpumask_set_cpu(cur_cpu, &primary->alloced_cpus_in_node);

		channel->target_cpu = cur_cpu;
		channel->target_vp = hv_context.vp_index[cur_cpu];
		}

include/linux/hyperv.h

+5 −0

Original line number	Diff line number	Diff line
		@@ -696,6 +696,11 @@ struct vmbus_channel {
		u32 target_vp;
		/* The corresponding CPUID in the guest */
		u32 target_cpu;
		/*
		* State to manage the CPU affiliation of channels.
		*/
		struct cpumask alloced_cpus_in_node;
		int numa_node;
		/*
		* Support for sub-channels. For high performance devices,
		* it will be useful to have multiple sub-channels to support

Admin message