Commit da361dd1 authored by shaoyunl's avatar shaoyunl Committed by Alex Deucher
Browse files

drm/amdgpu: Implement get num of hops between two xgmi device



KFD need to provide the info for upper level to determine the data path

Signed-off-by: default avatarshaoyunl <shaoyun.liu@amd.com>
Reviewed-by: default avatarFelix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent d8e408a8
Loading
Loading
Loading
Loading
+15 −0
Original line number Diff line number Diff line
@@ -27,6 +27,7 @@
#include "amdgpu_gfx.h"
#include <linux/module.h>
#include <linux/dma-buf.h>
#include "amdgpu_xgmi.h"

static const unsigned int compute_vmid_bitmap = 0xFF00;

@@ -518,6 +519,20 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)

	return adev->gmc.xgmi.hive_id;
}
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
{
	struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
	struct amdgpu_device *adev = (struct amdgpu_device *)dst;
	int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);

	if (ret < 0) {
		DRM_ERROR("amdgpu: failed to get  xgmi hops count between node %d and %d. ret = %d\n",
			adev->gmc.xgmi.physical_node_id,
			peer_adev->gmc.xgmi.physical_node_id, ret);
		ret = 0;
	}
	return  (uint8_t)ret;
}

uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
{
+1 −0
Original line number Diff line number Diff line
@@ -170,6 +170,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);

#define read_user_wptr(mmptr, wptr, dst)				\
	({								\
+14 −12
Original line number Diff line number Diff line
@@ -95,12 +95,26 @@ struct psp_funcs
	int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);
};

#define AMDGPU_XGMI_MAX_CONNECTED_NODES		64
struct psp_xgmi_node_info {
	uint64_t				node_id;
	uint8_t					num_hops;
	uint8_t					is_sharing_enabled;
	enum ta_xgmi_assigned_sdma_engine	sdma_engine;
};

struct psp_xgmi_topology_info {
	uint32_t			num_nodes;
	struct psp_xgmi_node_info	nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
};

struct psp_xgmi_context {
	uint8_t				initialized;
	uint32_t			session_id;
	struct amdgpu_bo                *xgmi_shared_bo;
	uint64_t                        xgmi_shared_mc_addr;
	void                            *xgmi_shared_buf;
	struct psp_xgmi_topology_info	top_info;
};

struct psp_ras_context {
@@ -181,18 +195,6 @@ struct amdgpu_psp_funcs {
					enum AMDGPU_UCODE_ID);
};

#define AMDGPU_XGMI_MAX_CONNECTED_NODES		64
struct psp_xgmi_node_info {
	uint64_t				node_id;
	uint8_t					num_hops;
	uint8_t					is_sharing_enabled;
	enum ta_xgmi_assigned_sdma_engine	sdma_engine;
};

struct psp_xgmi_topology_info {
	uint32_t			num_nodes;
	struct psp_xgmi_node_info	nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
};

#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
+18 −5
Original line number Diff line number Diff line
@@ -238,7 +238,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
	/* Each psp need to set the latest topology */
	ret = psp_xgmi_set_topology_info(&adev->psp,
					 hive->number_devices,
					 &hive->topology_info);
					 &adev->psp.xgmi_context.top_info);
	if (ret)
		dev_err(adev->dev,
			"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
@@ -248,9 +248,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
	return ret;
}


int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
		struct amdgpu_device *peer_adev)
{
	struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
	int i;

	for (i = 0 ; i < top->num_nodes; ++i)
		if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
			return top->nodes[i].num_hops;
	return	-EINVAL;
}

int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
{
	struct psp_xgmi_topology_info *hive_topology;
	struct psp_xgmi_topology_info *top_info;
	struct amdgpu_hive_info *hive;
	struct amdgpu_xgmi	*entry;
	struct amdgpu_device *tmp_adev = NULL;
@@ -283,16 +296,16 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
		goto exit;
	}

	hive_topology = &hive->topology_info;
	top_info = &adev->psp.xgmi_context.top_info;

	list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
	list_for_each_entry(entry, &hive->device_list, head)
		hive_topology->nodes[count++].node_id = entry->node_id;
		top_info->nodes[count++].node_id = entry->node_id;
	hive->number_devices = count;

	/* Each psp need to get the latest topology */
	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
		ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
		ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, top_info);
		if (ret) {
			dev_err(tmp_adev->dev,
				"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
+2 −1
Original line number Diff line number Diff line
@@ -27,7 +27,6 @@
struct amdgpu_hive_info {
	uint64_t		hive_id;
	struct list_head	device_list;
	struct psp_xgmi_topology_info	topology_info;
	int number_devices;
	struct mutex hive_lock, reset_lock;
	struct kobject *kobj;
@@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
		struct amdgpu_device *peer_adev);

static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
		struct amdgpu_device *bo_adev)