Merge branch 'drm-next-4.21' of git://people.freedesktop.org/~agd5f/linux into drm-next (513126ae) · Commits · 戴 / test

Documentation/gpu/drm-kms.rst

+7 −0

Original line number	Diff line number	Diff line
		@@ -575,6 +575,13 @@ Explicit Fencing Properties
		.. kernel-doc:: drivers/gpu/drm/drm_atomic_uapi.c
		:doc: explicit fencing properties


		Variable Refresh Properties
		---------------------------

		.. kernel-doc:: drivers/gpu/drm/drm_connector.c
		:doc: Variable refresh properties

		Existing KMS Properties
		-----------------------

drivers/gpu/drm/amd/amdgpu/amdgpu.h

+9 −133

Original line number	Diff line number	Diff line
		@@ -81,6 +81,7 @@
		#include "amdgpu_job.h"
		#include "amdgpu_bo_list.h"
		#include "amdgpu_gem.h"
		#include "amdgpu_doorbell.h"

		#define MAX_GPU_INSTANCE 16

		@@ -360,123 +361,6 @@ struct amdgpu_sa_bo {
		int amdgpu_fence_slab_init(void);
		void amdgpu_fence_slab_fini(void);

		/*
		* GPU doorbell structures, functions & helpers
		*/
		typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
		{
		AMDGPU_DOORBELL_KIQ = 0x000,
		AMDGPU_DOORBELL_HIQ = 0x001,
		AMDGPU_DOORBELL_DIQ = 0x002,
		AMDGPU_DOORBELL_MEC_RING0 = 0x010,
		AMDGPU_DOORBELL_MEC_RING1 = 0x011,
		AMDGPU_DOORBELL_MEC_RING2 = 0x012,
		AMDGPU_DOORBELL_MEC_RING3 = 0x013,
		AMDGPU_DOORBELL_MEC_RING4 = 0x014,
		AMDGPU_DOORBELL_MEC_RING5 = 0x015,
		AMDGPU_DOORBELL_MEC_RING6 = 0x016,
		AMDGPU_DOORBELL_MEC_RING7 = 0x017,
		AMDGPU_DOORBELL_GFX_RING0 = 0x020,
		AMDGPU_DOORBELL_sDMA_ENGINE0 = 0x1E0,
		AMDGPU_DOORBELL_sDMA_ENGINE1 = 0x1E1,
		AMDGPU_DOORBELL_IH = 0x1E8,
		AMDGPU_DOORBELL_MAX_ASSIGNMENT = 0x3FF,
		AMDGPU_DOORBELL_INVALID = 0xFFFF
		} AMDGPU_DOORBELL_ASSIGNMENT;

		struct amdgpu_doorbell {
		/* doorbell mmio */
		resource_size_t base;
		resource_size_t size;
		u32 __iomem *ptr;
		u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */
		};

		/*
		* 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
		*/
		typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
		{
		/*
		* All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in
		* a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range.
		* Compute related doorbells are allocated from 0x00 to 0x8a
		*/


		/* kernel scheduling */
		AMDGPU_DOORBELL64_KIQ = 0x00,

		/* HSA interface queue and debug queue */
		AMDGPU_DOORBELL64_HIQ = 0x01,
		AMDGPU_DOORBELL64_DIQ = 0x02,

		/* Compute engines */
		AMDGPU_DOORBELL64_MEC_RING0 = 0x03,
		AMDGPU_DOORBELL64_MEC_RING1 = 0x04,
		AMDGPU_DOORBELL64_MEC_RING2 = 0x05,
		AMDGPU_DOORBELL64_MEC_RING3 = 0x06,
		AMDGPU_DOORBELL64_MEC_RING4 = 0x07,
		AMDGPU_DOORBELL64_MEC_RING5 = 0x08,
		AMDGPU_DOORBELL64_MEC_RING6 = 0x09,
		AMDGPU_DOORBELL64_MEC_RING7 = 0x0a,

		/* User queue doorbell range (128 doorbells) */
		AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b,
		AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a,

		/* Graphics engine */
		AMDGPU_DOORBELL64_GFX_RING0 = 0x8b,

		/*
		* Other graphics doorbells can be allocated here: from 0x8c to 0xdf
		* Graphics voltage island aperture 1
		* default non-graphics QWORD index is 0xe0 - 0xFF inclusive
		*/

		/* sDMA engines reserved from 0xe0 -0xef */
		AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xE0,
		AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xE1,
		AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xE8,
		AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xE9,

		/* For vega10 sriov, the sdma doorbell must be fixed as follow
		* to keep the same setting with host driver, or it will
		* happen conflicts
		*/
		AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 = 0xF0,
		AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1,
		AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 = 0xF2,
		AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3,

		/* Interrupt handler */
		AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */
		AMDGPU_DOORBELL64_IH_RING1 = 0xF5, /* For page migration request log */
		AMDGPU_DOORBELL64_IH_RING2 = 0xF6, /* For page migration translation/invalidation log */

		/* VCN engine use 32 bits doorbell */
		AMDGPU_DOORBELL64_VCN0_1 = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
		AMDGPU_DOORBELL64_VCN2_3 = 0xF9,
		AMDGPU_DOORBELL64_VCN4_5 = 0xFA,
		AMDGPU_DOORBELL64_VCN6_7 = 0xFB,

		/* overlap the doorbell assignment with VCN as they are mutually exclusive
		* VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
		*/
		AMDGPU_DOORBELL64_UVD_RING0_1 = 0xF8,
		AMDGPU_DOORBELL64_UVD_RING2_3 = 0xF9,
		AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFA,
		AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFB,

		AMDGPU_DOORBELL64_VCE_RING0_1 = 0xFC,
		AMDGPU_DOORBELL64_VCE_RING2_3 = 0xFD,
		AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE,
		AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF,

		AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF,
		AMDGPU_DOORBELL64_INVALID = 0xFFFF
		} AMDGPU_DOORBELL64_ASSIGNMENT;

		/*
		* IRQS.
		*/
		@@ -654,6 +538,8 @@ struct amdgpu_asic_funcs {
		struct amdgpu_ring *ring);
		/* check if the asic needs a full reset of if soft reset will work */
		bool (need_full_reset)(struct amdgpu_device adev);
		/* initialize doorbell layout for specific asic*/
		void (init_doorbell_index)(struct amdgpu_device adev);
		};

		/*
		@@ -1023,6 +909,8 @@ struct amdgpu_device {
		unsigned long last_mm_index;
		bool in_gpu_reset;
		struct mutex lock_reset;
		struct amdgpu_doorbell_index doorbell_index;
		int asic_reset_res;
		};

		static inline struct amdgpu_device amdgpu_ttm_adev(struct ttm_bo_device bdev)
		@@ -1047,11 +935,6 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
		u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
		void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);

		u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
		void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
		u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
		void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);

		bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
		bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);

		@@ -1113,11 +996,6 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
		#define RREG32_IO(reg) amdgpu_io_rreg(adev, (reg))
		#define WREG32_IO(reg, v) amdgpu_io_wreg(adev, (reg), (v))

		#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
		#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
		#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
		#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))

		#define REG_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT
		#define REG_FIELD_MASK(reg, field) reg##__##field##_MASK

		@@ -1159,6 +1037,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
		#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
		#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
		#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
		#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))

		/* Common functions */
		bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
		@@ -1219,12 +1098,6 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
		long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
		unsigned long arg);


		/*
		* functions used by amdgpu_xgmi.c
		*/
		int amdgpu_xgmi_add_device(struct amdgpu_device *adev);

		/*
		* functions used by amdgpu_encoder.c
		*/
		@@ -1252,6 +1125,9 @@ bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *ade
		int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
		u8 perf_req, bool advertise);
		int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);

		void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev,
		struct amdgpu_dm_backlight_caps *caps);
		#else
		static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
		static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }

drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c

+92 −22

Original line number	Diff line number	Diff line
		@@ -41,28 +41,21 @@ struct amdgpu_atif_notification_cfg {
		};

		struct amdgpu_atif_notifications {
		bool display_switch;
		bool expansion_mode_change;
		bool thermal_state;
		bool forced_power_state;
		bool system_power_state;
		bool display_conf_change;
		bool px_gfx_switch;
		bool brightness_change;
		bool dgpu_display_event;
		bool gpu_package_power_limit;
		};

		struct amdgpu_atif_functions {
		bool system_params;
		bool sbios_requests;
		bool select_active_disp;
		bool lid_state;
		bool get_tv_standard;
		bool set_tv_standard;
		bool get_panel_expansion_mode;
		bool set_panel_expansion_mode;
		bool temperature_change;
		bool graphics_device_types;
		bool query_backlight_transfer_characteristics;
		bool ready_to_undock;
		bool external_gpu_information;
		};

		struct amdgpu_atif {
		@@ -72,6 +65,7 @@ struct amdgpu_atif {
		struct amdgpu_atif_functions functions;
		struct amdgpu_atif_notification_cfg notification_cfg;
		struct amdgpu_encoder *encoder_for_bl;
		struct amdgpu_dm_backlight_caps backlight_caps;
		};

		/* Call the ATIF method
		@@ -137,15 +131,12 @@ static union acpi_object amdgpu_atif_call(struct amdgpu_atif atif,
		*/
		static void amdgpu_atif_parse_notification(struct amdgpu_atif_notifications *n, u32 mask)
		{
		n->display_switch = mask & ATIF_DISPLAY_SWITCH_REQUEST_SUPPORTED;
		n->expansion_mode_change = mask & ATIF_EXPANSION_MODE_CHANGE_REQUEST_SUPPORTED;
		n->thermal_state = mask & ATIF_THERMAL_STATE_CHANGE_REQUEST_SUPPORTED;
		n->forced_power_state = mask & ATIF_FORCED_POWER_STATE_CHANGE_REQUEST_SUPPORTED;
		n->system_power_state = mask & ATIF_SYSTEM_POWER_SOURCE_CHANGE_REQUEST_SUPPORTED;
		n->display_conf_change = mask & ATIF_DISPLAY_CONF_CHANGE_REQUEST_SUPPORTED;
		n->px_gfx_switch = mask & ATIF_PX_GFX_SWITCH_REQUEST_SUPPORTED;
		n->brightness_change = mask & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST_SUPPORTED;
		n->dgpu_display_event = mask & ATIF_DGPU_DISPLAY_EVENT_SUPPORTED;
		n->gpu_package_power_limit = mask & ATIF_GPU_PACKAGE_POWER_LIMIT_REQUEST_SUPPORTED;
		}

		/**
		@@ -162,14 +153,11 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas
		{
		f->system_params = mask & ATIF_GET_SYSTEM_PARAMETERS_SUPPORTED;
		f->sbios_requests = mask & ATIF_GET_SYSTEM_BIOS_REQUESTS_SUPPORTED;
		f->select_active_disp = mask & ATIF_SELECT_ACTIVE_DISPLAYS_SUPPORTED;
		f->lid_state = mask & ATIF_GET_LID_STATE_SUPPORTED;
		f->get_tv_standard = mask & ATIF_GET_TV_STANDARD_FROM_CMOS_SUPPORTED;
		f->set_tv_standard = mask & ATIF_SET_TV_STANDARD_IN_CMOS_SUPPORTED;
		f->get_panel_expansion_mode = mask & ATIF_GET_PANEL_EXPANSION_MODE_FROM_CMOS_SUPPORTED;
		f->set_panel_expansion_mode = mask & ATIF_SET_PANEL_EXPANSION_MODE_IN_CMOS_SUPPORTED;
		f->temperature_change = mask & ATIF_TEMPERATURE_CHANGE_NOTIFICATION_SUPPORTED;
		f->graphics_device_types = mask & ATIF_GET_GRAPHICS_DEVICE_TYPES_SUPPORTED;
		f->query_backlight_transfer_characteristics =
		mask & ATIF_QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS_SUPPORTED;
		f->ready_to_undock = mask & ATIF_READY_TO_UNDOCK_NOTIFICATION_SUPPORTED;
		f->external_gpu_information = mask & ATIF_GET_EXTERNAL_GPU_INFORMATION_SUPPORTED;
		}

		/**
		@@ -310,6 +298,65 @@ out:
		return err;
		}

		/**
		* amdgpu_atif_query_backlight_caps - get min and max backlight input signal
		*
		* @handle: acpi handle
		*
		* Execute the QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS ATIF function
		* to determine the acceptable range of backlight values
		*
		* Backlight_caps.caps_valid will be set to true if the query is successful
		*
		* The input signals are in range 0-255
		*
		* This function assumes the display with backlight is the first LCD
		*
		* Returns 0 on success, error on failure.
		*/
		static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif)
		{
		union acpi_object *info;
		struct atif_qbtc_output characteristics;
		struct atif_qbtc_arguments arguments;
		struct acpi_buffer params;
		size_t size;
		int err = 0;

		arguments.size = sizeof(arguments);
		arguments.requested_display = ATIF_QBTC_REQUEST_LCD1;

		params.length = sizeof(arguments);
		params.pointer = (void *)&arguments;

		info = amdgpu_atif_call(atif,
		ATIF_FUNCTION_QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS,
		&params);
		if (!info) {
		err = -EIO;
		goto out;
		}

		size = (u16 ) info->buffer.pointer;
		if (size < 10) {
		err = -EINVAL;
		goto out;
		}

		memset(&characteristics, 0, sizeof(characteristics));
		size = min(sizeof(characteristics), size);
		memcpy(&characteristics, info->buffer.pointer, size);

		atif->backlight_caps.caps_valid = true;
		atif->backlight_caps.min_input_signal =
		characteristics.min_input_signal;
		atif->backlight_caps.max_input_signal =
		characteristics.max_input_signal;
		out:
		kfree(info);
		return err;
		}

		/**
		* amdgpu_atif_get_sbios_requests - get requested sbios event
		*
		@@ -799,6 +846,17 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
		}
		}

		if (atif->functions.query_backlight_transfer_characteristics) {
		ret = amdgpu_atif_query_backlight_caps(atif);
		if (ret) {
		DRM_DEBUG_DRIVER("Call to QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS failed: %d\n",
		ret);
		atif->backlight_caps.caps_valid = false;
		}
		} else {
		atif->backlight_caps.caps_valid = false;
		}

		out:
		adev->acpi_nb.notifier_call = amdgpu_acpi_event;
		register_acpi_notifier(&adev->acpi_nb);
		@@ -806,6 +864,18 @@ out:
		return ret;
		}

		void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev,
		struct amdgpu_dm_backlight_caps *caps)
		{
		if (!adev->atif) {
		caps->caps_valid = false;
		return;
		}
		caps->caps_valid = adev->atif->backlight_caps.caps_valid;
		caps->min_input_signal = adev->atif->backlight_caps.min_input_signal;
		caps->max_input_signal = adev->atif->backlight_caps.max_input_signal;
		}

		/**
		* amdgpu_acpi_fini - tear down driver acpi support
		*

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c

+10 −19

Original line number	Diff line number	Diff line
		@@ -73,9 +73,11 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
		case CHIP_FIJI:
		case CHIP_POLARIS10:
		case CHIP_POLARIS11:
		case CHIP_POLARIS12:
		kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
		break;
		case CHIP_VEGA10:
		case CHIP_VEGA12:
		case CHIP_VEGA20:
		case CHIP_RAVEN:
		kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
		@@ -179,25 +181,14 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
		* process in case of 64-bit doorbells so we
		* can use each doorbell assignment twice.
		*/
		if (adev->asic_type == CHIP_VEGA10) {
		gpu_resources.sdma_doorbell[0][i] =
		AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
		adev->doorbell_index.sdma_engine0 + (i >> 1);
		gpu_resources.sdma_doorbell[0][i+1] =
		AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
		adev->doorbell_index.sdma_engine0 + 0x200 + (i >> 1);
		gpu_resources.sdma_doorbell[1][i] =
		AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
		adev->doorbell_index.sdma_engine1 + (i >> 1);
		gpu_resources.sdma_doorbell[1][i+1] =
		AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
		} else {
		gpu_resources.sdma_doorbell[0][i] =
		AMDGPU_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
		gpu_resources.sdma_doorbell[0][i+1] =
		AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
		gpu_resources.sdma_doorbell[1][i] =
		AMDGPU_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
		gpu_resources.sdma_doorbell[1][i+1] =
		AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
		}
		adev->doorbell_index.sdma_engine1 + 0x200 + (i >> 1);
		}
		/* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for
		* SDMA, IH and VCN. So don't use them for the CP.

drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c

+96 −75

Original line number	Diff line number	Diff line
		@@ -46,9 +46,9 @@
		/* Impose limit on how much memory KFD can use */
		static struct {
		uint64_t max_system_mem_limit;
		uint64_t max_userptr_mem_limit;
		uint64_t max_ttm_mem_limit;
		int64_t system_mem_used;
		int64_t userptr_mem_used;
		int64_t ttm_mem_used;
		spinlock_t mem_limit_lock;
		} kfd_mem_limit;

		@@ -90,8 +90,8 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
		}

		/* Set memory usage limits. Current, limits are
		* System (kernel) memory - 3/8th System RAM
		* Userptr memory - 3/4th System RAM
		* System (TTM + userptr) memory - 3/4th System RAM
		* TTM memory - 3/8th System RAM
		*/
		void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
		{
		@@ -103,48 +103,54 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
		mem *= si.mem_unit;

		spin_lock_init(&kfd_mem_limit.mem_limit_lock);
		kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
		kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
		pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
		kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2);
		kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
		pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
		(kfd_mem_limit.max_system_mem_limit >> 20),
		(kfd_mem_limit.max_userptr_mem_limit >> 20));
		(kfd_mem_limit.max_ttm_mem_limit >> 20));
		}

		static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
		uint64_t size, u32 domain)
		uint64_t size, u32 domain, bool sg)
		{
		size_t acc_size;
		size_t acc_size, system_mem_needed, ttm_mem_needed;
		int ret = 0;

		acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
		sizeof(struct amdgpu_bo));

		spin_lock(&kfd_mem_limit.mem_limit_lock);

		if (domain == AMDGPU_GEM_DOMAIN_GTT) {
		if (kfd_mem_limit.system_mem_used + (acc_size + size) >
		kfd_mem_limit.max_system_mem_limit) {
		ret = -ENOMEM;
		goto err_no_mem;
		/* TTM GTT memory */
		system_mem_needed = acc_size + size;
		ttm_mem_needed = acc_size + size;
		} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
		/* Userptr */
		system_mem_needed = acc_size + size;
		ttm_mem_needed = acc_size;
		} else {
		/* VRAM and SG */
		system_mem_needed = acc_size;
		ttm_mem_needed = acc_size;
		}
		kfd_mem_limit.system_mem_used += (acc_size + size);
		} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
		if ((kfd_mem_limit.system_mem_used + acc_size >

		if ((kfd_mem_limit.system_mem_used + system_mem_needed >
		kfd_mem_limit.max_system_mem_limit) \|\|
		(kfd_mem_limit.userptr_mem_used + (size + acc_size) >
		kfd_mem_limit.max_userptr_mem_limit)) {
		(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
		kfd_mem_limit.max_ttm_mem_limit))
		ret = -ENOMEM;
		goto err_no_mem;
		}
		kfd_mem_limit.system_mem_used += acc_size;
		kfd_mem_limit.userptr_mem_used += size;
		else {
		kfd_mem_limit.system_mem_used += system_mem_needed;
		kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
		}
		err_no_mem:

		spin_unlock(&kfd_mem_limit.mem_limit_lock);
		return ret;
		}

		static void unreserve_system_mem_limit(struct amdgpu_device *adev,
		uint64_t size, u32 domain)
		uint64_t size, u32 domain, bool sg)
		{
		size_t acc_size;

		@@ -154,14 +160,18 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
		spin_lock(&kfd_mem_limit.mem_limit_lock);
		if (domain == AMDGPU_GEM_DOMAIN_GTT) {
		kfd_mem_limit.system_mem_used -= (acc_size + size);
		} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
		kfd_mem_limit.ttm_mem_used -= (acc_size + size);
		} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
		kfd_mem_limit.system_mem_used -= (acc_size + size);
		kfd_mem_limit.ttm_mem_used -= acc_size;
		} else {
		kfd_mem_limit.system_mem_used -= acc_size;
		kfd_mem_limit.userptr_mem_used -= size;
		kfd_mem_limit.ttm_mem_used -= acc_size;
		}
		WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
		"kfd system memory accounting unbalanced");
		WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
		"kfd userptr memory accounting unbalanced");
		WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
		"kfd TTM memory accounting unbalanced");

		spin_unlock(&kfd_mem_limit.mem_limit_lock);
		}
		@@ -171,16 +181,22 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
		spin_lock(&kfd_mem_limit.mem_limit_lock);

		if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
		kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
		kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
		kfd_mem_limit.system_mem_used -=
		(bo->tbo.acc_size + amdgpu_bo_size(bo));
		kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
		} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
		kfd_mem_limit.system_mem_used -=
		(bo->tbo.acc_size + amdgpu_bo_size(bo));
		kfd_mem_limit.ttm_mem_used -=
		(bo->tbo.acc_size + amdgpu_bo_size(bo));
		} else {
		kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
		kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
		}
		WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
		"kfd system memory accounting unbalanced");
		WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
		"kfd userptr memory accounting unbalanced");
		WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
		"kfd TTM memory accounting unbalanced");

		spin_unlock(&kfd_mem_limit.mem_limit_lock);
		}
		@@ -395,23 +411,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
		return 0;
		}

		static int sync_vm_fence(struct amdgpu_device adev, struct amdgpu_sync sync,
		struct dma_fence *f)
		{
		int ret = amdgpu_sync_fence(adev, sync, f, false);

		/* Sync objects can't handle multiple GPUs (contexts) updating
		* sync->last_vm_update. Fortunately we don't need it for
		* KFD's purposes, so we can just drop that fence.
		*/
		if (sync->last_vm_update) {
		dma_fence_put(sync->last_vm_update);
		sync->last_vm_update = NULL;
		}

		return ret;
		}

		static int vm_update_pds(struct amdgpu_vm vm, struct amdgpu_sync sync)
		{
		struct amdgpu_bo *pd = vm->root.base.bo;
		@@ -422,7 +421,7 @@ static int vm_update_pds(struct amdgpu_vm vm, struct amdgpu_sync sync)
		if (ret)
		return ret;

		return sync_vm_fence(adev, sync, vm->last_update);
		return amdgpu_sync_fence(NULL, sync, vm->last_update, false);
		}

		/* add_bo_to_vm - Add a BO to a VM
		@@ -826,7 +825,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
		/* Add the eviction fence back */
		amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);

		sync_vm_fence(adev, sync, bo_va->last_pt_update);
		amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);

		return 0;
		}
		@@ -851,7 +850,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
		return ret;
		}

		return sync_vm_fence(adev, sync, bo_va->last_pt_update);
		return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
		}

		static int map_bo_to_gpuvm(struct amdgpu_device *adev,
		@@ -901,6 +900,26 @@ static int process_validate_vms(struct amdkfd_process_info *process_info)
		return 0;
		}

		static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
		struct amdgpu_sync *sync)
		{
		struct amdgpu_vm *peer_vm;
		int ret;

		list_for_each_entry(peer_vm, &process_info->vm_list_head,
		vm_list_node) {
		struct amdgpu_bo *pd = peer_vm->root.base.bo;

		ret = amdgpu_sync_resv(NULL,
		sync, pd->tbo.resv,
		AMDGPU_FENCE_OWNER_UNDEFINED, false);
		if (ret)
		return ret;
		}

		return 0;
		}

		static int process_update_pds(struct amdkfd_process_info *process_info,
		struct amdgpu_sync *sync)
		{
		@@ -1199,7 +1218,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
		byte_align = (adev->family == AMDGPU_FAMILY_VI &&
		adev->asic_type != CHIP_FIJI &&
		adev->asic_type != CHIP_POLARIS10 &&
		adev->asic_type != CHIP_POLARIS11) ?
		adev->asic_type != CHIP_POLARIS11 &&
		adev->asic_type != CHIP_POLARIS12) ?
		VI_BO_SIZE_ALIGN : 1;

		mapping_flags = AMDGPU_VM_PAGE_READABLE;
		@@ -1215,10 +1235,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(

		amdgpu_sync_create(&(*mem)->sync);

		ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
		ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size,
		alloc_domain, false);
		if (ret) {
		pr_debug("Insufficient system memory\n");
		goto err_reserve_system_mem;
		goto err_reserve_limit;
		}

		pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
		@@ -1266,10 +1287,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
		allocate_init_user_pages_failed:
		amdgpu_bo_unref(&bo);
		/* Don't unreserve system mem limit twice */
		goto err_reserve_system_mem;
		goto err_reserve_limit;
		err_bo_create:
		unreserve_system_mem_limit(adev, size, alloc_domain);
		err_reserve_system_mem:
		unreserve_system_mem_limit(adev, size, alloc_domain, false);
		err_reserve_limit:
		mutex_destroy(&(*mem)->lock);
		kfree(*mem);
		return ret;
		@@ -1405,7 +1426,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
		* the queues are still stopped and we can leave mapping for
		* the next restore worker
		*/
		if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
		if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
		bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
		is_invalid_userptr = true;

		if (check_if_add_bo_to_vm(avm, mem)) {
		@@ -2044,13 +2066,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void info, struct dma_fence *ef)
		if (ret)
		goto validate_map_fail;

		/* Wait for PD/PTs validate to finish */
		/* FIXME: I think this isn't needed */
		list_for_each_entry(peer_vm, &process_info->vm_list_head,
		vm_list_node) {
		struct amdgpu_bo *bo = peer_vm->root.base.bo;

		ttm_bo_wait(&bo->tbo, false, false);
		ret = process_sync_pds_resv(process_info, &sync_obj);
		if (ret) {
		pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
		goto validate_map_fail;
		}

		/* Validate BOs and map them to GPUVM (update VM page tables). */
		@@ -2066,7 +2085,11 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void info, struct dma_fence *ef)
		pr_debug("Memory eviction: Validate BOs failed. Try again\n");
		goto validate_map_fail;
		}

		ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false);
		if (ret) {
		pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
		goto validate_map_fail;
		}
		list_for_each_entry(bo_va_entry, &mem->bo_va_list,
		bo_list) {
		ret = update_gpuvm_pte((struct amdgpu_device *)
		@@ -2087,6 +2110,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void info, struct dma_fence *ef)
		goto validate_map_fail;
		}

		/* Wait for validate and PT updates to finish */
		amdgpu_sync_wait(&sync_obj, false);

		/* Release old eviction fence and create new one, because fence only
		@@ -2105,10 +2129,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void info, struct dma_fence *ef)
		process_info->eviction_fence = new_fence;
		*ef = dma_fence_get(&new_fence->base);

		/* Wait for validate to finish and attach new eviction fence */
		list_for_each_entry(mem, &process_info->kfd_bo_list,
		validate_list.head)
		ttm_bo_wait(&mem->bo->tbo, false, false);
		/* Attach new eviction fence to all BOs */
		list_for_each_entry(mem, &process_info->kfd_bo_list,
		validate_list.head)
		amdgpu_bo_fence(mem->bo,

Admin message