Commit 513126ae authored by Dave Airlie's avatar Dave Airlie
Browse files

Merge branch 'drm-next-4.21' of git://people.freedesktop.org/~agd5f/linux into drm-next



amdgpu and amdkfd:
- Freesync support
- ABM support in DC
- KFD support for vega12 and polaris12
- Add sdma paging queue support for vega
- Use ACPI to query backlight range on supported platforms
- Clean up doorbell handling
- KFD fix for pasid handling under non-HWS
- Misc cleanups and fixes

scheduler:
- Revert "fix timeout handling v2"

radeon:
- Fix possible overflow on 32 bit

ttm:
- Fix for LRU handling for ghost objects

Signed-off-by: default avatarDave Airlie <airlied@redhat.com>

From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20181130192505.2946-1-alexander.deucher@amd.com
parents 467e8a51 2c486cc4
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -575,6 +575,13 @@ Explicit Fencing Properties
.. kernel-doc:: drivers/gpu/drm/drm_atomic_uapi.c
   :doc: explicit fencing properties


Variable Refresh Properties
---------------------------

.. kernel-doc:: drivers/gpu/drm/drm_connector.c
   :doc: Variable refresh properties

Existing KMS Properties
-----------------------

+9 −133
Original line number Diff line number Diff line
@@ -81,6 +81,7 @@
#include "amdgpu_job.h"
#include "amdgpu_bo_list.h"
#include "amdgpu_gem.h"
#include "amdgpu_doorbell.h"

#define MAX_GPU_INSTANCE		16

@@ -360,123 +361,6 @@ struct amdgpu_sa_bo {
int amdgpu_fence_slab_init(void);
void amdgpu_fence_slab_fini(void);

/*
 * GPU doorbell structures, functions & helpers
 */
typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
{
	AMDGPU_DOORBELL_KIQ                     = 0x000,
	AMDGPU_DOORBELL_HIQ                     = 0x001,
	AMDGPU_DOORBELL_DIQ                     = 0x002,
	AMDGPU_DOORBELL_MEC_RING0               = 0x010,
	AMDGPU_DOORBELL_MEC_RING1               = 0x011,
	AMDGPU_DOORBELL_MEC_RING2               = 0x012,
	AMDGPU_DOORBELL_MEC_RING3               = 0x013,
	AMDGPU_DOORBELL_MEC_RING4               = 0x014,
	AMDGPU_DOORBELL_MEC_RING5               = 0x015,
	AMDGPU_DOORBELL_MEC_RING6               = 0x016,
	AMDGPU_DOORBELL_MEC_RING7               = 0x017,
	AMDGPU_DOORBELL_GFX_RING0               = 0x020,
	AMDGPU_DOORBELL_sDMA_ENGINE0            = 0x1E0,
	AMDGPU_DOORBELL_sDMA_ENGINE1            = 0x1E1,
	AMDGPU_DOORBELL_IH                      = 0x1E8,
	AMDGPU_DOORBELL_MAX_ASSIGNMENT          = 0x3FF,
	AMDGPU_DOORBELL_INVALID                 = 0xFFFF
} AMDGPU_DOORBELL_ASSIGNMENT;

struct amdgpu_doorbell {
	/* doorbell mmio */
	resource_size_t		base;
	resource_size_t		size;
	u32 __iomem		*ptr;
	u32			num_doorbells;	/* Number of doorbells actually reserved for amdgpu. */
};

/*
 * 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
 */
typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
{
	/*
	 * All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in
	 * a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range.
	 *  Compute related doorbells are allocated from 0x00 to 0x8a
	 */


	/* kernel scheduling */
	AMDGPU_DOORBELL64_KIQ                     = 0x00,

	/* HSA interface queue and debug queue */
	AMDGPU_DOORBELL64_HIQ                     = 0x01,
	AMDGPU_DOORBELL64_DIQ                     = 0x02,

	/* Compute engines */
	AMDGPU_DOORBELL64_MEC_RING0               = 0x03,
	AMDGPU_DOORBELL64_MEC_RING1               = 0x04,
	AMDGPU_DOORBELL64_MEC_RING2               = 0x05,
	AMDGPU_DOORBELL64_MEC_RING3               = 0x06,
	AMDGPU_DOORBELL64_MEC_RING4               = 0x07,
	AMDGPU_DOORBELL64_MEC_RING5               = 0x08,
	AMDGPU_DOORBELL64_MEC_RING6               = 0x09,
	AMDGPU_DOORBELL64_MEC_RING7               = 0x0a,

	/* User queue doorbell range (128 doorbells) */
	AMDGPU_DOORBELL64_USERQUEUE_START         = 0x0b,
	AMDGPU_DOORBELL64_USERQUEUE_END           = 0x8a,

	/* Graphics engine */
	AMDGPU_DOORBELL64_GFX_RING0               = 0x8b,

	/*
	 * Other graphics doorbells can be allocated here: from 0x8c to 0xdf
	 * Graphics voltage island aperture 1
	 * default non-graphics QWORD index is 0xe0 - 0xFF inclusive
	 */

	/* sDMA engines  reserved from 0xe0 -0xef  */
	AMDGPU_DOORBELL64_sDMA_ENGINE0            = 0xE0,
	AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0     = 0xE1,
	AMDGPU_DOORBELL64_sDMA_ENGINE1            = 0xE8,
	AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1     = 0xE9,

	/* For vega10 sriov, the sdma doorbell must be fixed as follow
	 * to keep the same setting with host driver, or it will
	 * happen conflicts
	 */
	AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0            = 0xF0,
	AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE0     = 0xF1,
	AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1            = 0xF2,
	AMDGPU_VEGA10_DOORBELL64_sDMA_HI_PRI_ENGINE1     = 0xF3,

	/* Interrupt handler */
	AMDGPU_DOORBELL64_IH                      = 0xF4,  /* For legacy interrupt ring buffer */
	AMDGPU_DOORBELL64_IH_RING1                = 0xF5,  /* For page migration request log */
	AMDGPU_DOORBELL64_IH_RING2                = 0xF6,  /* For page migration translation/invalidation log */

	/* VCN engine use 32 bits doorbell  */
	AMDGPU_DOORBELL64_VCN0_1                  = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
	AMDGPU_DOORBELL64_VCN2_3                  = 0xF9,
	AMDGPU_DOORBELL64_VCN4_5                  = 0xFA,
	AMDGPU_DOORBELL64_VCN6_7                  = 0xFB,

	/* overlap the doorbell assignment with VCN as they are  mutually exclusive
	 * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
	 */
	AMDGPU_DOORBELL64_UVD_RING0_1             = 0xF8,
	AMDGPU_DOORBELL64_UVD_RING2_3             = 0xF9,
	AMDGPU_DOORBELL64_UVD_RING4_5             = 0xFA,
	AMDGPU_DOORBELL64_UVD_RING6_7             = 0xFB,

	AMDGPU_DOORBELL64_VCE_RING0_1             = 0xFC,
	AMDGPU_DOORBELL64_VCE_RING2_3             = 0xFD,
	AMDGPU_DOORBELL64_VCE_RING4_5             = 0xFE,
	AMDGPU_DOORBELL64_VCE_RING6_7             = 0xFF,

	AMDGPU_DOORBELL64_MAX_ASSIGNMENT          = 0xFF,
	AMDGPU_DOORBELL64_INVALID                 = 0xFFFF
} AMDGPU_DOORBELL64_ASSIGNMENT;

/*
 * IRQS.
 */
@@ -654,6 +538,8 @@ struct amdgpu_asic_funcs {
			       struct amdgpu_ring *ring);
	/* check if the asic needs a full reset of if soft reset will work */
	bool (*need_full_reset)(struct amdgpu_device *adev);
	/* initialize doorbell layout for specific asic*/
	void (*init_doorbell_index)(struct amdgpu_device *adev);
};

/*
@@ -1023,6 +909,8 @@ struct amdgpu_device {
	unsigned long last_mm_index;
	bool                            in_gpu_reset;
	struct mutex  lock_reset;
	struct amdgpu_doorbell_index doorbell_index;
	int asic_reset_res;
};

static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
@@ -1047,11 +935,6 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset);
u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg);
void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);

u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);

bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);

@@ -1113,11 +996,6 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define RREG32_IO(reg) amdgpu_io_rreg(adev, (reg))
#define WREG32_IO(reg, v) amdgpu_io_wreg(adev, (reg), (v))

#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index))
#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v))
#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index))
#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v))

#define REG_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT
#define REG_FIELD_MASK(reg, field) reg##__##field##_MASK

@@ -1159,6 +1037,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r))
#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
#define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))

/* Common functions */
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
@@ -1219,12 +1098,6 @@ void amdgpu_disable_vblank_kms(struct drm_device *dev, unsigned int pipe);
long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
			     unsigned long arg);


/*
 * functions used by amdgpu_xgmi.c
 */
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);

/*
 * functions used by amdgpu_encoder.c
 */
@@ -1252,6 +1125,9 @@ bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *ade
int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev,
						u8 perf_req, bool advertise);
int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev);

void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev,
		struct amdgpu_dm_backlight_caps *caps);
#else
static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; }
static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { }
+92 −22
Original line number Diff line number Diff line
@@ -41,28 +41,21 @@ struct amdgpu_atif_notification_cfg {
};

struct amdgpu_atif_notifications {
	bool display_switch;
	bool expansion_mode_change;
	bool thermal_state;
	bool forced_power_state;
	bool system_power_state;
	bool display_conf_change;
	bool px_gfx_switch;
	bool brightness_change;
	bool dgpu_display_event;
	bool gpu_package_power_limit;
};

struct amdgpu_atif_functions {
	bool system_params;
	bool sbios_requests;
	bool select_active_disp;
	bool lid_state;
	bool get_tv_standard;
	bool set_tv_standard;
	bool get_panel_expansion_mode;
	bool set_panel_expansion_mode;
	bool temperature_change;
	bool graphics_device_types;
	bool query_backlight_transfer_characteristics;
	bool ready_to_undock;
	bool external_gpu_information;
};

struct amdgpu_atif {
@@ -72,6 +65,7 @@ struct amdgpu_atif {
	struct amdgpu_atif_functions functions;
	struct amdgpu_atif_notification_cfg notification_cfg;
	struct amdgpu_encoder *encoder_for_bl;
	struct amdgpu_dm_backlight_caps backlight_caps;
};

/* Call the ATIF method
@@ -137,15 +131,12 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif,
 */
static void amdgpu_atif_parse_notification(struct amdgpu_atif_notifications *n, u32 mask)
{
	n->display_switch = mask & ATIF_DISPLAY_SWITCH_REQUEST_SUPPORTED;
	n->expansion_mode_change = mask & ATIF_EXPANSION_MODE_CHANGE_REQUEST_SUPPORTED;
	n->thermal_state = mask & ATIF_THERMAL_STATE_CHANGE_REQUEST_SUPPORTED;
	n->forced_power_state = mask & ATIF_FORCED_POWER_STATE_CHANGE_REQUEST_SUPPORTED;
	n->system_power_state = mask & ATIF_SYSTEM_POWER_SOURCE_CHANGE_REQUEST_SUPPORTED;
	n->display_conf_change = mask & ATIF_DISPLAY_CONF_CHANGE_REQUEST_SUPPORTED;
	n->px_gfx_switch = mask & ATIF_PX_GFX_SWITCH_REQUEST_SUPPORTED;
	n->brightness_change = mask & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST_SUPPORTED;
	n->dgpu_display_event = mask & ATIF_DGPU_DISPLAY_EVENT_SUPPORTED;
	n->gpu_package_power_limit = mask & ATIF_GPU_PACKAGE_POWER_LIMIT_REQUEST_SUPPORTED;
}

/**
@@ -162,14 +153,11 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas
{
	f->system_params = mask & ATIF_GET_SYSTEM_PARAMETERS_SUPPORTED;
	f->sbios_requests = mask & ATIF_GET_SYSTEM_BIOS_REQUESTS_SUPPORTED;
	f->select_active_disp = mask & ATIF_SELECT_ACTIVE_DISPLAYS_SUPPORTED;
	f->lid_state = mask & ATIF_GET_LID_STATE_SUPPORTED;
	f->get_tv_standard = mask & ATIF_GET_TV_STANDARD_FROM_CMOS_SUPPORTED;
	f->set_tv_standard = mask & ATIF_SET_TV_STANDARD_IN_CMOS_SUPPORTED;
	f->get_panel_expansion_mode = mask & ATIF_GET_PANEL_EXPANSION_MODE_FROM_CMOS_SUPPORTED;
	f->set_panel_expansion_mode = mask & ATIF_SET_PANEL_EXPANSION_MODE_IN_CMOS_SUPPORTED;
	f->temperature_change = mask & ATIF_TEMPERATURE_CHANGE_NOTIFICATION_SUPPORTED;
	f->graphics_device_types = mask & ATIF_GET_GRAPHICS_DEVICE_TYPES_SUPPORTED;
	f->query_backlight_transfer_characteristics =
		mask & ATIF_QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS_SUPPORTED;
	f->ready_to_undock = mask & ATIF_READY_TO_UNDOCK_NOTIFICATION_SUPPORTED;
	f->external_gpu_information = mask & ATIF_GET_EXTERNAL_GPU_INFORMATION_SUPPORTED;
}

/**
@@ -310,6 +298,65 @@ out:
	return err;
}

/**
 * amdgpu_atif_query_backlight_caps - get min and max backlight input signal
 *
 * @handle: acpi handle
 *
 * Execute the QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS ATIF function
 * to determine the acceptable range of backlight values
 *
 * Backlight_caps.caps_valid will be set to true if the query is successful
 *
 * The input signals are in range 0-255
 *
 * This function assumes the display with backlight is the first LCD
 *
 * Returns 0 on success, error on failure.
 */
static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif)
{
	union acpi_object *info;
	struct atif_qbtc_output characteristics;
	struct atif_qbtc_arguments arguments;
	struct acpi_buffer params;
	size_t size;
	int err = 0;

	arguments.size = sizeof(arguments);
	arguments.requested_display = ATIF_QBTC_REQUEST_LCD1;

	params.length = sizeof(arguments);
	params.pointer = (void *)&arguments;

	info = amdgpu_atif_call(atif,
		ATIF_FUNCTION_QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS,
		&params);
	if (!info) {
		err = -EIO;
		goto out;
	}

	size = *(u16 *) info->buffer.pointer;
	if (size < 10) {
		err = -EINVAL;
		goto out;
	}

	memset(&characteristics, 0, sizeof(characteristics));
	size = min(sizeof(characteristics), size);
	memcpy(&characteristics, info->buffer.pointer, size);

	atif->backlight_caps.caps_valid = true;
	atif->backlight_caps.min_input_signal =
			characteristics.min_input_signal;
	atif->backlight_caps.max_input_signal =
			characteristics.max_input_signal;
out:
	kfree(info);
	return err;
}

/**
 * amdgpu_atif_get_sbios_requests - get requested sbios event
 *
@@ -799,6 +846,17 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
		}
	}

	if (atif->functions.query_backlight_transfer_characteristics) {
		ret = amdgpu_atif_query_backlight_caps(atif);
		if (ret) {
			DRM_DEBUG_DRIVER("Call to QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS failed: %d\n",
					ret);
			atif->backlight_caps.caps_valid = false;
		}
	} else {
		atif->backlight_caps.caps_valid = false;
	}

out:
	adev->acpi_nb.notifier_call = amdgpu_acpi_event;
	register_acpi_notifier(&adev->acpi_nb);
@@ -806,6 +864,18 @@ out:
	return ret;
}

void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev,
		struct amdgpu_dm_backlight_caps *caps)
{
	if (!adev->atif) {
		caps->caps_valid = false;
		return;
	}
	caps->caps_valid = adev->atif->backlight_caps.caps_valid;
	caps->min_input_signal = adev->atif->backlight_caps.min_input_signal;
	caps->max_input_signal = adev->atif->backlight_caps.max_input_signal;
}

/**
 * amdgpu_acpi_fini - tear down driver acpi support
 *
+10 −19
Original line number Diff line number Diff line
@@ -73,9 +73,11 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
	case CHIP_FIJI:
	case CHIP_POLARIS10:
	case CHIP_POLARIS11:
	case CHIP_POLARIS12:
		kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
		break;
	case CHIP_VEGA10:
	case CHIP_VEGA12:
	case CHIP_VEGA20:
	case CHIP_RAVEN:
		kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
@@ -179,25 +181,14 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
			 * process in case of 64-bit doorbells so we
			 * can use each doorbell assignment twice.
			 */
			if (adev->asic_type == CHIP_VEGA10) {
			gpu_resources.sdma_doorbell[0][i] =
					AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
				adev->doorbell_index.sdma_engine0 + (i >> 1);
			gpu_resources.sdma_doorbell[0][i+1] =
					AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
				adev->doorbell_index.sdma_engine0 + 0x200 + (i >> 1);
			gpu_resources.sdma_doorbell[1][i] =
					AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
				adev->doorbell_index.sdma_engine1 + (i >> 1);
			gpu_resources.sdma_doorbell[1][i+1] =
					AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
			} else {
				gpu_resources.sdma_doorbell[0][i] =
					AMDGPU_DOORBELL64_sDMA_ENGINE0 + (i >> 1);
				gpu_resources.sdma_doorbell[0][i+1] =
					AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200 + (i >> 1);
				gpu_resources.sdma_doorbell[1][i] =
					AMDGPU_DOORBELL64_sDMA_ENGINE1 + (i >> 1);
				gpu_resources.sdma_doorbell[1][i+1] =
					AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200 + (i >> 1);
			}
				adev->doorbell_index.sdma_engine1 + 0x200 + (i >> 1);
		}
		/* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for
		 * SDMA, IH and VCN. So don't use them for the CP.
+96 −75
Original line number Diff line number Diff line
@@ -46,9 +46,9 @@
/* Impose limit on how much memory KFD can use */
static struct {
	uint64_t max_system_mem_limit;
	uint64_t max_userptr_mem_limit;
	uint64_t max_ttm_mem_limit;
	int64_t system_mem_used;
	int64_t userptr_mem_used;
	int64_t ttm_mem_used;
	spinlock_t mem_limit_lock;
} kfd_mem_limit;

@@ -90,8 +90,8 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm,
}

/* Set memory usage limits. Current, limits are
 *  System (kernel) memory - 3/8th System RAM
 *  Userptr memory - 3/4th System RAM
 *  System (TTM + userptr) memory - 3/4th System RAM
 *  TTM memory - 3/8th System RAM
 */
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
{
@@ -103,48 +103,54 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
	mem *= si.mem_unit;

	spin_lock_init(&kfd_mem_limit.mem_limit_lock);
	kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3);
	kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2);
	pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n",
	kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2);
	kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3);
	pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n",
		(kfd_mem_limit.max_system_mem_limit >> 20),
		(kfd_mem_limit.max_userptr_mem_limit >> 20));
		(kfd_mem_limit.max_ttm_mem_limit >> 20));
}

static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
					      uint64_t size, u32 domain)
		uint64_t size, u32 domain, bool sg)
{
	size_t acc_size;
	size_t acc_size, system_mem_needed, ttm_mem_needed;
	int ret = 0;

	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
				       sizeof(struct amdgpu_bo));

	spin_lock(&kfd_mem_limit.mem_limit_lock);

	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
		if (kfd_mem_limit.system_mem_used + (acc_size + size) >
			kfd_mem_limit.max_system_mem_limit) {
			ret = -ENOMEM;
			goto err_no_mem;
		/* TTM GTT memory */
		system_mem_needed = acc_size + size;
		ttm_mem_needed = acc_size + size;
	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
		/* Userptr */
		system_mem_needed = acc_size + size;
		ttm_mem_needed = acc_size;
	} else {
		/* VRAM and SG */
		system_mem_needed = acc_size;
		ttm_mem_needed = acc_size;
	}
		kfd_mem_limit.system_mem_used += (acc_size + size);
	} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
		if ((kfd_mem_limit.system_mem_used + acc_size >

	if ((kfd_mem_limit.system_mem_used + system_mem_needed >
		kfd_mem_limit.max_system_mem_limit) ||
			(kfd_mem_limit.userptr_mem_used + (size + acc_size) >
			kfd_mem_limit.max_userptr_mem_limit)) {
		(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
		kfd_mem_limit.max_ttm_mem_limit))
		ret = -ENOMEM;
			goto err_no_mem;
		}
		kfd_mem_limit.system_mem_used += acc_size;
		kfd_mem_limit.userptr_mem_used += size;
	else {
		kfd_mem_limit.system_mem_used += system_mem_needed;
		kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
	}
err_no_mem:

	spin_unlock(&kfd_mem_limit.mem_limit_lock);
	return ret;
}

static void unreserve_system_mem_limit(struct amdgpu_device *adev,
				       uint64_t size, u32 domain)
		uint64_t size, u32 domain, bool sg)
{
	size_t acc_size;

@@ -154,14 +160,18 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
	spin_lock(&kfd_mem_limit.mem_limit_lock);
	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
		kfd_mem_limit.system_mem_used -= (acc_size + size);
	} else if (domain == AMDGPU_GEM_DOMAIN_CPU) {
		kfd_mem_limit.ttm_mem_used -= (acc_size + size);
	} else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) {
		kfd_mem_limit.system_mem_used -= (acc_size + size);
		kfd_mem_limit.ttm_mem_used -= acc_size;
	} else {
		kfd_mem_limit.system_mem_used -= acc_size;
		kfd_mem_limit.userptr_mem_used -= size;
		kfd_mem_limit.ttm_mem_used -= acc_size;
	}
	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
		  "kfd system memory accounting unbalanced");
	WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
		  "kfd userptr memory accounting unbalanced");
	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
		  "kfd TTM memory accounting unbalanced");

	spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
@@ -171,16 +181,22 @@ void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
	spin_lock(&kfd_mem_limit.mem_limit_lock);

	if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
		kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
		kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo);
		kfd_mem_limit.system_mem_used -=
			(bo->tbo.acc_size + amdgpu_bo_size(bo));
		kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
	} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
		kfd_mem_limit.system_mem_used -=
			(bo->tbo.acc_size + amdgpu_bo_size(bo));
		kfd_mem_limit.ttm_mem_used -=
			(bo->tbo.acc_size + amdgpu_bo_size(bo));
	} else {
		kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
		kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
	}
	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
		  "kfd system memory accounting unbalanced");
	WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0,
		  "kfd userptr memory accounting unbalanced");
	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
		  "kfd TTM memory accounting unbalanced");

	spin_unlock(&kfd_mem_limit.mem_limit_lock);
}
@@ -395,23 +411,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
	return 0;
}

static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
			 struct dma_fence *f)
{
	int ret = amdgpu_sync_fence(adev, sync, f, false);

	/* Sync objects can't handle multiple GPUs (contexts) updating
	 * sync->last_vm_update. Fortunately we don't need it for
	 * KFD's purposes, so we can just drop that fence.
	 */
	if (sync->last_vm_update) {
		dma_fence_put(sync->last_vm_update);
		sync->last_vm_update = NULL;
	}

	return ret;
}

static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
{
	struct amdgpu_bo *pd = vm->root.base.bo;
@@ -422,7 +421,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
	if (ret)
		return ret;

	return sync_vm_fence(adev, sync, vm->last_update);
	return amdgpu_sync_fence(NULL, sync, vm->last_update, false);
}

/* add_bo_to_vm - Add a BO to a VM
@@ -826,7 +825,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
	/* Add the eviction fence back */
	amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);

	sync_vm_fence(adev, sync, bo_va->last_pt_update);
	amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);

	return 0;
}
@@ -851,7 +850,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev,
		return ret;
	}

	return sync_vm_fence(adev, sync, bo_va->last_pt_update);
	return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);
}

static int map_bo_to_gpuvm(struct amdgpu_device *adev,
@@ -901,6 +900,26 @@ static int process_validate_vms(struct amdkfd_process_info *process_info)
	return 0;
}

static int process_sync_pds_resv(struct amdkfd_process_info *process_info,
				 struct amdgpu_sync *sync)
{
	struct amdgpu_vm *peer_vm;
	int ret;

	list_for_each_entry(peer_vm, &process_info->vm_list_head,
			    vm_list_node) {
		struct amdgpu_bo *pd = peer_vm->root.base.bo;

		ret = amdgpu_sync_resv(NULL,
					sync, pd->tbo.resv,
					AMDGPU_FENCE_OWNER_UNDEFINED, false);
		if (ret)
			return ret;
	}

	return 0;
}

static int process_update_pds(struct amdkfd_process_info *process_info,
			      struct amdgpu_sync *sync)
{
@@ -1199,7 +1218,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
	byte_align = (adev->family == AMDGPU_FAMILY_VI &&
			adev->asic_type != CHIP_FIJI &&
			adev->asic_type != CHIP_POLARIS10 &&
			adev->asic_type != CHIP_POLARIS11) ?
			adev->asic_type != CHIP_POLARIS11 &&
			adev->asic_type != CHIP_POLARIS12) ?
			VI_BO_SIZE_ALIGN : 1;

	mapping_flags = AMDGPU_VM_PAGE_READABLE;
@@ -1215,10 +1235,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(

	amdgpu_sync_create(&(*mem)->sync);

	ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain);
	ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size,
						     alloc_domain, false);
	if (ret) {
		pr_debug("Insufficient system memory\n");
		goto err_reserve_system_mem;
		goto err_reserve_limit;
	}

	pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n",
@@ -1266,10 +1287,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
allocate_init_user_pages_failed:
	amdgpu_bo_unref(&bo);
	/* Don't unreserve system mem limit twice */
	goto err_reserve_system_mem;
	goto err_reserve_limit;
err_bo_create:
	unreserve_system_mem_limit(adev, size, alloc_domain);
err_reserve_system_mem:
	unreserve_system_mem_limit(adev, size, alloc_domain, false);
err_reserve_limit:
	mutex_destroy(&(*mem)->lock);
	kfree(*mem);
	return ret;
@@ -1405,7 +1426,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
	 * the queues are still stopped and we can leave mapping for
	 * the next restore worker
	 */
	if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) &&
	    bo->tbo.mem.mem_type == TTM_PL_SYSTEM)
		is_invalid_userptr = true;

	if (check_if_add_bo_to_vm(avm, mem)) {
@@ -2044,13 +2066,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
	if (ret)
		goto validate_map_fail;

	/* Wait for PD/PTs validate to finish */
	/* FIXME: I think this isn't needed */
	list_for_each_entry(peer_vm, &process_info->vm_list_head,
			    vm_list_node) {
		struct amdgpu_bo *bo = peer_vm->root.base.bo;

		ttm_bo_wait(&bo->tbo, false, false);
	ret = process_sync_pds_resv(process_info, &sync_obj);
	if (ret) {
		pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n");
		goto validate_map_fail;
	}

	/* Validate BOs and map them to GPUVM (update VM page tables). */
@@ -2066,7 +2085,11 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
			pr_debug("Memory eviction: Validate BOs failed. Try again\n");
			goto validate_map_fail;
		}

		ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false);
		if (ret) {
			pr_debug("Memory eviction: Sync BO fence failed. Try again\n");
			goto validate_map_fail;
		}
		list_for_each_entry(bo_va_entry, &mem->bo_va_list,
				    bo_list) {
			ret = update_gpuvm_pte((struct amdgpu_device *)
@@ -2087,6 +2110,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
		goto validate_map_fail;
	}

	/* Wait for validate and PT updates to finish */
	amdgpu_sync_wait(&sync_obj, false);

	/* Release old eviction fence and create new one, because fence only
@@ -2105,10 +2129,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
	process_info->eviction_fence = new_fence;
	*ef = dma_fence_get(&new_fence->base);

	/* Wait for validate to finish and attach new eviction fence */
	list_for_each_entry(mem, &process_info->kfd_bo_list,
		validate_list.head)
		ttm_bo_wait(&mem->bo->tbo, false, false);
	/* Attach new eviction fence to all BOs */
	list_for_each_entry(mem, &process_info->kfd_bo_list,
		validate_list.head)
		amdgpu_bo_fence(mem->bo,
Loading