Commit 1eca5a53 authored by Harish Kasiviswanathan's avatar Harish Kasiviswanathan Committed by Alex Deucher
Browse files

drm/amdgpu: Refactor amdgpu_move_blit



Add more generic function amdgpu_copy_ttm_mem_to_mem() that supports
arbitrary copy size, offsets and two BOs (source & dest.).

This is useful for KFD Cross Memory Attach feature where data needs to
be copied from BOs from different processes

v2: Add struct amdgpu_copy_mem and changed amdgpu_copy_ttm_mem_to_mem()
function parameters to use the struct

v3: Minor function name change

Signed-off-by: default avatarHarish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
Reviewed-by: default avatarChristian König <christian.koenig@amd.com>
Signed-off-by: default avatarAlex Deucher <alexander.deucher@amd.com>
parent 26eedf6d
Loading
Loading
Loading
Loading
+120 −49
Original line number Diff line number Diff line
@@ -290,97 +290,168 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
	return addr;
}

static int amdgpu_move_blit(struct ttm_buffer_object *bo,
			    bool evict, bool no_wait_gpu,
			    struct ttm_mem_reg *new_mem,
			    struct ttm_mem_reg *old_mem)
/**
 * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
 *
 * The function copies @size bytes from {src->mem + src->offset} to
 * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
 * move and different for a BO to BO copy.
 *
 * @f: Returns the last fence if multiple jobs are submitted.
 */
int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
			       struct amdgpu_copy_mem *src,
			       struct amdgpu_copy_mem *dst,
			       uint64_t size,
			       struct reservation_object *resv,
			       struct dma_fence **f)
{
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;

	struct drm_mm_node *old_mm, *new_mm;
	uint64_t old_start, old_size, new_start, new_size;
	unsigned long num_pages;
	struct drm_mm_node *src_mm, *dst_mm;
	uint64_t src_node_start, dst_node_start, src_node_size,
		 dst_node_size, src_page_offset, dst_page_offset;
	struct dma_fence *fence = NULL;
	int r;

	BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
	int r = 0;
	const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
					AMDGPU_GPU_PAGE_SIZE);

	if (!ring->ready) {
		DRM_ERROR("Trying to move memory with ring turned off.\n");
		return -EINVAL;
	}

	old_mm = old_mem->mm_node;
	old_size = old_mm->size;
	old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);
	src_mm = src->mem->mm_node;
	while (src->offset >= (src_mm->size << PAGE_SHIFT)) {
		src->offset -= (src_mm->size << PAGE_SHIFT);
		++src_mm;
	}
	src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
					     src->offset;
	src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
	src_page_offset = src_node_start & (PAGE_SIZE - 1);

	new_mm = new_mem->mm_node;
	new_size = new_mm->size;
	new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);
	dst_mm = dst->mem->mm_node;
	while (dst->offset >= (dst_mm->size << PAGE_SHIFT)) {
		dst->offset -= (dst_mm->size << PAGE_SHIFT);
		++dst_mm;
	}
	dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
					     dst->offset;
	dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
	dst_page_offset = dst_node_start & (PAGE_SIZE - 1);

	num_pages = new_mem->num_pages;
	mutex_lock(&adev->mman.gtt_window_lock);
	while (num_pages) {
		unsigned long cur_pages = min(min(old_size, new_size),
					      (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE);
		uint64_t from = old_start, to = new_start;

	while (size) {
		unsigned long cur_size;
		uint64_t from = src_node_start, to = dst_node_start;
		struct dma_fence *next;

		if (old_mem->mem_type == TTM_PL_TT &&
		    !amdgpu_gtt_mgr_is_allocated(old_mem)) {
			r = amdgpu_map_buffer(bo, old_mem, cur_pages,
					      old_start, 0, ring, &from);
		/* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
		 * begins at an offset, then adjust the size accordingly
		 */
		cur_size = min3(min(src_node_size, dst_node_size), size,
				GTT_MAX_BYTES);
		if (cur_size + src_page_offset > GTT_MAX_BYTES ||
		    cur_size + dst_page_offset > GTT_MAX_BYTES)
			cur_size -= max(src_page_offset, dst_page_offset);

		/* Map only what needs to be accessed. Map src to window 0 and
		 * dst to window 1
		 */
		if (src->mem->mem_type == TTM_PL_TT &&
		    !amdgpu_gtt_mgr_is_allocated(src->mem)) {
			r = amdgpu_map_buffer(src->bo, src->mem,
					PFN_UP(cur_size + src_page_offset),
					src_node_start, 0, ring,
					&from);
			if (r)
				goto error;
			/* Adjust the offset because amdgpu_map_buffer returns
			 * start of mapped page
			 */
			from += src_page_offset;
		}

		if (new_mem->mem_type == TTM_PL_TT &&
		    !amdgpu_gtt_mgr_is_allocated(new_mem)) {
			r = amdgpu_map_buffer(bo, new_mem, cur_pages,
					      new_start, 1, ring, &to);
		if (dst->mem->mem_type == TTM_PL_TT &&
		    !amdgpu_gtt_mgr_is_allocated(dst->mem)) {
			r = amdgpu_map_buffer(dst->bo, dst->mem,
					PFN_UP(cur_size + dst_page_offset),
					dst_node_start, 1, ring,
					&to);
			if (r)
				goto error;
			to += dst_page_offset;
		}

		r = amdgpu_copy_buffer(ring, from, to,
				       cur_pages * PAGE_SIZE,
				       bo->resv, &next, false, true);
		r = amdgpu_copy_buffer(ring, from, to, cur_size,
				       resv, &next, false, true);
		if (r)
			goto error;

		dma_fence_put(fence);
		fence = next;

		num_pages -= cur_pages;
		if (!num_pages)
		size -= cur_size;
		if (!size)
			break;

		old_size -= cur_pages;
		if (!old_size) {
			old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem);
			old_size = old_mm->size;
		src_node_size -= cur_size;
		if (!src_node_size) {
			src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
							     src->mem);
			src_node_size = (src_mm->size << PAGE_SHIFT);
		} else {
			old_start += cur_pages * PAGE_SIZE;
		}

		new_size -= cur_pages;
		if (!new_size) {
			new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem);
			new_size = new_mm->size;
			src_node_start += cur_size;
			src_page_offset = src_node_start & (PAGE_SIZE - 1);
		}
		dst_node_size -= cur_size;
		if (!dst_node_size) {
			dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
							     dst->mem);
			dst_node_size = (dst_mm->size << PAGE_SHIFT);
		} else {
			new_start += cur_pages * PAGE_SIZE;
			dst_node_start += cur_size;
			dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
		}
	}
error:
	mutex_unlock(&adev->mman.gtt_window_lock);
	if (f)
		*f = dma_fence_get(fence);
	dma_fence_put(fence);
	return r;
}


static int amdgpu_move_blit(struct ttm_buffer_object *bo,
			    bool evict, bool no_wait_gpu,
			    struct ttm_mem_reg *new_mem,
			    struct ttm_mem_reg *old_mem)
{
	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
	struct amdgpu_copy_mem src, dst;
	struct dma_fence *fence = NULL;
	int r;

	src.bo = bo;
	dst.bo = bo;
	src.mem = old_mem;
	dst.mem = new_mem;
	src.offset = 0;
	dst.offset = 0;

	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
				       new_mem->num_pages << PAGE_SHIFT,
				       bo->resv, &fence);
	if (r)
		goto error;

	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
	dma_fence_put(fence);
	return r;

error:
	mutex_unlock(&adev->mman.gtt_window_lock);

	if (fence)
		dma_fence_wait(fence, false);
	dma_fence_put(fence);
+12 −0
Original line number Diff line number Diff line
@@ -58,6 +58,12 @@ struct amdgpu_mman {
	struct amd_sched_entity			entity;
};

struct amdgpu_copy_mem {
	struct ttm_buffer_object	*bo;
	struct ttm_mem_reg		*mem;
	unsigned long			offset;
};

extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func;
extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func;

@@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
		       struct reservation_object *resv,
		       struct dma_fence **fence, bool direct_submit,
		       bool vm_needs_flush);
int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
			       struct amdgpu_copy_mem *src,
			       struct amdgpu_copy_mem *dst,
			       uint64_t size,
			       struct reservation_object *resv,
			       struct dma_fence **f);
int amdgpu_fill_buffer(struct amdgpu_bo *bo,
			uint64_t src_data,
			struct reservation_object *resv,