Commit 7fc40bca authored by Pawel Piskorski's avatar Pawel Piskorski Committed by Oded Gabbay
Browse files

habanalabs: flush only at the end of the map/unmap



Optimize hl_mmu_map and hl_mmu_unmap by not calling flush(ctx)
within per-page loop.

Signed-off-by: default avatarPawel Piskorski <ppiskorski@habana.ai>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent bbde5709
Loading
Loading
Loading
Loading
+12 −8
Original line number Diff line number Diff line
@@ -4776,7 +4776,8 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)

	for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
		rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
				prop->dram_base_address + off, PAGE_SIZE_2MB);
				prop->dram_base_address + off, PAGE_SIZE_2MB,
				(off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
		if (rc) {
			dev_err(hdev->dev, "Map failed for address 0x%llx\n",
				prop->dram_base_address + off);
@@ -4786,7 +4787,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)

	if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
		rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
			hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB);
			hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);

		if (rc) {
			dev_err(hdev->dev,
@@ -4799,7 +4800,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
			rc = hl_mmu_map(hdev->kernel_ctx,
				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
				hdev->cpu_accessible_dma_address + cpu_off,
				PAGE_SIZE_4KB);
				PAGE_SIZE_4KB, true);
			if (rc) {
				dev_err(hdev->dev,
					"Map failed for CPU accessible memory\n");
@@ -4825,14 +4826,15 @@ unmap_cpu:
	for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
		if (hl_mmu_unmap(hdev->kernel_ctx,
				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
				PAGE_SIZE_4KB))
				PAGE_SIZE_4KB, true))
			dev_warn_ratelimited(hdev->dev,
				"failed to unmap address 0x%llx\n",
				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
unmap:
	for (; off >= 0 ; off -= PAGE_SIZE_2MB)
		if (hl_mmu_unmap(hdev->kernel_ctx,
				prop->dram_base_address + off, PAGE_SIZE_2MB))
				prop->dram_base_address + off, PAGE_SIZE_2MB,
				true))
			dev_warn_ratelimited(hdev->dev,
				"failed to unmap address 0x%llx\n",
				prop->dram_base_address + off);
@@ -4857,14 +4859,15 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)

	if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
		if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
				PAGE_SIZE_2MB))
				PAGE_SIZE_2MB, true))
			dev_warn(hdev->dev,
				"Failed to unmap CPU accessible memory\n");
	} else {
		for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
			if (hl_mmu_unmap(hdev->kernel_ctx,
					VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
					PAGE_SIZE_4KB))
					PAGE_SIZE_4KB,
					(cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
				dev_warn_ratelimited(hdev->dev,
					"failed to unmap address 0x%llx\n",
					VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
@@ -4872,7 +4875,8 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)

	for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
		if (hl_mmu_unmap(hdev->kernel_ctx,
				prop->dram_base_address + off, PAGE_SIZE_2MB))
				prop->dram_base_address + off, PAGE_SIZE_2MB,
				(off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
			dev_warn_ratelimited(hdev->dev,
					"Failed to unmap address 0x%llx\n",
					prop->dram_base_address + off);
+4 −2
Original line number Diff line number Diff line
@@ -1573,8 +1573,10 @@ int hl_mmu_init(struct hl_device *hdev);
void hl_mmu_fini(struct hl_device *hdev);
int hl_mmu_ctx_init(struct hl_ctx *ctx);
void hl_mmu_ctx_fini(struct hl_ctx *ctx);
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size);
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size);
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
		u32 page_size, bool flush_pte);
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
		bool flush_pte);
void hl_mmu_swap_out(struct hl_ctx *ctx);
void hl_mmu_swap_in(struct hl_ctx *ctx);

+6 −3
Original line number Diff line number Diff line
@@ -747,7 +747,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
	for (i = 0 ; i < phys_pg_pack->npages ; i++) {
		paddr = phys_pg_pack->pages[i];

		rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size);
		rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
				(i + 1) == phys_pg_pack->npages);
		if (rc) {
			dev_err(hdev->dev,
				"map failed for handle %u, npages: %llu, mapped: %llu",
@@ -765,7 +766,8 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
err:
	next_vaddr = vaddr;
	for (i = 0 ; i < mapped_pg_cnt ; i++) {
		if (hl_mmu_unmap(ctx, next_vaddr, page_size))
		if (hl_mmu_unmap(ctx, next_vaddr, page_size,
					(i + 1) == mapped_pg_cnt))
			dev_warn_ratelimited(hdev->dev,
				"failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
					phys_pg_pack->handle, next_vaddr,
@@ -794,7 +796,8 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
	next_vaddr = vaddr;

	for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
		if (hl_mmu_unmap(ctx, next_vaddr, page_size))
		if (hl_mmu_unmap(ctx, next_vaddr, page_size,
				       (i + 1) == phys_pg_pack->npages))
			dev_warn_ratelimited(hdev->dev,
			"unmap failed for vaddr: 0x%llx\n", next_vaddr);

+28 −14
Original line number Diff line number Diff line
@@ -637,29 +637,27 @@ static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
			clear_hop3 = true;

		if (!clear_hop3)
			goto flush;
			goto mapped;

		clear_pte(ctx, hop3_pte_addr);

		if (put_pte(ctx, hop3_addr))
			goto flush;
			goto mapped;

		clear_pte(ctx, hop2_pte_addr);

		if (put_pte(ctx, hop2_addr))
			goto flush;
			goto mapped;

		clear_pte(ctx, hop1_pte_addr);

		if (put_pte(ctx, hop1_addr))
			goto flush;
			goto mapped;

		clear_pte(ctx, hop0_pte_addr);
	}

flush:
	flush(ctx);

mapped:
	return 0;

not_mapped:
@@ -675,6 +673,7 @@ not_mapped:
 * @ctx: pointer to the context structure
 * @virt_addr: virt addr to map from
 * @page_size: size of the page to unmap
 * @flush_pte: whether to do a PCI flush
 *
 * This function does the following:
 * - Check that the virt addr is mapped
@@ -685,15 +684,19 @@ not_mapped:
 * changes the MMU hash, it must be protected by a lock.
 * However, because it maps only a single page, the lock should be implemented
 * in a higher level in order to protect the entire mapping of the memory area
 *
 * For optimization reasons PCI flush may be requested once after unmapping of
 * large area.
 */
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
		bool flush_pte)
{
	struct hl_device *hdev = ctx->hdev;
	struct asic_fixed_properties *prop = &hdev->asic_prop;
	struct hl_mmu_properties *mmu_prop;
	u64 real_virt_addr;
	u32 real_page_size, npages;
	int i, rc;
	int i, rc = 0;
	bool is_dram_addr;

	if (!hdev->mmu_enable)
@@ -729,12 +732,15 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
	for (i = 0 ; i < npages ; i++) {
		rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
		if (rc)
			return rc;
			break;

		real_virt_addr += real_page_size;
	}

	return 0;
	if (flush_pte)
		flush(ctx);

	return rc;
}

static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
@@ -885,8 +891,6 @@ static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
		get_pte(ctx, hop3_addr);
	}

	flush(ctx);

	return 0;

err:
@@ -909,6 +913,7 @@ err:
 * @virt_addr: virt addr to map from
 * @phys_addr: phys addr to map to
 * @page_size: physical page size
 * @flush_pte: whether to do a PCI flush
 *
 * This function does the following:
 * - Check that the virt addr is not mapped
@@ -919,8 +924,12 @@ err:
 * changes the MMU hash, it must be protected by a lock.
 * However, because it maps only a single page, the lock should be implemented
 * in a higher level in order to protect the entire mapping of the memory area
 *
 * For optimization reasons PCI flush may be requested once after mapping of
 * large area.
 */
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
		bool flush_pte)
{
	struct hl_device *hdev = ctx->hdev;
	struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -976,6 +985,9 @@ int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
		mapped_cnt++;
	}

	if (flush_pte)
		flush(ctx);

	return 0;

err:
@@ -988,6 +1000,8 @@ err:
		real_virt_addr += real_page_size;
	}

	flush(ctx);

	return rc;
}