Merge tag 'misc-habanalabs-next-2020-03-24' of... (9d20328d) · Commits · 戴 / test

Documentation/ABI/testing/debugfs-driver-habanalabs

+14 −0

Original line number	Diff line number	Diff line
		@@ -43,6 +43,20 @@ Description: Allows the root user to read or write directly through the
		If the IOMMU is disabled, it also allows the root user to read
		or write from the host a device VA of a host mapped memory

		What: /sys/kernel/debug/habanalabs/hl<n>/data64
		Date: Jan 2020
		KernelVersion: 5.6
		Contact: oded.gabbay@gmail.com
		Description: Allows the root user to read or write 64 bit data directly
		through the device's PCI bar. Writing to this file generates a
		write transaction while reading from the file generates a read
		transaction. This custom interface is needed (instead of using
		the generic Linux user-space PCI mapping) because the DDR bar
		is very small compared to the DDR memory and only the driver can
		move the bar before and after the transaction.
		If the IOMMU is disabled, it also allows the root user to read
		or write from the host a device VA of a host mapped memory

		What: /sys/kernel/debug/habanalabs/hl<n>/device
		Date: Jan 2019
		KernelVersion: 5.1

drivers/misc/habanalabs/command_submission.c

+25 −26

Original line number	Diff line number	Diff line
		@@ -129,6 +129,8 @@ static int cs_parser(struct hl_fpriv hpriv, struct hl_cs_job job)
		spin_unlock(&job->user_cb->lock);
		hl_cb_put(job->user_cb);
		job->user_cb = NULL;
		} else if (!rc) {
		job->job_cb_size = job->user_cb_size;
		}

		return rc;
		@@ -507,7 +509,7 @@ static int _hl_cs_ioctl(struct hl_fpriv hpriv, void __user chunks,
		struct hl_cb *cb;
		bool int_queues_only = true;
		u32 size_to_copy;
		int rc, i, parse_cnt;
		int rc, i;

		*cs_seq = ULLONG_MAX;

		@@ -547,7 +549,7 @@ static int _hl_cs_ioctl(struct hl_fpriv hpriv, void __user chunks,
		hl_debugfs_add_cs(cs);

		/* Validate ALL the CS chunks before submitting the CS */
		for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) {
		for (i = 0 ; i < num_chunks ; i++) {
		struct hl_cs_chunk *chunk = &cs_chunk_array[i];
		enum hl_queue_type queue_type;
		bool is_kernel_allocated_cb;
		@@ -585,10 +587,6 @@ static int _hl_cs_ioctl(struct hl_fpriv hpriv, void __user chunks,
		job->cs = cs;
		job->user_cb = cb;
		job->user_cb_size = chunk->cb_size;
		if (is_kernel_allocated_cb)
		job->job_cb_size = cb->size;
		else
		job->job_cb_size = chunk->cb_size;
		job->hw_queue_id = chunk->queue_index;

		cs->jobs_in_queue_cnt[job->hw_queue_id]++;
		@@ -659,8 +657,8 @@ int hl_cs_ioctl(struct hl_fpriv hpriv, void data)
		struct hl_device *hdev = hpriv->hdev;
		union hl_cs_args *args = data;
		struct hl_ctx *ctx = hpriv->ctx;
		void __user *chunks;
		u32 num_chunks;
		void __user chunks_execute, chunks_restore;
		u32 num_chunks_execute, num_chunks_restore;
		u64 cs_seq = ULONG_MAX;
		int rc, do_ctx_switch;
		bool need_soft_reset = false;
		@@ -673,13 +671,25 @@ int hl_cs_ioctl(struct hl_fpriv hpriv, void data)
		goto out;
		}

		chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute;
		num_chunks_execute = args->in.num_chunks_execute;

		if (!num_chunks_execute) {
		dev_err(hdev->dev,
		"Got execute CS with 0 chunks, context %d\n",
		ctx->asid);
		rc = -EINVAL;
		goto out;
		}

		do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);

		if (do_ctx_switch \|\| (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
		long ret;

		chunks = (void __user *)(uintptr_t)args->in.chunks_restore;
		num_chunks = args->in.num_chunks_restore;
		chunks_restore =
		(void __user *) (uintptr_t) args->in.chunks_restore;
		num_chunks_restore = args->in.num_chunks_restore;

		mutex_lock(&hpriv->restore_phase_mutex);

		@@ -707,13 +717,13 @@ int hl_cs_ioctl(struct hl_fpriv hpriv, void data)

		hdev->asic_funcs->restore_phase_topology(hdev);

		if (num_chunks == 0) {
		if (!num_chunks_restore) {
		dev_dbg(hdev->dev,
		"Need to run restore phase but restore CS is empty\n");
		rc = 0;
		} else {
		rc = _hl_cs_ioctl(hpriv, chunks, num_chunks,
		&cs_seq);
		rc = _hl_cs_ioctl(hpriv, chunks_restore,
		num_chunks_restore, &cs_seq);
		}

		mutex_unlock(&hpriv->restore_phase_mutex);
		@@ -726,7 +736,7 @@ int hl_cs_ioctl(struct hl_fpriv hpriv, void data)
		}

		/* Need to wait for restore completion before execution phase */
		if (num_chunks > 0) {
		if (num_chunks_restore) {
		ret = _hl_cs_wait_ioctl(hdev, ctx,
		jiffies_to_usecs(hdev->timeout_jiffies),
		cs_seq);
		@@ -754,18 +764,7 @@ int hl_cs_ioctl(struct hl_fpriv hpriv, void data)
		}
		}

		chunks = (void __user *)(uintptr_t)args->in.chunks_execute;
		num_chunks = args->in.num_chunks_execute;

		if (num_chunks == 0) {
		dev_err(hdev->dev,
		"Got execute CS with 0 chunks, context %d\n",
		ctx->asid);
		rc = -EINVAL;
		goto out;
		}

		rc = _hl_cs_ioctl(hpriv, chunks, num_chunks, &cs_seq);
		rc = _hl_cs_ioctl(hpriv, chunks_execute, num_chunks_execute, &cs_seq);

		out:
		if (rc != -EAGAIN) {

drivers/misc/habanalabs/debugfs.c

+84 −8

Original line number	Diff line number	Diff line
		@@ -393,9 +393,10 @@ static int mmu_show(struct seq_file s, void data)
		}

		is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
		prop->va_space_dram_start_address,
		prop->va_space_dram_end_address);
		prop->dmmu.start_addr,
		prop->dmmu.end_addr);

		/* shifts and masks are the same in PMMU and HPMMU, use one of them */
		mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;

		mutex_lock(&ctx->mmu_lock);
		@@ -547,12 +548,15 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
		goto out;

		if (hdev->dram_supports_virtual_memory &&
		addr >= prop->va_space_dram_start_address &&
		addr < prop->va_space_dram_end_address)
		(addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
		return true;

		if (addr >= prop->va_space_host_start_address &&
		addr < prop->va_space_host_end_address)
		if (addr >= prop->pmmu.start_addr &&
		addr < prop->pmmu.end_addr)
		return true;

		if (addr >= prop->pmmu_huge.start_addr &&
		addr < prop->pmmu_huge.end_addr)
		return true;
		out:
		return false;
		@@ -575,9 +579,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
		}

		is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
		prop->va_space_dram_start_address,
		prop->va_space_dram_end_address);
		prop->dmmu.start_addr,
		prop->dmmu.end_addr);

		/* shifts and masks are the same in PMMU and HPMMU, use one of them */
		mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;

		mutex_lock(&ctx->mmu_lock);
		@@ -705,6 +710,65 @@ static ssize_t hl_data_write32(struct file f, const char __user buf,
		return count;
		}

		static ssize_t hl_data_read64(struct file f, char __user buf,
		size_t count, loff_t *ppos)
		{
		struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
		struct hl_device *hdev = entry->hdev;
		char tmp_buf[32];
		u64 addr = entry->addr;
		u64 val;
		ssize_t rc;

		if (*ppos)
		return 0;

		if (hl_is_device_va(hdev, addr)) {
		rc = device_va_to_pa(hdev, addr, &addr);
		if (rc)
		return rc;
		}

		rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val);
		if (rc) {
		dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
		return rc;
		}

		sprintf(tmp_buf, "0x%016llx\n", val);
		return simple_read_from_buffer(buf, count, ppos, tmp_buf,
		strlen(tmp_buf));
		}

		static ssize_t hl_data_write64(struct file f, const char __user buf,
		size_t count, loff_t *ppos)
		{
		struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
		struct hl_device *hdev = entry->hdev;
		u64 addr = entry->addr;
		u64 value;
		ssize_t rc;

		rc = kstrtoull_from_user(buf, count, 16, &value);
		if (rc)
		return rc;

		if (hl_is_device_va(hdev, addr)) {
		rc = device_va_to_pa(hdev, addr, &addr);
		if (rc)
		return rc;
		}

		rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value);
		if (rc) {
		dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n",
		value, addr);
		return rc;
		}

		return count;
		}

		static ssize_t hl_get_power_state(struct file f, char __user buf,
		size_t count, loff_t *ppos)
		{
		@@ -912,6 +976,12 @@ static const struct file_operations hl_data32b_fops = {
		.write = hl_data_write32
		};

		static const struct file_operations hl_data64b_fops = {
		.owner = THIS_MODULE,
		.read = hl_data_read64,
		.write = hl_data_write64
		};

		static const struct file_operations hl_i2c_data_fops = {
		.owner = THIS_MODULE,
		.read = hl_i2c_data_read,
		@@ -1025,6 +1095,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
		dev_entry,
		&hl_data32b_fops);

		debugfs_create_file("data64",
		0644,
		dev_entry->root,
		dev_entry,
		&hl_data64b_fops);

		debugfs_create_file("set_power_state",
		0200,
		dev_entry->root,

drivers/misc/habanalabs/device.c

+1 −1

Original line number	Diff line number	Diff line
		@@ -36,7 +36,7 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
		status = HL_DEVICE_STATUS_OPERATIONAL;

		return status;
		};
		}

		static void hpriv_release(struct kref *ref)
		{

drivers/misc/habanalabs/goya/goya.c

+176 −28

Original line number	Diff line number	Diff line
		@@ -324,7 +324,11 @@ static u32 goya_all_events[] = {
		GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
		GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
		GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
		GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
		GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
		GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
		GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
		GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
		GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
		};

		static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
		@@ -393,19 +397,21 @@ void goya_get_fixed_properties(struct hl_device *hdev)
		prop->dmmu.hop2_mask = HOP2_MASK;
		prop->dmmu.hop3_mask = HOP3_MASK;
		prop->dmmu.hop4_mask = HOP4_MASK;
		prop->dmmu.huge_page_size = PAGE_SIZE_2MB;
		prop->dmmu.start_addr = VA_DDR_SPACE_START;
		prop->dmmu.end_addr = VA_DDR_SPACE_END;
		prop->dmmu.page_size = PAGE_SIZE_2MB;

		/* No difference between PMMU and DMMU except of page size */
		/* shifts and masks are the same in PMMU and DMMU */
		memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
		prop->dmmu.page_size = PAGE_SIZE_2MB;
		prop->pmmu.start_addr = VA_HOST_SPACE_START;
		prop->pmmu.end_addr = VA_HOST_SPACE_END;
		prop->pmmu.page_size = PAGE_SIZE_4KB;

		prop->va_space_host_start_address = VA_HOST_SPACE_START;
		prop->va_space_host_end_address = VA_HOST_SPACE_END;
		prop->va_space_dram_start_address = VA_DDR_SPACE_START;
		prop->va_space_dram_end_address = VA_DDR_SPACE_END;
		prop->dram_size_for_default_page_mapping =
		prop->va_space_dram_end_address;
		/* PMMU and HPMMU are the same except of page size */
		memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
		prop->pmmu_huge.page_size = PAGE_SIZE_2MB;

		prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
		prop->cfg_size = CFG_SIZE;
		prop->max_asid = MAX_ASID;
		prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
		@@ -2573,8 +2579,7 @@ static int goya_hw_init(struct hl_device *hdev)
		* After CPU initialization is finished, change DDR bar mapping inside
		* iATU to point to the start address of the MMU page tables
		*/
		if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE +
		(MMU_PAGE_TABLES_ADDR &
		if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
		~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
		dev_err(hdev->dev,
		"failed to map DDR bar to MMU page tables\n");
		@@ -3443,12 +3448,13 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
		/*
		* WA for HW-23.
		* We can't allow user to read from Host using QMANs other than 1.
		* PMMU and HPMMU addresses are equal, check only one of them.
		*/
		if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
		hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
		le32_to_cpu(user_dma_pkt->tsize),
		hdev->asic_prop.va_space_host_start_address,
		hdev->asic_prop.va_space_host_end_address)) {
		hdev->asic_prop.pmmu.start_addr,
		hdev->asic_prop.pmmu.end_addr)) {
		dev_err(hdev->dev,
		"Can't DMA from host on queue other then 1\n");
		return -EFAULT;
		@@ -4178,6 +4184,96 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
		return rc;
		}

		static int goya_debugfs_read64(struct hl_device hdev, u64 addr, u64 val)
		{
		struct asic_fixed_properties *prop = &hdev->asic_prop;
		u64 ddr_bar_addr;
		int rc = 0;

		if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
		u32 val_l = RREG32(addr - CFG_BASE);
		u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);

		*val = (((u64) val_h) << 32) \| val_l;

		} else if ((addr >= SRAM_BASE_ADDR) &&
		(addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {

		*val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
		(addr - SRAM_BASE_ADDR));

		} else if ((addr >= DRAM_PHYS_BASE) &&
		(addr <=
		DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) {

		u64 bar_base_addr = DRAM_PHYS_BASE +
		(addr & ~(prop->dram_pci_bar_size - 0x1ull));

		ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
		if (ddr_bar_addr != U64_MAX) {
		*val = readq(hdev->pcie_bar[DDR_BAR_ID] +
		(addr - bar_base_addr));

		ddr_bar_addr = goya_set_ddr_bar_base(hdev,
		ddr_bar_addr);
		}
		if (ddr_bar_addr == U64_MAX)
		rc = -EIO;

		} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
		val = (u64 *) phys_to_virt(addr - HOST_PHYS_BASE);

		} else {
		rc = -EFAULT;
		}

		return rc;
		}

		static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
		{
		struct asic_fixed_properties *prop = &hdev->asic_prop;
		u64 ddr_bar_addr;
		int rc = 0;

		if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
		WREG32(addr - CFG_BASE, lower_32_bits(val));
		WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));

		} else if ((addr >= SRAM_BASE_ADDR) &&
		(addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {

		writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
		(addr - SRAM_BASE_ADDR));

		} else if ((addr >= DRAM_PHYS_BASE) &&
		(addr <=
		DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) {

		u64 bar_base_addr = DRAM_PHYS_BASE +
		(addr & ~(prop->dram_pci_bar_size - 0x1ull));

		ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
		if (ddr_bar_addr != U64_MAX) {
		writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
		(addr - bar_base_addr));

		ddr_bar_addr = goya_set_ddr_bar_base(hdev,
		ddr_bar_addr);
		}
		if (ddr_bar_addr == U64_MAX)
		rc = -EIO;

		} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
		(u64 ) phys_to_virt(addr - HOST_PHYS_BASE) = val;

		} else {
		rc = -EFAULT;
		}

		return rc;
		}

		static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
		{
		struct goya_device *goya = hdev->asic_specific;
		@@ -4297,6 +4393,14 @@ static const char *_goya_get_event_desc(u16 event_type)
		return "TPC%d_bmon_spmu";
		case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
		return "DMA_bm_ch%d";
		case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
		return "POWER_ENV_S";
		case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
		return "POWER_ENV_E";
		case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
		return "THERMAL_ENV_S";
		case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
		return "THERMAL_ENV_E";
		default:
		return "N/A";
		}
		@@ -4388,22 +4492,22 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
		static void goya_print_razwi_info(struct hl_device *hdev)
		{
		if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
		dev_err(hdev->dev, "Illegal write to LBW\n");
		dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n");
		WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
		}

		if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
		dev_err(hdev->dev, "Illegal read from LBW\n");
		dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n");
		WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
		}

		if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
		dev_err(hdev->dev, "Illegal write to HBW\n");
		dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n");
		WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
		}

		if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
		dev_err(hdev->dev, "Illegal read from HBW\n");
		dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n");
		WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
		}
		}
		@@ -4423,7 +4527,8 @@ static void goya_print_mmu_error_info(struct hl_device *hdev)
		addr <<= 32;
		addr \|= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);

		dev_err(hdev->dev, "MMU page fault on va 0x%llx\n", addr);
		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
		addr);

		WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
		}
		@@ -4435,7 +4540,7 @@ static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
		char desc[20] = "";

		goya_get_event_desc(event_type, desc, sizeof(desc));
		dev_err(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
		dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
		event_type, desc);

		if (razwi) {
		@@ -4526,6 +4631,33 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
		return rc;
		}

		static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
		{
		switch (event_type) {
		case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
		dev_info_ratelimited(hdev->dev,
		"Clock throttling due to power consumption\n");
		break;
		case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
		dev_info_ratelimited(hdev->dev,
		"Power envelop is safe, back to optimal clock\n");
		break;
		case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
		dev_info_ratelimited(hdev->dev,
		"Clock throttling due to overheating\n");
		break;
		case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
		dev_info_ratelimited(hdev->dev,
		"Thermal envelop is safe, back to optimal clock\n");
		break;

		default:
		dev_err(hdev->dev, "Received invalid clock change event %d\n",
		event_type);
		break;
		}
		}

		void goya_handle_eqe(struct hl_device hdev, struct hl_eq_entry eq_entry)
		{
		u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
		@@ -4609,6 +4741,14 @@ void goya_handle_eqe(struct hl_device hdev, struct hl_eq_entry eq_entry)
		goya_unmask_irq(hdev, event_type);
		break;

		case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
		case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
		case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
		case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
		goya_print_clk_change_info(hdev, event_type);
		goya_unmask_irq(hdev, event_type);
		break;

		default:
		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
		event_type);
		@@ -4776,7 +4916,8 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)

		for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
		rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
		prop->dram_base_address + off, PAGE_SIZE_2MB);
		prop->dram_base_address + off, PAGE_SIZE_2MB,
		(off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
		if (rc) {
		dev_err(hdev->dev, "Map failed for address 0x%llx\n",
		prop->dram_base_address + off);
		@@ -4786,7 +4927,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)

		if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
		rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
		hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB);
		hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);

		if (rc) {
		dev_err(hdev->dev,
		@@ -4799,7 +4940,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
		rc = hl_mmu_map(hdev->kernel_ctx,
		VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
		hdev->cpu_accessible_dma_address + cpu_off,
		PAGE_SIZE_4KB);
		PAGE_SIZE_4KB, true);
		if (rc) {
		dev_err(hdev->dev,
		"Map failed for CPU accessible memory\n");
		@@ -4825,14 +4966,15 @@ unmap_cpu:
		for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
		if (hl_mmu_unmap(hdev->kernel_ctx,
		VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
		PAGE_SIZE_4KB))
		PAGE_SIZE_4KB, true))
		dev_warn_ratelimited(hdev->dev,
		"failed to unmap address 0x%llx\n",
		VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
		unmap:
		for (; off >= 0 ; off -= PAGE_SIZE_2MB)
		if (hl_mmu_unmap(hdev->kernel_ctx,
		prop->dram_base_address + off, PAGE_SIZE_2MB))
		prop->dram_base_address + off, PAGE_SIZE_2MB,
		true))
		dev_warn_ratelimited(hdev->dev,
		"failed to unmap address 0x%llx\n",
		prop->dram_base_address + off);
		@@ -4857,14 +4999,15 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)

		if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
		if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
		PAGE_SIZE_2MB))
		PAGE_SIZE_2MB, true))
		dev_warn(hdev->dev,
		"Failed to unmap CPU accessible memory\n");
		} else {
		for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
		if (hl_mmu_unmap(hdev->kernel_ctx,
		VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
		PAGE_SIZE_4KB))
		PAGE_SIZE_4KB,
		(cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
		dev_warn_ratelimited(hdev->dev,
		"failed to unmap address 0x%llx\n",
		VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
		@@ -4872,7 +5015,8 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)

		for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
		if (hl_mmu_unmap(hdev->kernel_ctx,
		prop->dram_base_address + off, PAGE_SIZE_2MB))
		prop->dram_base_address + off, PAGE_SIZE_2MB,
		(off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
		dev_warn_ratelimited(hdev->dev,
		"Failed to unmap address 0x%llx\n",
		prop->dram_base_address + off);
		@@ -5113,6 +5257,7 @@ static bool goya_is_device_idle(struct hl_device hdev, u32 mask,
		}

		static void goya_hw_queues_lock(struct hl_device *hdev)
		__acquires(&goya->hw_queues_lock)
		{
		struct goya_device *goya = hdev->asic_specific;

		@@ -5120,6 +5265,7 @@ static void goya_hw_queues_lock(struct hl_device *hdev)
		}

		static void goya_hw_queues_unlock(struct hl_device *hdev)
		__releases(&goya->hw_queues_lock)
		{
		struct goya_device *goya = hdev->asic_specific;

		@@ -5180,6 +5326,8 @@ static const struct hl_asic_funcs goya_funcs = {
		.restore_phase_topology = goya_restore_phase_topology,
		.debugfs_read32 = goya_debugfs_read32,
		.debugfs_write32 = goya_debugfs_write32,
		.debugfs_read64 = goya_debugfs_read64,
		.debugfs_write64 = goya_debugfs_write64,
		.add_device_attr = goya_add_device_attr,
		.handle_eqe = goya_handle_eqe,
		.set_pll_profile = goya_set_pll_profile,

Admin message