Commit 9d20328d authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman
Browse files

Merge tag 'misc-habanalabs-next-2020-03-24' of...

Merge tag 'misc-habanalabs-next-2020-03-24' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next

Oded writes:

This tag contains the following changes for kernel 5.7:

- MMU code improvements that includes:
  - Flush MMU TLB cache only once, at the end of mapping/unmapping
    function, instead of flushing after mapping of every page.
  - Add future ASIC support by splitting properties of ASIC capabilities
    regarding mapping of host memory to regular and huge pages.

- Add debugfs interface to write and read 64-bit values from the device's
  memory/registers. Previously the driver provided interface for 32-bit
  values and this will allow the user to debug much more quickly. We saw it
  gives a boost of around 1.5 - 1.7 when reading internal memories.

- Support temperature offset via sysfs as defined in
  https://www.kernel.org/doc/Documentation/hwmon/sysfs-interface

- Display historical maximum of various sensors.

- Print to kernel log when clock throttling occurs to due breach of power
  or thermal envelope. Also prints when clock throttling is finished
  (clock is back to optimal).

- Fix bug when moving from manual to auto power-management mode.

- Print a message ("unsupported device") to kernel log in case a GAUDI device
  is recognized.

- Small bug fixes and minor improvements to code.

* tag 'misc-habanalabs-next-2020-03-24' of git://people.freedesktop.org/~gabbayo/linux:
  habanalabs: fix pm manual->auto in GOYA
  habanalabs: show unsupported message for GAUDI
  habanalabs: add print upon clock change
  habanalabs: update goya firmware register map
  habanalabs: Add missing annotation for goya_hw_queues_unlock()
  habanalabs: Add missing annotation for goya_hw_queues_lock()
  habanalabs: Remove unused parse_cnt variable
  habanalabs: provide historical maximum of various sensors
  habanalabs: modify the return values of hl_read/write routines
  habanalabs: support temperature offset via sysfs
  habanalabs: ratelimit error prints of IRQs
  habanalabs: add debugfs write64/read64
  habanalabs: fix DDR bar address setting
  habanalabs: removing extra ;
  habanalabs: Avoid running restore chunks if no execute chunks
  habanalabs: Modify CS jobs counter to u16
  habanalabs: split the host MMU properties
  habanalabs: use the user CB size as a default job size
  habanalabs: flush only at the end of the map/unmap
parents bbde5709 11845501
Loading
Loading
Loading
Loading
+14 −0
Original line number Diff line number Diff line
@@ -43,6 +43,20 @@ Description: Allows the root user to read or write directly through the
                If the IOMMU is disabled, it also allows the root user to read
                or write from the host a device VA of a host mapped memory

What:           /sys/kernel/debug/habanalabs/hl<n>/data64
Date:           Jan 2020
KernelVersion:  5.6
Contact:        oded.gabbay@gmail.com
Description:    Allows the root user to read or write 64 bit data directly
                through the device's PCI bar. Writing to this file generates a
                write transaction while reading from the file generates a read
                transaction. This custom interface is needed (instead of using
                the generic Linux user-space PCI mapping) because the DDR bar
                is very small compared to the DDR memory and only the driver can
                move the bar before and after the transaction.
                If the IOMMU is disabled, it also allows the root user to read
                or write from the host a device VA of a host mapped memory

What:           /sys/kernel/debug/habanalabs/hl<n>/device
Date:           Jan 2019
KernelVersion:  5.1
+25 −26
Original line number Diff line number Diff line
@@ -129,6 +129,8 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
		spin_unlock(&job->user_cb->lock);
		hl_cb_put(job->user_cb);
		job->user_cb = NULL;
	} else if (!rc) {
		job->job_cb_size = job->user_cb_size;
	}

	return rc;
@@ -507,7 +509,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
	struct hl_cb *cb;
	bool int_queues_only = true;
	u32 size_to_copy;
	int rc, i, parse_cnt;
	int rc, i;

	*cs_seq = ULLONG_MAX;

@@ -547,7 +549,7 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
	hl_debugfs_add_cs(cs);

	/* Validate ALL the CS chunks before submitting the CS */
	for (i = 0, parse_cnt = 0 ; i < num_chunks ; i++, parse_cnt++) {
	for (i = 0 ; i < num_chunks ; i++) {
		struct hl_cs_chunk *chunk = &cs_chunk_array[i];
		enum hl_queue_type queue_type;
		bool is_kernel_allocated_cb;
@@ -585,10 +587,6 @@ static int _hl_cs_ioctl(struct hl_fpriv *hpriv, void __user *chunks,
		job->cs = cs;
		job->user_cb = cb;
		job->user_cb_size = chunk->cb_size;
		if (is_kernel_allocated_cb)
			job->job_cb_size = cb->size;
		else
			job->job_cb_size = chunk->cb_size;
		job->hw_queue_id = chunk->queue_index;

		cs->jobs_in_queue_cnt[job->hw_queue_id]++;
@@ -659,8 +657,8 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
	struct hl_device *hdev = hpriv->hdev;
	union hl_cs_args *args = data;
	struct hl_ctx *ctx = hpriv->ctx;
	void __user *chunks;
	u32 num_chunks;
	void __user *chunks_execute, *chunks_restore;
	u32 num_chunks_execute, num_chunks_restore;
	u64 cs_seq = ULONG_MAX;
	int rc, do_ctx_switch;
	bool need_soft_reset = false;
@@ -673,13 +671,25 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
		goto out;
	}

	chunks_execute = (void __user *) (uintptr_t) args->in.chunks_execute;
	num_chunks_execute = args->in.num_chunks_execute;

	if (!num_chunks_execute) {
		dev_err(hdev->dev,
			"Got execute CS with 0 chunks, context %d\n",
			ctx->asid);
		rc = -EINVAL;
		goto out;
	}

	do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);

	if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
		long ret;

		chunks = (void __user *)(uintptr_t)args->in.chunks_restore;
		num_chunks = args->in.num_chunks_restore;
		chunks_restore =
			(void __user *) (uintptr_t) args->in.chunks_restore;
		num_chunks_restore = args->in.num_chunks_restore;

		mutex_lock(&hpriv->restore_phase_mutex);

@@ -707,13 +717,13 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)

		hdev->asic_funcs->restore_phase_topology(hdev);

		if (num_chunks == 0) {
		if (!num_chunks_restore) {
			dev_dbg(hdev->dev,
			"Need to run restore phase but restore CS is empty\n");
			rc = 0;
		} else {
			rc = _hl_cs_ioctl(hpriv, chunks, num_chunks,
						&cs_seq);
			rc = _hl_cs_ioctl(hpriv, chunks_restore,
						num_chunks_restore, &cs_seq);
		}

		mutex_unlock(&hpriv->restore_phase_mutex);
@@ -726,7 +736,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
		}

		/* Need to wait for restore completion before execution phase */
		if (num_chunks > 0) {
		if (num_chunks_restore) {
			ret = _hl_cs_wait_ioctl(hdev, ctx,
					jiffies_to_usecs(hdev->timeout_jiffies),
					cs_seq);
@@ -754,18 +764,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
		}
	}

	chunks = (void __user *)(uintptr_t)args->in.chunks_execute;
	num_chunks = args->in.num_chunks_execute;

	if (num_chunks == 0) {
		dev_err(hdev->dev,
			"Got execute CS with 0 chunks, context %d\n",
			ctx->asid);
		rc = -EINVAL;
		goto out;
	}

	rc = _hl_cs_ioctl(hpriv, chunks, num_chunks, &cs_seq);
	rc = _hl_cs_ioctl(hpriv, chunks_execute, num_chunks_execute, &cs_seq);

out:
	if (rc != -EAGAIN) {
+84 −8
Original line number Diff line number Diff line
@@ -393,9 +393,10 @@ static int mmu_show(struct seq_file *s, void *data)
	}

	is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
				prop->va_space_dram_start_address,
				prop->va_space_dram_end_address);
						prop->dmmu.start_addr,
						prop->dmmu.end_addr);

	/* shifts and masks are the same in PMMU and HPMMU, use one of them */
	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;

	mutex_lock(&ctx->mmu_lock);
@@ -547,12 +548,15 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr)
		goto out;

	if (hdev->dram_supports_virtual_memory &&
			addr >= prop->va_space_dram_start_address &&
			addr < prop->va_space_dram_end_address)
		(addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr))
		return true;

	if (addr >= prop->va_space_host_start_address &&
			addr < prop->va_space_host_end_address)
	if (addr >= prop->pmmu.start_addr &&
		addr < prop->pmmu.end_addr)
		return true;

	if (addr >= prop->pmmu_huge.start_addr &&
		addr < prop->pmmu_huge.end_addr)
		return true;
out:
	return false;
@@ -575,9 +579,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr,
	}

	is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
				prop->va_space_dram_start_address,
				prop->va_space_dram_end_address);
						prop->dmmu.start_addr,
						prop->dmmu.end_addr);

	/* shifts and masks are the same in PMMU and HPMMU, use one of them */
	mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;

	mutex_lock(&ctx->mmu_lock);
@@ -705,6 +710,65 @@ static ssize_t hl_data_write32(struct file *f, const char __user *buf,
	return count;
}

static ssize_t hl_data_read64(struct file *f, char __user *buf,
					size_t count, loff_t *ppos)
{
	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
	struct hl_device *hdev = entry->hdev;
	char tmp_buf[32];
	u64 addr = entry->addr;
	u64 val;
	ssize_t rc;

	if (*ppos)
		return 0;

	if (hl_is_device_va(hdev, addr)) {
		rc = device_va_to_pa(hdev, addr, &addr);
		if (rc)
			return rc;
	}

	rc = hdev->asic_funcs->debugfs_read64(hdev, addr, &val);
	if (rc) {
		dev_err(hdev->dev, "Failed to read from 0x%010llx\n", addr);
		return rc;
	}

	sprintf(tmp_buf, "0x%016llx\n", val);
	return simple_read_from_buffer(buf, count, ppos, tmp_buf,
			strlen(tmp_buf));
}

static ssize_t hl_data_write64(struct file *f, const char __user *buf,
					size_t count, loff_t *ppos)
{
	struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
	struct hl_device *hdev = entry->hdev;
	u64 addr = entry->addr;
	u64 value;
	ssize_t rc;

	rc = kstrtoull_from_user(buf, count, 16, &value);
	if (rc)
		return rc;

	if (hl_is_device_va(hdev, addr)) {
		rc = device_va_to_pa(hdev, addr, &addr);
		if (rc)
			return rc;
	}

	rc = hdev->asic_funcs->debugfs_write64(hdev, addr, value);
	if (rc) {
		dev_err(hdev->dev, "Failed to write 0x%016llx to 0x%010llx\n",
			value, addr);
		return rc;
	}

	return count;
}

static ssize_t hl_get_power_state(struct file *f, char __user *buf,
		size_t count, loff_t *ppos)
{
@@ -912,6 +976,12 @@ static const struct file_operations hl_data32b_fops = {
	.write = hl_data_write32
};

static const struct file_operations hl_data64b_fops = {
	.owner = THIS_MODULE,
	.read = hl_data_read64,
	.write = hl_data_write64
};

static const struct file_operations hl_i2c_data_fops = {
	.owner = THIS_MODULE,
	.read = hl_i2c_data_read,
@@ -1025,6 +1095,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
				dev_entry,
				&hl_data32b_fops);

	debugfs_create_file("data64",
				0644,
				dev_entry->root,
				dev_entry,
				&hl_data64b_fops);

	debugfs_create_file("set_power_state",
				0200,
				dev_entry->root,
+1 −1
Original line number Diff line number Diff line
@@ -36,7 +36,7 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
		status = HL_DEVICE_STATUS_OPERATIONAL;

	return status;
};
}

static void hpriv_release(struct kref *ref)
{
+176 −28
Original line number Diff line number Diff line
@@ -324,7 +324,11 @@ static u32 goya_all_events[] = {
	GOYA_ASYNC_EVENT_ID_DMA_BM_CH1,
	GOYA_ASYNC_EVENT_ID_DMA_BM_CH2,
	GOYA_ASYNC_EVENT_ID_DMA_BM_CH3,
	GOYA_ASYNC_EVENT_ID_DMA_BM_CH4
	GOYA_ASYNC_EVENT_ID_DMA_BM_CH4,
	GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S,
	GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E,
	GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S,
	GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
};

static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
@@ -393,19 +397,21 @@ void goya_get_fixed_properties(struct hl_device *hdev)
	prop->dmmu.hop2_mask = HOP2_MASK;
	prop->dmmu.hop3_mask = HOP3_MASK;
	prop->dmmu.hop4_mask = HOP4_MASK;
	prop->dmmu.huge_page_size = PAGE_SIZE_2MB;
	prop->dmmu.start_addr = VA_DDR_SPACE_START;
	prop->dmmu.end_addr = VA_DDR_SPACE_END;
	prop->dmmu.page_size = PAGE_SIZE_2MB;

	/* No difference between PMMU and DMMU except of page size */
	/* shifts and masks are the same in PMMU and DMMU */
	memcpy(&prop->pmmu, &prop->dmmu, sizeof(prop->dmmu));
	prop->dmmu.page_size = PAGE_SIZE_2MB;
	prop->pmmu.start_addr = VA_HOST_SPACE_START;
	prop->pmmu.end_addr = VA_HOST_SPACE_END;
	prop->pmmu.page_size = PAGE_SIZE_4KB;

	prop->va_space_host_start_address = VA_HOST_SPACE_START;
	prop->va_space_host_end_address = VA_HOST_SPACE_END;
	prop->va_space_dram_start_address = VA_DDR_SPACE_START;
	prop->va_space_dram_end_address = VA_DDR_SPACE_END;
	prop->dram_size_for_default_page_mapping =
			prop->va_space_dram_end_address;
	/* PMMU and HPMMU are the same except of page size */
	memcpy(&prop->pmmu_huge, &prop->pmmu, sizeof(prop->pmmu));
	prop->pmmu_huge.page_size = PAGE_SIZE_2MB;

	prop->dram_size_for_default_page_mapping = VA_DDR_SPACE_END;
	prop->cfg_size = CFG_SIZE;
	prop->max_asid = MAX_ASID;
	prop->num_of_events = GOYA_ASYNC_EVENT_ID_SIZE;
@@ -2573,8 +2579,7 @@ static int goya_hw_init(struct hl_device *hdev)
	 * After CPU initialization is finished, change DDR bar mapping inside
	 * iATU to point to the start address of the MMU page tables
	 */
	if (goya_set_ddr_bar_base(hdev, DRAM_PHYS_BASE +
			(MMU_PAGE_TABLES_ADDR &
	if (goya_set_ddr_bar_base(hdev, (MMU_PAGE_TABLES_ADDR &
			~(prop->dram_pci_bar_size - 0x1ull))) == U64_MAX) {
		dev_err(hdev->dev,
			"failed to map DDR bar to MMU page tables\n");
@@ -3443,12 +3448,13 @@ static int goya_validate_dma_pkt_mmu(struct hl_device *hdev,
	/*
	 * WA for HW-23.
	 * We can't allow user to read from Host using QMANs other than 1.
	 * PMMU and HPMMU addresses are equal, check only one of them.
	 */
	if (parser->hw_queue_id != GOYA_QUEUE_ID_DMA_1 &&
		hl_mem_area_inside_range(le64_to_cpu(user_dma_pkt->src_addr),
				le32_to_cpu(user_dma_pkt->tsize),
				hdev->asic_prop.va_space_host_start_address,
				hdev->asic_prop.va_space_host_end_address)) {
				hdev->asic_prop.pmmu.start_addr,
				hdev->asic_prop.pmmu.end_addr)) {
		dev_err(hdev->dev,
			"Can't DMA from host on queue other then 1\n");
		return -EFAULT;
@@ -4178,6 +4184,96 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
	return rc;
}

static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
{
	struct asic_fixed_properties *prop = &hdev->asic_prop;
	u64 ddr_bar_addr;
	int rc = 0;

	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
		u32 val_l = RREG32(addr - CFG_BASE);
		u32 val_h = RREG32(addr + sizeof(u32) - CFG_BASE);

		*val = (((u64) val_h) << 32) | val_l;

	} else if ((addr >= SRAM_BASE_ADDR) &&
			(addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {

		*val = readq(hdev->pcie_bar[SRAM_CFG_BAR_ID] +
				(addr - SRAM_BASE_ADDR));

	} else if ((addr >= DRAM_PHYS_BASE) &&
		   (addr <=
		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) {

		u64 bar_base_addr = DRAM_PHYS_BASE +
				(addr & ~(prop->dram_pci_bar_size - 0x1ull));

		ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
		if (ddr_bar_addr != U64_MAX) {
			*val = readq(hdev->pcie_bar[DDR_BAR_ID] +
						(addr - bar_base_addr));

			ddr_bar_addr = goya_set_ddr_bar_base(hdev,
							ddr_bar_addr);
		}
		if (ddr_bar_addr == U64_MAX)
			rc = -EIO;

	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
		*val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);

	} else {
		rc = -EFAULT;
	}

	return rc;
}

static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
{
	struct asic_fixed_properties *prop = &hdev->asic_prop;
	u64 ddr_bar_addr;
	int rc = 0;

	if ((addr >= CFG_BASE) && (addr <= CFG_BASE + CFG_SIZE - sizeof(u64))) {
		WREG32(addr - CFG_BASE, lower_32_bits(val));
		WREG32(addr + sizeof(u32) - CFG_BASE, upper_32_bits(val));

	} else if ((addr >= SRAM_BASE_ADDR) &&
			(addr <= SRAM_BASE_ADDR + SRAM_SIZE - sizeof(u64))) {

		writeq(val, hdev->pcie_bar[SRAM_CFG_BAR_ID] +
					(addr - SRAM_BASE_ADDR));

	} else if ((addr >= DRAM_PHYS_BASE) &&
		   (addr <=
		    DRAM_PHYS_BASE + hdev->asic_prop.dram_size - sizeof(u64))) {

		u64 bar_base_addr = DRAM_PHYS_BASE +
				(addr & ~(prop->dram_pci_bar_size - 0x1ull));

		ddr_bar_addr = goya_set_ddr_bar_base(hdev, bar_base_addr);
		if (ddr_bar_addr != U64_MAX) {
			writeq(val, hdev->pcie_bar[DDR_BAR_ID] +
						(addr - bar_base_addr));

			ddr_bar_addr = goya_set_ddr_bar_base(hdev,
							ddr_bar_addr);
		}
		if (ddr_bar_addr == U64_MAX)
			rc = -EIO;

	} else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
		*(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;

	} else {
		rc = -EFAULT;
	}

	return rc;
}

static u64 goya_read_pte(struct hl_device *hdev, u64 addr)
{
	struct goya_device *goya = hdev->asic_specific;
@@ -4297,6 +4393,14 @@ static const char *_goya_get_event_desc(u16 event_type)
		return "TPC%d_bmon_spmu";
	case GOYA_ASYNC_EVENT_ID_DMA_BM_CH0 ... GOYA_ASYNC_EVENT_ID_DMA_BM_CH4:
		return "DMA_bm_ch%d";
	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
		return "POWER_ENV_S";
	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
		return "POWER_ENV_E";
	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
		return "THERMAL_ENV_S";
	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
		return "THERMAL_ENV_E";
	default:
		return "N/A";
	}
@@ -4388,22 +4492,22 @@ static void goya_get_event_desc(u16 event_type, char *desc, size_t size)
static void goya_print_razwi_info(struct hl_device *hdev)
{
	if (RREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD)) {
		dev_err(hdev->dev, "Illegal write to LBW\n");
		dev_err_ratelimited(hdev->dev, "Illegal write to LBW\n");
		WREG32(mmDMA_MACRO_RAZWI_LBW_WT_VLD, 0);
	}

	if (RREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD)) {
		dev_err(hdev->dev, "Illegal read from LBW\n");
		dev_err_ratelimited(hdev->dev, "Illegal read from LBW\n");
		WREG32(mmDMA_MACRO_RAZWI_LBW_RD_VLD, 0);
	}

	if (RREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD)) {
		dev_err(hdev->dev, "Illegal write to HBW\n");
		dev_err_ratelimited(hdev->dev, "Illegal write to HBW\n");
		WREG32(mmDMA_MACRO_RAZWI_HBW_WT_VLD, 0);
	}

	if (RREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD)) {
		dev_err(hdev->dev, "Illegal read from HBW\n");
		dev_err_ratelimited(hdev->dev, "Illegal read from HBW\n");
		WREG32(mmDMA_MACRO_RAZWI_HBW_RD_VLD, 0);
	}
}
@@ -4423,7 +4527,8 @@ static void goya_print_mmu_error_info(struct hl_device *hdev)
		addr <<= 32;
		addr |= RREG32(mmMMU_PAGE_ERROR_CAPTURE_VA);

		dev_err(hdev->dev, "MMU page fault on va 0x%llx\n", addr);
		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n",
					addr);

		WREG32(mmMMU_PAGE_ERROR_CAPTURE, 0);
	}
@@ -4435,7 +4540,7 @@ static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
	char desc[20] = "";

	goya_get_event_desc(event_type, desc, sizeof(desc));
	dev_err(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
		event_type, desc);

	if (razwi) {
@@ -4526,6 +4631,33 @@ static int goya_unmask_irq(struct hl_device *hdev, u16 event_type)
	return rc;
}

static void goya_print_clk_change_info(struct hl_device *hdev, u16 event_type)
{
	switch (event_type) {
	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
		dev_info_ratelimited(hdev->dev,
			"Clock throttling due to power consumption\n");
		break;
	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
		dev_info_ratelimited(hdev->dev,
			"Power envelop is safe, back to optimal clock\n");
		break;
	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
		dev_info_ratelimited(hdev->dev,
			"Clock throttling due to overheating\n");
		break;
	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
		dev_info_ratelimited(hdev->dev,
			"Thermal envelop is safe, back to optimal clock\n");
		break;

	default:
		dev_err(hdev->dev, "Received invalid clock change event %d\n",
			event_type);
		break;
	}
}

void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
{
	u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
@@ -4609,6 +4741,14 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
		goya_unmask_irq(hdev, event_type);
		break;

	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_S:
	case GOYA_ASYNC_EVENT_ID_FIX_POWER_ENV_E:
	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_S:
	case GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E:
		goya_print_clk_change_info(hdev, event_type);
		goya_unmask_irq(hdev, event_type);
		break;

	default:
		dev_err(hdev->dev, "Received invalid H/W interrupt %d\n",
				event_type);
@@ -4776,7 +4916,8 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)

	for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
		rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
				prop->dram_base_address + off, PAGE_SIZE_2MB);
				prop->dram_base_address + off, PAGE_SIZE_2MB,
				(off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
		if (rc) {
			dev_err(hdev->dev, "Map failed for address 0x%llx\n",
				prop->dram_base_address + off);
@@ -4786,7 +4927,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)

	if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
		rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
			hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB);
			hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);

		if (rc) {
			dev_err(hdev->dev,
@@ -4799,7 +4940,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
			rc = hl_mmu_map(hdev->kernel_ctx,
				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
				hdev->cpu_accessible_dma_address + cpu_off,
				PAGE_SIZE_4KB);
				PAGE_SIZE_4KB, true);
			if (rc) {
				dev_err(hdev->dev,
					"Map failed for CPU accessible memory\n");
@@ -4825,14 +4966,15 @@ unmap_cpu:
	for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
		if (hl_mmu_unmap(hdev->kernel_ctx,
				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
				PAGE_SIZE_4KB))
				PAGE_SIZE_4KB, true))
			dev_warn_ratelimited(hdev->dev,
				"failed to unmap address 0x%llx\n",
				VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
unmap:
	for (; off >= 0 ; off -= PAGE_SIZE_2MB)
		if (hl_mmu_unmap(hdev->kernel_ctx,
				prop->dram_base_address + off, PAGE_SIZE_2MB))
				prop->dram_base_address + off, PAGE_SIZE_2MB,
				true))
			dev_warn_ratelimited(hdev->dev,
				"failed to unmap address 0x%llx\n",
				prop->dram_base_address + off);
@@ -4857,14 +4999,15 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)

	if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
		if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
				PAGE_SIZE_2MB))
				PAGE_SIZE_2MB, true))
			dev_warn(hdev->dev,
				"Failed to unmap CPU accessible memory\n");
	} else {
		for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
			if (hl_mmu_unmap(hdev->kernel_ctx,
					VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
					PAGE_SIZE_4KB))
					PAGE_SIZE_4KB,
					(cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
				dev_warn_ratelimited(hdev->dev,
					"failed to unmap address 0x%llx\n",
					VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
@@ -4872,7 +5015,8 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)

	for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
		if (hl_mmu_unmap(hdev->kernel_ctx,
				prop->dram_base_address + off, PAGE_SIZE_2MB))
				prop->dram_base_address + off, PAGE_SIZE_2MB,
				(off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
			dev_warn_ratelimited(hdev->dev,
					"Failed to unmap address 0x%llx\n",
					prop->dram_base_address + off);
@@ -5113,6 +5257,7 @@ static bool goya_is_device_idle(struct hl_device *hdev, u32 *mask,
}

static void goya_hw_queues_lock(struct hl_device *hdev)
	__acquires(&goya->hw_queues_lock)
{
	struct goya_device *goya = hdev->asic_specific;

@@ -5120,6 +5265,7 @@ static void goya_hw_queues_lock(struct hl_device *hdev)
}

static void goya_hw_queues_unlock(struct hl_device *hdev)
	__releases(&goya->hw_queues_lock)
{
	struct goya_device *goya = hdev->asic_specific;

@@ -5180,6 +5326,8 @@ static const struct hl_asic_funcs goya_funcs = {
	.restore_phase_topology = goya_restore_phase_topology,
	.debugfs_read32 = goya_debugfs_read32,
	.debugfs_write32 = goya_debugfs_write32,
	.debugfs_read64 = goya_debugfs_read64,
	.debugfs_write64 = goya_debugfs_write64,
	.add_device_attr = goya_add_device_attr,
	.handle_eqe = goya_handle_eqe,
	.set_pll_profile = goya_set_pll_profile,
Loading