Commit f4cbfd24 authored by Ofir Bitton's avatar Ofir Bitton Committed by Oded Gabbay
Browse files

habanalabs: PCIe iATU refactoring



Divide iATU initialization into inbound/outbound methods.
We must separate it in order to enable different match mode
per PCIe region.
In addition, added support for PCI address match mode.

Signed-off-by: default avatarOfir Bitton <obitton@habana.ai>
Reviewed-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: default avatarOded Gabbay <oded.gabbay@gmail.com>
parent fcc6a4e6
Loading
Loading
Loading
Loading
+37 −22
Original line number Diff line number Diff line
@@ -465,6 +465,7 @@ static int gaudi_pci_bars_map(struct hl_device *hdev)
static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
{
	struct gaudi_device *gaudi = hdev->asic_specific;
	struct hl_inbound_pci_region pci_region;
	u64 old_addr = addr;
	int rc;

@@ -472,7 +473,10 @@ static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)
		return old_addr;

	/* Inbound Region 2 - Bar 4 - Point to HBM */
	rc = hl_pci_set_dram_bar_base(hdev, 2, 4, addr);
	pci_region.mode = PCI_BAR_MATCH_MODE;
	pci_region.bar = HBM_BAR_ID;
	pci_region.addr = addr;
	rc = hl_pci_set_inbound_region(hdev, 2, &pci_region);
	if (rc)
		return U64_MAX;

@@ -486,22 +490,43 @@ static u64 gaudi_set_hbm_bar_base(struct hl_device *hdev, u64 addr)

static int gaudi_init_iatu(struct hl_device *hdev)
{
	int rc = 0;
	struct hl_inbound_pci_region inbound_region;
	struct hl_outbound_pci_region outbound_region;
	int rc;

	/* Inbound Region 0 - Bar 0 - Point to SRAM + CFG */
	inbound_region.mode = PCI_BAR_MATCH_MODE;
	inbound_region.bar = SRAM_BAR_ID;
	inbound_region.addr = SRAM_BASE_ADDR;
	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
	if (rc)
		goto done;

	/* Inbound Region 1 - Bar 2 - Point to SPI FLASH */
	rc  = hl_pci_iatu_write(hdev, 0x314,
				lower_32_bits(SPI_FLASH_BASE_ADDR));
	rc |= hl_pci_iatu_write(hdev, 0x318,
				upper_32_bits(SPI_FLASH_BASE_ADDR));
	rc |= hl_pci_iatu_write(hdev, 0x300, 0);
	/* Enable + Bar match + match enable */
	rc |= hl_pci_iatu_write(hdev, 0x304, 0xC0080200);
	inbound_region.mode = PCI_BAR_MATCH_MODE;
	inbound_region.bar = CFG_BAR_ID;
	inbound_region.addr = SPI_FLASH_BASE_ADDR;
	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
	if (rc)
		goto done;

	/* Inbound Region 2 - Bar 4 - Point to HBM */
	inbound_region.mode = PCI_BAR_MATCH_MODE;
	inbound_region.bar = HBM_BAR_ID;
	inbound_region.addr = DRAM_PHYS_BASE;
	rc = hl_pci_set_inbound_region(hdev, 2, &inbound_region);
	if (rc)
		return -EIO;
		goto done;

	hdev->asic_funcs->set_dma_mask_from_fw(hdev);

	return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
				HOST_PHYS_BASE, HOST_PHYS_SIZE);
	/* Outbound Region 0 - Point to Host */
	outbound_region.addr = HOST_PHYS_BASE;
	outbound_region.size = HOST_PHYS_SIZE;
	rc = hl_pci_set_outbound_region(hdev, &outbound_region);

done:
	return rc;
}

static int gaudi_early_init(struct hl_device *hdev)
@@ -2884,16 +2909,6 @@ static int gaudi_hw_init(struct hl_device *hdev)

	gaudi_init_hbm_dma_qmans(hdev);

	/*
	 * Before pushing u-boot/linux to device, need to set the hbm bar to
	 * base address of dram
	 */
	if (gaudi_set_hbm_bar_base(hdev, DRAM_PHYS_BASE) == U64_MAX) {
		dev_err(hdev->dev,
			"failed to map HBM bar to DRAM base address\n");
		return -EIO;
	}

	rc = gaudi_init_cpu(hdev);
	if (rc) {
		dev_err(hdev->dev, "failed to initialize CPU\n");
+34 −3
Original line number Diff line number Diff line
@@ -458,6 +458,7 @@ static int goya_pci_bars_map(struct hl_device *hdev)
static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
{
	struct goya_device *goya = hdev->asic_specific;
	struct hl_inbound_pci_region pci_region;
	u64 old_addr = addr;
	int rc;

@@ -465,7 +466,10 @@ static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
		return old_addr;

	/* Inbound Region 1 - Bar 4 - Point to DDR */
	rc = hl_pci_set_dram_bar_base(hdev, 1, 4, addr);
	pci_region.mode = PCI_BAR_MATCH_MODE;
	pci_region.bar = DDR_BAR_ID;
	pci_region.addr = addr;
	rc = hl_pci_set_inbound_region(hdev, 1, &pci_region);
	if (rc)
		return U64_MAX;

@@ -487,8 +491,35 @@ static u64 goya_set_ddr_bar_base(struct hl_device *hdev, u64 addr)
 */
static int goya_init_iatu(struct hl_device *hdev)
{
	return hl_pci_init_iatu(hdev, SRAM_BASE_ADDR, DRAM_PHYS_BASE,
				HOST_PHYS_BASE, HOST_PHYS_SIZE);
	struct hl_inbound_pci_region inbound_region;
	struct hl_outbound_pci_region outbound_region;
	int rc;

	/* Inbound Region 0 - Bar 0 - Point to SRAM and CFG */
	inbound_region.mode = PCI_BAR_MATCH_MODE;
	inbound_region.bar = SRAM_CFG_BAR_ID;
	inbound_region.addr = SRAM_BASE_ADDR;
	rc = hl_pci_set_inbound_region(hdev, 0, &inbound_region);
	if (rc)
		goto done;

	/* Inbound Region 1 - Bar 4 - Point to DDR */
	inbound_region.mode = PCI_BAR_MATCH_MODE;
	inbound_region.bar = DDR_BAR_ID;
	inbound_region.addr = DRAM_PHYS_BASE;
	rc = hl_pci_set_inbound_region(hdev, 1, &inbound_region);
	if (rc)
		goto done;

	hdev->asic_funcs->set_dma_mask_from_fw(hdev);

	/* Outbound Region 0 - Point to Host  */
	outbound_region.addr = HOST_PHYS_BASE;
	outbound_region.size = HOST_PHYS_SIZE;
	rc = hl_pci_set_outbound_region(hdev, &outbound_region);

done:
	return rc;
}

/*
+47 −5
Original line number Diff line number Diff line
@@ -66,6 +66,8 @@
#define IS_POWER_OF_2(n)		(n != 0 && ((n & (n - 1)) == 0))
#define IS_MAX_PENDING_CS_VALID(n)	(IS_POWER_OF_2(n) && (n > 1))

#define HL_PCI_NUM_BARS			6

/**
 * struct pgt_info - MMU hop page info.
 * @node: hash linked-list node for the pgts shadow hash of pgts.
@@ -90,6 +92,16 @@ struct pgt_info {
struct hl_device;
struct hl_fpriv;

/**
 * enum hl_pci_match_mode - pci match mode per region
 * @PCI_ADDRESS_MATCH_MODE: address match mode
 * @PCI_BAR_MATCH_MODE: bar match mode
 */
enum hl_pci_match_mode {
	PCI_ADDRESS_MATCH_MODE,
	PCI_BAR_MATCH_MODE
};

/**
 * enum hl_fw_component - F/W components to read version through registers.
 * @FW_COMP_UBOOT: u-boot.
@@ -125,6 +137,32 @@ enum hl_cs_type {
	CS_TYPE_WAIT
};

/*
 * struct hl_inbound_pci_region - inbound region descriptor
 * @mode: pci match mode for this region
 * @addr: region target address
 * @size: region size in bytes
 * @offset_in_bar: offset within bar (address match mode)
 * @bar: bar id
 */
struct hl_inbound_pci_region {
	enum hl_pci_match_mode	mode;
	u64			addr;
	u64			size;
	u64			offset_in_bar;
	u8			bar;
};

/*
 * struct hl_outbound_pci_region - outbound region descriptor
 * @addr: region target address
 * @size: region size in bytes
 */
struct hl_outbound_pci_region {
	u64	addr;
	u64	size;
};

/*
 * struct hl_hw_sob - H/W SOB info.
 * @hdev: habanalabs device structure.
@@ -1347,7 +1385,9 @@ struct hl_device_idle_busy_ts {
/**
 * struct hl_device - habanalabs device structure.
 * @pdev: pointer to PCI device, can be NULL in case of simulator device.
 * @pcie_bar: array of available PCIe bars.
 * @pcie_bar_phys: array of available PCIe bars physical addresses.
 *		   (required only for PCI address match mode)
 * @pcie_bar: array of available PCIe bars virtual addresses.
 * @rmmio: configuration area address on SRAM.
 * @cdev: related char device.
 * @cdev_ctrl: char device for control operations only (INFO IOCTL)
@@ -1442,7 +1482,8 @@ struct hl_device_idle_busy_ts {
 */
struct hl_device {
	struct pci_dev			*pdev;
	void __iomem			*pcie_bar[6];
	u64				pcie_bar_phys[HL_PCI_NUM_BARS];
	void __iomem			*pcie_bar[HL_PCI_NUM_BARS];
	void __iomem			*rmmio;
	struct cdev			cdev;
	struct cdev			cdev_ctrl;
@@ -1767,9 +1808,10 @@ int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
int hl_pci_iatu_write(struct hl_device *hdev, u32 addr, u32 data);
int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
				u64 addr);
int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
			u64 dram_base_address, u64 host_phys_base_address,
			u64 host_phys_size);
int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
		struct hl_inbound_pci_region *pci_region);
int hl_pci_set_outbound_region(struct hl_device *hdev,
		struct hl_outbound_pci_region *pci_region);
int hl_pci_init(struct hl_device *hdev);
void hl_pci_fini(struct hl_device *hdev);

+62 −74
Original line number Diff line number Diff line
@@ -9,9 +9,15 @@
#include "include/hw_ip/pci/pci_general.h"

#include <linux/pci.h>
#include <linux/bitfield.h>

#define HL_PLDM_PCI_ELBI_TIMEOUT_MSEC	(HL_PCI_ELBI_TIMEOUT_MSEC * 10)

#define IATU_REGION_CTRL_REGION_EN_MASK		BIT(31)
#define IATU_REGION_CTRL_MATCH_MODE_MASK	BIT(30)
#define IATU_REGION_CTRL_NUM_MATCH_EN_MASK	BIT(19)
#define IATU_REGION_CTRL_BAR_NUM_MASK		GENMASK(10, 8)

/**
 * hl_pci_bars_map() - Map PCI BARs.
 * @hdev: Pointer to hl_device structure.
@@ -187,110 +193,94 @@ static void hl_pci_reset_link_through_bridge(struct hl_device *hdev)
}

/**
 * hl_pci_set_dram_bar_base() - Set DDR BAR to map specific device address.
 * hl_pci_set_inbound_region() - Configure inbound region
 * @hdev: Pointer to hl_device structure.
 * @inbound_region: Inbound region number.
 * @bar: PCI BAR number.
 * @addr: Address in DRAM. Must be aligned to DRAM bar size.
 * @region: Inbound region number.
 * @pci_region: Inbound region parameters.
 *
 * Configure the iATU so that the DRAM bar will start at the specified address.
 * Configure the iATU inbound region.
 *
 * Return: 0 on success, negative value for failure.
 */
int hl_pci_set_dram_bar_base(struct hl_device *hdev, u8 inbound_region, u8 bar,
				u64 addr)
int hl_pci_set_inbound_region(struct hl_device *hdev, u8 region,
		struct hl_inbound_pci_region *pci_region)
{
	struct asic_fixed_properties *prop = &hdev->asic_prop;
	u32 offset;
	int rc;
	u64 bar_phys_base, region_base, region_end_address;
	u32 offset, ctrl_reg_val;
	int rc = 0;

	switch (inbound_region) {
	case 0:
		offset = 0x100;
		break;
	case 1:
		offset = 0x300;
		break;
	case 2:
		offset = 0x500;
		break;
	default:
		dev_err(hdev->dev, "Invalid inbound region %d\n",
			inbound_region);
		return -EINVAL;
	}
	/* region offset */
	offset = (0x200 * region) + 0x100;

	if (pci_region->mode == PCI_ADDRESS_MATCH_MODE) {
		bar_phys_base = hdev->pcie_bar_phys[pci_region->bar];
		region_base = bar_phys_base + pci_region->offset_in_bar;
		region_end_address = region_base + pci_region->size - 1;

	if (bar != 0 && bar != 2 && bar != 4) {
		dev_err(hdev->dev, "Invalid PCI BAR %d\n", bar);
		return -EINVAL;
		rc |= hl_pci_iatu_write(hdev, offset + 0x8,
				lower_32_bits(region_base));
		rc |= hl_pci_iatu_write(hdev, offset + 0xC,
				upper_32_bits(region_base));
		rc |= hl_pci_iatu_write(hdev, offset + 0x10,
				lower_32_bits(region_end_address));
	}

	/* Point to the specified address */
	rc = hl_pci_iatu_write(hdev, offset + 0x14, lower_32_bits(addr));
	rc |= hl_pci_iatu_write(hdev, offset + 0x18, upper_32_bits(addr));
	rc = hl_pci_iatu_write(hdev, offset + 0x14,
			lower_32_bits(pci_region->addr));
	rc |= hl_pci_iatu_write(hdev, offset + 0x18,
			upper_32_bits(pci_region->addr));
	rc |= hl_pci_iatu_write(hdev, offset + 0x0, 0);
	/* Enable + BAR match + match enable + BAR number */
	rc |= hl_pci_iatu_write(hdev, offset + 0x4, 0xC0080000 | (bar << 8));

	/* Enable + bar/address match + match enable + bar number */
	ctrl_reg_val = FIELD_PREP(IATU_REGION_CTRL_REGION_EN_MASK, 1);
	ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_MATCH_MODE_MASK,
			pci_region->mode);
	ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_NUM_MATCH_EN_MASK, 1);

	if (pci_region->mode == PCI_BAR_MATCH_MODE)
		ctrl_reg_val |= FIELD_PREP(IATU_REGION_CTRL_BAR_NUM_MASK,
				pci_region->bar);

	rc |= hl_pci_iatu_write(hdev, offset + 0x4, ctrl_reg_val);

	/* Return the DBI window to the default location */
	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);

	if (rc)
		dev_err(hdev->dev, "failed to map DRAM bar to 0x%08llx\n",
			addr);
		dev_err(hdev->dev, "failed to map bar %u to 0x%08llx\n",
				pci_region->bar, pci_region->addr);

	return rc;
}

/**
 * hl_pci_init_iatu() - Initialize the iATU unit inside the PCI controller.
 * hl_pci_set_outbound_region() - Configure outbound region 0
 * @hdev: Pointer to hl_device structure.
 * @sram_base_address: SRAM base address.
 * @dram_base_address: DRAM base address.
 * @host_phys_base_address: Base physical address of host memory for device
 *                          transactions.
 * @host_phys_size: Size of host memory for device transactions.
 * @pci_region: Outbound region parameters.
 *
 * This is needed in case the firmware doesn't initialize the iATU.
 * Configure the iATU outbound region 0.
 *
 * Return: 0 on success, negative value for failure.
 */
int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
			u64 dram_base_address, u64 host_phys_base_address,
			u64 host_phys_size)
int hl_pci_set_outbound_region(struct hl_device *hdev,
		struct hl_outbound_pci_region *pci_region)
{
	struct asic_fixed_properties *prop = &hdev->asic_prop;
	u64 host_phys_end_addr;
	u64 outbound_region_end_address;
	int rc = 0;

	/* Inbound Region 0 - Bar 0 - Point to SRAM base address */
	rc  = hl_pci_iatu_write(hdev, 0x114, lower_32_bits(sram_base_address));
	rc |= hl_pci_iatu_write(hdev, 0x118, upper_32_bits(sram_base_address));
	rc |= hl_pci_iatu_write(hdev, 0x100, 0);
	/* Enable + Bar match + match enable */
	rc |= hl_pci_iatu_write(hdev, 0x104, 0xC0080000);

	/* Return the DBI window to the default location */
	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);

	hdev->asic_funcs->set_dma_mask_from_fw(hdev);

	/* Point to DRAM */
	if (!hdev->asic_funcs->set_dram_bar_base)
		return -EINVAL;
	if (hdev->asic_funcs->set_dram_bar_base(hdev, dram_base_address) ==
								U64_MAX)
		return -EIO;

	/* Outbound Region 0 - Point to Host */
	host_phys_end_addr = host_phys_base_address + host_phys_size - 1;
	/* Outbound Region 0 */
	outbound_region_end_address =
			pci_region->addr + pci_region->size - 1;
	rc |= hl_pci_iatu_write(hdev, 0x008,
				lower_32_bits(host_phys_base_address));
				lower_32_bits(pci_region->addr));
	rc |= hl_pci_iatu_write(hdev, 0x00C,
				upper_32_bits(host_phys_base_address));
	rc |= hl_pci_iatu_write(hdev, 0x010, lower_32_bits(host_phys_end_addr));
				upper_32_bits(pci_region->addr));
	rc |= hl_pci_iatu_write(hdev, 0x010,
				lower_32_bits(outbound_region_end_address));
	rc |= hl_pci_iatu_write(hdev, 0x014, 0);

	if ((hdev->power9_64bit_dma_enable) && (hdev->dma_mask == 64))
@@ -298,7 +288,8 @@ int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
	else
		rc |= hl_pci_iatu_write(hdev, 0x018, 0);

	rc |= hl_pci_iatu_write(hdev, 0x020, upper_32_bits(host_phys_end_addr));
	rc |= hl_pci_iatu_write(hdev, 0x020,
				upper_32_bits(outbound_region_end_address));
	/* Increase region size */
	rc |= hl_pci_iatu_write(hdev, 0x000, 0x00002000);
	/* Enable */
@@ -308,10 +299,7 @@ int hl_pci_init_iatu(struct hl_device *hdev, u64 sram_base_address,
	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr, 0);
	rc |= hl_pci_elbi_write(hdev, prop->pcie_aux_dbi_reg_addr + 4, 0);

	if (rc)
		return -EIO;

	return 0;
	return rc;
}

/**