Commit 49b6ed1a authored by Joerg Roedel's avatar Joerg Roedel
Browse files

Merge tag 'arm-smmu-updates' of...

Merge tag 'arm-smmu-updates' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into arm/smmu

Arm SMMU updates for 5.7

- Support for the TLB range invalidation command in SMMUv3.2

- Introduction of command batching helpers...

- ... which are then used to batch up CD and ATC invalidation

- Support for PCI PASID, along with necessary PCI symbol exports

- MAINTAINERS update to include DT binding docs
parents f8788d86 6a481a95
Loading
Loading
Loading
Loading
+1 −0
Original line number Diff line number Diff line
@@ -1426,6 +1426,7 @@ M: Will Deacon <will@kernel.org>
R:	Robin Murphy <robin.murphy@arm.com>
L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S:	Maintained
F:	Documentation/devicetree/bindings/iommu/arm,smmu*
F:	drivers/iommu/arm-smmu*
F:	drivers/iommu/io-pgtable-arm.c
F:	drivers/iommu/io-pgtable-arm-v7s.c
+176 −28
Original line number Diff line number Diff line
@@ -69,6 +69,9 @@
#define IDR1_SSIDSIZE			GENMASK(10, 6)
#define IDR1_SIDSIZE			GENMASK(5, 0)

#define ARM_SMMU_IDR3			0xc
#define IDR3_RIL			(1 << 10)

#define ARM_SMMU_IDR5			0x14
#define IDR5_STALL_MAX			GENMASK(31, 16)
#define IDR5_GRAN64K			(1 << 6)
@@ -346,9 +349,14 @@
#define CMDQ_CFGI_1_LEAF		(1UL << 0)
#define CMDQ_CFGI_1_RANGE		GENMASK_ULL(4, 0)

#define CMDQ_TLBI_0_NUM			GENMASK_ULL(16, 12)
#define CMDQ_TLBI_RANGE_NUM_MAX		31
#define CMDQ_TLBI_0_SCALE		GENMASK_ULL(24, 20)
#define CMDQ_TLBI_0_VMID		GENMASK_ULL(47, 32)
#define CMDQ_TLBI_0_ASID		GENMASK_ULL(63, 48)
#define CMDQ_TLBI_1_LEAF		(1UL << 0)
#define CMDQ_TLBI_1_TTL			GENMASK_ULL(9, 8)
#define CMDQ_TLBI_1_TG			GENMASK_ULL(11, 10)
#define CMDQ_TLBI_1_VA_MASK		GENMASK_ULL(63, 12)
#define CMDQ_TLBI_1_IPA_MASK		GENMASK_ULL(51, 12)

@@ -473,9 +481,13 @@ struct arm_smmu_cmdq_ent {
		#define CMDQ_OP_TLBI_S2_IPA	0x2a
		#define CMDQ_OP_TLBI_NSNH_ALL	0x30
		struct {
			u8			num;
			u8			scale;
			u16			asid;
			u16			vmid;
			bool			leaf;
			u8			ttl;
			u8			tg;
			u64			addr;
		} tlbi;

@@ -548,6 +560,11 @@ struct arm_smmu_cmdq {
	atomic_t			lock;
};

struct arm_smmu_cmdq_batch {
	u64				cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
	int				num;
};

struct arm_smmu_evtq {
	struct arm_smmu_queue		q;
	u32				max_stalls;
@@ -627,6 +644,7 @@ struct arm_smmu_device {
#define ARM_SMMU_FEAT_HYP		(1 << 12)
#define ARM_SMMU_FEAT_STALL_FORCE	(1 << 13)
#define ARM_SMMU_FEAT_VAX		(1 << 14)
#define ARM_SMMU_FEAT_RANGE_INV		(1 << 15)
	u32				features;

#define ARM_SMMU_OPT_SKIP_PREFETCH	(1 << 0)
@@ -895,14 +913,22 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent)
		cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31);
		break;
	case CMDQ_OP_TLBI_NH_VA:
		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_ASID, ent->tlbi.asid);
		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_VA_MASK;
		break;
	case CMDQ_OP_TLBI_S2_IPA:
		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_NUM, ent->tlbi.num);
		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_SCALE, ent->tlbi.scale);
		cmd[0] |= FIELD_PREP(CMDQ_TLBI_0_VMID, ent->tlbi.vmid);
		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_LEAF, ent->tlbi.leaf);
		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TTL, ent->tlbi.ttl);
		cmd[1] |= FIELD_PREP(CMDQ_TLBI_1_TG, ent->tlbi.tg);
		cmd[1] |= ent->tlbi.addr & CMDQ_TLBI_1_IPA_MASK;
		break;
	case CMDQ_OP_TLBI_NH_ASID:
@@ -1482,6 +1508,24 @@ static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu)
	return arm_smmu_cmdq_issue_cmdlist(smmu, NULL, 0, true);
}

static void arm_smmu_cmdq_batch_add(struct arm_smmu_device *smmu,
				    struct arm_smmu_cmdq_batch *cmds,
				    struct arm_smmu_cmdq_ent *cmd)
{
	if (cmds->num == CMDQ_BATCH_ENTRIES) {
		arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, false);
		cmds->num = 0;
	}
	arm_smmu_cmdq_build_cmd(&cmds->cmds[cmds->num * CMDQ_ENT_DWORDS], cmd);
	cmds->num++;
}

static int arm_smmu_cmdq_batch_submit(struct arm_smmu_device *smmu,
				      struct arm_smmu_cmdq_batch *cmds)
{
	return arm_smmu_cmdq_issue_cmdlist(smmu, cmds->cmds, cmds->num, true);
}

/* Context descriptor manipulation functions */
static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
			     int ssid, bool leaf)
@@ -1489,6 +1533,7 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
	size_t i;
	unsigned long flags;
	struct arm_smmu_master *master;
	struct arm_smmu_cmdq_batch cmds = {};
	struct arm_smmu_device *smmu = smmu_domain->smmu;
	struct arm_smmu_cmdq_ent cmd = {
		.opcode	= CMDQ_OP_CFGI_CD,
@@ -1502,12 +1547,12 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
		for (i = 0; i < master->num_sids; i++) {
			cmd.cfgi.sid = master->sids[i];
			arm_smmu_cmdq_issue_cmd(smmu, &cmd);
			arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
		}
	}
	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);

	arm_smmu_cmdq_issue_sync(smmu);
	arm_smmu_cmdq_batch_submit(smmu, &cmds);
}

static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu,
@@ -1531,6 +1576,7 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst,
	u64 val = (l1_desc->l2ptr_dma & CTXDESC_L1_DESC_L2PTR_MASK) |
		  CTXDESC_L1_DESC_V;

	/* See comment in arm_smmu_write_ctx_desc() */
	WRITE_ONCE(*dst, cpu_to_le64(val));
}

@@ -1726,7 +1772,8 @@ arm_smmu_write_strtab_l1_desc(__le64 *dst, struct arm_smmu_strtab_l1_desc *desc)
	val |= FIELD_PREP(STRTAB_L1_DESC_SPAN, desc->span);
	val |= desc->l2ptr_dma & STRTAB_L1_DESC_L2PTR_MASK;

	*dst = cpu_to_le64(val);
	/* See comment in arm_smmu_write_ctx_desc() */
	WRITE_ONCE(*dst, cpu_to_le64(val));
}

static void arm_smmu_sync_ste_for_sid(struct arm_smmu_device *smmu, u32 sid)
@@ -2132,17 +2179,16 @@ arm_smmu_atc_inv_to_cmd(int ssid, unsigned long iova, size_t size,
	cmd->atc.size	= log2_span;
}

static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
				   struct arm_smmu_cmdq_ent *cmd)
static int arm_smmu_atc_inv_master(struct arm_smmu_master *master)
{
	int i;
	struct arm_smmu_cmdq_ent cmd;

	if (!master->ats_enabled)
		return 0;
	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);

	for (i = 0; i < master->num_sids; i++) {
		cmd->atc.sid = master->sids[i];
		arm_smmu_cmdq_issue_cmd(master->smmu, cmd);
		cmd.atc.sid = master->sids[i];
		arm_smmu_cmdq_issue_cmd(master->smmu, &cmd);
	}

	return arm_smmu_cmdq_issue_sync(master->smmu);
@@ -2151,10 +2197,11 @@ static int arm_smmu_atc_inv_master(struct arm_smmu_master *master,
static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
				   int ssid, unsigned long iova, size_t size)
{
	int ret = 0;
	int i;
	unsigned long flags;
	struct arm_smmu_cmdq_ent cmd;
	struct arm_smmu_master *master;
	struct arm_smmu_cmdq_batch cmds = {};

	if (!(smmu_domain->smmu->features & ARM_SMMU_FEAT_ATS))
		return 0;
@@ -2179,11 +2226,18 @@ static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain,
	arm_smmu_atc_inv_to_cmd(ssid, iova, size, &cmd);

	spin_lock_irqsave(&smmu_domain->devices_lock, flags);
	list_for_each_entry(master, &smmu_domain->devices, domain_head)
		ret |= arm_smmu_atc_inv_master(master, &cmd);
	list_for_each_entry(master, &smmu_domain->devices, domain_head) {
		if (!master->ats_enabled)
			continue;

		for (i = 0; i < master->num_sids; i++) {
			cmd.atc.sid = master->sids[i];
			arm_smmu_cmdq_batch_add(smmu_domain->smmu, &cmds, &cmd);
		}
	}
	spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);

	return ret ? -ETIMEDOUT : 0;
	return arm_smmu_cmdq_batch_submit(smmu_domain->smmu, &cmds);
}

/* IO_PGTABLE API */
@@ -2218,10 +2272,10 @@ static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
				   size_t granule, bool leaf,
				   struct arm_smmu_domain *smmu_domain)
{
	u64 cmds[CMDQ_BATCH_ENTRIES * CMDQ_ENT_DWORDS];
	struct arm_smmu_device *smmu = smmu_domain->smmu;
	unsigned long start = iova, end = iova + size;
	int i = 0;
	unsigned long start = iova, end = iova + size, num_pages = 0, tg = 0;
	size_t inv_range = granule;
	struct arm_smmu_cmdq_batch cmds = {};
	struct arm_smmu_cmdq_ent cmd = {
		.tlbi = {
			.leaf	= leaf,
@@ -2239,19 +2293,50 @@ static void arm_smmu_tlb_inv_range(unsigned long iova, size_t size,
		cmd.tlbi.vmid	= smmu_domain->s2_cfg.vmid;
	}

	if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
		/* Get the leaf page size */
		tg = __ffs(smmu_domain->domain.pgsize_bitmap);

		/* Convert page size of 12,14,16 (log2) to 1,2,3 */
		cmd.tlbi.tg = (tg - 10) / 2;

		/* Determine what level the granule is at */
		cmd.tlbi.ttl = 4 - ((ilog2(granule) - 3) / (tg - 3));

		num_pages = size >> tg;
	}

	while (iova < end) {
		if (i == CMDQ_BATCH_ENTRIES) {
			arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, false);
			i = 0;
		if (smmu->features & ARM_SMMU_FEAT_RANGE_INV) {
			/*
			 * On each iteration of the loop, the range is 5 bits
			 * worth of the aligned size remaining.
			 * The range in pages is:
			 *
			 * range = (num_pages & (0x1f << __ffs(num_pages)))
			 */
			unsigned long scale, num;

			/* Determine the power of 2 multiple number of pages */
			scale = __ffs(num_pages);
			cmd.tlbi.scale = scale;

			/* Determine how many chunks of 2^scale size we have */
			num = (num_pages >> scale) & CMDQ_TLBI_RANGE_NUM_MAX;
			cmd.tlbi.num = num - 1;

			/* range is num * 2^scale * pgsize */
			inv_range = num << (scale + tg);

			/* Clear out the lower order bits for the next iteration */
			num_pages -= num << scale;
		}

		cmd.tlbi.addr = iova;
		arm_smmu_cmdq_build_cmd(&cmds[i * CMDQ_ENT_DWORDS], &cmd);
		iova += granule;
		i++;
		arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
		iova += inv_range;
	}

	arm_smmu_cmdq_issue_cmdlist(smmu, cmds, i, true);
	arm_smmu_cmdq_batch_submit(smmu, &cmds);

	/*
	 * Unfortunately, this can't be leaf-only since we may have
@@ -2611,7 +2696,6 @@ static void arm_smmu_enable_ats(struct arm_smmu_master *master)

static void arm_smmu_disable_ats(struct arm_smmu_master *master)
{
	struct arm_smmu_cmdq_ent cmd;
	struct arm_smmu_domain *smmu_domain = master->domain;

	if (!master->ats_enabled)
@@ -2623,11 +2707,57 @@ static void arm_smmu_disable_ats(struct arm_smmu_master *master)
	 * ATC invalidation via the SMMU.
	 */
	wmb();
	arm_smmu_atc_inv_to_cmd(0, 0, 0, &cmd);
	arm_smmu_atc_inv_master(master, &cmd);
	arm_smmu_atc_inv_master(master);
	atomic_dec(&smmu_domain->nr_ats_masters);
}

static int arm_smmu_enable_pasid(struct arm_smmu_master *master)
{
	int ret;
	int features;
	int num_pasids;
	struct pci_dev *pdev;

	if (!dev_is_pci(master->dev))
		return -ENODEV;

	pdev = to_pci_dev(master->dev);

	features = pci_pasid_features(pdev);
	if (features < 0)
		return features;

	num_pasids = pci_max_pasids(pdev);
	if (num_pasids <= 0)
		return num_pasids;

	ret = pci_enable_pasid(pdev, features);
	if (ret) {
		dev_err(&pdev->dev, "Failed to enable PASID\n");
		return ret;
	}

	master->ssid_bits = min_t(u8, ilog2(num_pasids),
				  master->smmu->ssid_bits);
	return 0;
}

static void arm_smmu_disable_pasid(struct arm_smmu_master *master)
{
	struct pci_dev *pdev;

	if (!dev_is_pci(master->dev))
		return;

	pdev = to_pci_dev(master->dev);

	if (!pdev->pasid_enabled)
		return;

	master->ssid_bits = 0;
	pci_disable_pasid(pdev);
}

static void arm_smmu_detach_dev(struct arm_smmu_master *master)
{
	unsigned long flags;
@@ -2831,13 +2961,23 @@ static int arm_smmu_add_device(struct device *dev)

	master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits);

	/*
	 * Note that PASID must be enabled before, and disabled after ATS:
	 * PCI Express Base 4.0r1.0 - 10.5.1.3 ATS Control Register
	 *
	 *   Behavior is undefined if this bit is Set and the value of the PASID
	 *   Enable, Execute Requested Enable, or Privileged Mode Requested bits
	 *   are changed.
	 */
	arm_smmu_enable_pasid(master);

	if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB))
		master->ssid_bits = min_t(u8, master->ssid_bits,
					  CTXDESC_LINEAR_CDMAX);

	ret = iommu_device_link(&smmu->iommu, dev);
	if (ret)
		goto err_free_master;
		goto err_disable_pasid;

	group = iommu_group_get_for_dev(dev);
	if (IS_ERR(group)) {
@@ -2850,6 +2990,8 @@ static int arm_smmu_add_device(struct device *dev)

err_unlink:
	iommu_device_unlink(&smmu->iommu, dev);
err_disable_pasid:
	arm_smmu_disable_pasid(master);
err_free_master:
	kfree(master);
	fwspec->iommu_priv = NULL;
@@ -2870,6 +3012,7 @@ static void arm_smmu_remove_device(struct device *dev)
	arm_smmu_detach_dev(master);
	iommu_group_remove_device(dev);
	iommu_device_unlink(&smmu->iommu, dev);
	arm_smmu_disable_pasid(master);
	kfree(master);
	iommu_fwspec_free(dev);
}
@@ -3700,6 +3843,11 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
	if (smmu->sid_bits <= STRTAB_SPLIT)
		smmu->features &= ~ARM_SMMU_FEAT_2_LVL_STRTAB;

	/* IDR3 */
	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR3);
	if (FIELD_GET(IDR3_RIL, reg))
		smmu->features |= ARM_SMMU_FEAT_RANGE_INV;

	/* IDR5 */
	reg = readl_relaxed(smmu->base + ARM_SMMU_IDR5);

+4 −0
Original line number Diff line number Diff line
@@ -366,6 +366,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)

	return 0;
}
EXPORT_SYMBOL_GPL(pci_enable_pasid);

/**
 * pci_disable_pasid - Disable the PASID capability
@@ -390,6 +391,7 @@ void pci_disable_pasid(struct pci_dev *pdev)

	pdev->pasid_enabled = 0;
}
EXPORT_SYMBOL_GPL(pci_disable_pasid);

/**
 * pci_restore_pasid_state - Restore PASID capabilities
@@ -441,6 +443,7 @@ int pci_pasid_features(struct pci_dev *pdev)

	return supported;
}
EXPORT_SYMBOL_GPL(pci_pasid_features);

#define PASID_NUMBER_SHIFT	8
#define PASID_NUMBER_MASK	(0x1f << PASID_NUMBER_SHIFT)
@@ -469,4 +472,5 @@ int pci_max_pasids(struct pci_dev *pdev)

	return (1 << supported);
}
EXPORT_SYMBOL_GPL(pci_max_pasids);
#endif /* CONFIG_PCI_PASID */