Commit 16102736 authored by Greg Kroah-Hartman's avatar Greg Kroah-Hartman
Browse files

Merge tag 'misc-habanalabs-next-2019-05-03' of...

Merge tag 'misc-habanalabs-next-2019-05-03' of git://people.freedesktop.org/~gabbayo/linux into char-misc-next

Oded writes:

This tag contains further changes for kernel 5.2.

The changes are either bug fixes or simple re-factoring of existing code.
The notable changes are:

- Add missing fields in the bmon structure that is passed in the debug
  IOCTL when the user wants to configure the bus monitor.

- Use the dedicated device-CPU accessible memory pool for all host memory
  allocations that are accessible directly by the embedded CPU. This is
  needed to enforce certain restrictions we have due to the embedded CPU's
  architecture.

- Manipulate DMA addresses only inside ASIC-specific files. This is needed
  to better support future ASICs code.

Other minor changes include:

- Move pr_fmt() to c files to avoid dependency in include order.

- Remove call to CS parsing function for workloads that originates from
  the driver and remove dead code as a result from this change.

- Update names of structure members and labels to better reflect their
  usage.

- When moving the dram PCI bar aperture, return the old aperture address
  range instead of error code. This will allow us to restore the old
  address range in a simpler fashion.

* tag 'misc-habanalabs-next-2019-05-03' of git://people.freedesktop.org/~gabbayo/linux:
  habanalabs: Update CPU DMA memory label name
  habanalabs: Update CPU DMA pool label name
  habanalabs: increase timeout if working with simulator
  habanalabs: remove condition that is always true
  habanalabs: remove redundant member from parser struct
  habanalabs: Manipulate DMA addresses in ASIC functions
  habanalabs: rename functions to improve code readability
  habanalabs: remove call to cs_parser()
  habanalabs: Use single pool for CPU accessible host memory
  habanalabs: return old dram bar address upon change
  habanalabs: rename restore to ctx_switch when appropriate
  habanalabs: use ASIC functions interface for rreg/wreg
  uapi/habanalabs: add missing fields in bmon params
  habanalabs: re-factor goya_parse_cb_no_ext_queue()
  habanalabs: Cancel pr_fmt() definition dependency on includes order
parents 78e6427b 9f832fda
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@

static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
{
	hdev->asic_funcs->dma_free_coherent(hdev, cb->size,
	hdev->asic_funcs->asic_dma_free_coherent(hdev, cb->size,
			(void *) (uintptr_t) cb->kernel_address,
			cb->bus_address);
	kfree(cb);
@@ -66,10 +66,10 @@ static struct hl_cb *hl_cb_alloc(struct hl_device *hdev, u32 cb_size,
		return NULL;

	if (ctx_id == HL_KERNEL_ASID_ID)
		p = hdev->asic_funcs->dma_alloc_coherent(hdev, cb_size,
		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
						&cb->bus_address, GFP_ATOMIC);
	else
		p = hdev->asic_funcs->dma_alloc_coherent(hdev, cb_size,
		p = hdev->asic_funcs->asic_dma_alloc_coherent(hdev, cb_size,
						&cb->bus_address,
						GFP_USER | __GFP_ZERO);
	if (!p) {
+8 −9
Original line number Diff line number Diff line
@@ -93,7 +93,6 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
	parser.user_cb_size = job->user_cb_size;
	parser.ext_queue = job->ext_queue;
	job->patched_cb = NULL;
	parser.use_virt_addr = hdev->mmu_enable;

	rc = hdev->asic_funcs->cs_parser(hdev, &parser);
	if (job->ext_queue) {
@@ -601,7 +600,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
	void __user *chunks;
	u32 num_chunks;
	u64 cs_seq = ULONG_MAX;
	int rc, do_restore;
	int rc, do_ctx_switch;
	bool need_soft_reset = false;

	if (hl_device_disabled_or_in_reset(hdev)) {
@@ -612,9 +611,9 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
		goto out;
	}

	do_restore = atomic_cmpxchg(&ctx->thread_restore_token, 1, 0);
	do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);

	if (do_restore || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
	if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
		long ret;

		chunks = (void __user *)(uintptr_t)args->in.chunks_restore;
@@ -622,7 +621,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)

		mutex_lock(&hpriv->restore_phase_mutex);

		if (do_restore) {
		if (do_ctx_switch) {
			rc = hdev->asic_funcs->context_switch(hdev, ctx->asid);
			if (rc) {
				dev_err_ratelimited(hdev->dev,
@@ -678,18 +677,18 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
			}
		}

		ctx->thread_restore_wait_token = 1;
	} else if (!ctx->thread_restore_wait_token) {
		ctx->thread_ctx_switch_wait_token = 1;
	} else if (!ctx->thread_ctx_switch_wait_token) {
		u32 tmp;

		rc = hl_poll_timeout_memory(hdev,
			(u64) (uintptr_t) &ctx->thread_restore_wait_token,
			(u64) (uintptr_t) &ctx->thread_ctx_switch_wait_token,
			jiffies_to_usecs(hdev->timeout_jiffies),
			&tmp);

		if (rc || !tmp) {
			dev_err(hdev->dev,
				"restore phase hasn't finished in time\n");
				"context switch phase didn't finish in time\n");
			rc = -ETIMEDOUT;
			goto out;
		}
+2 −2
Original line number Diff line number Diff line
@@ -106,8 +106,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)

	ctx->cs_sequence = 1;
	spin_lock_init(&ctx->cs_lock);
	atomic_set(&ctx->thread_restore_token, 1);
	ctx->thread_restore_wait_token = 0;
	atomic_set(&ctx->thread_ctx_switch_token, 1);
	ctx->thread_ctx_switch_wait_token = 0;

	if (is_kernel_ctx) {
		ctx->asid = HL_KERNEL_ASID_ID; /* KMD gets ASID 0 */
+12 −4
Original line number Diff line number Diff line
@@ -5,6 +5,8 @@
 * All Rights Reserved.
 */

#define pr_fmt(fmt)			"habanalabs: " fmt

#include "habanalabs.h"

#include <linux/pci.h>
@@ -708,10 +710,10 @@ again:
	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
		hl_cq_reset(hdev, &hdev->completion_queue[i]);

	/* Make sure the setup phase for the user context will run again */
	/* Make sure the context switch phase will run again */
	if (hdev->user_ctx) {
		atomic_set(&hdev->user_ctx->thread_restore_token, 1);
		hdev->user_ctx->thread_restore_wait_token = 0;
		atomic_set(&hdev->user_ctx->thread_ctx_switch_token, 1);
		hdev->user_ctx->thread_ctx_switch_wait_token = 0;
	}

	/* Finished tear-down, starting to re-initialize */
@@ -1145,7 +1147,13 @@ int hl_poll_timeout_memory(struct hl_device *hdev, u64 addr,
	 * either by the direct access of the device or by another core
	 */
	u32 *paddr = (u32 *) (uintptr_t) addr;
	ktime_t timeout = ktime_add_us(ktime_get(), timeout_us);
	ktime_t timeout;

	/* timeout should be longer when working with simulator */
	if (!hdev->pdev)
		timeout_us *= 10;

	timeout = ktime_add_us(ktime_get(), timeout_us);

	might_sleep();

+2 −5
Original line number Diff line number Diff line
@@ -249,8 +249,7 @@ int hl_fw_armcp_info_get(struct hl_device *hdev)

	pkt.ctl = cpu_to_le32(ARMCP_PACKET_INFO_GET <<
				ARMCP_PKT_CTL_OPCODE_SHIFT);
	pkt.addr = cpu_to_le64(armcp_info_dma_addr +
				prop->host_phys_base_address);
	pkt.addr = cpu_to_le64(armcp_info_dma_addr);
	pkt.data_max_size = cpu_to_le32(sizeof(struct armcp_info));

	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
@@ -281,7 +280,6 @@ out:

int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)
{
	struct asic_fixed_properties *prop = &hdev->asic_prop;
	struct armcp_packet pkt = {};
	void *eeprom_info_cpu_addr;
	dma_addr_t eeprom_info_dma_addr;
@@ -301,8 +299,7 @@ int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size)

	pkt.ctl = cpu_to_le32(ARMCP_PACKET_EEPROM_DATA_GET <<
				ARMCP_PKT_CTL_OPCODE_SHIFT);
	pkt.addr = cpu_to_le64(eeprom_info_dma_addr +
				prop->host_phys_base_address);
	pkt.addr = cpu_to_le64(eeprom_info_dma_addr);
	pkt.data_max_size = cpu_to_le32(max_size);

	rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
Loading