Kernel  |  4.4

下载     查看原文件
C++程序  |  812行  |  21.16 KB
/*
 * Intel MIC Platform Software Stack (MPSS)
 *
 * Copyright(c) 2013 Intel Corporation.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License, version 2, as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * General Public License for more details.
 *
 * The full GNU General Public License is included in this distribution in
 * the file called "COPYING".
 *
 * Intel MIC Host driver.
 *
 */
#include <linux/pci.h>
#include <linux/sched.h>
#include <linux/uaccess.h>
#include <linux/dmaengine.h>
#include <linux/mic_common.h>
#include "../common/mic_dev.h"
#include "mic_device.h"
#include "mic_smpt.h"
#include "mic_virtio.h"

/*
 * Size of the internal buffer used during DMA's as an intermediate buffer
 * for copy to/from user.
 */
#define MIC_INT_DMA_BUF_SIZE PAGE_ALIGN(64 * 1024ULL)

static int mic_sync_dma(struct mic_device *mdev, dma_addr_t dst,
			dma_addr_t src, size_t len)
{
	int err = 0;
	struct dma_async_tx_descriptor *tx;
	struct dma_chan *mic_ch = mdev->dma_ch[0];

	if (!mic_ch) {
		err = -EBUSY;
		goto error;
	}

	tx = mic_ch->device->device_prep_dma_memcpy(mic_ch, dst, src, len,
						    DMA_PREP_FENCE);
	if (!tx) {
		err = -ENOMEM;
		goto error;
	} else {
		dma_cookie_t cookie = tx->tx_submit(tx);

		err = dma_submit_error(cookie);
		if (err)
			goto error;
		err = dma_sync_wait(mic_ch, cookie);
	}
error:
	if (err)
		dev_err(&mdev->pdev->dev, "%s %d err %d\n",
			__func__, __LINE__, err);
	return err;
}

/*
 * Initiates the copies across the PCIe bus from card memory to a user
 * space buffer. When transfers are done using DMA, source/destination
 * addresses and transfer length must follow the alignment requirements of
 * the MIC DMA engine.
 */
static int mic_virtio_copy_to_user(struct mic_vdev *mvdev, void __user *ubuf,
				   size_t len, u64 daddr, size_t dlen,
				   int vr_idx)
{
	struct mic_device *mdev = mvdev->mdev;
	void __iomem *dbuf = mdev->aper.va + daddr;
	struct mic_vringh *mvr = &mvdev->mvr[vr_idx];
	size_t dma_alignment = 1 << mdev->dma_ch[0]->device->copy_align;
	size_t dma_offset;
	size_t partlen;
	int err;

	dma_offset = daddr - round_down(daddr, dma_alignment);
	daddr -= dma_offset;
	len += dma_offset;

	while (len) {
		partlen = min_t(size_t, len, MIC_INT_DMA_BUF_SIZE);

		err = mic_sync_dma(mdev, mvr->buf_da, daddr,
				   ALIGN(partlen, dma_alignment));
		if (err)
			goto err;

		if (copy_to_user(ubuf, mvr->buf + dma_offset,
				 partlen - dma_offset)) {
			err = -EFAULT;
			goto err;
		}
		daddr += partlen;
		ubuf += partlen;
		dbuf += partlen;
		mvdev->in_bytes_dma += partlen;
		mvdev->in_bytes += partlen;
		len -= partlen;
		dma_offset = 0;
	}
	return 0;
err:
	dev_err(mic_dev(mvdev), "%s %d err %d\n", __func__, __LINE__, err);
	return err;
}

/*
 * Initiates copies across the PCIe bus from a user space buffer to card
 * memory. When transfers are done using DMA, source/destination addresses
 * and transfer length must follow the alignment requirements of the MIC
 * DMA engine.
 */
static int mic_virtio_copy_from_user(struct mic_vdev *mvdev, void __user *ubuf,
				     size_t len, u64 daddr, size_t dlen,
				     int vr_idx)
{
	struct mic_device *mdev = mvdev->mdev;
	void __iomem *dbuf = mdev->aper.va + daddr;
	struct mic_vringh *mvr = &mvdev->mvr[vr_idx];
	size_t dma_alignment = 1 << mdev->dma_ch[0]->device->copy_align;
	size_t partlen;
	int err;

	if (daddr & (dma_alignment - 1)) {
		mvdev->tx_dst_unaligned += len;
		goto memcpy;
	} else if (ALIGN(len, dma_alignment) > dlen) {
		mvdev->tx_len_unaligned += len;
		goto memcpy;
	}

	while (len) {
		partlen = min_t(size_t, len, MIC_INT_DMA_BUF_SIZE);

		if (copy_from_user(mvr->buf, ubuf, partlen)) {
			err = -EFAULT;
			goto err;
		}
		err = mic_sync_dma(mdev, daddr, mvr->buf_da,
				   ALIGN(partlen, dma_alignment));
		if (err)
			goto err;
		daddr += partlen;
		ubuf += partlen;
		dbuf += partlen;
		mvdev->out_bytes_dma += partlen;
		mvdev->out_bytes += partlen;
		len -= partlen;
	}
memcpy:
	/*
	 * We are copying to IO below and should ideally use something
	 * like copy_from_user_toio(..) if it existed.
	 */
	if (copy_from_user((void __force *)dbuf, ubuf, len)) {
		err = -EFAULT;
		goto err;
	}
	mvdev->out_bytes += len;
	return 0;
err:
	dev_err(mic_dev(mvdev), "%s %d err %d\n", __func__, __LINE__, err);
	return err;
}

#define MIC_VRINGH_READ true

/* The function to call to notify the card about added buffers */
static void mic_notify(struct vringh *vrh)
{
	struct mic_vringh *mvrh = container_of(vrh, struct mic_vringh, vrh);
	struct mic_vdev *mvdev = mvrh->mvdev;
	s8 db = mvdev->dc->h2c_vdev_db;

	if (db != -1)
		mvdev->mdev->ops->send_intr(mvdev->mdev, db);
}

/* Determine the total number of bytes consumed in a VRINGH KIOV */
static inline u32 mic_vringh_iov_consumed(struct vringh_kiov *iov)
{
	int i;
	u32 total = iov->consumed;

	for (i = 0; i < iov->i; i++)
		total += iov->iov[i].iov_len;
	return total;
}

/*
 * Traverse the VRINGH KIOV and issue the APIs to trigger the copies.
 * This API is heavily based on the vringh_iov_xfer(..) implementation
 * in vringh.c. The reason we cannot reuse vringh_iov_pull_kern(..)
 * and vringh_iov_push_kern(..) directly is because there is no
 * way to override the VRINGH xfer(..) routines as of v3.10.
 */
static int mic_vringh_copy(struct mic_vdev *mvdev, struct vringh_kiov *iov,
			void __user *ubuf, size_t len, bool read, int vr_idx,
			size_t *out_len)
{
	int ret = 0;
	size_t partlen, tot_len = 0;

	while (len && iov->i < iov->used) {
		partlen = min(iov->iov[iov->i].iov_len, len);
		if (read)
			ret = mic_virtio_copy_to_user(mvdev, ubuf, partlen,
						(u64)iov->iov[iov->i].iov_base,
						iov->iov[iov->i].iov_len,
						vr_idx);
		else
			ret = mic_virtio_copy_from_user(mvdev, ubuf, partlen,
						(u64)iov->iov[iov->i].iov_base,
						iov->iov[iov->i].iov_len,
						vr_idx);
		if (ret) {
			dev_err(mic_dev(mvdev), "%s %d err %d\n",
				__func__, __LINE__, ret);
			break;
		}
		len -= partlen;
		ubuf += partlen;
		tot_len += partlen;
		iov->consumed += partlen;
		iov->iov[iov->i].iov_len -= partlen;
		iov->iov[iov->i].iov_base += partlen;
		if (!iov->iov[iov->i].iov_len) {
			/* Fix up old iov element then increment. */
			iov->iov[iov->i].iov_len = iov->consumed;
			iov->iov[iov->i].iov_base -= iov->consumed;

			iov->consumed = 0;
			iov->i++;
		}
	}
	*out_len = tot_len;
	return ret;
}

/*
 * Use the standard VRINGH infrastructure in the kernel to fetch new
 * descriptors, initiate the copies and update the used ring.
 */
static int _mic_virtio_copy(struct mic_vdev *mvdev,
	struct mic_copy_desc *copy)
{
	int ret = 0;
	u32 iovcnt = copy->iovcnt;
	struct iovec iov;
	struct iovec __user *u_iov = copy->iov;
	void __user *ubuf = NULL;
	struct mic_vringh *mvr = &mvdev->mvr[copy->vr_idx];
	struct vringh_kiov *riov = &mvr->riov;
	struct vringh_kiov *wiov = &mvr->wiov;
	struct vringh *vrh = &mvr->vrh;
	u16 *head = &mvr->head;
	struct mic_vring *vr = &mvr->vring;
	size_t len = 0, out_len;

	copy->out_len = 0;
	/* Fetch a new IOVEC if all previous elements have been processed */
	if (riov->i == riov->used && wiov->i == wiov->used) {
		ret = vringh_getdesc_kern(vrh, riov, wiov,
				head, GFP_KERNEL);
		/* Check if there are available descriptors */
		if (ret <= 0)
			return ret;
	}
	while (iovcnt) {
		if (!len) {
			/* Copy over a new iovec from user space. */
			ret = copy_from_user(&iov, u_iov, sizeof(*u_iov));
			if (ret) {
				ret = -EINVAL;
				dev_err(mic_dev(mvdev), "%s %d err %d\n",
					__func__, __LINE__, ret);
				break;
			}
			len = iov.iov_len;
			ubuf = iov.iov_base;
		}
		/* Issue all the read descriptors first */
		ret = mic_vringh_copy(mvdev, riov, ubuf, len, MIC_VRINGH_READ,
				      copy->vr_idx, &out_len);
		if (ret) {
			dev_err(mic_dev(mvdev), "%s %d err %d\n",
				__func__, __LINE__, ret);
			break;
		}
		len -= out_len;
		ubuf += out_len;
		copy->out_len += out_len;
		/* Issue the write descriptors next */
		ret = mic_vringh_copy(mvdev, wiov, ubuf, len, !MIC_VRINGH_READ,
				      copy->vr_idx, &out_len);
		if (ret) {
			dev_err(mic_dev(mvdev), "%s %d err %d\n",
				__func__, __LINE__, ret);
			break;
		}
		len -= out_len;
		ubuf += out_len;
		copy->out_len += out_len;
		if (!len) {
			/* One user space iovec is now completed */
			iovcnt--;
			u_iov++;
		}
		/* Exit loop if all elements in KIOVs have been processed. */
		if (riov->i == riov->used && wiov->i == wiov->used)
			break;
	}
	/*
	 * Update the used ring if a descriptor was available and some data was
	 * copied in/out and the user asked for a used ring update.
	 */
	if (*head != USHRT_MAX && copy->out_len && copy->update_used) {
		u32 total = 0;

		/* Determine the total data consumed */
		total += mic_vringh_iov_consumed(riov);
		total += mic_vringh_iov_consumed(wiov);
		vringh_complete_kern(vrh, *head, total);
		*head = USHRT_MAX;
		if (vringh_need_notify_kern(vrh) > 0)
			vringh_notify(vrh);
		vringh_kiov_cleanup(riov);
		vringh_kiov_cleanup(wiov);
		/* Update avail idx for user space */
		vr->info->avail_idx = vrh->last_avail_idx;
	}
	return ret;
}

static inline int mic_verify_copy_args(struct mic_vdev *mvdev,
		struct mic_copy_desc *copy)
{
	if (copy->vr_idx >= mvdev->dd->num_vq) {
		dev_err(mic_dev(mvdev), "%s %d err %d\n",
			__func__, __LINE__, -EINVAL);
		return -EINVAL;
	}
	return 0;
}

/* Copy a specified number of virtio descriptors in a chain */
int mic_virtio_copy_desc(struct mic_vdev *mvdev,
		struct mic_copy_desc *copy)
{
	int err;
	struct mic_vringh *mvr = &mvdev->mvr[copy->vr_idx];

	err = mic_verify_copy_args(mvdev, copy);
	if (err)
		return err;

	mutex_lock(&mvr->vr_mutex);
	if (!mic_vdevup(mvdev)) {
		err = -ENODEV;
		dev_err(mic_dev(mvdev), "%s %d err %d\n",
			__func__, __LINE__, err);
		goto err;
	}
	err = _mic_virtio_copy(mvdev, copy);
	if (err) {
		dev_err(mic_dev(mvdev), "%s %d err %d\n",
			__func__, __LINE__, err);
	}
err:
	mutex_unlock(&mvr->vr_mutex);
	return err;
}

static void mic_virtio_init_post(struct mic_vdev *mvdev)
{
	struct mic_vqconfig *vqconfig = mic_vq_config(mvdev->dd);
	int i;

	for (i = 0; i < mvdev->dd->num_vq; i++) {
		if (!le64_to_cpu(vqconfig[i].used_address)) {
			dev_warn(mic_dev(mvdev), "used_address zero??\n");
			continue;
		}
		mvdev->mvr[i].vrh.vring.used =
			(void __force *)mvdev->mdev->aper.va +
			le64_to_cpu(vqconfig[i].used_address);
	}

	mvdev->dc->used_address_updated = 0;

	dev_dbg(mic_dev(mvdev), "%s: device type %d LINKUP\n",
		__func__, mvdev->virtio_id);
}

static inline void mic_virtio_device_reset(struct mic_vdev *mvdev)
{
	int i;

	dev_dbg(mic_dev(mvdev), "%s: status %d device type %d RESET\n",
		__func__, mvdev->dd->status, mvdev->virtio_id);

	for (i = 0; i < mvdev->dd->num_vq; i++)
		/*
		 * Avoid lockdep false positive. The + 1 is for the mic
		 * mutex which is held in the reset devices code path.
		 */
		mutex_lock_nested(&mvdev->mvr[i].vr_mutex, i + 1);

	/* 0 status means "reset" */
	mvdev->dd->status = 0;
	mvdev->dc->vdev_reset = 0;
	mvdev->dc->host_ack = 1;

	for (i = 0; i < mvdev->dd->num_vq; i++) {
		struct vringh *vrh = &mvdev->mvr[i].vrh;
		mvdev->mvr[i].vring.info->avail_idx = 0;
		vrh->completed = 0;
		vrh->last_avail_idx = 0;
		vrh->last_used_idx = 0;
	}

	for (i = 0; i < mvdev->dd->num_vq; i++)
		mutex_unlock(&mvdev->mvr[i].vr_mutex);
}

void mic_virtio_reset_devices(struct mic_device *mdev)
{
	struct list_head *pos, *tmp;
	struct mic_vdev *mvdev;

	dev_dbg(&mdev->pdev->dev, "%s\n",  __func__);

	list_for_each_safe(pos, tmp, &mdev->vdev_list) {
		mvdev = list_entry(pos, struct mic_vdev, list);
		mic_virtio_device_reset(mvdev);
		mvdev->poll_wake = 1;
		wake_up(&mvdev->waitq);
	}
}

void mic_bh_handler(struct work_struct *work)
{
	struct mic_vdev *mvdev = container_of(work, struct mic_vdev,
			virtio_bh_work);

	if (mvdev->dc->used_address_updated)
		mic_virtio_init_post(mvdev);

	if (mvdev->dc->vdev_reset)
		mic_virtio_device_reset(mvdev);

	mvdev->poll_wake = 1;
	wake_up(&mvdev->waitq);
}

static irqreturn_t mic_virtio_intr_handler(int irq, void *data)
{
	struct mic_vdev *mvdev = data;
	struct mic_device *mdev = mvdev->mdev;

	mdev->ops->intr_workarounds(mdev);
	schedule_work(&mvdev->virtio_bh_work);
	return IRQ_HANDLED;
}

int mic_virtio_config_change(struct mic_vdev *mvdev,
			void __user *argp)
{
	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
	int ret = 0, retry, i;
	struct mic_bootparam *bootparam = mvdev->mdev->dp;
	s8 db = bootparam->h2c_config_db;

	mutex_lock(&mvdev->mdev->mic_mutex);
	for (i = 0; i < mvdev->dd->num_vq; i++)
		mutex_lock_nested(&mvdev->mvr[i].vr_mutex, i + 1);

	if (db == -1 || mvdev->dd->type == -1) {
		ret = -EIO;
		goto exit;
	}

	if (copy_from_user(mic_vq_configspace(mvdev->dd),
			   argp, mvdev->dd->config_len)) {
		dev_err(mic_dev(mvdev), "%s %d err %d\n",
			__func__, __LINE__, -EFAULT);
		ret = -EFAULT;
		goto exit;
	}
	mvdev->dc->config_change = MIC_VIRTIO_PARAM_CONFIG_CHANGED;
	mvdev->mdev->ops->send_intr(mvdev->mdev, db);

	for (retry = 100; retry--;) {
		ret = wait_event_timeout(wake,
			mvdev->dc->guest_ack, msecs_to_jiffies(100));
		if (ret)
			break;
	}

	dev_dbg(mic_dev(mvdev),
		"%s %d retry: %d\n", __func__, __LINE__, retry);
	mvdev->dc->config_change = 0;
	mvdev->dc->guest_ack = 0;
exit:
	for (i = 0; i < mvdev->dd->num_vq; i++)
		mutex_unlock(&mvdev->mvr[i].vr_mutex);
	mutex_unlock(&mvdev->mdev->mic_mutex);
	return ret;
}

static int mic_copy_dp_entry(struct mic_vdev *mvdev,
					void __user *argp,
					__u8 *type,
					struct mic_device_desc **devpage)
{
	struct mic_device *mdev = mvdev->mdev;
	struct mic_device_desc dd, *dd_config, *devp;
	struct mic_vqconfig *vqconfig;
	int ret = 0, i;
	bool slot_found = false;

	if (copy_from_user(&dd, argp, sizeof(dd))) {
		dev_err(mic_dev(mvdev), "%s %d err %d\n",
			__func__, __LINE__, -EFAULT);
		return -EFAULT;
	}

	if (mic_aligned_desc_size(&dd) > MIC_MAX_DESC_BLK_SIZE ||
	    dd.num_vq > MIC_MAX_VRINGS) {
		dev_err(mic_dev(mvdev), "%s %d err %d\n",
			__func__, __LINE__, -EINVAL);
		return -EINVAL;
	}

	dd_config = kmalloc(mic_desc_size(&dd), GFP_KERNEL);
	if (dd_config == NULL) {
		dev_err(mic_dev(mvdev), "%s %d err %d\n",
			__func__, __LINE__, -ENOMEM);
		return -ENOMEM;
	}
	if (copy_from_user(dd_config, argp, mic_desc_size(&dd))) {
		ret = -EFAULT;
		dev_err(mic_dev(mvdev), "%s %d err %d\n",
			__func__, __LINE__, ret);
		goto exit;
	}

	vqconfig = mic_vq_config(dd_config);
	for (i = 0; i < dd.num_vq; i++) {
		if (le16_to_cpu(vqconfig[i].num) > MIC_MAX_VRING_ENTRIES) {
			ret =  -EINVAL;
			dev_err(mic_dev(mvdev), "%s %d err %d\n",
				__func__, __LINE__, ret);
			goto exit;
		}
	}

	/* Find the first free device page entry */
	for (i = sizeof(struct mic_bootparam);
		i < MIC_DP_SIZE - mic_total_desc_size(dd_config);
		i += mic_total_desc_size(devp)) {
		devp = mdev->dp + i;
		if (devp->type == 0 || devp->type == -1) {
			slot_found = true;
			break;
		}
	}
	if (!slot_found) {
		ret =  -EINVAL;
		dev_err(mic_dev(mvdev), "%s %d err %d\n",
			__func__, __LINE__, ret);
		goto exit;
	}
	/*
	 * Save off the type before doing the memcpy. Type will be set in the
	 * end after completing all initialization for the new device.
	 */
	*type = dd_config->type;
	dd_config->type = 0;
	memcpy(devp, dd_config, mic_desc_size(dd_config));

	*devpage = devp;
exit:
	kfree(dd_config);
	return ret;
}

static void mic_init_device_ctrl(struct mic_vdev *mvdev,
				struct mic_device_desc *devpage)
{
	struct mic_device_ctrl *dc;

	dc = (void *)devpage + mic_aligned_desc_size(devpage);

	dc->config_change = 0;
	dc->guest_ack = 0;
	dc->vdev_reset = 0;
	dc->host_ack = 0;
	dc->used_address_updated = 0;
	dc->c2h_vdev_db = -1;
	dc->h2c_vdev_db = -1;
	mvdev->dc = dc;
}

int mic_virtio_add_device(struct mic_vdev *mvdev,
			void __user *argp)
{
	struct mic_device *mdev = mvdev->mdev;
	struct mic_device_desc *dd = NULL;
	struct mic_vqconfig *vqconfig;
	int vr_size, i, j, ret;
	u8 type = 0;
	s8 db;
	char irqname[10];
	struct mic_bootparam *bootparam = mdev->dp;
	u16 num;
	dma_addr_t vr_addr;

	mutex_lock(&mdev->mic_mutex);

	ret = mic_copy_dp_entry(mvdev, argp, &type, &dd);
	if (ret) {
		mutex_unlock(&mdev->mic_mutex);
		return ret;
	}

	mic_init_device_ctrl(mvdev, dd);

	mvdev->dd = dd;
	mvdev->virtio_id = type;
	vqconfig = mic_vq_config(dd);
	INIT_WORK(&mvdev->virtio_bh_work, mic_bh_handler);

	for (i = 0; i < dd->num_vq; i++) {
		struct mic_vringh *mvr = &mvdev->mvr[i];
		struct mic_vring *vr = &mvdev->mvr[i].vring;
		num = le16_to_cpu(vqconfig[i].num);
		mutex_init(&mvr->vr_mutex);
		vr_size = PAGE_ALIGN(vring_size(num, MIC_VIRTIO_RING_ALIGN) +
			sizeof(struct _mic_vring_info));
		vr->va = (void *)
			__get_free_pages(GFP_KERNEL | __GFP_ZERO,
					 get_order(vr_size));
		if (!vr->va) {
			ret = -ENOMEM;
			dev_err(mic_dev(mvdev), "%s %d err %d\n",
				__func__, __LINE__, ret);
			goto err;
		}
		vr->len = vr_size;
		vr->info = vr->va + vring_size(num, MIC_VIRTIO_RING_ALIGN);
		vr->info->magic = cpu_to_le32(MIC_MAGIC + mvdev->virtio_id + i);
		vr_addr = mic_map_single(mdev, vr->va, vr_size);
		if (mic_map_error(vr_addr)) {
			free_pages((unsigned long)vr->va, get_order(vr_size));
			ret = -ENOMEM;
			dev_err(mic_dev(mvdev), "%s %d err %d\n",
				__func__, __LINE__, ret);
			goto err;
		}
		vqconfig[i].address = cpu_to_le64(vr_addr);

		vring_init(&vr->vr, num, vr->va, MIC_VIRTIO_RING_ALIGN);
		ret = vringh_init_kern(&mvr->vrh,
			*(u32 *)mic_vq_features(mvdev->dd), num, false,
			vr->vr.desc, vr->vr.avail, vr->vr.used);
		if (ret) {
			dev_err(mic_dev(mvdev), "%s %d err %d\n",
				__func__, __LINE__, ret);
			goto err;
		}
		vringh_kiov_init(&mvr->riov, NULL, 0);
		vringh_kiov_init(&mvr->wiov, NULL, 0);
		mvr->head = USHRT_MAX;
		mvr->mvdev = mvdev;
		mvr->vrh.notify = mic_notify;
		dev_dbg(&mdev->pdev->dev,
			"%s %d index %d va %p info %p vr_size 0x%x\n",
			__func__, __LINE__, i, vr->va, vr->info, vr_size);
		mvr->buf = (void *)__get_free_pages(GFP_KERNEL,
					get_order(MIC_INT_DMA_BUF_SIZE));
		mvr->buf_da = mic_map_single(mvdev->mdev, mvr->buf,
					  MIC_INT_DMA_BUF_SIZE);
	}

	snprintf(irqname, sizeof(irqname), "mic%dvirtio%d", mdev->id,
		 mvdev->virtio_id);
	mvdev->virtio_db = mic_next_db(mdev);
	mvdev->virtio_cookie = mic_request_threaded_irq(mdev,
					       mic_virtio_intr_handler,
					       NULL, irqname, mvdev,
					       mvdev->virtio_db, MIC_INTR_DB);
	if (IS_ERR(mvdev->virtio_cookie)) {
		ret = PTR_ERR(mvdev->virtio_cookie);
		dev_dbg(&mdev->pdev->dev, "request irq failed\n");
		goto err;
	}

	mvdev->dc->c2h_vdev_db = mvdev->virtio_db;

	list_add_tail(&mvdev->list, &mdev->vdev_list);
	/*
	 * Order the type update with previous stores. This write barrier
	 * is paired with the corresponding read barrier before the uncached
	 * system memory read of the type, on the card while scanning the
	 * device page.
	 */
	smp_wmb();
	dd->type = type;

	dev_dbg(&mdev->pdev->dev, "Added virtio device id %d\n", dd->type);

	db = bootparam->h2c_config_db;
	if (db != -1)
		mdev->ops->send_intr(mdev, db);
	mutex_unlock(&mdev->mic_mutex);
	return 0;
err:
	vqconfig = mic_vq_config(dd);
	for (j = 0; j < i; j++) {
		struct mic_vringh *mvr = &mvdev->mvr[j];
		mic_unmap_single(mdev, le64_to_cpu(vqconfig[j].address),
				 mvr->vring.len);
		free_pages((unsigned long)mvr->vring.va,
			   get_order(mvr->vring.len));
	}
	mutex_unlock(&mdev->mic_mutex);
	return ret;
}

void mic_virtio_del_device(struct mic_vdev *mvdev)
{
	struct list_head *pos, *tmp;
	struct mic_vdev *tmp_mvdev;
	struct mic_device *mdev = mvdev->mdev;
	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wake);
	int i, ret, retry;
	struct mic_vqconfig *vqconfig;
	struct mic_bootparam *bootparam = mdev->dp;
	s8 db;

	mutex_lock(&mdev->mic_mutex);
	db = bootparam->h2c_config_db;
	if (db == -1)
		goto skip_hot_remove;
	dev_dbg(&mdev->pdev->dev,
		"Requesting hot remove id %d\n", mvdev->virtio_id);
	mvdev->dc->config_change = MIC_VIRTIO_PARAM_DEV_REMOVE;
	mdev->ops->send_intr(mdev, db);
	for (retry = 100; retry--;) {
		ret = wait_event_timeout(wake,
			mvdev->dc->guest_ack, msecs_to_jiffies(100));
		if (ret)
			break;
	}
	dev_dbg(&mdev->pdev->dev,
		"Device id %d config_change %d guest_ack %d retry %d\n",
		mvdev->virtio_id, mvdev->dc->config_change,
		mvdev->dc->guest_ack, retry);
	mvdev->dc->config_change = 0;
	mvdev->dc->guest_ack = 0;
skip_hot_remove:
	mic_free_irq(mdev, mvdev->virtio_cookie, mvdev);
	flush_work(&mvdev->virtio_bh_work);
	vqconfig = mic_vq_config(mvdev->dd);
	for (i = 0; i < mvdev->dd->num_vq; i++) {
		struct mic_vringh *mvr = &mvdev->mvr[i];

		mic_unmap_single(mvdev->mdev, mvr->buf_da,
				 MIC_INT_DMA_BUF_SIZE);
		free_pages((unsigned long)mvr->buf,
			   get_order(MIC_INT_DMA_BUF_SIZE));
		vringh_kiov_cleanup(&mvr->riov);
		vringh_kiov_cleanup(&mvr->wiov);
		mic_unmap_single(mdev, le64_to_cpu(vqconfig[i].address),
				 mvr->vring.len);
		free_pages((unsigned long)mvr->vring.va,
			   get_order(mvr->vring.len));
	}

	list_for_each_safe(pos, tmp, &mdev->vdev_list) {
		tmp_mvdev = list_entry(pos, struct mic_vdev, list);
		if (tmp_mvdev == mvdev) {
			list_del(pos);
			dev_dbg(&mdev->pdev->dev,
				"Removing virtio device id %d\n",
				mvdev->virtio_id);
			break;
		}
	}
	/*
	 * Order the type update with previous stores. This write barrier
	 * is paired with the corresponding read barrier before the uncached
	 * system memory read of the type, on the card while scanning the
	 * device page.
	 */
	smp_wmb();
	mvdev->dd->type = -1;
	mutex_unlock(&mdev->mic_mutex);
}