Signed-off-by: George Zhang
---
drivers/misc/vmw_vmci/vmci_driver.c | 2293 +++++++++++++++++++++++++++++++++++
drivers/misc/vmw_vmci/vmci_driver.h | 48 +
2 files changed, 2341 insertions(+), 0 deletions(-)
create mode 100644 drivers/misc/vmw_vmci/vmci_driver.c
create mode 100644 drivers/misc/vmw_vmci/vmci_driver.h
diff --git a/drivers/misc/vmw_vmci/vmci_driver.c b/drivers/misc/vmw_vmci/vmci_driver.c
new file mode 100644
index 0000000..ab19651
--- /dev/null
+++ b/drivers/misc/vmw_vmci/vmci_driver.c
@@ -0,0 +1,2293 @@
+/*
+ * VMware VMCI Driver
+ *
+ * Copyright (C) 2012 VMware, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include "vmci_handle_array.h"
+#include "vmci_common_int.h"
+#include "vmci_hash_table.h"
+#include "vmci_queue_pair.h"
+#include "vmci_datagram.h"
+#include "vmci_doorbell.h"
+#include "vmci_resource.h"
+#include "vmci_context.h"
+#include "vmci_driver.h"
+#include "vmci_event.h"
+
+#define VMCI_UTIL_NUM_RESOURCES 1
+
+enum {
+ VMCI_NOTIFY_RESOURCE_QUEUE_PAIR = 0,
+ VMCI_NOTIFY_RESOURCE_DOOR_BELL = 1,
+};
+
+enum {
+ VMCI_NOTIFY_RESOURCE_ACTION_NOTIFY = 0,
+ VMCI_NOTIFY_RESOURCE_ACTION_CREATE = 1,
+ VMCI_NOTIFY_RESOURCE_ACTION_DESTROY = 2,
+};
+
+static u32 ctxUpdateSubID = VMCI_INVALID_ID;
+static struct vmci_ctx *hostContext;
+static atomic_t vmContextID = { VMCI_INVALID_ID };
+
+struct vmci_delayed_work_info {
+ struct work_struct work;
+ VMCIWorkFn *workFn;
+ void *data;
+};
+
+/*
+ * VMCI driver initialization. This block can also be used to
+ * pass initial group membership etc.
+ */
+struct vmci_init_blk {
+ u32 cid;
+ u32 flags;
+};
+
+/* VMCIQueuePairAllocInfo_VMToVM */
+struct vmci_qp_alloc_info_vmvm {
+ struct vmci_handle handle;
+ u32 peer;
+ u32 flags;
+ uint64_t produceSize;
+ uint64_t consumeSize;
+ uint64_t producePageFile; /* User VA. */
+ uint64_t consumePageFile; /* User VA. */
+ uint64_t producePageFileSize; /* Size of the file name array. */
+ uint64_t consumePageFileSize; /* Size of the file name array. */
+ int32_t result;
+ u32 _pad;
+};
+
+/* VMCISetNotifyInfo: Used to pass notify flag's address to the host driver. */
+struct vmci_set_notify_info {
+ uint64_t notifyUVA;
+ int32_t result;
+ u32 _pad;
+};
+
+struct vmci_device {
+ struct mutex lock; /* Device access mutex */
+
+ unsigned int ioaddr;
+ unsigned int ioaddr_size;
+ unsigned int irq;
+ unsigned int intr_type;
+ bool exclusive_vectors;
+ struct msix_entry msix_entries[VMCI_MAX_INTRS];
+
+ bool enabled;
+ spinlock_t dev_spinlock; /* Lock for datagram access synchronization */
+ atomic_t datagrams_allowed;
+};
+
+static DEFINE_PCI_DEVICE_TABLE(vmci_ids) = {
+ {PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI),},
+ {0},
+};
+
+static struct vmci_device vmci_dev;
+
+/* These options are false (0) by default */
+static bool vmci_disable_host;
+static bool vmci_disable_guest;
+static bool vmci_disable_msi;
+static bool vmci_disable_msix;
+
+/*
+ * Allocate a buffer for incoming datagrams globally to avoid repeated
+ * allocation in the interrupt handler's atomic context.
+ */
+static uint8_t *data_buffer;
+static u32 data_buffer_size = VMCI_MAX_DG_SIZE;
+
+/*
+ * If the VMCI hardware supports the notification bitmap, we allocate
+ * and register a page with the device.
+ */
+static uint8_t *notification_bitmap;
+
+/*
+ * Per-instance host state
+ */
+struct vmci_linux {
+ struct vmci_ctx *context;
+ int userVersion;
+ enum vmci_obj_type ctType;
+ struct mutex lock; /* Mutex lock for vmci context access */
+};
+
+/*
+ * Static driver state.
+ */
+struct vmci_linux_state {
+ struct miscdevice misc;
+ char buf[1024];
+ atomic_t activeContexts;
+};
+
+/*
+ * Types and variables shared by both host and guest personality
+ */
+static bool guestDeviceInit;
+static atomic_t guestDeviceActive;
+static bool hostDeviceInit;
+
+static void drv_delayed_work_cb(struct work_struct *work)
+{
+ struct vmci_delayed_work_info *delayedWorkInfo;
+
+ delayedWorkInfo = container_of(work, struct vmci_delayed_work_info,
+ work);
+ ASSERT(delayedWorkInfo);
+ ASSERT(delayedWorkInfo->workFn);
+
+ delayedWorkInfo->workFn(delayedWorkInfo->data);
+
+ kfree(delayedWorkInfo);
+}
+
+/*
+ * Schedule the specified callback.
+ */
+int vmci_drv_schedule_delayed_work(VMCIWorkFn *workFn,
+ void *data)
+{
+ struct vmci_delayed_work_info *delayedWorkInfo;
+
+ ASSERT(workFn);
+
+ delayedWorkInfo = kmalloc(sizeof(*delayedWorkInfo), GFP_ATOMIC);
+ if (!delayedWorkInfo)
+ return VMCI_ERROR_NO_MEM;
+
+ delayedWorkInfo->workFn = workFn;
+ delayedWorkInfo->data = data;
+
+ INIT_WORK(&delayedWorkInfo->work, drv_delayed_work_cb);
+
+ schedule_work(&delayedWorkInfo->work);
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * True if the wait was interrupted by a signal, false otherwise.
+ */
+bool vmci_drv_wait_on_event_intr(wait_queue_head_t *event,
+ VMCIEventReleaseCB releaseCB,
+ void *clientData)
+{
+ DECLARE_WAITQUEUE(wait, current);
+
+ if (event == NULL || releaseCB == NULL)
+ return false;
+
+ add_wait_queue(event, &wait);
+ current->state = TASK_INTERRUPTIBLE;
+
+ /*
+ * Release the lock or other primitive that makes it possible for us to
+ * put the current thread on the wait queue without missing the signal.
+ * Ie. on Linux we need to put ourselves on the wait queue and set our
+ * stateto TASK_INTERRUPTIBLE without another thread signalling us.
+ * The releaseCB is used to synchronize this.
+ */
+ releaseCB(clientData);
+
+ schedule();
+ current->state = TASK_RUNNING;
+ remove_wait_queue(event, &wait);
+
+ return signal_pending(current);
+}
+
+/*
+ * Cleans up the host specific components of the VMCI module.
+ */
+static void drv_host_cleanup(void)
+{
+ vmci_ctx_release_ctx(hostContext);
+ vmci_qp_broker_exit();
+}
+
+/*
+ * Checks whether the VMCI device is enabled.
+ */
+static bool drv_device_enabled(void)
+{
+ return vmci_guest_code_active() ||
+ vmci_host_code_active();
+}
+
+/*
+ * Gets called with the new context id if updated or resumed.
+ * Context id.
+ */
+static void drv_util_cid_update(u32 subID,
+ struct vmci_event_data *eventData,
+ void *clientData)
+{
+ struct vmci_event_payld_ctx *evPayload =
+ vmci_event_data_payload(eventData);
+
+ if (subID != ctxUpdateSubID) {
+ pr_devel("Invalid subscriber (ID=0x%x).", subID);
+ return;
+ }
+
+ if (eventData == NULL || evPayload->contextID == VMCI_INVALID_ID) {
+ pr_devel("Invalid event data.");
+ return;
+ }
+
+ pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event " \
+ "(type=%d).", atomic_read(&vmContextID), evPayload->contextID,
+ eventData->event);
+
+ atomic_set(&vmContextID, evPayload->contextID);
+}
+
+/*
+ * Subscribe to context id update event.
+ */
+static void __devinit drv_util_init(void)
+{
+ /*
+ * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can
+ * update the internal context id when needed.
+ */
+ if (vmci_event_subscribe
+ (VMCI_EVENT_CTX_ID_UPDATE, VMCI_FLAG_EVENT_NONE,
+ drv_util_cid_update, NULL, &ctxUpdateSubID)
dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,
+ VMCI_RESOURCES_QUERY);
+ checkMsg->src = VMCI_ANON_SRC_HANDLE;
+ checkMsg->payloadSize = msgSize - VMCI_DG_HEADERSIZE;
+ msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(checkMsg);
+
+ msg->numResources = VMCI_UTIL_NUM_RESOURCES;
+ msg->resources[0] = VMCI_GET_CONTEXT_ID;
+
+ /* Checks that hyper calls are supported */
+ result = (0x1 == vmci_send_datagram(checkMsg));
+ kfree(checkMsg);
+
+ pr_info("Host capability check: %s.",
+ result ? "PASSED" : "FAILED");
+
+ /* We need the vector. There are no fallbacks. */
+ return result;
+}
+
+/*
+ * Reads datagrams from the data in port and dispatches them. We
+ * always start reading datagrams into only the first page of the
+ * datagram buffer. If the datagrams don't fit into one page, we
+ * use the maximum datagram buffer size for the remainder of the
+ * invocation. This is a simple heuristic for not penalizing
+ * small datagrams.
+ *
+ * This function assumes that it has exclusive access to the data
+ * in port for the duration of the call.
+ */
+static void drv_read_dgs_from_port(int ioHandle,
+ unsigned short int dgInPort,
+ uint8_t *dgInBuffer,
+ size_t dgInBufferSize)
+{
+ struct vmci_datagram *dg;
+ size_t currentDgInBufferSize = PAGE_SIZE;
+ size_t remainingBytes;
+
+ ASSERT(dgInBufferSize >= PAGE_SIZE);
+
+ insb(dgInPort, dgInBuffer, currentDgInBufferSize);
+ dg = (struct vmci_datagram *)dgInBuffer;
+ remainingBytes = currentDgInBufferSize;
+
+ while (dg->dst.resource != VMCI_INVALID_ID ||
+ remainingBytes > PAGE_SIZE) {
+ unsigned dgInSize;
+
+ /*
+ * When the input buffer spans multiple pages, a datagram can
+ * start on any page boundary in the buffer.
+ */
+ if (dg->dst.resource == VMCI_INVALID_ID) {
+ ASSERT(remainingBytes > PAGE_SIZE);
+ dg = (struct vmci_datagram *)roundup((uintptr_t)
+ dg + 1, PAGE_SIZE);
+ ASSERT((uint8_t *)dg
remainingBytes) {
+ if (remainingBytes != currentDgInBufferSize) {
+
+ /*
+ * We move the partial
+ * datagram to the front and
+ * read the reminder of the
+ * datagram and possibly
+ * following calls into the
+ * following bytes.
+ */
+ memmove(dgInBuffer, dgInBuffer +
+ currentDgInBufferSize -
+ remainingBytes, remainingBytes);
+ dg = (struct vmci_datagram *)
+ dgInBuffer;
+ }
+
+ if (currentDgInBufferSize != dgInBufferSize)
+ currentDgInBufferSize = dgInBufferSize;
+
+ insb(dgInPort, dgInBuffer + remainingBytes,
+ currentDgInBufferSize - remainingBytes);
+ }
+
+ /*
+ * We special case event datagrams from the
+ * hypervisor.
+ */
+ if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&
+ dg->dst.resource == VMCI_EVENT_HANDLER) {
+ result = vmci_event_dispatch(dg);
+ } else {
+ result = vmci_datagram_invoke_guest_handler(dg);
+ }
+ if (result
dst.resource, result);
+ }
+
+ /* On to the next datagram. */
+ dg = (struct vmci_datagram *)((uint8_t *)dg +
+ dgInSize);
+ } else {
+ size_t bytesToSkip;
+
+ /*
+ * Datagram doesn't fit in datagram buffer of maximal
+ * size. We drop it.
+ */
+ pr_devel("Failed to receive datagram (size=%u bytes).",
+ dgInSize);
+
+ bytesToSkip = dgInSize - remainingBytes;
+ if (currentDgInBufferSize != dgInBufferSize)
+ currentDgInBufferSize = dgInBufferSize;
+
+ for (;;) {
+ insb(dgInPort, dgInBuffer,
+ currentDgInBufferSize);
+ if (bytesToSkip
ctType = VMCIOBJ_NOT_SET;
+ mutex_init(&vmciLinux->lock);
+ filp->private_data = vmciLinux;
+
+ return 0;
+}
+
+/*
+ * Called on close of /dev/vmci, most often when the process
+ * exits.
+ */
+static int drv_driver_close(struct inode *inode,
+ struct file *filp)
+{
+ struct vmci_linux *vmciLinux;
+
+ vmciLinux = (struct vmci_linux *)filp->private_data;
+ ASSERT(vmciLinux);
+
+ if (vmciLinux->ctType == VMCIOBJ_CONTEXT) {
+ ASSERT(vmciLinux->context);
+
+ vmci_ctx_release_ctx(vmciLinux->context);
+ vmciLinux->context = NULL;
+
+ /*
+ * The number of active contexts is used to track whether any
+ * VMX'en are using the host personality. It is incremented when
+ * a context is created through the IOCTL_VMCI_INIT_CONTEXT
+ * ioctl.
+ */
+ atomic_dec(&linuxState.activeContexts);
+ }
+ vmciLinux->ctType = VMCIOBJ_NOT_SET;
+
+ kfree(vmciLinux);
+ filp->private_data = NULL;
+ return 0;
+}
+
+/*
+ * This is used to wake up the VMX when a VMCI call arrives, or
+ * to wake up select() or poll() at the next clock tick.
+ */
+static unsigned int drv_driver_poll(struct file *filp, poll_table *wait)
+{
+ struct vmci_linux *vmciLinux = (struct vmci_linux *)filp->private_data;
+ unsigned int mask = 0;
+
+ if (vmciLinux->ctType == VMCIOBJ_CONTEXT) {
+ ASSERT(vmciLinux->context != NULL);
+
+ /* Check for VMCI calls to this VM context. */
+ if (wait != NULL) {
+ poll_wait(filp,
+ &vmciLinux->context->hostContext.waitQueue,
+ wait);
+ }
+
+ spin_lock(&vmciLinux->context->lock);
+ if (vmciLinux->context->pendingDatagrams > 0 ||
+ vmci_handle_arr_get_size(vmciLinux->context->
+ pendingDoorbellArray) > 0) {
+ mask = POLLIN;
+ }
+ spin_unlock(&vmciLinux->context->lock);
+ }
+ return mask;
+}
+
+static int __init drv_host_init(void)
+{
+ int error;
+ int result;
+
+ result = vmci_ctx_init_ctx(VMCI_HOST_CONTEXT_ID,
+ VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS,
+ -1, VMCI_VERSION, NULL, &hostContext);
+ if (result
*userBufSize)
+ return VMCI_ERROR_MORE_DATA;
+
+ *userBufSize = arraySize * sizeof(*handles);
+ if (*userBufSize)
+ *retval = copy_to_user(userBufUVA,
+ vmci_handle_arr_get_handles
+ (handleArray), *userBufSize);
+
+ return VMCI_SUCCESS;
+}
+
+/*
+ * Helper function for creating queue pair and copying the result
+ * to user memory.
+ */
+static int drv_qp_broker_alloc(struct vmci_handle handle,
+ u32 peer,
+ u32 flags,
+ uint64_t produceSize,
+ uint64_t consumeSize,
+ struct vmci_qp_page_store *pageStore,
+ struct vmci_ctx *context,
+ bool vmToVm,
+ void __user *resultUVA)
+{
+ u32 cid;
+ int result;
+ int retval;
+
+ cid = vmci_ctx_get_id(context);
+
+ result =
+ vmci_qp_broker_alloc(handle, peer, flags,
+ VMCI_NO_PRIVILEGE_FLAGS, produceSize,
+ consumeSize, pageStore, context);
+ if (result == VMCI_SUCCESS && vmToVm)
+ result = VMCI_SUCCESS_QUEUEPAIR_CREATE;
+
+ retval = copy_to_user(resultUVA, &result, sizeof(result));
+ if (retval) {
+ retval = -EFAULT;
+ if (result >= VMCI_SUCCESS) {
+ result = vmci_qp_broker_detach(handle, context);
+ ASSERT(result >= VMCI_SUCCESS);
+ }
+ }
+
+ return retval;
+}
+
+/*
+ * Lock physical page backing a given user VA.
+ */
+static struct page *drv_user_va_lock_page(uintptr_t addr)
+{
+ struct page *page = NULL;
+ int retval;
+
+ down_read(¤t->mm->mmap_sem);
+ retval = get_user_pages(current, current->mm, addr,
+ 1, 1, 0, &page, NULL);
+ up_read(¤t->mm->mmap_sem);
+
+ if (retval != 1)
+ return NULL;
+
+ return page;
+}
+
+/*
+ * Lock physical page backing a given user VA and maps it to kernel
+ * address space. The range of the mapped memory should be within a
+ * single page otherwise an error is returned.
+ */
+static int drv_map_bool_ptr(uintptr_t notifyUVA,
+ struct page **p,
+ bool **notifyPtr)
+{
+ if (!access_ok(VERIFY_WRITE, (void __user *)notifyUVA,
+ sizeof(**notifyPtr)) ||
+ (((notifyUVA + sizeof(**notifyPtr) - 1) & ~(PAGE_SIZE - 1)) !=
+ (notifyUVA & ~(PAGE_SIZE - 1)))) {
+ return -EINVAL;
+ }
+
+ *p = drv_user_va_lock_page(notifyUVA);
+ if (*p == NULL)
+ return -EAGAIN;
+
+ *notifyPtr =
+ (bool *)((uint8_t *)kmap(*p) + (notifyUVA & (PAGE_SIZE - 1)));
+ return 0;
+}
+
+/*
+ * Sets up a given context for notify to work. Calls drv_map_bool_ptr()
+ * which maps the notify boolean in user VA in kernel space.
+ */
+static int drv_setup_notify(struct vmci_ctx *context,
+ uintptr_t notifyUVA)
+{
+ int retval;
+
+ if (context->notify) {
+ pr_warn("Notify mechanism is already set up.");
+ return VMCI_ERROR_DUPLICATE_ENTRY;
+ }
+
+ retval = drv_map_bool_ptr(notifyUVA, &context->notifyPage,
+ &context->notify);
+ if (retval == 0) {
+ vmci_ctx_check_signal_notify(context);
+ return VMCI_SUCCESS;
+ }
+
+ return VMCI_ERROR_GENERIC;
+}
+
+static long drv_driver_unlocked_ioctl(struct file *filp,
+ u_int iocmd,
+ unsigned long ioarg)
+{
+ struct vmci_linux *vmciLinux = (struct vmci_linux *)filp->private_data;
+ int retval = 0;
+
+ switch (iocmd) {
+ case IOCTL_VMCI_VERSION2:{
+ int verFromUser;
+
+ if (copy_from_user
+ (&verFromUser, (void *)ioarg, sizeof(verFromUser))) {
+ retval = -EFAULT;
+ break;
+ }
+
+ vmciLinux->userVersion = verFromUser;
+ }
+ /* Fall through. */
+ case IOCTL_VMCI_VERSION:
+ /*
+ * The basic logic here is:
+ *
+ * If the user sends in a version of 0 tell it our version.
+ * If the user didn't send in a version, tell it our version.
+ * If the user sent in an old version, tell it -its- version.
+ * If the user sent in an newer version, tell it our version.
+ *
+ * The rationale behind telling the caller its version is that
+ * Workstation 6.5 required that VMX and VMCI kernel module were
+ * version sync'd. All new VMX users will be programmed to
+ * handle the VMCI kernel module version.
+ */
+
+ if (vmciLinux->userVersion > 0 &&
+ vmciLinux->userVersion
userVersion;
+ } else {
+ retval = VMCI_VERSION;
+ }
+ break;
+
+ case IOCTL_VMCI_INIT_CONTEXT:{
+ struct vmci_init_blk initBlock;
+ const struct cred *cred;
+
+ retval = copy_from_user(&initBlock, (void *)ioarg,
+ sizeof(initBlock));
+ if (retval != 0) {
+ pr_info("Error reading init block.");
+ retval = -EFAULT;
+ break;
+ }
+
+ mutex_lock(&vmciLinux->lock);
+ if (vmciLinux->ctType != VMCIOBJ_NOT_SET) {
+ pr_info("Received VMCI init on initialized handle.");
+ retval = -EINVAL;
+ goto init_release;
+ }
+
+ if (initBlock.flags & ~VMCI_PRIVILEGE_FLAG_RESTRICTED) {
+ pr_info("Unsupported VMCI restriction flag.");
+ retval = -EINVAL;
+ goto init_release;
+ }
+
+ cred = get_current_cred();
+ retval = vmci_ctx_init_ctx(initBlock.cid,
+ initBlock.flags,
+ 0, vmciLinux->userVersion,
+ cred, &vmciLinux->context);
+ put_cred(cred);
+ if (retval
context);
+ retval = copy_to_user((void *)ioarg, &initBlock,
+ sizeof(initBlock));
+ if (retval != 0) {
+ vmci_ctx_release_ctx(vmciLinux->context);
+ vmciLinux->context = NULL;
+ pr_info("Error writing init block.");
+ retval = -EFAULT;
+ goto init_release;
+ }
+
+ ASSERT(initBlock.cid != VMCI_INVALID_ID);
+ vmciLinux->ctType = VMCIOBJ_CONTEXT;
+ atomic_inc(&linuxState.activeContexts);
+
+init_release:
+ mutex_unlock(&vmciLinux->lock);
+ break;
+ }
+
+ case IOCTL_VMCI_DATAGRAM_SEND:{
+ struct vmci_datagram_snd_rcv_info sendInfo;
+ struct vmci_datagram *dg = NULL;
+ u32 cid;
+
+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {
+ pr_warn("Ioctl only valid for context handle (iocmd=%d).",
+ iocmd);
+ retval = -EINVAL;
+ break;
+ }
+
+ retval = copy_from_user(&sendInfo, (void *)ioarg,
+ sizeof(sendInfo));
+ if (retval) {
+ pr_warn("copy_from_user failed.");
+ retval = -EFAULT;
+ break;
+ }
+
+ if (sendInfo.len > VMCI_MAX_DG_SIZE) {
+ pr_warn("Datagram too big (size=%d).",
+ sendInfo.len);
+ retval = -EINVAL;
+ break;
+ }
+
+ if (sendInfo.len
dst.context, dg->dst.resource,
+ dg->src.context, dg->src.resource,
+ (unsigned long long) dg->payloadSize);
+
+ /* Get source context id. */
+ ASSERT(vmciLinux->context);
+ cid = vmci_ctx_get_id(vmciLinux->context);
+ ASSERT(cid != VMCI_INVALID_ID);
+ sendInfo.result = vmci_datagram_dispatch(cid, dg, true);
+ kfree(dg);
+ retval =
+ copy_to_user((void *)ioarg, &sendInfo,
+ sizeof(sendInfo));
+ break;
+ }
+
+ case IOCTL_VMCI_DATAGRAM_RECEIVE:{
+ struct vmci_datagram_snd_rcv_info recvInfo;
+ struct vmci_datagram *dg = NULL;
+ size_t size;
+
+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {
+ pr_warn("Ioctl only valid for context handle (iocmd=%d).",
+ iocmd);
+ retval = -EINVAL;
+ break;
+ }
+
+ retval = copy_from_user(&recvInfo, (void *)ioarg,
+ sizeof(recvInfo));
+ if (retval) {
+ pr_warn("copy_from_user failed.");
+ retval = -EFAULT;
+ break;
+ }
+
+ ASSERT(vmciLinux->ctType == VMCIOBJ_CONTEXT);
+ ASSERT(vmciLinux->context);
+ size = recvInfo.len;
+ recvInfo.result =
+ vmci_ctx_dequeue_datagram(vmciLinux->context,
+ &size, &dg);
+
+ if (recvInfo.result >= VMCI_SUCCESS) {
+ ASSERT(dg);
+ retval = copy_to_user((void *)((uintptr_t)
+ recvInfo.addr),
+ dg, VMCI_DG_SIZE(dg));
+ kfree(dg);
+ if (retval != 0)
+ break;
+ }
+ retval = copy_to_user((void *)ioarg, &recvInfo,
+ sizeof(recvInfo));
+ break;
+ }
+
+ case IOCTL_VMCI_QUEUEPAIR_ALLOC:{
+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {
+ pr_info("IOCTL_VMCI_QUEUEPAIR_ALLOC only valid for contexts.");
+ retval = -EINVAL;
+ break;
+ }
+
+ if (vmciLinux->userVersion
context,
+ true, &info->result);
+ } else {
+ struct vmci_qp_alloc_info
+ queuePairAllocInfo;
+ struct vmci_qp_alloc_info *info =
+ (struct vmci_qp_alloc_info *)ioarg;
+ struct vmci_qp_page_store pageStore;
+
+ retval = copy_from_user(&queuePairAllocInfo,
+ (void *)ioarg,
+ sizeof(queuePairAllocInfo));
+ if (retval) {
+ retval = -EFAULT;
+ break;
+ }
+
+ pageStore.pages = queuePairAllocInfo.ppnVA;
+ pageStore.len = queuePairAllocInfo.numPPNs;
+
+ retval = drv_qp_broker_alloc(
+ queuePairAllocInfo.handle,
+ queuePairAllocInfo.peer,
+ queuePairAllocInfo.flags,
+ queuePairAllocInfo.produceSize,
+ queuePairAllocInfo.consumeSize,
+ &pageStore, vmciLinux->context,
+ false, &info->result);
+ }
+ break;
+ }
+
+ case IOCTL_VMCI_QUEUEPAIR_SETVA:{
+ struct vmci_qp_set_va_info setVAInfo;
+ struct vmci_qp_set_va_info *info =
+ (struct vmci_qp_set_va_info *)ioarg;
+ int32_t result;
+
+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {
+ pr_info("IOCTL_VMCI_QUEUEPAIR_SETVA only valid for contexts.");
+ retval = -EINVAL;
+ break;
+ }
+
+ if (vmciLinux->userVersion
context,
+ setVAInfo.va);
+ } else {
+ /*
+ * The queue pair is about to be unmapped by
+ * the VMX.
+ */
+ result = vmci_qp_broker_unmap(setVAInfo.handle,
+ vmciLinux->context, 0);
+ }
+
+ retval = copy_to_user(&info->result, &result, sizeof(result));
+ if (retval)
+ retval = -EFAULT;
+
+ break;
+ }
+
+ case IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE:{
+ struct vmci_qp_page_file_info pageFileInfo;
+ struct vmci_qp_page_file_info *info =
+ (struct vmci_qp_page_file_info *)ioarg;
+ int32_t result;
+
+ if (vmciLinux->userVersion
userVersion >= VMCI_VERSION_NOVMVM) {
+ pr_info("IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE not " \
+ "supported this VMX (version=%d).",
+ vmciLinux->userVersion);
+ retval = -EINVAL;
+ break;
+ }
+
+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {
+ pr_info("IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE only " \
+ "valid for contexts.");
+ retval = -EINVAL;
+ break;
+ }
+
+ retval = copy_from_user(&pageFileInfo, (void *)ioarg,
+ sizeof(*info));
+ if (retval) {
+ retval = -EFAULT;
+ break;
+ }
+
+ /*
+ * Communicate success pre-emptively to the caller.
+ * Note that the basic premise is that it is incumbent
+ * upon the caller not to look at the info.result
+ * field until after the ioctl() returns. And then,
+ * only if the ioctl() result indicates no error. We
+ * send up the SUCCESS status before calling
+ * SetPageStore() store because failing to copy up the
+ * result code means unwinding the SetPageStore().
+ *
+ * It turns out the logic to unwind a SetPageStore()
+ * opens a can of worms. For example, if a host had
+ * created the QueuePair and a guest attaches and
+ * SetPageStore() is successful but writing success
+ * fails, then ... the host has to be stopped from
+ * writing (anymore) data into the QueuePair. That
+ * means an additional test in the VMCI_Enqueue() code
+ * path. Ugh.
+ */
+
+ result = VMCI_SUCCESS;
+ retval = copy_to_user(&info->result, &result, sizeof(result));
+ if (retval == 0) {
+ result = vmci_qp_broker_set_page_store(
+ pageFileInfo.handle,
+ pageFileInfo.produceVA,
+ pageFileInfo.consumeVA,
+ vmciLinux->context);
+ if (result
result,
+ &result,
+ sizeof(result));
+ if (retval != 0) {
+ /*
+ * Note that in this case the
+ * SetPageStore() call failed
+ * but we were unable to
+ * communicate that to the
+ * caller (because the
+ * copy_to_user() call
+ * failed). So, if we simply
+ * return an error (in this
+ * case -EFAULT) then the
+ * caller will know that the
+ * SetPageStore failed even
+ * though we couldn't put the
+ * result code in the result
+ * field and indicate exactly
+ * why it failed.
+ *
+ * That says nothing about the
+ * issue where we were once
+ * able to write to the
+ * caller's info memory and
+ * now can't. Something more
+ * serious is probably going
+ * on than the fact that
+ * SetPageStore() didn't work.
+ */
+ retval = -EFAULT;
+ }
+ }
+
+ } else {
+ /*
+ * In this case, we can't write a result field of the
+ * caller's info block. So, we don't even try to
+ * SetPageStore().
+ */
+ retval = -EFAULT;
+ }
+
+ break;
+ }
+
+ case IOCTL_VMCI_QUEUEPAIR_DETACH:{
+ struct vmci_qp_dtch_info detachInfo;
+ struct vmci_qp_dtch_info *info =
+ (struct vmci_qp_dtch_info *)ioarg;
+ int32_t result;
+
+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {
+ pr_info("IOCTL_VMCI_QUEUEPAIR_DETACH only valid for contexts.");
+ retval = -EINVAL;
+ break;
+ }
+
+ retval = copy_from_user(&detachInfo, (void *)ioarg,
+ sizeof(detachInfo));
+ if (retval) {
+ retval = -EFAULT;
+ break;
+ }
+
+ result = vmci_qp_broker_detach(detachInfo.handle,
+ vmciLinux->context);
+ if (result == VMCI_SUCCESS &&
+ vmciLinux->userVersion
result, &result, sizeof(result));
+ if (retval)
+ retval = -EFAULT;
+
+ break;
+ }
+
+ case IOCTL_VMCI_CTX_ADD_NOTIFICATION:{
+ struct vmci_ctx_info arInfo;
+ struct vmci_ctx_info *info =
+ (struct vmci_ctx_info *)ioarg;
+ int32_t result;
+ u32 cid;
+
+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {
+ pr_info("IOCTL_VMCI_CTX_ADD_NOTIFICATION only " \
+ "valid for contexts.");
+ retval = -EINVAL;
+ break;
+ }
+
+ retval = copy_from_user(&arInfo, (void *)ioarg,
+ sizeof(arInfo));
+ if (retval) {
+ retval = -EFAULT;
+ break;
+ }
+
+ cid = vmci_ctx_get_id(vmciLinux->context);
+ result = vmci_ctx_add_notification(cid, arInfo.remoteCID);
+ retval = copy_to_user(&info->result, &result, sizeof(result));
+ if (retval) {
+ retval = -EFAULT;
+ break;
+ }
+ break;
+ }
+
+ case IOCTL_VMCI_CTX_REMOVE_NOTIFICATION:{
+ struct vmci_ctx_info arInfo;
+ struct vmci_ctx_info *info =
+ (struct vmci_ctx_info *)ioarg;
+ int32_t result;
+ u32 cid;
+
+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {
+ pr_info("IOCTL_VMCI_CTX_REMOVE_NOTIFICATION only " \
+ "valid for contexts.");
+ retval = -EINVAL;
+ break;
+ }
+
+ retval = copy_from_user(&arInfo, (void *)ioarg,
+ sizeof(arInfo));
+ if (retval) {
+ retval = -EFAULT;
+ break;
+ }
+
+ cid = vmci_ctx_get_id(vmciLinux->context);
+ result = vmci_ctx_remove_notification(cid,
+ arInfo.remoteCID);
+ retval = copy_to_user(&info->result, &result, sizeof(result));
+ if (retval) {
+ retval = -EFAULT;
+ break;
+ }
+
+ break;
+ }
+
+ case IOCTL_VMCI_CTX_GET_CPT_STATE:{
+ struct vmci_ctx_chkpt_buf_info getInfo;
+ u32 cid;
+ char *cptBuf;
+
+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {
+ pr_info("IOCTL_VMCI_CTX_GET_CPT_STATE only valid for contexts.");
+ retval = -EINVAL;
+ break;
+ }
+
+ retval = copy_from_user(&getInfo, (void *)ioarg,
+ sizeof(getInfo));
+ if (retval) {
+ retval = -EFAULT;
+ break;
+ }
+
+ cid = vmci_ctx_get_id(vmciLinux->context);
+ getInfo.result =
+ vmci_ctx_get_chkpt_state(cid,
+ getInfo.cptType,
+ &getInfo.bufSize,
+ &cptBuf);
+ if (getInfo.result == VMCI_SUCCESS && getInfo.bufSize) {
+ retval = copy_to_user((void *)(uintptr_t)
+ getInfo.cptBuf, cptBuf,
+ getInfo.bufSize);
+ kfree(cptBuf);
+ if (retval) {
+ retval = -EFAULT;
+ break;
+ }
+ }
+ retval = copy_to_user((void *)ioarg, &getInfo,
+ sizeof(getInfo));
+ if (retval)
+ retval = -EFAULT;
+
+ break;
+ }
+
+ case IOCTL_VMCI_CTX_SET_CPT_STATE:{
+ struct vmci_ctx_chkpt_buf_info setInfo;
+ u32 cid;
+ char *cptBuf;
+
+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {
+ pr_info("IOCTL_VMCI_CTX_SET_CPT_STATE only valid for contexts.");
+ retval = -EINVAL;
+ break;
+ }
+
+ retval = copy_from_user(&setInfo, (void *)ioarg,
+ sizeof(setInfo));
+ if (retval) {
+ retval = -EFAULT;
+ break;
+ }
+
+ cptBuf = kmalloc(setInfo.bufSize, GFP_KERNEL);
+ if (cptBuf == NULL) {
+ pr_info("Cannot allocate memory to set cpt state (type=%d).",
+ setInfo.cptType);
+ retval = -ENOMEM;
+ break;
+ }
+ retval = copy_from_user(cptBuf,
+ (void *)(uintptr_t) setInfo.cptBuf,
+ setInfo.bufSize);
+ if (retval) {
+ kfree(cptBuf);
+ retval = -EFAULT;
+ break;
+ }
+
+ cid = vmci_ctx_get_id(vmciLinux->context);
+ setInfo.result =
+ vmci_ctx_set_chkpt_state(cid,
+ setInfo.cptType,
+ setInfo.bufSize,
+ cptBuf);
+ kfree(cptBuf);
+ retval = copy_to_user((void *)ioarg, &setInfo,
+ sizeof(setInfo));
+ if (retval)
+ retval = -EFAULT;
+
+ break;
+ }
+
+ case IOCTL_VMCI_GET_CONTEXT_ID:{
+ u32 cid = VMCI_HOST_CONTEXT_ID;
+
+ retval = copy_to_user((void *)ioarg, &cid, sizeof(cid));
+ break;
+ }
+
+ case IOCTL_VMCI_SET_NOTIFY:{
+ struct vmci_set_notify_info notifyInfo;
+
+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {
+ pr_info("IOCTL_VMCI_SET_NOTIFY only valid for contexts.");
+ retval = -EINVAL;
+ break;
+ }
+
+ retval = copy_from_user(¬ifyInfo, (void *)ioarg,
+ sizeof(notifyInfo));
+ if (retval) {
+ retval = -EFAULT;
+ break;
+ }
+
+ if ((uintptr_t) notifyInfo.notifyUVA !=
+ (uintptr_t) NULL) {
+ notifyInfo.result =
+ drv_setup_notify(vmciLinux->context,
+ (uintptr_t)
+ notifyInfo.notifyUVA);
+ } else {
+ spin_lock(&vmciLinux->context->lock);
+ vmci_ctx_unset_notify(vmciLinux->context);
+ spin_unlock(&vmciLinux->context->lock);
+ notifyInfo.result = VMCI_SUCCESS;
+ }
+
+ retval = copy_to_user((void *)ioarg, ¬ifyInfo,
+ sizeof(notifyInfo));
+ if (retval)
+ retval = -EFAULT;
+
+ break;
+ }
+
+ case IOCTL_VMCI_NOTIFY_RESOURCE:{
+ struct vmci_dbell_notify_resource_info info;
+ u32 cid;
+
+ if (vmciLinux->userVersion
ctType != VMCIOBJ_CONTEXT) {
+ pr_info("IOCTL_VMCI_NOTIFY_RESOURCE is only valid " \
+ "for contexts.");
+ retval = -EINVAL;
+ break;
+ }
+
+ retval = copy_from_user(&info, (void *)ioarg, sizeof(info));
+ if (retval) {
+ retval = -EFAULT;
+ break;
+ }
+
+ cid = vmci_ctx_get_id(vmciLinux->context);
+ switch (info.action) {
+ case VMCI_NOTIFY_RESOURCE_ACTION_NOTIFY:
+ if (info.resource ==
+ VMCI_NOTIFY_RESOURCE_DOOR_BELL) {
+ u32 flags = VMCI_NO_PRIVILEGE_FLAGS;
+ info.result =
+ vmci_ctx_notify_dbell(cid,
+ info.handle,
+ flags);
+ } else {
+ info.result = VMCI_ERROR_UNAVAILABLE;
+ }
+ break;
+ case VMCI_NOTIFY_RESOURCE_ACTION_CREATE:
+ info.result =
+ vmci_ctx_dbell_create(cid,
+ info.handle);
+ break;
+ case VMCI_NOTIFY_RESOURCE_ACTION_DESTROY:
+ info.result =
+ vmci_ctx_dbell_destroy(cid,
+ info.handle);
+ break;
+ default:
+ pr_info("IOCTL_VMCI_NOTIFY_RESOURCE got unknown " \
+ "action (action=%d).", info.action);
+ info.result = VMCI_ERROR_INVALID_ARGS;
+ }
+ retval = copy_to_user((void *)ioarg, &info,
+ sizeof(info));
+ if (retval)
+ retval = -EFAULT;
+
+ break;
+ }
+
+ case IOCTL_VMCI_NOTIFICATIONS_RECEIVE:{
+ struct vmci_ctx_notify_recv_info info;
+ struct vmci_handle_arr *dbHandleArray;
+ struct vmci_handle_arr *qpHandleArray;
+ u32 cid;
+
+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {
+ pr_info("IOCTL_VMCI_NOTIFICATIONS_RECEIVE is only " \
+ "valid for contexts.");
+ retval = -EINVAL;
+ break;
+ }
+
+ if (vmciLinux->userVersion
context);
+ info.result =
+ vmci_ctx_rcv_notifications_get(cid,
+ &dbHandleArray,
+ &qpHandleArr