Serverphorums.com

[PATCH 04/11] vmci_driver.patch: VMCI device driver. (no replies)

2012-08-30

Signed-off-by: George Zhang

---

drivers/misc/vmw_vmci/vmci_driver.c | 2293 +++++++++++++++++++++++++++++++++++

drivers/misc/vmw_vmci/vmci_driver.h | 48 +

2 files changed, 2341 insertions(+), 0 deletions(-)

create mode 100644 drivers/misc/vmw_vmci/vmci_driver.c

create mode 100644 drivers/misc/vmw_vmci/vmci_driver.h

diff --git a/drivers/misc/vmw_vmci/vmci_driver.c b/drivers/misc/vmw_vmci/vmci_driver.c

new file mode 100644

index 0000000..ab19651

--- /dev/null

+++ b/drivers/misc/vmw_vmci/vmci_driver.c

@@ -0,0 +1,2293 @@

+/*

+ * VMware VMCI Driver

+ *

+ * Copyright (C) 2012 VMware, Inc. All rights reserved.

+ *

+ * This program is free software; you can redistribute it and/or modify it

+ * under the terms of the GNU General Public License as published by the

+ * Free Software Foundation version 2 and no later version.

+ *

+ * This program is distributed in the hope that it will be useful, but

+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY

+ * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License

+ * for more details.

+ */

+

+#include

+#include

+#include

+#include

+#include

+#include

+#include

+#include

+#include

+#include

+#include

+#include

+#include

+#include

+#include

+#include

+#include

+#include

+

+#include "vmci_handle_array.h"

+#include "vmci_common_int.h"

+#include "vmci_hash_table.h"

+#include "vmci_queue_pair.h"

+#include "vmci_datagram.h"

+#include "vmci_doorbell.h"

+#include "vmci_resource.h"

+#include "vmci_context.h"

+#include "vmci_driver.h"

+#include "vmci_event.h"

+

+#define VMCI_UTIL_NUM_RESOURCES 1

+

+enum {

+ VMCI_NOTIFY_RESOURCE_QUEUE_PAIR = 0,

+ VMCI_NOTIFY_RESOURCE_DOOR_BELL = 1,

+};

+

+enum {

+ VMCI_NOTIFY_RESOURCE_ACTION_NOTIFY = 0,

+ VMCI_NOTIFY_RESOURCE_ACTION_CREATE = 1,

+ VMCI_NOTIFY_RESOURCE_ACTION_DESTROY = 2,

+};

+

+static u32 ctxUpdateSubID = VMCI_INVALID_ID;

+static struct vmci_ctx *hostContext;

+static atomic_t vmContextID = { VMCI_INVALID_ID };

+

+struct vmci_delayed_work_info {

+ struct work_struct work;

+ VMCIWorkFn *workFn;

+ void *data;

+};

+

+/*

+ * VMCI driver initialization. This block can also be used to

+ * pass initial group membership etc.

+ */

+struct vmci_init_blk {

+ u32 cid;

+ u32 flags;

+};

+

+/* VMCIQueuePairAllocInfo_VMToVM */

+struct vmci_qp_alloc_info_vmvm {

+ struct vmci_handle handle;

+ u32 peer;

+ u32 flags;

+ uint64_t produceSize;

+ uint64_t consumeSize;

+ uint64_t producePageFile; /* User VA. */

+ uint64_t consumePageFile; /* User VA. */

+ uint64_t producePageFileSize; /* Size of the file name array. */

+ uint64_t consumePageFileSize; /* Size of the file name array. */

+ int32_t result;

+ u32 _pad;

+};

+

+/* VMCISetNotifyInfo: Used to pass notify flag's address to the host driver. */

+struct vmci_set_notify_info {

+ uint64_t notifyUVA;

+ int32_t result;

+ u32 _pad;

+};

+

+struct vmci_device {

+ struct mutex lock; /* Device access mutex */

+

+ unsigned int ioaddr;

+ unsigned int ioaddr_size;

+ unsigned int irq;

+ unsigned int intr_type;

+ bool exclusive_vectors;

+ struct msix_entry msix_entries[VMCI_MAX_INTRS];

+

+ bool enabled;

+ spinlock_t dev_spinlock; /* Lock for datagram access synchronization */

+ atomic_t datagrams_allowed;

+};

+

+static DEFINE_PCI_DEVICE_TABLE(vmci_ids) = {

+ {PCI_DEVICE(PCI_VENDOR_ID_VMWARE, PCI_DEVICE_ID_VMWARE_VMCI),},

+ {0},

+};

+

+static struct vmci_device vmci_dev;

+

+/* These options are false (0) by default */

+static bool vmci_disable_host;

+static bool vmci_disable_guest;

+static bool vmci_disable_msi;

+static bool vmci_disable_msix;

+

+/*

+ * Allocate a buffer for incoming datagrams globally to avoid repeated

+ * allocation in the interrupt handler's atomic context.

+ */

+static uint8_t *data_buffer;

+static u32 data_buffer_size = VMCI_MAX_DG_SIZE;

+

+/*

+ * If the VMCI hardware supports the notification bitmap, we allocate

+ * and register a page with the device.

+ */

+static uint8_t *notification_bitmap;

+

+/*

+ * Per-instance host state

+ */

+struct vmci_linux {

+ struct vmci_ctx *context;

+ int userVersion;

+ enum vmci_obj_type ctType;

+ struct mutex lock; /* Mutex lock for vmci context access */

+};

+

+/*

+ * Static driver state.

+ */

+struct vmci_linux_state {

+ struct miscdevice misc;

+ char buf[1024];

+ atomic_t activeContexts;

+};

+

+/*

+ * Types and variables shared by both host and guest personality

+ */

+static bool guestDeviceInit;

+static atomic_t guestDeviceActive;

+static bool hostDeviceInit;

+

+static void drv_delayed_work_cb(struct work_struct *work)

+{

+ struct vmci_delayed_work_info *delayedWorkInfo;

+

+ delayedWorkInfo = container_of(work, struct vmci_delayed_work_info,

+ work);

+ ASSERT(delayedWorkInfo);

+ ASSERT(delayedWorkInfo->workFn);

+

+ delayedWorkInfo->workFn(delayedWorkInfo->data);

+

+ kfree(delayedWorkInfo);

+}

+

+/*

+ * Schedule the specified callback.

+ */

+int vmci_drv_schedule_delayed_work(VMCIWorkFn *workFn,

+ void *data)

+{

+ struct vmci_delayed_work_info *delayedWorkInfo;

+

+ ASSERT(workFn);

+

+ delayedWorkInfo = kmalloc(sizeof(*delayedWorkInfo), GFP_ATOMIC);

+ if (!delayedWorkInfo)

+ return VMCI_ERROR_NO_MEM;

+

+ delayedWorkInfo->workFn = workFn;

+ delayedWorkInfo->data = data;

+

+ INIT_WORK(&delayedWorkInfo->work, drv_delayed_work_cb);

+

+ schedule_work(&delayedWorkInfo->work);

+

+ return VMCI_SUCCESS;

+}

+

+/*

+ * True if the wait was interrupted by a signal, false otherwise.

+ */

+bool vmci_drv_wait_on_event_intr(wait_queue_head_t *event,

+ VMCIEventReleaseCB releaseCB,

+ void *clientData)

+{

+ DECLARE_WAITQUEUE(wait, current);

+

+ if (event == NULL || releaseCB == NULL)

+ return false;

+

+ add_wait_queue(event, &wait);

+ current->state = TASK_INTERRUPTIBLE;

+

+ /*

+ * Release the lock or other primitive that makes it possible for us to

+ * put the current thread on the wait queue without missing the signal.

+ * Ie. on Linux we need to put ourselves on the wait queue and set our

+ * stateto TASK_INTERRUPTIBLE without another thread signalling us.

+ * The releaseCB is used to synchronize this.

+ */

+ releaseCB(clientData);

+

+ schedule();

+ current->state = TASK_RUNNING;

+ remove_wait_queue(event, &wait);

+

+ return signal_pending(current);

+}

+

+/*

+ * Cleans up the host specific components of the VMCI module.

+ */

+static void drv_host_cleanup(void)

+{

+ vmci_ctx_release_ctx(hostContext);

+ vmci_qp_broker_exit();

+}

+

+/*

+ * Checks whether the VMCI device is enabled.

+ */

+static bool drv_device_enabled(void)

+{

+ return vmci_guest_code_active() ||

+ vmci_host_code_active();

+}

+

+/*

+ * Gets called with the new context id if updated or resumed.

+ * Context id.

+ */

+static void drv_util_cid_update(u32 subID,

+ struct vmci_event_data *eventData,

+ void *clientData)

+{

+ struct vmci_event_payld_ctx *evPayload =

+ vmci_event_data_payload(eventData);

+

+ if (subID != ctxUpdateSubID) {

+ pr_devel("Invalid subscriber (ID=0x%x).", subID);

+ return;

+ }

+

+ if (eventData == NULL || evPayload->contextID == VMCI_INVALID_ID) {

+ pr_devel("Invalid event data.");

+ return;

+ }

+

+ pr_devel("Updating context from (ID=0x%x) to (ID=0x%x) on event " \

+ "(type=%d).", atomic_read(&vmContextID), evPayload->contextID,

+ eventData->event);

+

+ atomic_set(&vmContextID, evPayload->contextID);

+}

+

+/*

+ * Subscribe to context id update event.

+ */

+static void __devinit drv_util_init(void)

+{

+ /*

+ * We subscribe to the VMCI_EVENT_CTX_ID_UPDATE here so we can

+ * update the internal context id when needed.

+ */

+ if (vmci_event_subscribe

+ (VMCI_EVENT_CTX_ID_UPDATE, VMCI_FLAG_EVENT_NONE,

+ drv_util_cid_update, NULL, &ctxUpdateSubID)
dst = vmci_make_handle(VMCI_HYPERVISOR_CONTEXT_ID,

+ VMCI_RESOURCES_QUERY);

+ checkMsg->src = VMCI_ANON_SRC_HANDLE;

+ checkMsg->payloadSize = msgSize - VMCI_DG_HEADERSIZE;

+ msg = (struct vmci_resource_query_msg *)VMCI_DG_PAYLOAD(checkMsg);

+

+ msg->numResources = VMCI_UTIL_NUM_RESOURCES;

+ msg->resources[0] = VMCI_GET_CONTEXT_ID;

+

+ /* Checks that hyper calls are supported */

+ result = (0x1 == vmci_send_datagram(checkMsg));

+ kfree(checkMsg);

+

+ pr_info("Host capability check: %s.",

+ result ? "PASSED" : "FAILED");

+

+ /* We need the vector. There are no fallbacks. */

+ return result;

+}

+

+/*

+ * Reads datagrams from the data in port and dispatches them. We

+ * always start reading datagrams into only the first page of the

+ * datagram buffer. If the datagrams don't fit into one page, we

+ * use the maximum datagram buffer size for the remainder of the

+ * invocation. This is a simple heuristic for not penalizing

+ * small datagrams.

+ *

+ * This function assumes that it has exclusive access to the data

+ * in port for the duration of the call.

+ */

+static void drv_read_dgs_from_port(int ioHandle,

+ unsigned short int dgInPort,

+ uint8_t *dgInBuffer,

+ size_t dgInBufferSize)

+{

+ struct vmci_datagram *dg;

+ size_t currentDgInBufferSize = PAGE_SIZE;

+ size_t remainingBytes;

+

+ ASSERT(dgInBufferSize >= PAGE_SIZE);

+

+ insb(dgInPort, dgInBuffer, currentDgInBufferSize);

+ dg = (struct vmci_datagram *)dgInBuffer;

+ remainingBytes = currentDgInBufferSize;

+

+ while (dg->dst.resource != VMCI_INVALID_ID ||

+ remainingBytes > PAGE_SIZE) {

+ unsigned dgInSize;

+

+ /*

+ * When the input buffer spans multiple pages, a datagram can

+ * start on any page boundary in the buffer.

+ */

+ if (dg->dst.resource == VMCI_INVALID_ID) {

+ ASSERT(remainingBytes > PAGE_SIZE);

+ dg = (struct vmci_datagram *)roundup((uintptr_t)

+ dg + 1, PAGE_SIZE);

+ ASSERT((uint8_t *)dg
remainingBytes) {

+ if (remainingBytes != currentDgInBufferSize) {

+

+ /*

+ * We move the partial

+ * datagram to the front and

+ * read the reminder of the

+ * datagram and possibly

+ * following calls into the

+ * following bytes.

+ */

+ memmove(dgInBuffer, dgInBuffer +

+ currentDgInBufferSize -

+ remainingBytes, remainingBytes);

+ dg = (struct vmci_datagram *)

+ dgInBuffer;

+ }

+

+ if (currentDgInBufferSize != dgInBufferSize)

+ currentDgInBufferSize = dgInBufferSize;

+

+ insb(dgInPort, dgInBuffer + remainingBytes,

+ currentDgInBufferSize - remainingBytes);

+ }

+

+ /*

+ * We special case event datagrams from the

+ * hypervisor.

+ */

+ if (dg->src.context == VMCI_HYPERVISOR_CONTEXT_ID &&

+ dg->dst.resource == VMCI_EVENT_HANDLER) {

+ result = vmci_event_dispatch(dg);

+ } else {

+ result = vmci_datagram_invoke_guest_handler(dg);

+ }

+ if (result
dst.resource, result);

+ }

+

+ /* On to the next datagram. */

+ dg = (struct vmci_datagram *)((uint8_t *)dg +

+ dgInSize);

+ } else {

+ size_t bytesToSkip;

+

+ /*

+ * Datagram doesn't fit in datagram buffer of maximal

+ * size. We drop it.

+ */

+ pr_devel("Failed to receive datagram (size=%u bytes).",

+ dgInSize);

+

+ bytesToSkip = dgInSize - remainingBytes;

+ if (currentDgInBufferSize != dgInBufferSize)

+ currentDgInBufferSize = dgInBufferSize;

+

+ for (;;) {

+ insb(dgInPort, dgInBuffer,

+ currentDgInBufferSize);

+ if (bytesToSkip
ctType = VMCIOBJ_NOT_SET;

+ mutex_init(&vmciLinux->lock);

+ filp->private_data = vmciLinux;

+

+ return 0;

+}

+

+/*

+ * Called on close of /dev/vmci, most often when the process

+ * exits.

+ */

+static int drv_driver_close(struct inode *inode,

+ struct file *filp)

+{

+ struct vmci_linux *vmciLinux;

+

+ vmciLinux = (struct vmci_linux *)filp->private_data;

+ ASSERT(vmciLinux);

+

+ if (vmciLinux->ctType == VMCIOBJ_CONTEXT) {

+ ASSERT(vmciLinux->context);

+

+ vmci_ctx_release_ctx(vmciLinux->context);

+ vmciLinux->context = NULL;

+

+ /*

+ * The number of active contexts is used to track whether any

+ * VMX'en are using the host personality. It is incremented when

+ * a context is created through the IOCTL_VMCI_INIT_CONTEXT

+ * ioctl.

+ */

+ atomic_dec(&linuxState.activeContexts);

+ }

+ vmciLinux->ctType = VMCIOBJ_NOT_SET;

+

+ kfree(vmciLinux);

+ filp->private_data = NULL;

+ return 0;

+}

+

+/*

+ * This is used to wake up the VMX when a VMCI call arrives, or

+ * to wake up select() or poll() at the next clock tick.

+ */

+static unsigned int drv_driver_poll(struct file *filp, poll_table *wait)

+{

+ struct vmci_linux *vmciLinux = (struct vmci_linux *)filp->private_data;

+ unsigned int mask = 0;

+

+ if (vmciLinux->ctType == VMCIOBJ_CONTEXT) {

+ ASSERT(vmciLinux->context != NULL);

+

+ /* Check for VMCI calls to this VM context. */

+ if (wait != NULL) {

+ poll_wait(filp,

+ &vmciLinux->context->hostContext.waitQueue,

+ wait);

+ }

+

+ spin_lock(&vmciLinux->context->lock);

+ if (vmciLinux->context->pendingDatagrams > 0 ||

+ vmci_handle_arr_get_size(vmciLinux->context->

+ pendingDoorbellArray) > 0) {

+ mask = POLLIN;

+ }

+ spin_unlock(&vmciLinux->context->lock);

+ }

+ return mask;

+}

+

+static int __init drv_host_init(void)

+{

+ int error;

+ int result;

+

+ result = vmci_ctx_init_ctx(VMCI_HOST_CONTEXT_ID,

+ VMCI_DEFAULT_PROC_PRIVILEGE_FLAGS,

+ -1, VMCI_VERSION, NULL, &hostContext);

+ if (result
*userBufSize)

+ return VMCI_ERROR_MORE_DATA;

+

+ *userBufSize = arraySize * sizeof(*handles);

+ if (*userBufSize)

+ *retval = copy_to_user(userBufUVA,

+ vmci_handle_arr_get_handles

+ (handleArray), *userBufSize);

+

+ return VMCI_SUCCESS;

+}

+

+/*

+ * Helper function for creating queue pair and copying the result

+ * to user memory.

+ */

+static int drv_qp_broker_alloc(struct vmci_handle handle,

+ u32 peer,

+ u32 flags,

+ uint64_t produceSize,

+ uint64_t consumeSize,

+ struct vmci_qp_page_store *pageStore,

+ struct vmci_ctx *context,

+ bool vmToVm,

+ void __user *resultUVA)

+{

+ u32 cid;

+ int result;

+ int retval;

+

+ cid = vmci_ctx_get_id(context);

+

+ result =

+ vmci_qp_broker_alloc(handle, peer, flags,

+ VMCI_NO_PRIVILEGE_FLAGS, produceSize,

+ consumeSize, pageStore, context);

+ if (result == VMCI_SUCCESS && vmToVm)

+ result = VMCI_SUCCESS_QUEUEPAIR_CREATE;

+

+ retval = copy_to_user(resultUVA, &result, sizeof(result));

+ if (retval) {

+ retval = -EFAULT;

+ if (result >= VMCI_SUCCESS) {

+ result = vmci_qp_broker_detach(handle, context);

+ ASSERT(result >= VMCI_SUCCESS);

+ }

+ }

+

+ return retval;

+}

+

+/*

+ * Lock physical page backing a given user VA.

+ */

+static struct page *drv_user_va_lock_page(uintptr_t addr)

+{

+ struct page *page = NULL;

+ int retval;

+

+ down_read(&current->mm->mmap_sem);

+ retval = get_user_pages(current, current->mm, addr,

+ 1, 1, 0, &page, NULL);

+ up_read(&current->mm->mmap_sem);

+

+ if (retval != 1)

+ return NULL;

+

+ return page;

+}

+

+/*

+ * Lock physical page backing a given user VA and maps it to kernel

+ * address space. The range of the mapped memory should be within a

+ * single page otherwise an error is returned.

+ */

+static int drv_map_bool_ptr(uintptr_t notifyUVA,

+ struct page **p,

+ bool **notifyPtr)

+{

+ if (!access_ok(VERIFY_WRITE, (void __user *)notifyUVA,

+ sizeof(**notifyPtr)) ||

+ (((notifyUVA + sizeof(**notifyPtr) - 1) & ~(PAGE_SIZE - 1)) !=

+ (notifyUVA & ~(PAGE_SIZE - 1)))) {

+ return -EINVAL;

+ }

+

+ *p = drv_user_va_lock_page(notifyUVA);

+ if (*p == NULL)

+ return -EAGAIN;

+

+ *notifyPtr =

+ (bool *)((uint8_t *)kmap(*p) + (notifyUVA & (PAGE_SIZE - 1)));

+ return 0;

+}

+

+/*

+ * Sets up a given context for notify to work. Calls drv_map_bool_ptr()

+ * which maps the notify boolean in user VA in kernel space.

+ */

+static int drv_setup_notify(struct vmci_ctx *context,

+ uintptr_t notifyUVA)

+{

+ int retval;

+

+ if (context->notify) {

+ pr_warn("Notify mechanism is already set up.");

+ return VMCI_ERROR_DUPLICATE_ENTRY;

+ }

+

+ retval = drv_map_bool_ptr(notifyUVA, &context->notifyPage,

+ &context->notify);

+ if (retval == 0) {

+ vmci_ctx_check_signal_notify(context);

+ return VMCI_SUCCESS;

+ }

+

+ return VMCI_ERROR_GENERIC;

+}

+

+static long drv_driver_unlocked_ioctl(struct file *filp,

+ u_int iocmd,

+ unsigned long ioarg)

+{

+ struct vmci_linux *vmciLinux = (struct vmci_linux *)filp->private_data;

+ int retval = 0;

+

+ switch (iocmd) {

+ case IOCTL_VMCI_VERSION2:{

+ int verFromUser;

+

+ if (copy_from_user

+ (&verFromUser, (void *)ioarg, sizeof(verFromUser))) {

+ retval = -EFAULT;

+ break;

+ }

+

+ vmciLinux->userVersion = verFromUser;

+ }

+ /* Fall through. */

+ case IOCTL_VMCI_VERSION:

+ /*

+ * The basic logic here is:

+ *

+ * If the user sends in a version of 0 tell it our version.

+ * If the user didn't send in a version, tell it our version.

+ * If the user sent in an old version, tell it -its- version.

+ * If the user sent in an newer version, tell it our version.

+ *

+ * The rationale behind telling the caller its version is that

+ * Workstation 6.5 required that VMX and VMCI kernel module were

+ * version sync'd. All new VMX users will be programmed to

+ * handle the VMCI kernel module version.

+ */

+

+ if (vmciLinux->userVersion > 0 &&

+ vmciLinux->userVersion
userVersion;

+ } else {

+ retval = VMCI_VERSION;

+ }

+ break;

+

+ case IOCTL_VMCI_INIT_CONTEXT:{

+ struct vmci_init_blk initBlock;

+ const struct cred *cred;

+

+ retval = copy_from_user(&initBlock, (void *)ioarg,

+ sizeof(initBlock));

+ if (retval != 0) {

+ pr_info("Error reading init block.");

+ retval = -EFAULT;

+ break;

+ }

+

+ mutex_lock(&vmciLinux->lock);

+ if (vmciLinux->ctType != VMCIOBJ_NOT_SET) {

+ pr_info("Received VMCI init on initialized handle.");

+ retval = -EINVAL;

+ goto init_release;

+ }

+

+ if (initBlock.flags & ~VMCI_PRIVILEGE_FLAG_RESTRICTED) {

+ pr_info("Unsupported VMCI restriction flag.");

+ retval = -EINVAL;

+ goto init_release;

+ }

+

+ cred = get_current_cred();

+ retval = vmci_ctx_init_ctx(initBlock.cid,

+ initBlock.flags,

+ 0, vmciLinux->userVersion,

+ cred, &vmciLinux->context);

+ put_cred(cred);

+ if (retval
context);

+ retval = copy_to_user((void *)ioarg, &initBlock,

+ sizeof(initBlock));

+ if (retval != 0) {

+ vmci_ctx_release_ctx(vmciLinux->context);

+ vmciLinux->context = NULL;

+ pr_info("Error writing init block.");

+ retval = -EFAULT;

+ goto init_release;

+ }

+

+ ASSERT(initBlock.cid != VMCI_INVALID_ID);

+ vmciLinux->ctType = VMCIOBJ_CONTEXT;

+ atomic_inc(&linuxState.activeContexts);

+

+init_release:

+ mutex_unlock(&vmciLinux->lock);

+ break;

+ }

+

+ case IOCTL_VMCI_DATAGRAM_SEND:{

+ struct vmci_datagram_snd_rcv_info sendInfo;

+ struct vmci_datagram *dg = NULL;

+ u32 cid;

+

+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {

+ pr_warn("Ioctl only valid for context handle (iocmd=%d).",

+ iocmd);

+ retval = -EINVAL;

+ break;

+ }

+

+ retval = copy_from_user(&sendInfo, (void *)ioarg,

+ sizeof(sendInfo));

+ if (retval) {

+ pr_warn("copy_from_user failed.");

+ retval = -EFAULT;

+ break;

+ }

+

+ if (sendInfo.len > VMCI_MAX_DG_SIZE) {

+ pr_warn("Datagram too big (size=%d).",

+ sendInfo.len);

+ retval = -EINVAL;

+ break;

+ }

+

+ if (sendInfo.len
dst.context, dg->dst.resource,

+ dg->src.context, dg->src.resource,

+ (unsigned long long) dg->payloadSize);

+

+ /* Get source context id. */

+ ASSERT(vmciLinux->context);

+ cid = vmci_ctx_get_id(vmciLinux->context);

+ ASSERT(cid != VMCI_INVALID_ID);

+ sendInfo.result = vmci_datagram_dispatch(cid, dg, true);

+ kfree(dg);

+ retval =

+ copy_to_user((void *)ioarg, &sendInfo,

+ sizeof(sendInfo));

+ break;

+ }

+

+ case IOCTL_VMCI_DATAGRAM_RECEIVE:{

+ struct vmci_datagram_snd_rcv_info recvInfo;

+ struct vmci_datagram *dg = NULL;

+ size_t size;

+

+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {

+ pr_warn("Ioctl only valid for context handle (iocmd=%d).",

+ iocmd);

+ retval = -EINVAL;

+ break;

+ }

+

+ retval = copy_from_user(&recvInfo, (void *)ioarg,

+ sizeof(recvInfo));

+ if (retval) {

+ pr_warn("copy_from_user failed.");

+ retval = -EFAULT;

+ break;

+ }

+

+ ASSERT(vmciLinux->ctType == VMCIOBJ_CONTEXT);

+ ASSERT(vmciLinux->context);

+ size = recvInfo.len;

+ recvInfo.result =

+ vmci_ctx_dequeue_datagram(vmciLinux->context,

+ &size, &dg);

+

+ if (recvInfo.result >= VMCI_SUCCESS) {

+ ASSERT(dg);

+ retval = copy_to_user((void *)((uintptr_t)

+ recvInfo.addr),

+ dg, VMCI_DG_SIZE(dg));

+ kfree(dg);

+ if (retval != 0)

+ break;

+ }

+ retval = copy_to_user((void *)ioarg, &recvInfo,

+ sizeof(recvInfo));

+ break;

+ }

+

+ case IOCTL_VMCI_QUEUEPAIR_ALLOC:{

+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {

+ pr_info("IOCTL_VMCI_QUEUEPAIR_ALLOC only valid for contexts.");

+ retval = -EINVAL;

+ break;

+ }

+

+ if (vmciLinux->userVersion
context,

+ true, &info->result);

+ } else {

+ struct vmci_qp_alloc_info

+ queuePairAllocInfo;

+ struct vmci_qp_alloc_info *info =

+ (struct vmci_qp_alloc_info *)ioarg;

+ struct vmci_qp_page_store pageStore;

+

+ retval = copy_from_user(&queuePairAllocInfo,

+ (void *)ioarg,

+ sizeof(queuePairAllocInfo));

+ if (retval) {

+ retval = -EFAULT;

+ break;

+ }

+

+ pageStore.pages = queuePairAllocInfo.ppnVA;

+ pageStore.len = queuePairAllocInfo.numPPNs;

+

+ retval = drv_qp_broker_alloc(

+ queuePairAllocInfo.handle,

+ queuePairAllocInfo.peer,

+ queuePairAllocInfo.flags,

+ queuePairAllocInfo.produceSize,

+ queuePairAllocInfo.consumeSize,

+ &pageStore, vmciLinux->context,

+ false, &info->result);

+ }

+ break;

+ }

+

+ case IOCTL_VMCI_QUEUEPAIR_SETVA:{

+ struct vmci_qp_set_va_info setVAInfo;

+ struct vmci_qp_set_va_info *info =

+ (struct vmci_qp_set_va_info *)ioarg;

+ int32_t result;

+

+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {

+ pr_info("IOCTL_VMCI_QUEUEPAIR_SETVA only valid for contexts.");

+ retval = -EINVAL;

+ break;

+ }

+

+ if (vmciLinux->userVersion
context,

+ setVAInfo.va);

+ } else {

+ /*

+ * The queue pair is about to be unmapped by

+ * the VMX.

+ */

+ result = vmci_qp_broker_unmap(setVAInfo.handle,

+ vmciLinux->context, 0);

+ }

+

+ retval = copy_to_user(&info->result, &result, sizeof(result));

+ if (retval)

+ retval = -EFAULT;

+

+ break;

+ }

+

+ case IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE:{

+ struct vmci_qp_page_file_info pageFileInfo;

+ struct vmci_qp_page_file_info *info =

+ (struct vmci_qp_page_file_info *)ioarg;

+ int32_t result;

+

+ if (vmciLinux->userVersion
userVersion >= VMCI_VERSION_NOVMVM) {

+ pr_info("IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE not " \

+ "supported this VMX (version=%d).",

+ vmciLinux->userVersion);

+ retval = -EINVAL;

+ break;

+ }

+

+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {

+ pr_info("IOCTL_VMCI_QUEUEPAIR_SETPAGEFILE only " \

+ "valid for contexts.");

+ retval = -EINVAL;

+ break;

+ }

+

+ retval = copy_from_user(&pageFileInfo, (void *)ioarg,

+ sizeof(*info));

+ if (retval) {

+ retval = -EFAULT;

+ break;

+ }

+

+ /*

+ * Communicate success pre-emptively to the caller.

+ * Note that the basic premise is that it is incumbent

+ * upon the caller not to look at the info.result

+ * field until after the ioctl() returns. And then,

+ * only if the ioctl() result indicates no error. We

+ * send up the SUCCESS status before calling

+ * SetPageStore() store because failing to copy up the

+ * result code means unwinding the SetPageStore().

+ *

+ * It turns out the logic to unwind a SetPageStore()

+ * opens a can of worms. For example, if a host had

+ * created the QueuePair and a guest attaches and

+ * SetPageStore() is successful but writing success

+ * fails, then ... the host has to be stopped from

+ * writing (anymore) data into the QueuePair. That

+ * means an additional test in the VMCI_Enqueue() code

+ * path. Ugh.

+ */

+

+ result = VMCI_SUCCESS;

+ retval = copy_to_user(&info->result, &result, sizeof(result));

+ if (retval == 0) {

+ result = vmci_qp_broker_set_page_store(

+ pageFileInfo.handle,

+ pageFileInfo.produceVA,

+ pageFileInfo.consumeVA,

+ vmciLinux->context);

+ if (result
result,

+ &result,

+ sizeof(result));

+ if (retval != 0) {

+ /*

+ * Note that in this case the

+ * SetPageStore() call failed

+ * but we were unable to

+ * communicate that to the

+ * caller (because the

+ * copy_to_user() call

+ * failed). So, if we simply

+ * return an error (in this

+ * case -EFAULT) then the

+ * caller will know that the

+ * SetPageStore failed even

+ * though we couldn't put the

+ * result code in the result

+ * field and indicate exactly

+ * why it failed.

+ *

+ * That says nothing about the

+ * issue where we were once

+ * able to write to the

+ * caller's info memory and

+ * now can't. Something more

+ * serious is probably going

+ * on than the fact that

+ * SetPageStore() didn't work.

+ */

+ retval = -EFAULT;

+ }

+ }

+

+ } else {

+ /*

+ * In this case, we can't write a result field of the

+ * caller's info block. So, we don't even try to

+ * SetPageStore().

+ */

+ retval = -EFAULT;

+ }

+

+ break;

+ }

+

+ case IOCTL_VMCI_QUEUEPAIR_DETACH:{

+ struct vmci_qp_dtch_info detachInfo;

+ struct vmci_qp_dtch_info *info =

+ (struct vmci_qp_dtch_info *)ioarg;

+ int32_t result;

+

+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {

+ pr_info("IOCTL_VMCI_QUEUEPAIR_DETACH only valid for contexts.");

+ retval = -EINVAL;

+ break;

+ }

+

+ retval = copy_from_user(&detachInfo, (void *)ioarg,

+ sizeof(detachInfo));

+ if (retval) {

+ retval = -EFAULT;

+ break;

+ }

+

+ result = vmci_qp_broker_detach(detachInfo.handle,

+ vmciLinux->context);

+ if (result == VMCI_SUCCESS &&

+ vmciLinux->userVersion
result, &result, sizeof(result));

+ if (retval)

+ retval = -EFAULT;

+

+ break;

+ }

+

+ case IOCTL_VMCI_CTX_ADD_NOTIFICATION:{

+ struct vmci_ctx_info arInfo;

+ struct vmci_ctx_info *info =

+ (struct vmci_ctx_info *)ioarg;

+ int32_t result;

+ u32 cid;

+

+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {

+ pr_info("IOCTL_VMCI_CTX_ADD_NOTIFICATION only " \

+ "valid for contexts.");

+ retval = -EINVAL;

+ break;

+ }

+

+ retval = copy_from_user(&arInfo, (void *)ioarg,

+ sizeof(arInfo));

+ if (retval) {

+ retval = -EFAULT;

+ break;

+ }

+

+ cid = vmci_ctx_get_id(vmciLinux->context);

+ result = vmci_ctx_add_notification(cid, arInfo.remoteCID);

+ retval = copy_to_user(&info->result, &result, sizeof(result));

+ if (retval) {

+ retval = -EFAULT;

+ break;

+ }

+ break;

+ }

+

+ case IOCTL_VMCI_CTX_REMOVE_NOTIFICATION:{

+ struct vmci_ctx_info arInfo;

+ struct vmci_ctx_info *info =

+ (struct vmci_ctx_info *)ioarg;

+ int32_t result;

+ u32 cid;

+

+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {

+ pr_info("IOCTL_VMCI_CTX_REMOVE_NOTIFICATION only " \

+ "valid for contexts.");

+ retval = -EINVAL;

+ break;

+ }

+

+ retval = copy_from_user(&arInfo, (void *)ioarg,

+ sizeof(arInfo));

+ if (retval) {

+ retval = -EFAULT;

+ break;

+ }

+

+ cid = vmci_ctx_get_id(vmciLinux->context);

+ result = vmci_ctx_remove_notification(cid,

+ arInfo.remoteCID);

+ retval = copy_to_user(&info->result, &result, sizeof(result));

+ if (retval) {

+ retval = -EFAULT;

+ break;

+ }

+

+ break;

+ }

+

+ case IOCTL_VMCI_CTX_GET_CPT_STATE:{

+ struct vmci_ctx_chkpt_buf_info getInfo;

+ u32 cid;

+ char *cptBuf;

+

+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {

+ pr_info("IOCTL_VMCI_CTX_GET_CPT_STATE only valid for contexts.");

+ retval = -EINVAL;

+ break;

+ }

+

+ retval = copy_from_user(&getInfo, (void *)ioarg,

+ sizeof(getInfo));

+ if (retval) {

+ retval = -EFAULT;

+ break;

+ }

+

+ cid = vmci_ctx_get_id(vmciLinux->context);

+ getInfo.result =

+ vmci_ctx_get_chkpt_state(cid,

+ getInfo.cptType,

+ &getInfo.bufSize,

+ &cptBuf);

+ if (getInfo.result == VMCI_SUCCESS && getInfo.bufSize) {

+ retval = copy_to_user((void *)(uintptr_t)

+ getInfo.cptBuf, cptBuf,

+ getInfo.bufSize);

+ kfree(cptBuf);

+ if (retval) {

+ retval = -EFAULT;

+ break;

+ }

+ }

+ retval = copy_to_user((void *)ioarg, &getInfo,

+ sizeof(getInfo));

+ if (retval)

+ retval = -EFAULT;

+

+ break;

+ }

+

+ case IOCTL_VMCI_CTX_SET_CPT_STATE:{

+ struct vmci_ctx_chkpt_buf_info setInfo;

+ u32 cid;

+ char *cptBuf;

+

+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {

+ pr_info("IOCTL_VMCI_CTX_SET_CPT_STATE only valid for contexts.");

+ retval = -EINVAL;

+ break;

+ }

+

+ retval = copy_from_user(&setInfo, (void *)ioarg,

+ sizeof(setInfo));

+ if (retval) {

+ retval = -EFAULT;

+ break;

+ }

+

+ cptBuf = kmalloc(setInfo.bufSize, GFP_KERNEL);

+ if (cptBuf == NULL) {

+ pr_info("Cannot allocate memory to set cpt state (type=%d).",

+ setInfo.cptType);

+ retval = -ENOMEM;

+ break;

+ }

+ retval = copy_from_user(cptBuf,

+ (void *)(uintptr_t) setInfo.cptBuf,

+ setInfo.bufSize);

+ if (retval) {

+ kfree(cptBuf);

+ retval = -EFAULT;

+ break;

+ }

+

+ cid = vmci_ctx_get_id(vmciLinux->context);

+ setInfo.result =

+ vmci_ctx_set_chkpt_state(cid,

+ setInfo.cptType,

+ setInfo.bufSize,

+ cptBuf);

+ kfree(cptBuf);

+ retval = copy_to_user((void *)ioarg, &setInfo,

+ sizeof(setInfo));

+ if (retval)

+ retval = -EFAULT;

+

+ break;

+ }

+

+ case IOCTL_VMCI_GET_CONTEXT_ID:{

+ u32 cid = VMCI_HOST_CONTEXT_ID;

+

+ retval = copy_to_user((void *)ioarg, &cid, sizeof(cid));

+ break;

+ }

+

+ case IOCTL_VMCI_SET_NOTIFY:{

+ struct vmci_set_notify_info notifyInfo;

+

+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {

+ pr_info("IOCTL_VMCI_SET_NOTIFY only valid for contexts.");

+ retval = -EINVAL;

+ break;

+ }

+

+ retval = copy_from_user(&notifyInfo, (void *)ioarg,

+ sizeof(notifyInfo));

+ if (retval) {

+ retval = -EFAULT;

+ break;

+ }

+

+ if ((uintptr_t) notifyInfo.notifyUVA !=

+ (uintptr_t) NULL) {

+ notifyInfo.result =

+ drv_setup_notify(vmciLinux->context,

+ (uintptr_t)

+ notifyInfo.notifyUVA);

+ } else {

+ spin_lock(&vmciLinux->context->lock);

+ vmci_ctx_unset_notify(vmciLinux->context);

+ spin_unlock(&vmciLinux->context->lock);

+ notifyInfo.result = VMCI_SUCCESS;

+ }

+

+ retval = copy_to_user((void *)ioarg, &notifyInfo,

+ sizeof(notifyInfo));

+ if (retval)

+ retval = -EFAULT;

+

+ break;

+ }

+

+ case IOCTL_VMCI_NOTIFY_RESOURCE:{

+ struct vmci_dbell_notify_resource_info info;

+ u32 cid;

+

+ if (vmciLinux->userVersion
ctType != VMCIOBJ_CONTEXT) {

+ pr_info("IOCTL_VMCI_NOTIFY_RESOURCE is only valid " \

+ "for contexts.");

+ retval = -EINVAL;

+ break;

+ }

+

+ retval = copy_from_user(&info, (void *)ioarg, sizeof(info));

+ if (retval) {

+ retval = -EFAULT;

+ break;

+ }

+

+ cid = vmci_ctx_get_id(vmciLinux->context);

+ switch (info.action) {

+ case VMCI_NOTIFY_RESOURCE_ACTION_NOTIFY:

+ if (info.resource ==

+ VMCI_NOTIFY_RESOURCE_DOOR_BELL) {

+ u32 flags = VMCI_NO_PRIVILEGE_FLAGS;

+ info.result =

+ vmci_ctx_notify_dbell(cid,

+ info.handle,

+ flags);

+ } else {

+ info.result = VMCI_ERROR_UNAVAILABLE;

+ }

+ break;

+ case VMCI_NOTIFY_RESOURCE_ACTION_CREATE:

+ info.result =

+ vmci_ctx_dbell_create(cid,

+ info.handle);

+ break;

+ case VMCI_NOTIFY_RESOURCE_ACTION_DESTROY:

+ info.result =

+ vmci_ctx_dbell_destroy(cid,

+ info.handle);

+ break;

+ default:

+ pr_info("IOCTL_VMCI_NOTIFY_RESOURCE got unknown " \

+ "action (action=%d).", info.action);

+ info.result = VMCI_ERROR_INVALID_ARGS;

+ }

+ retval = copy_to_user((void *)ioarg, &info,

+ sizeof(info));

+ if (retval)

+ retval = -EFAULT;

+

+ break;

+ }

+

+ case IOCTL_VMCI_NOTIFICATIONS_RECEIVE:{

+ struct vmci_ctx_notify_recv_info info;

+ struct vmci_handle_arr *dbHandleArray;

+ struct vmci_handle_arr *qpHandleArray;

+ u32 cid;

+

+ if (vmciLinux->ctType != VMCIOBJ_CONTEXT) {

+ pr_info("IOCTL_VMCI_NOTIFICATIONS_RECEIVE is only " \

+ "valid for contexts.");

+ retval = -EINVAL;

+ break;

+ }

+

+ if (vmciLinux->userVersion
context);

+ info.result =

+ vmci_ctx_rcv_notifications_get(cid,

+ &dbHandleArray,

+ &qpHandleArr