summaryrefslogtreecommitdiff
path: root/drivers/hv
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/hv')
-rw-r--r--drivers/hv/Kconfig7
-rw-r--r--drivers/hv/connection.c23
-rw-r--r--drivers/hv/hv_common.c76
-rw-r--r--drivers/hv/vmbus_drv.c97
4 files changed, 141 insertions, 62 deletions
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 6c1416167bd2..1cd188b73b74 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -5,17 +5,18 @@ menu "Microsoft Hyper-V guest support"
config HYPERV
tristate "Microsoft Hyper-V client drivers"
depends on (X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \
- || (ACPI && ARM64 && !CPU_BIG_ENDIAN)
+ || (ARM64 && !CPU_BIG_ENDIAN)
select PARAVIRT
select X86_HV_CALLBACK_VECTOR if X86
select OF_EARLY_FLATTREE if OF
+ select SYSFB if !HYPERV_VTL_MODE
help
Select this option to run Linux as a Hyper-V client operating
system.
config HYPERV_VTL_MODE
bool "Enable Linux to boot in VTL context"
- depends on X86_64 && HYPERV
+ depends on (X86_64 || ARM64) && HYPERV
depends on SMP
default n
help
@@ -31,7 +32,7 @@ config HYPERV_VTL_MODE
Select this option to build a Linux kernel to run at a VTL other than
the normal VTL0, which currently is only VTL2. This option
- initializes the x86 platform for VTL2, and adds the ability to boot
+ initializes the kernel to run in VTL2, and adds the ability to boot
secondary CPUs directly into 64-bit context as required for VTLs other
than 0. A kernel built with this option must run at VTL2, and will
not run as a normal guest.
diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c
index 8351360bba16..be490c598785 100644
--- a/drivers/hv/connection.c
+++ b/drivers/hv/connection.c
@@ -207,10 +207,19 @@ int vmbus_connect(void)
mutex_init(&vmbus_connection.channel_mutex);
/*
+ * The following Hyper-V interrupt and monitor pages can be used by
+ * UIO for mapping to user-space, so they should always be allocated on
+ * system page boundaries. The system page size must be >= the Hyper-V
+ * page size.
+ */
+ BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE);
+
+ /*
* Setup the vmbus event connection for channel interrupt
* abstraction stuff
*/
- vmbus_connection.int_page = hv_alloc_hyperv_zeroed_page();
+ vmbus_connection.int_page =
+ (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
if (vmbus_connection.int_page == NULL) {
ret = -ENOMEM;
goto cleanup;
@@ -225,8 +234,8 @@ int vmbus_connect(void)
* Setup the monitor notification facility. The 1st page for
* parent->child and the 2nd page for child->parent
*/
- vmbus_connection.monitor_pages[0] = hv_alloc_hyperv_page();
- vmbus_connection.monitor_pages[1] = hv_alloc_hyperv_page();
+ vmbus_connection.monitor_pages[0] = (void *)__get_free_page(GFP_KERNEL);
+ vmbus_connection.monitor_pages[1] = (void *)__get_free_page(GFP_KERNEL);
if ((vmbus_connection.monitor_pages[0] == NULL) ||
(vmbus_connection.monitor_pages[1] == NULL)) {
ret = -ENOMEM;
@@ -342,21 +351,23 @@ void vmbus_disconnect(void)
destroy_workqueue(vmbus_connection.work_queue);
if (vmbus_connection.int_page) {
- hv_free_hyperv_page(vmbus_connection.int_page);
+ free_page((unsigned long)vmbus_connection.int_page);
vmbus_connection.int_page = NULL;
}
if (vmbus_connection.monitor_pages[0]) {
if (!set_memory_encrypted(
(unsigned long)vmbus_connection.monitor_pages[0], 1))
- hv_free_hyperv_page(vmbus_connection.monitor_pages[0]);
+ free_page((unsigned long)
+ vmbus_connection.monitor_pages[0]);
vmbus_connection.monitor_pages[0] = NULL;
}
if (vmbus_connection.monitor_pages[1]) {
if (!set_memory_encrypted(
(unsigned long)vmbus_connection.monitor_pages[1], 1))
- hv_free_hyperv_page(vmbus_connection.monitor_pages[1]);
+ free_page((unsigned long)
+ vmbus_connection.monitor_pages[1]);
vmbus_connection.monitor_pages[1] = NULL;
}
}
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index 59792e00cecf..49898d10faff 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -105,45 +105,6 @@ void __init hv_common_free(void)
hv_synic_eventring_tail = NULL;
}
-/*
- * Functions for allocating and freeing memory with size and
- * alignment HV_HYP_PAGE_SIZE. These functions are needed because
- * the guest page size may not be the same as the Hyper-V page
- * size. We depend upon kmalloc() aligning power-of-two size
- * allocations to the allocation size boundary, so that the
- * allocated memory appears to Hyper-V as a page of the size
- * it expects.
- */
-
-void *hv_alloc_hyperv_page(void)
-{
- BUILD_BUG_ON(PAGE_SIZE < HV_HYP_PAGE_SIZE);
-
- if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
- return (void *)__get_free_page(GFP_KERNEL);
- else
- return kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
-}
-EXPORT_SYMBOL_GPL(hv_alloc_hyperv_page);
-
-void *hv_alloc_hyperv_zeroed_page(void)
-{
- if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
- return (void *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
- else
- return kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
-}
-EXPORT_SYMBOL_GPL(hv_alloc_hyperv_zeroed_page);
-
-void hv_free_hyperv_page(void *addr)
-{
- if (PAGE_SIZE == HV_HYP_PAGE_SIZE)
- free_page((unsigned long)addr);
- else
- kfree(addr);
-}
-EXPORT_SYMBOL_GPL(hv_free_hyperv_page);
-
static void *hv_panic_page;
/*
@@ -272,7 +233,7 @@ static void hv_kmsg_dump_unregister(void)
atomic_notifier_chain_unregister(&panic_notifier_list,
&hyperv_panic_report_block);
- hv_free_hyperv_page(hv_panic_page);
+ kfree(hv_panic_page);
hv_panic_page = NULL;
}
@@ -280,7 +241,7 @@ static void hv_kmsg_dump_register(void)
{
int ret;
- hv_panic_page = hv_alloc_hyperv_zeroed_page();
+ hv_panic_page = kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
if (!hv_panic_page) {
pr_err("Hyper-V: panic message page memory allocation failed\n");
return;
@@ -289,7 +250,7 @@ static void hv_kmsg_dump_register(void)
ret = kmsg_dump_register(&hv_kmsg_dumper);
if (ret) {
pr_err("Hyper-V: kmsg dump register error 0x%x\n", ret);
- hv_free_hyperv_page(hv_panic_page);
+ kfree(hv_panic_page);
hv_panic_page = NULL;
}
}
@@ -317,6 +278,37 @@ void __init hv_get_partition_id(void)
pr_err("Hyper-V: failed to get partition ID: %#x\n",
hv_result(status));
}
+#if IS_ENABLED(CONFIG_HYPERV_VTL_MODE)
+u8 __init get_vtl(void)
+{
+ u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS;
+ struct hv_input_get_vp_registers *input;
+ struct hv_output_get_vp_registers *output;
+ unsigned long flags;
+ u64 ret;
+
+ local_irq_save(flags);
+ input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+ output = *this_cpu_ptr(hyperv_pcpu_output_arg);
+
+ memset(input, 0, struct_size(input, names, 1));
+ input->partition_id = HV_PARTITION_ID_SELF;
+ input->vp_index = HV_VP_INDEX_SELF;
+ input->input_vtl.as_uint8 = 0;
+ input->names[0] = HV_REGISTER_VSM_VP_STATUS;
+
+ ret = hv_do_hypercall(control, input, output);
+ if (hv_result_success(ret)) {
+ ret = output->values[0].reg8 & HV_VTL_MASK;
+ } else {
+ pr_err("Failed to get VTL(error: %lld) exiting...\n", ret);
+ BUG();
+ }
+
+ local_irq_restore(flags);
+ return ret;
+}
+#endif
int __init hv_common_init(void)
{
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index e3d51a316316..33b524b4eb5e 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -45,7 +45,8 @@ struct vmbus_dynid {
struct hv_vmbus_device_id id;
};
-static struct device *hv_dev;
+/* VMBus Root Device */
+static struct device *vmbus_root_device;
static int hyperv_cpuhp_online;
@@ -80,9 +81,15 @@ static struct resource *fb_mmio;
static struct resource *hyperv_mmio;
static DEFINE_MUTEX(hyperv_mmio_lock);
+struct device *hv_get_vmbus_root_device(void)
+{
+ return vmbus_root_device;
+}
+EXPORT_SYMBOL_GPL(hv_get_vmbus_root_device);
+
static int vmbus_exists(void)
{
- if (hv_dev == NULL)
+ if (vmbus_root_device == NULL)
return -ENODEV;
return 0;
@@ -707,7 +714,30 @@ static const struct hv_vmbus_device_id *hv_vmbus_get_id(const struct hv_driver *
return id;
}
-/* vmbus_add_dynid - add a new device ID to this driver and re-probe devices */
+/* vmbus_add_dynid - add a new device ID to this driver and re-probe devices
+ *
+ * This function can race with vmbus_device_register(). This function is
+ * typically running on a user thread in response to writing to the "new_id"
+ * sysfs entry for a driver. vmbus_device_register() is running on a
+ * workqueue thread in response to the Hyper-V host offering a device to the
+ * guest. This function calls driver_attach(), which looks for an existing
+ * device matching the new id, and attaches the driver to which the new id
+ * has been assigned. vmbus_device_register() calls device_register(), which
+ * looks for a driver that matches the device being registered. If both
+ * operations are running simultaneously, the device driver probe function runs
+ * on whichever thread establishes the linkage between the driver and device.
+ *
+ * In most cases, it doesn't matter which thread runs the driver probe
+ * function. But if vmbus_device_register() does not find a matching driver,
+ * it proceeds to create the "channels" subdirectory and numbered per-channel
+ * subdirectory in sysfs. While that multi-step creation is in progress, this
+ * function could run the driver probe function. If the probe function checks
+ * for, or operates on, entries in the "channels" subdirectory, including by
+ * calling hv_create_ring_sysfs(), the operation may or may not succeed
+ * depending on the race. The race can't create a kernel failure in VMBus
+ * or device subsystem code, but probe functions in VMBus drivers doing such
+ * operations must be prepared for the failure case.
+ */
static int vmbus_add_dynid(struct hv_driver *drv, guid_t *guid)
{
struct vmbus_dynid *dynid;
@@ -861,7 +891,7 @@ static int vmbus_dma_configure(struct device *child_device)
* On x86/x64 coherence is assumed and these calls have no effect.
*/
hv_setup_dma_ops(child_device,
- device_get_dma_attr(hv_dev) == DEV_DMA_COHERENT);
+ device_get_dma_attr(vmbus_root_device) == DEV_DMA_COHERENT);
return 0;
}
@@ -1841,7 +1871,7 @@ static struct attribute *vmbus_chan_attrs[] = {
NULL
};
-static struct bin_attribute *vmbus_chan_bin_attrs[] = {
+static const struct bin_attribute *vmbus_chan_bin_attrs[] = {
&chan_attr_ring_buffer,
NULL
};
@@ -1921,7 +1951,8 @@ static const struct kobj_type vmbus_chan_ktype = {
* ring for userspace to use.
* Note: Race conditions can happen with userspace and it is not encouraged to create new
* use-cases for this. This was added to maintain backward compatibility, while solving
- * one of the race conditions in uio_hv_generic while creating sysfs.
+ * one of the race conditions in uio_hv_generic while creating sysfs. See comments with
+ * vmbus_add_dynid() and vmbus_device_register().
*
* Returns 0 on success or error code on failure.
*/
@@ -2037,7 +2068,7 @@ int vmbus_device_register(struct hv_device *child_device_obj)
&child_device_obj->channel->offermsg.offer.if_instance);
child_device_obj->device.bus = &hv_bus;
- child_device_obj->device.parent = hv_dev;
+ child_device_obj->device.parent = vmbus_root_device;
child_device_obj->device.release = vmbus_device_release;
child_device_obj->device.dma_parms = &child_device_obj->dma_parms;
@@ -2055,6 +2086,20 @@ int vmbus_device_register(struct hv_device *child_device_obj)
return ret;
}
+ /*
+ * If device_register() found a driver to assign to the device, the
+ * driver's probe function has already run at this point. If that
+ * probe function accesses or operates on the "channels" subdirectory
+ * in sysfs, those operations will have failed because the "channels"
+ * subdirectory doesn't exist until the code below runs. Or if the
+ * probe function creates a /dev entry, a user space program could
+ * find and open the /dev entry, and then create a race by accessing
+ * the "channels" subdirectory while the creation steps are in progress
+ * here. The race can't result in a kernel failure, but the user space
+ * program may get an error in accessing "channels" or its
+ * subdirectories. See also comments with vmbus_add_dynid() about a
+ * related race condition.
+ */
child_device_obj->channels_kset = kset_create_and_add("channels",
NULL, kobj);
if (!child_device_obj->channels_kset) {
@@ -2412,7 +2457,7 @@ static int vmbus_acpi_add(struct platform_device *pdev)
struct acpi_device *ancestor;
struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
- hv_dev = &device->dev;
+ vmbus_root_device = &device->dev;
/*
* Older versions of Hyper-V for ARM64 fail to include the _CCA
@@ -2465,6 +2510,31 @@ static int vmbus_acpi_add(struct platform_device *pdev)
}
#endif
+static int vmbus_set_irq(struct platform_device *pdev)
+{
+ struct irq_data *data;
+ int irq;
+ irq_hw_number_t hwirq;
+
+ irq = platform_get_irq(pdev, 0);
+ /* platform_get_irq() may not return 0. */
+ if (irq < 0)
+ return irq;
+
+ data = irq_get_irq_data(irq);
+ if (!data) {
+ pr_err("No interrupt data for VMBus virq %d\n", irq);
+ return -ENODEV;
+ }
+ hwirq = irqd_to_hwirq(data);
+
+ vmbus_irq = irq;
+ vmbus_interrupt = hwirq;
+ pr_debug("VMBus virq %d, hwirq %d\n", vmbus_irq, vmbus_interrupt);
+
+ return 0;
+}
+
static int vmbus_device_add(struct platform_device *pdev)
{
struct resource **cur_res = &hyperv_mmio;
@@ -2473,12 +2543,17 @@ static int vmbus_device_add(struct platform_device *pdev)
struct device_node *np = pdev->dev.of_node;
int ret;
- hv_dev = &pdev->dev;
+ vmbus_root_device = &pdev->dev;
ret = of_range_parser_init(&parser, np);
if (ret)
return ret;
+ if (!__is_defined(HYPERVISOR_CALLBACK_VECTOR))
+ ret = vmbus_set_irq(pdev);
+ if (ret)
+ return ret;
+
for_each_of_range(&parser, &range) {
struct resource *res;
@@ -2786,7 +2861,7 @@ static int __init hv_acpi_init(void)
if (ret)
return ret;
- if (!hv_dev) {
+ if (!vmbus_root_device) {
ret = -ENODEV;
goto cleanup;
}
@@ -2817,7 +2892,7 @@ static int __init hv_acpi_init(void)
cleanup:
platform_driver_unregister(&vmbus_platform_driver);
- hv_dev = NULL;
+ vmbus_root_device = NULL;
return ret;
}