summaryrefslogtreecommitdiff
path: root/arch/x86/coco/tdx
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/coco/tdx')
-rw-r--r--arch/x86/coco/tdx/Makefile2
-rw-r--r--arch/x86/coco/tdx/debug.c69
-rw-r--r--arch/x86/coco/tdx/tdx.c132
3 files changed, 185 insertions, 18 deletions
diff --git a/arch/x86/coco/tdx/Makefile b/arch/x86/coco/tdx/Makefile
index 2c7dcbf1458b..b3c47d3700e2 100644
--- a/arch/x86/coco/tdx/Makefile
+++ b/arch/x86/coco/tdx/Makefile
@@ -1,3 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
-obj-y += tdx.o tdx-shared.o tdcall.o
+obj-y += debug.o tdcall.o tdx.o tdx-shared.o
diff --git a/arch/x86/coco/tdx/debug.c b/arch/x86/coco/tdx/debug.c
new file mode 100644
index 000000000000..cef847c8bb67
--- /dev/null
+++ b/arch/x86/coco/tdx/debug.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#undef pr_fmt
+#define pr_fmt(fmt) "tdx: " fmt
+
+#include <linux/array_size.h>
+#include <linux/printk.h>
+#include <asm/tdx.h>
+
+#define DEF_TDX_ATTR_NAME(_name) [TDX_ATTR_##_name##_BIT] = __stringify(_name)
+
+static __initdata const char *tdx_attributes[] = {
+ DEF_TDX_ATTR_NAME(DEBUG),
+ DEF_TDX_ATTR_NAME(HGS_PLUS_PROF),
+ DEF_TDX_ATTR_NAME(PERF_PROF),
+ DEF_TDX_ATTR_NAME(PMT_PROF),
+ DEF_TDX_ATTR_NAME(ICSSD),
+ DEF_TDX_ATTR_NAME(LASS),
+ DEF_TDX_ATTR_NAME(SEPT_VE_DISABLE),
+ DEF_TDX_ATTR_NAME(MIGRTABLE),
+ DEF_TDX_ATTR_NAME(PKS),
+ DEF_TDX_ATTR_NAME(KL),
+ DEF_TDX_ATTR_NAME(TPA),
+ DEF_TDX_ATTR_NAME(PERFMON),
+};
+
+#define DEF_TD_CTLS_NAME(_name) [TD_CTLS_##_name##_BIT] = __stringify(_name)
+
+static __initdata const char *tdcs_td_ctls[] = {
+ DEF_TD_CTLS_NAME(PENDING_VE_DISABLE),
+ DEF_TD_CTLS_NAME(ENUM_TOPOLOGY),
+ DEF_TD_CTLS_NAME(VIRT_CPUID2),
+ DEF_TD_CTLS_NAME(REDUCE_VE),
+ DEF_TD_CTLS_NAME(LOCK),
+};
+
+void __init tdx_dump_attributes(u64 td_attr)
+{
+ pr_info("Attributes:");
+
+ for (int i = 0; i < ARRAY_SIZE(tdx_attributes); i++) {
+ if (!tdx_attributes[i])
+ continue;
+ if (td_attr & BIT(i))
+ pr_cont(" %s", tdx_attributes[i]);
+ td_attr &= ~BIT(i);
+ }
+
+ if (td_attr)
+ pr_cont(" unknown:%#llx", td_attr);
+ pr_cont("\n");
+
+}
+
+void __init tdx_dump_td_ctls(u64 td_ctls)
+{
+ pr_info("TD_CTLS:");
+
+ for (int i = 0; i < ARRAY_SIZE(tdcs_td_ctls); i++) {
+ if (!tdcs_td_ctls[i])
+ continue;
+ if (td_ctls & BIT(i))
+ pr_cont(" %s", tdcs_td_ctls[i]);
+ td_ctls &= ~BIT(i);
+ }
+ if (td_ctls)
+ pr_cont(" unknown:%#llx", td_ctls);
+ pr_cont("\n");
+}
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index 0d9b090b4880..7b2833705d47 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -14,6 +14,7 @@
#include <asm/ia32.h>
#include <asm/insn.h>
#include <asm/insn-eval.h>
+#include <asm/paravirt_types.h>
#include <asm/pgtable.h>
#include <asm/set_memory.h>
#include <asm/traps.h>
@@ -32,12 +33,10 @@
#define VE_GET_PORT_NUM(e) ((e) >> 16)
#define VE_IS_IO_STRING(e) ((e) & BIT(4))
-#define ATTR_DEBUG BIT(0)
-#define ATTR_SEPT_VE_DISABLE BIT(28)
-
/* TDX Module call error codes */
#define TDCALL_RETURN_CODE(a) ((a) >> 32)
#define TDCALL_INVALID_OPERAND 0xc0000100
+#define TDCALL_OPERAND_BUSY 0x80000200
#define TDREPORT_SUBTYPE_0 0
@@ -111,12 +110,13 @@ static inline u64 tdg_vm_wr(u64 field, u64 value, u64 mask)
* REPORTDATA to be included into TDREPORT.
* @tdreport: Address of the output buffer to store TDREPORT.
*
- * Refer to section titled "TDG.MR.REPORT leaf" in the TDX Module
- * v1.0 specification for more information on TDG.MR.REPORT TDCALL.
+ * Refer to section titled "TDG.MR.REPORT leaf" in the TDX Module v1.0
+ * specification for more information on TDG.MR.REPORT TDCALL.
+ *
* It is used in the TDX guest driver module to get the TDREPORT0.
*
- * Return 0 on success, -EINVAL for invalid operands, or -EIO on
- * other TDCALL failures.
+ * Return 0 on success, -ENXIO for invalid operands, -EBUSY for busy operation,
+ * or -EIO on other TDCALL failures.
*/
int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport)
{
@@ -130,7 +130,9 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport)
ret = __tdcall(TDG_MR_REPORT, &args);
if (ret) {
if (TDCALL_RETURN_CODE(ret) == TDCALL_INVALID_OPERAND)
- return -EINVAL;
+ return -ENXIO;
+ else if (TDCALL_RETURN_CODE(ret) == TDCALL_OPERAND_BUSY)
+ return -EBUSY;
return -EIO;
}
@@ -139,6 +141,42 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport)
EXPORT_SYMBOL_GPL(tdx_mcall_get_report0);
/**
+ * tdx_mcall_extend_rtmr() - Wrapper to extend RTMR registers using
+ * TDG.MR.RTMR.EXTEND TDCALL.
+ * @index: Index of RTMR register to be extended.
+ * @data: Address of the input buffer with RTMR register extend data.
+ *
+ * Refer to section titled "TDG.MR.RTMR.EXTEND leaf" in the TDX Module v1.0
+ * specification for more information on TDG.MR.RTMR.EXTEND TDCALL.
+ *
+ * It is used in the TDX guest driver module to allow user to extend the RTMR
+ * registers.
+ *
+ * Return 0 on success, -ENXIO for invalid operands, -EBUSY for busy operation,
+ * or -EIO on other TDCALL failures.
+ */
+int tdx_mcall_extend_rtmr(u8 index, u8 *data)
+{
+ struct tdx_module_args args = {
+ .rcx = virt_to_phys(data),
+ .rdx = index,
+ };
+ u64 ret;
+
+ ret = __tdcall(TDG_MR_RTMR_EXTEND, &args);
+ if (ret) {
+ if (TDCALL_RETURN_CODE(ret) == TDCALL_INVALID_OPERAND)
+ return -ENXIO;
+ if (TDCALL_RETURN_CODE(ret) == TDCALL_OPERAND_BUSY)
+ return -EBUSY;
+ return -EIO;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(tdx_mcall_extend_rtmr);
+
+/**
* tdx_hcall_get_quote() - Wrapper to request TD Quote using GetQuote
* hypercall.
* @buf: Address of the directly mapped shared kernel buffer which
@@ -170,11 +208,11 @@ static void __noreturn tdx_panic(const char *msg)
/* Define register order according to the GHCI */
struct { u64 r14, r15, rbx, rdi, rsi, r8, r9, rdx; };
- char str[64];
+ char bytes[64] __nonstring;
} message;
/* VMM assumes '\0' in byte 65, if the message took all 64 bytes */
- strtomem_pad(message.str, msg, '\0');
+ strtomem_pad(message.bytes, msg, '\0');
args.r8 = message.r8;
args.r9 = message.r9;
@@ -200,14 +238,14 @@ static void __noreturn tdx_panic(const char *msg)
*
* TDX 1.0 does not allow the guest to disable SEPT #VE on its own. The VMM
* controls if the guest will receive such #VE with TD attribute
- * ATTR_SEPT_VE_DISABLE.
+ * TDX_ATTR_SEPT_VE_DISABLE.
*
* Newer TDX modules allow the guest to control if it wants to receive SEPT
* violation #VEs.
*
* Check if the feature is available and disable SEPT #VE if possible.
*
- * If the TD is allowed to disable/enable SEPT #VEs, the ATTR_SEPT_VE_DISABLE
+ * If the TD is allowed to disable/enable SEPT #VEs, the TDX_ATTR_SEPT_VE_DISABLE
* attribute is no longer reliable. It reflects the initial state of the
* control for the TD, but it will not be updated if someone (e.g. bootloader)
* changes it before the kernel starts. Kernel must check TDCS_TD_CTLS bit to
@@ -216,14 +254,14 @@ static void __noreturn tdx_panic(const char *msg)
static void disable_sept_ve(u64 td_attr)
{
const char *msg = "TD misconfiguration: SEPT #VE has to be disabled";
- bool debug = td_attr & ATTR_DEBUG;
+ bool debug = td_attr & TDX_ATTR_DEBUG;
u64 config, controls;
/* Is this TD allowed to disable SEPT #VE */
tdg_vm_rd(TDCS_CONFIG_FLAGS, &config);
if (!(config & TDCS_CONFIG_FLEXIBLE_PENDING_VE)) {
/* No SEPT #VE controls for the guest: check the attribute */
- if (td_attr & ATTR_SEPT_VE_DISABLE)
+ if (td_attr & TDX_ATTR_SEPT_VE_DISABLE)
return;
/* Relax SEPT_VE_DISABLE check for debug TD for backtraces */
@@ -274,6 +312,20 @@ static void enable_cpu_topology_enumeration(void)
tdg_vm_wr(TDCS_TD_CTLS, TD_CTLS_ENUM_TOPOLOGY, TD_CTLS_ENUM_TOPOLOGY);
}
+static void reduce_unnecessary_ve(void)
+{
+ u64 err = tdg_vm_wr(TDCS_TD_CTLS, TD_CTLS_REDUCE_VE, TD_CTLS_REDUCE_VE);
+
+ if (err == TDX_SUCCESS)
+ return;
+
+ /*
+ * Enabling REDUCE_VE includes ENUM_TOPOLOGY. Only try to
+ * enable ENUM_TOPOLOGY if REDUCE_VE was not successful.
+ */
+ enable_cpu_topology_enumeration();
+}
+
static void tdx_setup(u64 *cc_mask)
{
struct tdx_module_args args = {};
@@ -305,7 +357,8 @@ static void tdx_setup(u64 *cc_mask)
tdg_vm_wr(TDCS_NOTIFY_ENABLES, 0, -1ULL);
disable_sept_ve(td_attr);
- enable_cpu_topology_enumeration();
+
+ reduce_unnecessary_ve();
}
/*
@@ -380,13 +433,21 @@ static int handle_halt(struct ve_info *ve)
{
const bool irq_disabled = irqs_disabled();
+ /*
+ * HLT with IRQs enabled is unsafe, as an IRQ that is intended to be a
+ * wake event may be consumed before requesting HLT emulation, leaving
+ * the vCPU blocking indefinitely.
+ */
+ if (WARN_ONCE(!irq_disabled, "HLT emulation with IRQs enabled"))
+ return -EIO;
+
if (__halt(irq_disabled))
return -EIO;
return ve_instr_len(ve);
}
-void __cpuidle tdx_safe_halt(void)
+void __cpuidle tdx_halt(void)
{
const bool irq_disabled = false;
@@ -397,6 +458,16 @@ void __cpuidle tdx_safe_halt(void)
WARN_ONCE(1, "HLT instruction emulation failed\n");
}
+static void __cpuidle tdx_safe_halt(void)
+{
+ tdx_halt();
+ /*
+ * "__cpuidle" section doesn't support instrumentation, so stick
+ * with raw_* variant that avoids tracing hooks.
+ */
+ raw_local_irq_enable();
+}
+
static int read_msr(struct pt_regs *regs, struct ve_info *ve)
{
struct tdx_module_args args = {
@@ -1025,6 +1096,20 @@ static void tdx_kexec_finish(void)
}
}
+static __init void tdx_announce(void)
+{
+ struct tdx_module_args args = {};
+ u64 controls;
+
+ pr_info("Guest detected\n");
+
+ tdcall(TDG_VP_INFO, &args);
+ tdx_dump_attributes(args.rdx);
+
+ tdg_vm_rd(TDCS_TD_CTLS, &controls);
+ tdx_dump_td_ctls(controls);
+}
+
void __init tdx_early_init(void)
{
u64 cc_mask;
@@ -1084,6 +1169,19 @@ void __init tdx_early_init(void)
x86_platform.guest.enc_kexec_finish = tdx_kexec_finish;
/*
+ * Avoid "sti;hlt" execution in TDX guests as HLT induces a #VE that
+ * will enable interrupts before HLT TDCALL invocation if executed
+ * in STI-shadow, possibly resulting in missed wakeup events.
+ *
+ * Modify all possible HLT execution paths to use TDX specific routines
+ * that directly execute TDCALL and toggle the interrupt state as
+ * needed after TDCALL completion. This also reduces HLT related #VEs
+ * in addition to having a reliable halt logic execution.
+ */
+ pv_ops.irq.safe_halt = tdx_safe_halt;
+ pv_ops.irq.halt = tdx_halt;
+
+ /*
* TDX intercepts the RDMSR to read the X2APIC ID in the parallel
* bringup low level code. That raises #VE which cannot be handled
* there.
@@ -1094,5 +1192,5 @@ void __init tdx_early_init(void)
*/
x86_cpuinit.parallel_bringup = false;
- pr_info("Guest detected\n");
+ tdx_announce();
}