summaryrefslogtreecommitdiff
path: root/arch/x86/kernel/relocate_kernel_64.S
blob: ea604f4d0b52bd879ba594adf96154182cecb191 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * relocate_kernel.S - put the kernel image in place to boot
 * Copyright (C) 2002-2005 Eric Biederman  <ebiederm@xmission.com>
 */

#include <linux/linkage.h>
#include <linux/stringify.h>
#include <asm/alternative.h>
#include <asm/page_types.h>
#include <asm/kexec.h>
#include <asm/processor-flags.h>
#include <asm/pgtable_types.h>
#include <asm/nospec-branch.h>
#include <asm/unwind_hints.h>
#include <asm/asm-offsets.h>

/*
 * Must be relocatable PIC code callable as a C function, in particular
 * there must be a plain RET and not jump to return thunk.
 */

#define PTR(x) (x << 3)
#define PAGE_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)

/*
 * The .text..relocate_kernel and .data..relocate_kernel sections are copied
 * into the control page, and the remainder of the page is used as the stack.
 */

	.section .data..relocate_kernel,"a";
/* Minimal CPU state */
SYM_DATA_LOCAL(saved_rsp, .quad 0)
SYM_DATA_LOCAL(saved_cr0, .quad 0)
SYM_DATA_LOCAL(saved_cr3, .quad 0)
SYM_DATA_LOCAL(saved_cr4, .quad 0)
	/* other data */
SYM_DATA(kexec_va_control_page, .quad 0)
SYM_DATA(kexec_pa_table_page, .quad 0)
SYM_DATA(kexec_pa_swap_page, .quad 0)
SYM_DATA_LOCAL(pa_backup_pages_map, .quad 0)
SYM_DATA(kexec_debug_8250_mmio32, .quad 0)
SYM_DATA(kexec_debug_8250_port, .word 0)

	.balign 16
SYM_DATA_START_LOCAL(kexec_debug_gdt)
	.word   kexec_debug_gdt_end - kexec_debug_gdt - 1
	.long   0
	.word   0
	.quad   0x00cf9a000000ffff      /* __KERNEL32_CS */
	.quad   0x00af9a000000ffff      /* __KERNEL_CS */
	.quad   0x00cf92000000ffff      /* __KERNEL_DS */
SYM_DATA_END_LABEL(kexec_debug_gdt, SYM_L_LOCAL, kexec_debug_gdt_end)

	.balign 8
SYM_DATA_START(kexec_debug_idt)
	.skip 0x100, 0x00
SYM_DATA_END(kexec_debug_idt)

	.section .text..relocate_kernel,"ax";
	.code64
SYM_CODE_START_NOALIGN(relocate_kernel)
	UNWIND_HINT_END_OF_STACK
	ANNOTATE_NOENDBR
	/*
	 * %rdi indirection_page
	 * %rsi pa_control_page
	 * %rdx start address
	 * %rcx preserve_context
	 * %r8  host_mem_enc_active
	 */

	/* Save the CPU context, used for jumping back */
	pushq %rbx
	pushq %rbp
	pushq %r12
	pushq %r13
	pushq %r14
	pushq %r15
	pushf

	/* Invalidate GDT/IDT, zero out flags */
	pushq	$0
	pushq	$0

	lidt	(%rsp)
	lgdt	(%rsp)
	addq	$8, %rsp
	popfq

	/* Switch to the identity mapped page tables */
	movq	%cr3, %rax
	movq	kexec_pa_table_page(%rip), %r9
	movq	%r9, %cr3

	/* Leave CR4 in %r13 to enable the right paging mode later. */
	movq	%cr4, %r13

	/* Disable global pages immediately to ensure this mapping is RWX */
	movq	%r13, %r12
	andq	$~(X86_CR4_PGE), %r12
	movq	%r12, %cr4

	/* Save %rsp and CRs. */
	movq	%r13, saved_cr4(%rip)
	movq    %rsp, saved_rsp(%rip)
	movq	%rax, saved_cr3(%rip)
	movq	%cr0, %rax
	movq	%rax, saved_cr0(%rip)

	/* save indirection list for jumping back */
	movq	%rdi, pa_backup_pages_map(%rip)

	/* Save the preserve_context to %r11 as swap_pages clobbers %rcx. */
	movq	%rcx, %r11

	/* setup a new stack at the end of the physical control page */
	lea	PAGE_SIZE(%rsi), %rsp

	/* jump to identity mapped page */
0:	addq	$identity_mapped - 0b, %rsi
	subq	$__relocate_kernel_start - 0b, %rsi
	ANNOTATE_RETPOLINE_SAFE
	jmp	*%rsi
SYM_CODE_END(relocate_kernel)

SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
	UNWIND_HINT_END_OF_STACK
	/*
	 * %rdi	indirection page
	 * %rdx start address
	 * %r8 host_mem_enc_active
	 * %r9 page table page
	 * %r11 preserve_context
	 * %r13 original CR4 when relocate_kernel() was invoked
	 */

	/* store the start address on the stack */
	pushq   %rdx

	/* Create a GDTR (16 bits limit, 64 bits addr) on stack */
	leaq	kexec_debug_gdt(%rip), %rax
	pushq	%rax
	pushw	(%rax)

	/* Load the GDT, put the stack back */
	lgdt	(%rsp)
	addq	$10, %rsp

	/* Test that we can load segments */
	movq	%ds, %rax
	movq	%rax, %ds

	/* Now an IDTR on the stack to load the IDT the kernel created */
	leaq	kexec_debug_idt(%rip), %rsi
	pushq	%rsi
	pushw	$0xff
	lidt	(%rsp)
	addq	$10, %rsp

	//int3

	/*
	 * Clear X86_CR4_CET (if it was set) such that we can clear CR0_WP
	 * below.
	 */
	movq	%cr4, %rax
	andq	$~(X86_CR4_CET), %rax
	movq	%rax, %cr4

	/*
	 * Set cr0 to a known state:
	 *  - Paging enabled
	 *  - Alignment check disabled
	 *  - Write protect disabled
	 *  - No task switch
	 *  - Don't do FP software emulation.
	 *  - Protected mode enabled
	 */
	movq	%cr0, %rax
	andq	$~(X86_CR0_AM | X86_CR0_WP | X86_CR0_TS | X86_CR0_EM), %rax
	orl	$(X86_CR0_PG | X86_CR0_PE), %eax
	movq	%rax, %cr0

	/*
	 * Set cr4 to a known state:
	 *  - physical address extension enabled
	 *  - 5-level paging, if it was enabled before
	 *  - Machine check exception on TDX guest, if it was enabled before.
	 *    Clearing MCE might not be allowed in TDX guests, depending on setup.
	 *
	 * Use R13 that contains the original CR4 value, read in relocate_kernel().
	 * PAE is always set in the original CR4.
	 */
	andl	$(X86_CR4_PAE | X86_CR4_LA57), %r13d
	ALTERNATIVE "", __stringify(orl $X86_CR4_MCE, %r13d), X86_FEATURE_TDX_GUEST
	movq	%r13, %cr4

	/* Flush the TLB (needed?) */
	movq	%r9, %cr3

	/*
	 * If SME is active, there could be old encrypted cache line
	 * entries that will conflict with the now unencrypted memory
	 * used by kexec. Flush the caches before copying the kernel.
	 */
	testq	%r8, %r8
	jz .Lsme_off
	wbinvd
.Lsme_off:

	call	swap_pages

	/*
	 * To be certain of avoiding problems with self-modifying code
	 * I need to execute a serializing instruction here.
	 * So I flush the TLB by reloading %cr3 here, it's handy,
	 * and not processor dependent.
	 */
	movq	%cr3, %rax
	movq	%rax, %cr3

	testq	%r11, %r11	/* preserve_context */
	jnz .Lrelocate

	/*
	 * set all of the registers to known values
	 * leave %rsp alone
	 */

	xorl	%eax, %eax
	xorl	%ebx, %ebx
	xorl    %ecx, %ecx
	xorl    %edx, %edx
	xorl    %esi, %esi
	xorl    %edi, %edi
	xorl    %ebp, %ebp
	xorl	%r8d, %r8d
	xorl	%r9d, %r9d
	xorl	%r10d, %r10d
	xorl	%r11d, %r11d
	xorl	%r12d, %r12d
	xorl	%r13d, %r13d
	xorl	%r14d, %r14d
	xorl	%r15d, %r15d

	ANNOTATE_UNRET_SAFE
	ret
	int3

.Lrelocate:
	popq	%rdx

	/* Use the swap page for the callee's stack */
	movq	kexec_pa_swap_page(%rip), %r10
	leaq	PAGE_SIZE(%r10), %rsp

	/* push the existing entry point onto the callee's stack */
	pushq	%rdx

	ANNOTATE_RETPOLINE_SAFE
	call	*%rdx

	/* get the re-entry point of the peer system */
	popq	%rbp
	movq	kexec_pa_swap_page(%rip), %r10
	movq	pa_backup_pages_map(%rip), %rdi
	movq	kexec_pa_table_page(%rip), %rax
	movq	%rax, %cr3

	/* Find start (and end) of this physical mapping of control page */
	leaq	(%rip), %r8
	ANNOTATE_NOENDBR
	andq	$PAGE_MASK, %r8
	lea	PAGE_SIZE(%r8), %rsp
	movl	$1, %r11d	/* Ensure preserve_context flag is set */
	call	swap_pages
	movq	kexec_va_control_page(%rip), %rax
0:	addq	$virtual_mapped - 0b, %rax
	subq	$__relocate_kernel_start - 0b, %rax
	pushq	%rax
	ANNOTATE_UNRET_SAFE
	ret
	int3
SYM_CODE_END(identity_mapped)

SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
	UNWIND_HINT_END_OF_STACK
	ANNOTATE_NOENDBR // RET target, above
	movq	saved_rsp(%rip), %rsp
	movq	saved_cr4(%rip), %rax
	movq	%rax, %cr4
	movq	saved_cr3(%rip), %rax
	movq	saved_cr0(%rip), %r8
	movq	%rax, %cr3
	movq	%r8, %cr0

#ifdef CONFIG_KEXEC_JUMP
	/* Saved in save_processor_state. */
	movq    $saved_context, %rax
	lgdt    saved_context_gdt_desc(%rax)
#endif

	/* relocate_kernel() returns the re-entry point for next time */
	movq	%rbp, %rax

	popf
	popq	%r15
	popq	%r14
	popq	%r13
	popq	%r12
	popq	%rbp
	popq	%rbx
	ANNOTATE_UNRET_SAFE
	ret
	int3
SYM_CODE_END(virtual_mapped)

	/* Do the copies */
SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
	UNWIND_HINT_END_OF_STACK
	/*
	 * %rdi indirection page
	 * %r11 preserve_context
	 */
	movq	%rdi, %rcx	/* Put the indirection_page in %rcx */
	xorl	%edi, %edi
	xorl	%esi, %esi
	jmp	.Lstart		/* Should start with an indirection record */

.Lloop:	/* top, read another word for the indirection page */

	movq	(%rbx), %rcx
	addq	$8,	%rbx
.Lstart:
	testb	$0x1,	%cl   /* is it a destination page? */
	jz	.Lnotdest
	movq	%rcx,	%rdi
	andq	$0xfffffffffffff000, %rdi
	jmp	.Lloop
.Lnotdest:
	testb	$0x2,	%cl   /* is it an indirection page? */
	jz	.Lnotind
	movq	%rcx,   %rbx
	andq	$0xfffffffffffff000, %rbx
	jmp	.Lloop
.Lnotind:
	testb	$0x4,	%cl   /* is it the done indicator? */
	jz	.Lnotdone
	jmp	.Ldone
.Lnotdone:
	testb	$0x8,	%cl   /* is it the source indicator? */
	jz	.Lloop	      /* Ignore it otherwise */
	movq	%rcx,   %rsi  /* For ever source page do a copy */
	andq	$0xfffffffffffff000, %rsi

	movq	%rdi, %rdx    /* Save destination page to %rdx */
	movq	%rsi, %rax    /* Save source page to %rax */

	testq	%r11, %r11    /* Only actually swap for ::preserve_context */
	jz	.Lnoswap

	/* copy source page to swap page */
	movq	kexec_pa_swap_page(%rip), %rdi
	movl	$512, %ecx
	rep movsq

	/* copy destination page to source page */
	movq	%rax, %rdi
	movq	%rdx, %rsi
	movl	$512, %ecx
	rep movsq

	/* copy swap page to destination page */
	movq	%rdx, %rdi
	movq	kexec_pa_swap_page(%rip), %rsi
.Lnoswap:
	movl	$512, %ecx
	rep movsq

	lea	PAGE_SIZE(%rax), %rsi
	jmp	.Lloop
.Ldone:
	ANNOTATE_UNRET_SAFE
	ret
	int3
SYM_CODE_END(swap_pages)

/*
 * Generic 'print character' routine
 *  - %al: Character to be printed (may clobber %rax)
 *  - %rdx: MMIO address or port.
 */
#define XMTRDY          0x20

#define TXR             0       /*  Transmit register (WRITE) */
#define LSR             5       /*  Line Status               */

SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250)
	UNWIND_HINT_FUNC
	ANNOTATE_NOENDBR
	addw	$LSR, %dx
	xchg	%al, %ah
.Lxmtrdy_loop:
	inb	%dx, %al
	testb	$XMTRDY, %al
	jnz	.Lready
	pause
	jmp .Lxmtrdy_loop

.Lready:
	subw	$LSR, %dx
	xchg	%al, %ah
	outb	%al, %dx
pr_char_null:
	ANNOTATE_NOENDBR

	ANNOTATE_UNRET_SAFE
	ret
SYM_CODE_END(pr_char_8250)

SYM_CODE_START_LOCAL_NOALIGN(pr_char_8250_mmio32)
	UNWIND_HINT_FUNC
	ANNOTATE_NOENDBR
.Lxmtrdy_loop_mmio:
	movb	(LSR*4)(%rdx), %ah
	testb	$XMTRDY, %ah
	jnz	.Lready_mmio
	pause
	jmp .Lxmtrdy_loop_mmio

.Lready_mmio:
	movb	%al, (%rdx)
	ANNOTATE_UNRET_SAFE
	ret
SYM_CODE_END(pr_char_8250_mmio32)

/*
 * Load pr_char function pointer into %rsi and load %rdx with whatever
 * that function wants to see there (typically port/MMIO address).
 */
.macro pr_setup
	leaq	pr_char_8250(%rip), %rsi
	movw	kexec_debug_8250_port(%rip), %dx
	testw	%dx, %dx
	jnz	1f

	leaq	pr_char_8250_mmio32(%rip), %rsi
	movq	kexec_debug_8250_mmio32(%rip), %rdx
	testq	%rdx, %rdx
	jnz	1f

	leaq	pr_char_null(%rip), %rsi
1:
.endm

/* Print the nybble in %bl, clobber %rax */
SYM_CODE_START_LOCAL_NOALIGN(pr_nybble)
	UNWIND_HINT_FUNC
	movb	%bl, %al
	nop
	andb	$0x0f, %al
	addb	$0x30, %al
	cmpb	$0x3a, %al
	jb	1f
	addb	$('a' - '0' - 10), %al
	ANNOTATE_RETPOLINE_SAFE
1:	jmp	*%rsi
SYM_CODE_END(pr_nybble)

SYM_CODE_START_LOCAL_NOALIGN(pr_qword)
	UNWIND_HINT_FUNC
	movq	$16, %rcx
1:	rolq	$4, %rbx
	call	pr_nybble
	loop	1b
	movb	$'\n', %al
	ANNOTATE_RETPOLINE_SAFE
	jmp	*%rsi
SYM_CODE_END(pr_qword)

.macro print_reg a, b, c, d, r
	movb	$\a, %al
	ANNOTATE_RETPOLINE_SAFE
	call	*%rsi
	movb	$\b, %al
	ANNOTATE_RETPOLINE_SAFE
	call	*%rsi
	movb	$\c, %al
	ANNOTATE_RETPOLINE_SAFE
	call	*%rsi
	movb	$\d, %al
	ANNOTATE_RETPOLINE_SAFE
	call	*%rsi
	movq	\r, %rbx
	call	pr_qword
.endm

SYM_CODE_START_NOALIGN(kexec_debug_exc_vectors)
	/* Each of these is 6 bytes. */
.macro vec_err exc
	UNWIND_HINT_ENTRY
	. = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE)
	nop
	nop
	pushq	$\exc
	jmp	exc_handler
.endm

.macro vec_noerr exc
	UNWIND_HINT_ENTRY
	. = kexec_debug_exc_vectors + (\exc * KEXEC_DEBUG_EXC_HANDLER_SIZE)
	pushq	$0
	pushq	$\exc
	jmp	exc_handler
.endm

	ANNOTATE_NOENDBR
	vec_noerr  0 // #DE
	vec_noerr  1 // #DB
	vec_noerr  2 // #NMI
	vec_noerr  3 // #BP
	vec_noerr  4 // #OF
	vec_noerr  5 // #BR
	vec_noerr  6 // #UD
	vec_noerr  7 // #NM
	vec_err    8 // #DF
	vec_noerr  9
	vec_err   10 // #TS
	vec_err   11 // #NP
	vec_err   12 // #SS
	vec_err   13 // #GP
	vec_err   14 // #PF
	vec_noerr 15
SYM_CODE_END(kexec_debug_exc_vectors)

SYM_CODE_START_LOCAL_NOALIGN(exc_handler)
	/* No need for RET mitigations during kexec */
	VALIDATE_UNRET_END

	pushq	%rax
	pushq	%rbx
	pushq	%rcx
	pushq	%rdx
	pushq	%rsi

	/* Stack frame */
#define EXC_SS		0x58 /* Architectural... */
#define EXC_RSP		0x50
#define EXC_EFLAGS	0x48
#define EXC_CS		0x40
#define EXC_RIP		0x38
#define EXC_ERRORCODE	0x30 /* Either architectural or zero pushed by handler */
#define EXC_EXCEPTION	0x28 /* Pushed by handler entry point */
#define EXC_RAX		0x20 /* Pushed just above in exc_handler */
#define EXC_RBX		0x18
#define EXC_RCX		0x10
#define EXC_RDX		0x08
#define EXC_RSI		0x00

	/* Set up %rdx/%rsi for debug output */
	pr_setup

	/* rip and exception info */
	print_reg 'E', 'x', 'c', ':', EXC_EXCEPTION(%rsp)
	print_reg 'E', 'r', 'r', ':', EXC_ERRORCODE(%rsp)
	print_reg 'r', 'i', 'p', ':', EXC_RIP(%rsp)
	print_reg 'r', 's', 'p', ':', EXC_RSP(%rsp)

	/* We spilled these to the stack */
	print_reg 'r', 'a', 'x', ':', EXC_RAX(%rsp)
	print_reg 'r', 'b', 'x', ':', EXC_RBX(%rsp)
	print_reg 'r', 'c', 'x', ':', EXC_RCX(%rsp)
	print_reg 'r', 'd', 'x', ':', EXC_RDX(%rsp)
	print_reg 'r', 's', 'i', ':', EXC_RSI(%rsp)

	/* Other registers untouched */
	print_reg 'r', 'd', 'i', ':', %rdi
	print_reg 'r', '8', ' ', ':', %r8
	print_reg 'r', '9', ' ', ':', %r9
	print_reg 'r', '1', '0', ':', %r10
	print_reg 'r', '1', '1', ':', %r11
	print_reg 'r', '1', '2', ':', %r12
	print_reg 'r', '1', '3', ':', %r13
	print_reg 'r', '1', '4', ':', %r14
	print_reg 'r', '1', '5', ':', %r15
	print_reg 'c', 'r', '2', ':', %cr2

	/* Only return from INT3 */
	cmpq	$3, EXC_EXCEPTION(%rsp)
	jne	.Ldie

	popq	%rsi
	popq	%rdx
	popq	%rcx
	popq	%rbx
	popq	%rax

	addq	$16, %rsp
	iretq

.Ldie:
	hlt
	jmp	.Ldie

SYM_CODE_END(exc_handler)