1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
|
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* Copyright (c) 2024-2025, NVIDIA CORPORATION & AFFILIATES
*/
#ifndef __GENERIC_PT_COMMON_H
#define __GENERIC_PT_COMMON_H
#include <linux/types.h>
#include <linux/build_bug.h>
#include <linux/bits.h>
/**
* DOC: Generic Radix Page Table
*
* Generic Radix Page Table is a set of functions and helpers to efficiently
* parse radix style page tables typically seen in HW implementations. The
* interface is built to deliver similar code generation as the mm's pte/pmd/etc
* system by fully inlining the exact code required to handle each table level.
*
* Like the mm subsystem each format contributes its parsing implementation
* under common names and the common code implements the required algorithms.
*
* The system is divided into three logical levels:
*
* - The page table format and its manipulation functions
* - Generic helpers to give a consistent API regardless of underlying format
* - An algorithm implementation (e.g. IOMMU/DRM/KVM/MM)
*
* Multiple implementations are supported. The intention is to have the generic
* format code be re-usable for whatever specialized implementation is required.
* The generic code is solely about the format of the radix tree; it does not
* include memory allocation or higher level decisions that are left for the
* implementation.
*
* The generic framework supports a superset of functions across many HW
* implementations:
*
* - Entries comprised of contiguous blocks of IO PTEs for larger page sizes
* - Multi-level tables, up to 6 levels. Runtime selected top level
* - Runtime variable table level size (ARM's concatenated tables)
* - Expandable top level allowing dynamic sizing of table levels
* - Optional leaf entries at any level
* - 32-bit/64-bit virtual and output addresses, using every address bit
* - Dirty tracking
* - Sign extended addressing
*/
/**
* struct pt_common - struct for all page table implementations
*/
struct pt_common {
/**
* @top_of_table: Encodes the table top pointer and the top level in a
* single value. Must use READ_ONCE/WRITE_ONCE to access it. The lower
* bits of the aligned table pointer are used for the level.
*/
uintptr_t top_of_table;
/**
* @max_oasz_lg2: Maximum number of bits the OA can contain. Upper bits
* must be zero. This may be less than what the page table format
* supports, but must not be more.
*/
u8 max_oasz_lg2;
/**
* @max_vasz_lg2: Maximum number of bits the VA can contain. Upper bits
* are 0 or 1 depending on pt_full_va_prefix(). This may be less than
* what the page table format supports, but must not be more. When
* PT_FEAT_DYNAMIC_TOP is set this reflects the maximum VA capability.
*/
u8 max_vasz_lg2;
/**
* @features: Bitmap of `enum pt_features`
*/
unsigned int features;
};
/* Encoding parameters for top_of_table */
enum {
PT_TOP_LEVEL_BITS = 3,
PT_TOP_LEVEL_MASK = GENMASK(PT_TOP_LEVEL_BITS - 1, 0),
};
/**
* enum pt_features - Features turned on in the table. Each symbol is a bit
* position.
*/
enum pt_features {
/**
* @PT_FEAT_DMA_INCOHERENT: Cache flush page table memory before
* assuming the HW can read it. Otherwise a SMP release is sufficient
* for HW to read it.
*/
PT_FEAT_DMA_INCOHERENT,
/**
* @PT_FEAT_FULL_VA: The table can span the full VA range from 0 to
* PT_VADDR_MAX.
*/
PT_FEAT_FULL_VA,
/**
* @PT_FEAT_DYNAMIC_TOP: The table's top level can be increased
* dynamically during map. This requires HW support for atomically
* setting both the table top pointer and the starting table level.
*/
PT_FEAT_DYNAMIC_TOP,
/**
* @PT_FEAT_SIGN_EXTEND: The top most bit of the valid VA range sign
* extends up to the full pt_vaddr_t. This divides the page table into
* three VA ranges::
*
* 0 -> 2^N - 1 Lower
* 2^N -> (MAX - 2^N - 1) Non-Canonical
* MAX - 2^N -> MAX Upper
*
* In this mode pt_common::max_vasz_lg2 includes the sign bit and the
* upper bits that don't fall within the translation are just validated.
*
* If not set there is no sign extension and valid VA goes from 0 to 2^N
* - 1.
*/
PT_FEAT_SIGN_EXTEND,
/**
* @PT_FEAT_FLUSH_RANGE: IOTLB maintenance is done by flushing IOVA
* ranges which will clean out any walk cache or any IOPTE fully
* contained by the range. The optimization objective is to minimize the
* number of flushes even if ranges include IOVA gaps that do not need
* to be flushed.
*/
PT_FEAT_FLUSH_RANGE,
/**
* @PT_FEAT_FLUSH_RANGE_NO_GAPS: Like PT_FEAT_FLUSH_RANGE except that
* the optimization objective is to only flush IOVA that has been
* changed. This mode is suitable for cases like hypervisor shadowing
* where flushing unchanged ranges may cause the hypervisor to reparse
* significant amount of page table.
*/
PT_FEAT_FLUSH_RANGE_NO_GAPS,
/* private: */
PT_FEAT_FMT_START,
};
struct pt_amdv1 {
struct pt_common common;
};
enum {
/*
* The memory backing the tables is encrypted. Use __sme_set() to adjust
* the page table pointers in the tree. This only works with
* CONFIG_AMD_MEM_ENCRYPT.
*/
PT_FEAT_AMDV1_ENCRYPT_TABLES = PT_FEAT_FMT_START,
/*
* The PTEs are set to prevent cache incoherent traffic, such as PCI no
* snoop. This is set either at creation time or before the first map
* operation.
*/
PT_FEAT_AMDV1_FORCE_COHERENCE,
};
struct pt_vtdss {
struct pt_common common;
};
enum {
/*
* The PTEs are set to prevent cache incoherent traffic, such as PCI no
* snoop. This is set either at creation time or before the first map
* operation.
*/
PT_FEAT_VTDSS_FORCE_COHERENCE = PT_FEAT_FMT_START,
/*
* Prevent creating read-only PTEs. Used to work around HW errata
* ERRATA_772415_SPR17.
*/
PT_FEAT_VTDSS_FORCE_WRITEABLE,
};
struct pt_x86_64 {
struct pt_common common;
};
enum {
/*
* The memory backing the tables is encrypted. Use __sme_set() to adjust
* the page table pointers in the tree. This only works with
* CONFIG_AMD_MEM_ENCRYPT.
*/
PT_FEAT_X86_64_AMD_ENCRYPT_TABLES = PT_FEAT_FMT_START,
};
#endif
|