summaryrefslogtreecommitdiff
path: root/arch/s390/kernel/uv.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/kernel/uv.c')
-rw-r--r--arch/s390/kernel/uv.c466
1 files changed, 196 insertions, 270 deletions
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 6f9654a191ad..b99478e84da4 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -15,23 +15,11 @@
#include <linux/pagemap.h>
#include <linux/swap.h>
#include <linux/pagewalk.h>
+#include <linux/backing-dev.h>
#include <asm/facility.h>
#include <asm/sections.h>
#include <asm/uv.h>
-#if !IS_ENABLED(CONFIG_KVM)
-unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
-{
- return 0;
-}
-
-int gmap_fault(struct gmap *gmap, unsigned long gaddr,
- unsigned int fault_flags)
-{
- return 0;
-}
-#endif
-
/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
int __bootdata_preserved(prot_virt_guest);
EXPORT_SYMBOL(prot_virt_guest);
@@ -148,7 +136,7 @@ int uv_destroy_folio(struct folio *folio)
{
int rc;
- /* See gmap_make_secure(): large folios cannot be secure */
+ /* Large folios cannot be secure */
if (unlikely(folio_test_large(folio)))
return 0;
@@ -159,6 +147,7 @@ int uv_destroy_folio(struct folio *folio)
folio_put(folio);
return rc;
}
+EXPORT_SYMBOL(uv_destroy_folio);
/*
* The present PTE still indirectly holds a folio reference through the mapping.
@@ -175,7 +164,7 @@ int uv_destroy_pte(pte_t pte)
*
* @paddr: Absolute host address of page to be exported
*/
-static int uv_convert_from_secure(unsigned long paddr)
+int uv_convert_from_secure(unsigned long paddr)
{
struct uv_cb_cfs uvcb = {
.header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,
@@ -187,15 +176,16 @@ static int uv_convert_from_secure(unsigned long paddr)
return -EINVAL;
return 0;
}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure);
/*
* The caller must already hold a reference to the folio.
*/
-static int uv_convert_from_secure_folio(struct folio *folio)
+int uv_convert_from_secure_folio(struct folio *folio)
{
int rc;
- /* See gmap_make_secure(): large folios cannot be secure */
+ /* Large folios cannot be secure */
if (unlikely(folio_test_large(folio)))
return 0;
@@ -206,6 +196,7 @@ static int uv_convert_from_secure_folio(struct folio *folio)
folio_put(folio);
return rc;
}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio);
/*
* The present PTE still indirectly holds a folio reference through the mapping.
@@ -216,6 +207,39 @@ int uv_convert_from_secure_pte(pte_t pte)
return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte)));
}
+/**
+ * should_export_before_import - Determine whether an export is needed
+ * before an import-like operation
+ * @uvcb: the Ultravisor control block of the UVC to be performed
+ * @mm: the mm of the process
+ *
+ * Returns whether an export is needed before every import-like operation.
+ * This is needed for shared pages, which don't trigger a secure storage
+ * exception when accessed from a different guest.
+ *
+ * Although considered as one, the Unpin Page UVC is not an actual import,
+ * so it is not affected.
+ *
+ * No export is needed also when there is only one protected VM, because the
+ * page cannot belong to the wrong VM in that case (there is no "other VM"
+ * it can belong to).
+ *
+ * Return: true if an export is needed before every import, otherwise false.
+ */
+static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+{
+ /*
+ * The misc feature indicates, among other things, that importing a
+ * shared page from a different protected VM will automatically also
+ * transfer its ownership.
+ */
+ if (uv_has_feature(BIT_UV_FEAT_MISC))
+ return false;
+ if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
+ return false;
+ return atomic_read(&mm->context.protected_count) > 1;
+}
+
/*
* Calculate the expected ref_count for a folio that would otherwise have no
* further pins. This was cribbed from similar functions in other places in
@@ -237,13 +261,31 @@ static int expected_folio_refs(struct folio *folio)
return res;
}
-static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
+/**
+ * __make_folio_secure() - make a folio secure
+ * @folio: the folio to make secure
+ * @uvcb: the uvcb that describes the UVC to be used
+ *
+ * The folio @folio will be made secure if possible, @uvcb will be passed
+ * as-is to the UVC.
+ *
+ * Return: 0 on success;
+ * -EBUSY if the folio is in writeback or has too many references;
+ * -EAGAIN if the UVC needs to be attempted again;
+ * -ENXIO if the address is not mapped;
+ * -EINVAL if the UVC failed for other reasons.
+ *
+ * Context: The caller must hold exactly one extra reference on the folio
+ * (it's the same logic as split_folio()), and the folio must be
+ * locked.
+ */
+static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
{
int expected, cc = 0;
if (folio_test_writeback(folio))
- return -EAGAIN;
- expected = expected_folio_refs(folio);
+ return -EBUSY;
+ expected = expected_folio_refs(folio) + 1;
if (!folio_ref_freeze(folio, expected))
return -EBUSY;
set_bit(PG_arch_1, &folio->flags);
@@ -268,262 +310,167 @@ static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
}
-/**
- * should_export_before_import - Determine whether an export is needed
- * before an import-like operation
- * @uvcb: the Ultravisor control block of the UVC to be performed
- * @mm: the mm of the process
- *
- * Returns whether an export is needed before every import-like operation.
- * This is needed for shared pages, which don't trigger a secure storage
- * exception when accessed from a different guest.
- *
- * Although considered as one, the Unpin Page UVC is not an actual import,
- * so it is not affected.
- *
- * No export is needed also when there is only one protected VM, because the
- * page cannot belong to the wrong VM in that case (there is no "other VM"
- * it can belong to).
- *
- * Return: true if an export is needed before every import, otherwise false.
- */
-static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb)
{
- /*
- * The misc feature indicates, among other things, that importing a
- * shared page from a different protected VM will automatically also
- * transfer its ownership.
- */
- if (uv_has_feature(BIT_UV_FEAT_MISC))
- return false;
- if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
- return false;
- return atomic_read(&mm->context.protected_count) > 1;
-}
+ int rc;
-/*
- * Drain LRU caches: the local one on first invocation and the ones of all
- * CPUs on successive invocations. Returns "true" on the first invocation.
- */
-static bool drain_lru(bool *drain_lru_called)
-{
- /*
- * If we have tried a local drain and the folio refcount
- * still does not match our expected safe value, try with a
- * system wide drain. This is needed if the pagevecs holding
- * the page are on a different CPU.
- */
- if (*drain_lru_called) {
- lru_add_drain_all();
- /* We give up here, don't retry immediately. */
- return false;
- }
- /*
- * We are here if the folio refcount does not match the
- * expected safe value. The main culprits are usually
- * pagevecs. With lru_add_drain() we drain the pagevecs
- * on the local CPU so that hopefully the refcount will
- * reach the expected safe value.
- */
- lru_add_drain();
- *drain_lru_called = true;
- /* The caller should try again immediately */
- return true;
+ if (!folio_trylock(folio))
+ return -EAGAIN;
+ if (should_export_before_import(uvcb, mm))
+ uv_convert_from_secure(folio_to_phys(folio));
+ rc = __make_folio_secure(folio, uvcb);
+ folio_unlock(folio);
+
+ return rc;
}
-/*
- * Requests the Ultravisor to make a page accessible to a guest.
- * If it's brought in the first time, it will be cleared. If
- * it has been exported before, it will be decrypted and integrity
- * checked.
+/**
+ * s390_wiggle_split_folio() - try to drain extra references to a folio and
+ * split the folio if it is large.
+ * @mm: the mm containing the folio to work on
+ * @folio: the folio
+ *
+ * Context: Must be called while holding an extra reference to the folio;
+ * the mm lock should not be held.
+ * Return: 0 if the operation was successful;
+ * -EAGAIN if splitting the large folio was not successful,
+ * but another attempt can be made;
+ * -EINVAL in case of other folio splitting errors. See split_folio().
*/
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
+static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio)
{
- struct vm_area_struct *vma;
- bool drain_lru_called = false;
- spinlock_t *ptelock;
- unsigned long uaddr;
- struct folio *folio;
- pte_t *ptep;
- int rc;
+ int rc, tried_splits;
-again:
- rc = -EFAULT;
- mmap_read_lock(gmap->mm);
+ lockdep_assert_not_held(&mm->mmap_lock);
+ folio_wait_writeback(folio);
+ lru_add_drain_all();
- uaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(uaddr))
- goto out;
- vma = vma_lookup(gmap->mm, uaddr);
- if (!vma)
- goto out;
- /*
- * Secure pages cannot be huge and userspace should not combine both.
- * In case userspace does it anyway this will result in an -EFAULT for
- * the unpack. The guest is thus never reaching secure mode. If
- * userspace is playing dirty tricky with mapping huge pages later
- * on this will result in a segmentation fault.
- */
- if (is_vm_hugetlb_page(vma))
- goto out;
-
- rc = -ENXIO;
- ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
- if (!ptep)
- goto out;
- if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
- folio = page_folio(pte_page(*ptep));
- rc = -EAGAIN;
- if (folio_test_large(folio)) {
- rc = -E2BIG;
- } else if (folio_trylock(folio)) {
- if (should_export_before_import(uvcb, gmap->mm))
- uv_convert_from_secure(PFN_PHYS(folio_pfn(folio)));
- rc = make_folio_secure(folio, uvcb);
+ if (!folio_test_large(folio))
+ return 0;
+
+ for (tried_splits = 0; tried_splits < 2; tried_splits++) {
+ struct address_space *mapping;
+ loff_t lstart, lend;
+ struct inode *inode;
+
+ folio_lock(folio);
+ rc = split_folio(folio);
+ if (rc != -EBUSY) {
folio_unlock(folio);
+ return rc;
}
/*
- * Once we drop the PTL, the folio may get unmapped and
- * freed immediately. We need a temporary reference.
+ * Splitting with -EBUSY can fail for various reasons, but we
+ * have to handle one case explicitly for now: some mappings
+ * don't allow for splitting dirty folios; writeback will
+ * mark them clean again, including marking all page table
+ * entries mapping the folio read-only, to catch future write
+ * attempts.
+ *
+ * While the system should be writing back dirty folios in the
+ * background, we obtained this folio by looking up a writable
+ * page table entry. On these problematic mappings, writable
+ * page table entries imply dirty folios, preventing the
+ * split in the first place.
+ *
+ * To prevent a livelock when trigger writeback manually and
+ * letting the caller look up the folio again in the page
+ * table (turning it dirty), immediately try to split again.
+ *
+ * This is only a problem for some mappings (e.g., XFS);
+ * mappings that do not support writeback (e.g., shmem) do not
+ * apply.
*/
- if (rc == -EAGAIN || rc == -E2BIG)
- folio_get(folio);
- }
- pte_unmap_unlock(ptep, ptelock);
-out:
- mmap_read_unlock(gmap->mm);
-
- switch (rc) {
- case -E2BIG:
- folio_lock(folio);
- rc = split_folio(folio);
- folio_unlock(folio);
- folio_put(folio);
-
- switch (rc) {
- case 0:
- /* Splitting succeeded, try again immediately. */
- goto again;
- case -EAGAIN:
- /* Additional folio references. */
- if (drain_lru(&drain_lru_called))
- goto again;
- return -EAGAIN;
- case -EBUSY:
- /* Unexpected race. */
- return -EAGAIN;
+ if (!folio_test_dirty(folio) || folio_test_anon(folio) ||
+ !folio->mapping || !mapping_can_writeback(folio->mapping)) {
+ folio_unlock(folio);
+ break;
}
- WARN_ON_ONCE(1);
- return -ENXIO;
- case -EAGAIN:
+
/*
- * If we are here because the UVC returned busy or partial
- * completion, this is just a useless check, but it is safe.
+ * Ideally, we'd only trigger writeback on this exact folio. But
+ * there is no easy way to do that, so we'll stabilize the
+ * mapping while we still hold the folio lock, so we can drop
+ * the folio lock to trigger writeback on the range currently
+ * covered by the folio instead.
*/
- folio_wait_writeback(folio);
- folio_put(folio);
- return -EAGAIN;
- case -EBUSY:
- /* Additional folio references. */
- if (drain_lru(&drain_lru_called))
- goto again;
- return -EAGAIN;
- case -ENXIO:
- if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
- return -EFAULT;
- return -EAGAIN;
- }
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_make_secure);
+ mapping = folio->mapping;
+ lstart = folio_pos(folio);
+ lend = lstart + folio_size(folio) - 1;
+ inode = igrab(mapping->host);
+ folio_unlock(folio);
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
-{
- struct uv_cb_cts uvcb = {
- .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
- .header.len = sizeof(uvcb),
- .guest_handle = gmap->guest_handle,
- .gaddr = gaddr,
- };
+ if (unlikely(!inode))
+ break;
- return gmap_make_secure(gmap, gaddr, &uvcb);
+ filemap_write_and_wait_range(mapping, lstart, lend);
+ iput(mapping->host);
+ }
+ return -EAGAIN;
}
-EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
-/**
- * gmap_destroy_page - Destroy a guest page.
- * @gmap: the gmap of the guest
- * @gaddr: the guest address to destroy
- *
- * An attempt will be made to destroy the given guest page. If the attempt
- * fails, an attempt is made to export the page. If both attempts fail, an
- * appropriate error is returned.
- */
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
+int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb)
{
struct vm_area_struct *vma;
struct folio_walk fw;
- unsigned long uaddr;
struct folio *folio;
int rc;
- rc = -EFAULT;
- mmap_read_lock(gmap->mm);
-
- uaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(uaddr))
- goto out;
- vma = vma_lookup(gmap->mm, uaddr);
- if (!vma)
- goto out;
- /*
- * Huge pages should not be able to become secure
- */
- if (is_vm_hugetlb_page(vma))
- goto out;
+ mmap_read_lock(mm);
+ vma = vma_lookup(mm, hva);
+ if (!vma) {
+ mmap_read_unlock(mm);
+ return -EFAULT;
+ }
+ folio = folio_walk_start(&fw, vma, hva, 0);
+ if (!folio) {
+ mmap_read_unlock(mm);
+ return -ENXIO;
+ }
- rc = 0;
- folio = folio_walk_start(&fw, vma, uaddr, 0);
- if (!folio)
- goto out;
- /*
- * See gmap_make_secure(): large folios cannot be secure. Small
- * folio implies FW_LEVEL_PTE.
- */
- if (folio_test_large(folio) || !pte_write(fw.pte))
- goto out_walk_end;
- rc = uv_destroy_folio(folio);
+ folio_get(folio);
/*
- * Fault handlers can race; it is possible that two CPUs will fault
- * on the same secure page. One CPU can destroy the page, reboot,
- * re-enter secure mode and import it, while the second CPU was
- * stuck at the beginning of the handler. At some point the second
- * CPU will be able to progress, and it will not be able to destroy
- * the page. In that case we do not want to terminate the process,
- * we instead try to export the page.
+ * Secure pages cannot be huge and userspace should not combine both.
+ * In case userspace does it anyway this will result in an -EFAULT for
+ * the unpack. The guest is thus never reaching secure mode.
+ * If userspace plays dirty tricks and decides to map huge pages at a
+ * later point in time, it will receive a segmentation fault or
+ * KVM_RUN will return -EFAULT.
*/
- if (rc)
- rc = uv_convert_from_secure_folio(folio);
-out_walk_end:
+ if (folio_test_hugetlb(folio))
+ rc = -EFAULT;
+ else if (folio_test_large(folio))
+ rc = -E2BIG;
+ else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID))
+ rc = -ENXIO;
+ else
+ rc = make_folio_secure(mm, folio, uvcb);
folio_walk_end(&fw, vma);
-out:
- mmap_read_unlock(gmap->mm);
+ mmap_read_unlock(mm);
+
+ if (rc == -E2BIG || rc == -EBUSY) {
+ rc = s390_wiggle_split_folio(mm, folio);
+ if (!rc)
+ rc = -EAGAIN;
+ }
+ folio_put(folio);
+
return rc;
}
-EXPORT_SYMBOL_GPL(gmap_destroy_page);
+EXPORT_SYMBOL_GPL(make_hva_secure);
/*
* To be called with the folio locked or with an extra reference! This will
- * prevent gmap_make_secure from touching the folio concurrently. Having 2
- * parallel arch_make_folio_accessible is fine, as the UV calls will become a
- * no-op if the folio is already exported.
+ * prevent kvm_s390_pv_make_secure() from touching the folio concurrently.
+ * Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will
+ * become a no-op if the folio is already exported.
*/
int arch_make_folio_accessible(struct folio *folio)
{
int rc = 0;
- /* See gmap_make_secure(): large folios cannot be secure */
+ /* Large folios cannot be secure */
if (unlikely(folio_test_large(folio)))
return 0;
@@ -894,7 +841,12 @@ out_kobj:
device_initcall(uv_sysfs_init);
/*
- * Find the secret with the secret_id in the provided list.
+ * Locate a secret in the list by its id.
+ * @secret_id: search pattern.
+ * @list: ephemeral buffer space
+ * @secret: output data, containing the secret's metadata.
+ *
+ * Search for a secret with the given secret_id in the Ultravisor secret store.
*
* Context: might sleep.
*/
@@ -915,12 +867,15 @@ static int find_secret_in_page(const u8 secret_id[UV_SECRET_ID_LEN],
/*
* Do the actual search for `uv_get_secret_metadata`.
+ * @secret_id: search pattern.
+ * @list: ephemeral buffer space
+ * @secret: output data, containing the secret's metadata.
*
* Context: might sleep.
*/
-static int find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
- struct uv_secret_list *list,
- struct uv_secret_list_item_hdr *secret)
+int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
+ struct uv_secret_list *list,
+ struct uv_secret_list_item_hdr *secret)
{
u16 start_idx = 0;
u16 list_rc;
@@ -942,36 +897,7 @@ static int find_secret(const u8 secret_id[UV_SECRET_ID_LEN],
return -ENOENT;
}
-
-/**
- * uv_get_secret_metadata() - get secret metadata for a given secret id.
- * @secret_id: search pattern.
- * @secret: output data, containing the secret's metadata.
- *
- * Search for a secret with the given secret_id in the Ultravisor secret store.
- *
- * Context: might sleep.
- *
- * Return:
- * * %0: - Found entry; secret->idx and secret->type are valid.
- * * %ENOENT - No entry found.
- * * %ENODEV: - Not supported: UV not available or command not available.
- * * %EIO: - Other unexpected UV error.
- */
-int uv_get_secret_metadata(const u8 secret_id[UV_SECRET_ID_LEN],
- struct uv_secret_list_item_hdr *secret)
-{
- struct uv_secret_list *buf;
- int rc;
-
- buf = kzalloc(sizeof(*buf), GFP_KERNEL);
- if (!buf)
- return -ENOMEM;
- rc = find_secret(secret_id, buf, secret);
- kfree(buf);
- return rc;
-}
-EXPORT_SYMBOL_GPL(uv_get_secret_metadata);
+EXPORT_SYMBOL_GPL(uv_find_secret);
/**
* uv_retrieve_secret() - get the secret value for the secret index.