diff options
Diffstat (limited to 'arch/s390/kernel/uv.c')
-rw-r--r-- | arch/s390/kernel/uv.c | 466 |
1 files changed, 196 insertions, 270 deletions
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 6f9654a191ad..b99478e84da4 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -15,23 +15,11 @@ #include <linux/pagemap.h> #include <linux/swap.h> #include <linux/pagewalk.h> +#include <linux/backing-dev.h> #include <asm/facility.h> #include <asm/sections.h> #include <asm/uv.h> -#if !IS_ENABLED(CONFIG_KVM) -unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) -{ - return 0; -} - -int gmap_fault(struct gmap *gmap, unsigned long gaddr, - unsigned int fault_flags) -{ - return 0; -} -#endif - /* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */ int __bootdata_preserved(prot_virt_guest); EXPORT_SYMBOL(prot_virt_guest); @@ -148,7 +136,7 @@ int uv_destroy_folio(struct folio *folio) { int rc; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; @@ -159,6 +147,7 @@ int uv_destroy_folio(struct folio *folio) folio_put(folio); return rc; } +EXPORT_SYMBOL(uv_destroy_folio); /* * The present PTE still indirectly holds a folio reference through the mapping. @@ -175,7 +164,7 @@ int uv_destroy_pte(pte_t pte) * * @paddr: Absolute host address of page to be exported */ -static int uv_convert_from_secure(unsigned long paddr) +int uv_convert_from_secure(unsigned long paddr) { struct uv_cb_cfs uvcb = { .header.cmd = UVC_CMD_CONV_FROM_SEC_STOR, @@ -187,15 +176,16 @@ static int uv_convert_from_secure(unsigned long paddr) return -EINVAL; return 0; } +EXPORT_SYMBOL_GPL(uv_convert_from_secure); /* * The caller must already hold a reference to the folio. */ -static int uv_convert_from_secure_folio(struct folio *folio) +int uv_convert_from_secure_folio(struct folio *folio) { int rc; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; @@ -206,6 +196,7 @@ static int uv_convert_from_secure_folio(struct folio *folio) folio_put(folio); return rc; } +EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio); /* * The present PTE still indirectly holds a folio reference through the mapping. @@ -216,6 +207,39 @@ int uv_convert_from_secure_pte(pte_t pte) return uv_convert_from_secure_folio(pfn_folio(pte_pfn(pte))); } +/** + * should_export_before_import - Determine whether an export is needed + * before an import-like operation + * @uvcb: the Ultravisor control block of the UVC to be performed + * @mm: the mm of the process + * + * Returns whether an export is needed before every import-like operation. + * This is needed for shared pages, which don't trigger a secure storage + * exception when accessed from a different guest. + * + * Although considered as one, the Unpin Page UVC is not an actual import, + * so it is not affected. + * + * No export is needed also when there is only one protected VM, because the + * page cannot belong to the wrong VM in that case (there is no "other VM" + * it can belong to). + * + * Return: true if an export is needed before every import, otherwise false. + */ +static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +{ + /* + * The misc feature indicates, among other things, that importing a + * shared page from a different protected VM will automatically also + * transfer its ownership. + */ + if (uv_has_feature(BIT_UV_FEAT_MISC)) + return false; + if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) + return false; + return atomic_read(&mm->context.protected_count) > 1; +} + /* * Calculate the expected ref_count for a folio that would otherwise have no * further pins. This was cribbed from similar functions in other places in @@ -237,13 +261,31 @@ static int expected_folio_refs(struct folio *folio) return res; } -static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) +/** + * __make_folio_secure() - make a folio secure + * @folio: the folio to make secure + * @uvcb: the uvcb that describes the UVC to be used + * + * The folio @folio will be made secure if possible, @uvcb will be passed + * as-is to the UVC. + * + * Return: 0 on success; + * -EBUSY if the folio is in writeback or has too many references; + * -EAGAIN if the UVC needs to be attempted again; + * -ENXIO if the address is not mapped; + * -EINVAL if the UVC failed for other reasons. + * + * Context: The caller must hold exactly one extra reference on the folio + * (it's the same logic as split_folio()), and the folio must be + * locked. + */ +static int __make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) { int expected, cc = 0; if (folio_test_writeback(folio)) - return -EAGAIN; - expected = expected_folio_refs(folio); + return -EBUSY; + expected = expected_folio_refs(folio) + 1; if (!folio_ref_freeze(folio, expected)) return -EBUSY; set_bit(PG_arch_1, &folio->flags); @@ -268,262 +310,167 @@ static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb) return uvcb->rc == 0x10a ? -ENXIO : -EINVAL; } -/** - * should_export_before_import - Determine whether an export is needed - * before an import-like operation - * @uvcb: the Ultravisor control block of the UVC to be performed - * @mm: the mm of the process - * - * Returns whether an export is needed before every import-like operation. - * This is needed for shared pages, which don't trigger a secure storage - * exception when accessed from a different guest. - * - * Although considered as one, the Unpin Page UVC is not an actual import, - * so it is not affected. - * - * No export is needed also when there is only one protected VM, because the - * page cannot belong to the wrong VM in that case (there is no "other VM" - * it can belong to). - * - * Return: true if an export is needed before every import, otherwise false. - */ -static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm) +static int make_folio_secure(struct mm_struct *mm, struct folio *folio, struct uv_cb_header *uvcb) { - /* - * The misc feature indicates, among other things, that importing a - * shared page from a different protected VM will automatically also - * transfer its ownership. - */ - if (uv_has_feature(BIT_UV_FEAT_MISC)) - return false; - if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED) - return false; - return atomic_read(&mm->context.protected_count) > 1; -} + int rc; -/* - * Drain LRU caches: the local one on first invocation and the ones of all - * CPUs on successive invocations. Returns "true" on the first invocation. - */ -static bool drain_lru(bool *drain_lru_called) -{ - /* - * If we have tried a local drain and the folio refcount - * still does not match our expected safe value, try with a - * system wide drain. This is needed if the pagevecs holding - * the page are on a different CPU. - */ - if (*drain_lru_called) { - lru_add_drain_all(); - /* We give up here, don't retry immediately. */ - return false; - } - /* - * We are here if the folio refcount does not match the - * expected safe value. The main culprits are usually - * pagevecs. With lru_add_drain() we drain the pagevecs - * on the local CPU so that hopefully the refcount will - * reach the expected safe value. - */ - lru_add_drain(); - *drain_lru_called = true; - /* The caller should try again immediately */ - return true; + if (!folio_trylock(folio)) + return -EAGAIN; + if (should_export_before_import(uvcb, mm)) + uv_convert_from_secure(folio_to_phys(folio)); + rc = __make_folio_secure(folio, uvcb); + folio_unlock(folio); + + return rc; } -/* - * Requests the Ultravisor to make a page accessible to a guest. - * If it's brought in the first time, it will be cleared. If - * it has been exported before, it will be decrypted and integrity - * checked. +/** + * s390_wiggle_split_folio() - try to drain extra references to a folio and + * split the folio if it is large. + * @mm: the mm containing the folio to work on + * @folio: the folio + * + * Context: Must be called while holding an extra reference to the folio; + * the mm lock should not be held. + * Return: 0 if the operation was successful; + * -EAGAIN if splitting the large folio was not successful, + * but another attempt can be made; + * -EINVAL in case of other folio splitting errors. See split_folio(). */ -int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb) +static int s390_wiggle_split_folio(struct mm_struct *mm, struct folio *folio) { - struct vm_area_struct *vma; - bool drain_lru_called = false; - spinlock_t *ptelock; - unsigned long uaddr; - struct folio *folio; - pte_t *ptep; - int rc; + int rc, tried_splits; -again: - rc = -EFAULT; - mmap_read_lock(gmap->mm); + lockdep_assert_not_held(&mm->mmap_lock); + folio_wait_writeback(folio); + lru_add_drain_all(); - uaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(uaddr)) - goto out; - vma = vma_lookup(gmap->mm, uaddr); - if (!vma) - goto out; - /* - * Secure pages cannot be huge and userspace should not combine both. - * In case userspace does it anyway this will result in an -EFAULT for - * the unpack. The guest is thus never reaching secure mode. If - * userspace is playing dirty tricky with mapping huge pages later - * on this will result in a segmentation fault. - */ - if (is_vm_hugetlb_page(vma)) - goto out; - - rc = -ENXIO; - ptep = get_locked_pte(gmap->mm, uaddr, &ptelock); - if (!ptep) - goto out; - if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) { - folio = page_folio(pte_page(*ptep)); - rc = -EAGAIN; - if (folio_test_large(folio)) { - rc = -E2BIG; - } else if (folio_trylock(folio)) { - if (should_export_before_import(uvcb, gmap->mm)) - uv_convert_from_secure(PFN_PHYS(folio_pfn(folio))); - rc = make_folio_secure(folio, uvcb); + if (!folio_test_large(folio)) + return 0; + + for (tried_splits = 0; tried_splits < 2; tried_splits++) { + struct address_space *mapping; + loff_t lstart, lend; + struct inode *inode; + + folio_lock(folio); + rc = split_folio(folio); + if (rc != -EBUSY) { folio_unlock(folio); + return rc; } /* - * Once we drop the PTL, the folio may get unmapped and - * freed immediately. We need a temporary reference. + * Splitting with -EBUSY can fail for various reasons, but we + * have to handle one case explicitly for now: some mappings + * don't allow for splitting dirty folios; writeback will + * mark them clean again, including marking all page table + * entries mapping the folio read-only, to catch future write + * attempts. + * + * While the system should be writing back dirty folios in the + * background, we obtained this folio by looking up a writable + * page table entry. On these problematic mappings, writable + * page table entries imply dirty folios, preventing the + * split in the first place. + * + * To prevent a livelock when trigger writeback manually and + * letting the caller look up the folio again in the page + * table (turning it dirty), immediately try to split again. + * + * This is only a problem for some mappings (e.g., XFS); + * mappings that do not support writeback (e.g., shmem) do not + * apply. */ - if (rc == -EAGAIN || rc == -E2BIG) - folio_get(folio); - } - pte_unmap_unlock(ptep, ptelock); -out: - mmap_read_unlock(gmap->mm); - - switch (rc) { - case -E2BIG: - folio_lock(folio); - rc = split_folio(folio); - folio_unlock(folio); - folio_put(folio); - - switch (rc) { - case 0: - /* Splitting succeeded, try again immediately. */ - goto again; - case -EAGAIN: - /* Additional folio references. */ - if (drain_lru(&drain_lru_called)) - goto again; - return -EAGAIN; - case -EBUSY: - /* Unexpected race. */ - return -EAGAIN; + if (!folio_test_dirty(folio) || folio_test_anon(folio) || + !folio->mapping || !mapping_can_writeback(folio->mapping)) { + folio_unlock(folio); + break; } - WARN_ON_ONCE(1); - return -ENXIO; - case -EAGAIN: + /* - * If we are here because the UVC returned busy or partial - * completion, this is just a useless check, but it is safe. + * Ideally, we'd only trigger writeback on this exact folio. But + * there is no easy way to do that, so we'll stabilize the + * mapping while we still hold the folio lock, so we can drop + * the folio lock to trigger writeback on the range currently + * covered by the folio instead. */ - folio_wait_writeback(folio); - folio_put(folio); - return -EAGAIN; - case -EBUSY: - /* Additional folio references. */ - if (drain_lru(&drain_lru_called)) - goto again; - return -EAGAIN; - case -ENXIO: - if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE)) - return -EFAULT; - return -EAGAIN; - } - return rc; -} -EXPORT_SYMBOL_GPL(gmap_make_secure); + mapping = folio->mapping; + lstart = folio_pos(folio); + lend = lstart + folio_size(folio) - 1; + inode = igrab(mapping->host); + folio_unlock(folio); -int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr) -{ - struct uv_cb_cts uvcb = { - .header.cmd = UVC_CMD_CONV_TO_SEC_STOR, - .header.len = sizeof(uvcb), - .guest_handle = gmap->guest_handle, - .gaddr = gaddr, - }; + if (unlikely(!inode)) + break; - return gmap_make_secure(gmap, gaddr, &uvcb); + filemap_write_and_wait_range(mapping, lstart, lend); + iput(mapping->host); + } + return -EAGAIN; } -EXPORT_SYMBOL_GPL(gmap_convert_to_secure); -/** - * gmap_destroy_page - Destroy a guest page. - * @gmap: the gmap of the guest - * @gaddr: the guest address to destroy - * - * An attempt will be made to destroy the given guest page. If the attempt - * fails, an attempt is made to export the page. If both attempts fail, an - * appropriate error is returned. - */ -int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr) +int make_hva_secure(struct mm_struct *mm, unsigned long hva, struct uv_cb_header *uvcb) { struct vm_area_struct *vma; struct folio_walk fw; - unsigned long uaddr; struct folio *folio; int rc; - rc = -EFAULT; - mmap_read_lock(gmap->mm); - - uaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(uaddr)) - goto out; - vma = vma_lookup(gmap->mm, uaddr); - if (!vma) - goto out; - /* - * Huge pages should not be able to become secure - */ - if (is_vm_hugetlb_page(vma)) - goto out; + mmap_read_lock(mm); + vma = vma_lookup(mm, hva); + if (!vma) { + mmap_read_unlock(mm); + return -EFAULT; + } + folio = folio_walk_start(&fw, vma, hva, 0); + if (!folio) { + mmap_read_unlock(mm); + return -ENXIO; + } - rc = 0; - folio = folio_walk_start(&fw, vma, uaddr, 0); - if (!folio) - goto out; - /* - * See gmap_make_secure(): large folios cannot be secure. Small - * folio implies FW_LEVEL_PTE. - */ - if (folio_test_large(folio) || !pte_write(fw.pte)) - goto out_walk_end; - rc = uv_destroy_folio(folio); + folio_get(folio); /* - * Fault handlers can race; it is possible that two CPUs will fault - * on the same secure page. One CPU can destroy the page, reboot, - * re-enter secure mode and import it, while the second CPU was - * stuck at the beginning of the handler. At some point the second - * CPU will be able to progress, and it will not be able to destroy - * the page. In that case we do not want to terminate the process, - * we instead try to export the page. + * Secure pages cannot be huge and userspace should not combine both. + * In case userspace does it anyway this will result in an -EFAULT for + * the unpack. The guest is thus never reaching secure mode. + * If userspace plays dirty tricks and decides to map huge pages at a + * later point in time, it will receive a segmentation fault or + * KVM_RUN will return -EFAULT. */ - if (rc) - rc = uv_convert_from_secure_folio(folio); -out_walk_end: + if (folio_test_hugetlb(folio)) + rc = -EFAULT; + else if (folio_test_large(folio)) + rc = -E2BIG; + else if (!pte_write(fw.pte) || (pte_val(fw.pte) & _PAGE_INVALID)) + rc = -ENXIO; + else + rc = make_folio_secure(mm, folio, uvcb); folio_walk_end(&fw, vma); -out: - mmap_read_unlock(gmap->mm); + mmap_read_unlock(mm); + + if (rc == -E2BIG || rc == -EBUSY) { + rc = s390_wiggle_split_folio(mm, folio); + if (!rc) + rc = -EAGAIN; + } + folio_put(folio); + return rc; } -EXPORT_SYMBOL_GPL(gmap_destroy_page); +EXPORT_SYMBOL_GPL(make_hva_secure); /* * To be called with the folio locked or with an extra reference! This will - * prevent gmap_make_secure from touching the folio concurrently. Having 2 - * parallel arch_make_folio_accessible is fine, as the UV calls will become a - * no-op if the folio is already exported. + * prevent kvm_s390_pv_make_secure() from touching the folio concurrently. + * Having 2 parallel arch_make_folio_accessible is fine, as the UV calls will + * become a no-op if the folio is already exported. */ int arch_make_folio_accessible(struct folio *folio) { int rc = 0; - /* See gmap_make_secure(): large folios cannot be secure */ + /* Large folios cannot be secure */ if (unlikely(folio_test_large(folio))) return 0; @@ -894,7 +841,12 @@ out_kobj: device_initcall(uv_sysfs_init); /* - * Find the secret with the secret_id in the provided list. + * Locate a secret in the list by its id. + * @secret_id: search pattern. + * @list: ephemeral buffer space + * @secret: output data, containing the secret's metadata. + * + * Search for a secret with the given secret_id in the Ultravisor secret store. * * Context: might sleep. */ @@ -915,12 +867,15 @@ static int find_secret_in_page(const u8 secret_id[UV_SECRET_ID_LEN], /* * Do the actual search for `uv_get_secret_metadata`. + * @secret_id: search pattern. + * @list: ephemeral buffer space + * @secret: output data, containing the secret's metadata. * * Context: might sleep. */ -static int find_secret(const u8 secret_id[UV_SECRET_ID_LEN], - struct uv_secret_list *list, - struct uv_secret_list_item_hdr *secret) +int uv_find_secret(const u8 secret_id[UV_SECRET_ID_LEN], + struct uv_secret_list *list, + struct uv_secret_list_item_hdr *secret) { u16 start_idx = 0; u16 list_rc; @@ -942,36 +897,7 @@ static int find_secret(const u8 secret_id[UV_SECRET_ID_LEN], return -ENOENT; } - -/** - * uv_get_secret_metadata() - get secret metadata for a given secret id. - * @secret_id: search pattern. - * @secret: output data, containing the secret's metadata. - * - * Search for a secret with the given secret_id in the Ultravisor secret store. - * - * Context: might sleep. - * - * Return: - * * %0: - Found entry; secret->idx and secret->type are valid. - * * %ENOENT - No entry found. - * * %ENODEV: - Not supported: UV not available or command not available. - * * %EIO: - Other unexpected UV error. - */ -int uv_get_secret_metadata(const u8 secret_id[UV_SECRET_ID_LEN], - struct uv_secret_list_item_hdr *secret) -{ - struct uv_secret_list *buf; - int rc; - - buf = kzalloc(sizeof(*buf), GFP_KERNEL); - if (!buf) - return -ENOMEM; - rc = find_secret(secret_id, buf, secret); - kfree(buf); - return rc; -} -EXPORT_SYMBOL_GPL(uv_get_secret_metadata); +EXPORT_SYMBOL_GPL(uv_find_secret); /** * uv_retrieve_secret() - get the secret value for the secret index. |