summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/hmm.c262
1 files changed, 243 insertions, 19 deletions
diff --git a/mm/hmm.c b/mm/hmm.c
index 082f7b7c0b9e..feac86196a65 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -10,6 +10,7 @@
*/
#include <linux/pagewalk.h>
#include <linux/hmm.h>
+#include <linux/hmm-dma.h>
#include <linux/init.h>
#include <linux/rmap.h>
#include <linux/swap.h>
@@ -23,6 +24,7 @@
#include <linux/sched/mm.h>
#include <linux/jump_label.h>
#include <linux/dma-mapping.h>
+#include <linux/pci-p2pdma.h>
#include <linux/mmu_notifier.h>
#include <linux/memory_hotplug.h>
@@ -39,13 +41,21 @@ enum {
HMM_NEED_ALL_BITS = HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT,
};
+enum {
+ /* These flags are carried from input-to-output */
+ HMM_PFN_INOUT_FLAGS = HMM_PFN_DMA_MAPPED | HMM_PFN_P2PDMA |
+ HMM_PFN_P2PDMA_BUS,
+};
+
static int hmm_pfns_fill(unsigned long addr, unsigned long end,
struct hmm_range *range, unsigned long cpu_flags)
{
unsigned long i = (addr - range->start) >> PAGE_SHIFT;
- for (; addr < end; addr += PAGE_SIZE, i++)
- range->hmm_pfns[i] = cpu_flags;
+ for (; addr < end; addr += PAGE_SIZE, i++) {
+ range->hmm_pfns[i] &= HMM_PFN_INOUT_FLAGS;
+ range->hmm_pfns[i] |= cpu_flags;
+ }
return 0;
}
@@ -202,8 +212,10 @@ static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
return hmm_vma_fault(addr, end, required_fault, walk);
pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
- for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++)
- hmm_pfns[i] = pfn | cpu_flags;
+ for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++) {
+ hmm_pfns[i] &= HMM_PFN_INOUT_FLAGS;
+ hmm_pfns[i] |= pfn | cpu_flags;
+ }
return 0;
}
#else /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -230,14 +242,14 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
unsigned long cpu_flags;
pte_t pte = ptep_get(ptep);
uint64_t pfn_req_flags = *hmm_pfn;
+ uint64_t new_pfn_flags = 0;
if (pte_none_mostly(pte)) {
required_fault =
hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0);
if (required_fault)
goto fault;
- *hmm_pfn = 0;
- return 0;
+ goto out;
}
if (!pte_present(pte)) {
@@ -253,16 +265,14 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
cpu_flags = HMM_PFN_VALID;
if (is_writable_device_private_entry(entry))
cpu_flags |= HMM_PFN_WRITE;
- *hmm_pfn = swp_offset_pfn(entry) | cpu_flags;
- return 0;
+ new_pfn_flags = swp_offset_pfn(entry) | cpu_flags;
+ goto out;
}
required_fault =
hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0);
- if (!required_fault) {
- *hmm_pfn = 0;
- return 0;
- }
+ if (!required_fault)
+ goto out;
if (!non_swap_entry(entry))
goto fault;
@@ -304,11 +314,13 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
pte_unmap(ptep);
return -EFAULT;
}
- *hmm_pfn = HMM_PFN_ERROR;
- return 0;
+ new_pfn_flags = HMM_PFN_ERROR;
+ goto out;
}
- *hmm_pfn = pte_pfn(pte) | cpu_flags;
+ new_pfn_flags = pte_pfn(pte) | cpu_flags;
+out:
+ *hmm_pfn = (*hmm_pfn & HMM_PFN_INOUT_FLAGS) | new_pfn_flags;
return 0;
fault:
@@ -448,8 +460,10 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
}
pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
- for (i = 0; i < npages; ++i, ++pfn)
- hmm_pfns[i] = pfn | cpu_flags;
+ for (i = 0; i < npages; ++i, ++pfn) {
+ hmm_pfns[i] &= HMM_PFN_INOUT_FLAGS;
+ hmm_pfns[i] |= pfn | cpu_flags;
+ }
goto out_unlock;
}
@@ -507,8 +521,10 @@ static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
}
pfn = pte_pfn(entry) + ((start & ~hmask) >> PAGE_SHIFT);
- for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
- range->hmm_pfns[i] = pfn | cpu_flags;
+ for (; addr < end; addr += PAGE_SIZE, i++, pfn++) {
+ range->hmm_pfns[i] &= HMM_PFN_INOUT_FLAGS;
+ range->hmm_pfns[i] |= pfn | cpu_flags;
+ }
spin_unlock(ptl);
return 0;
@@ -607,3 +623,211 @@ int hmm_range_fault(struct hmm_range *range)
return ret;
}
EXPORT_SYMBOL(hmm_range_fault);
+
+/**
+ * hmm_dma_map_alloc - Allocate HMM map structure
+ * @dev: device to allocate structure for
+ * @map: HMM map to allocate
+ * @nr_entries: number of entries in the map
+ * @dma_entry_size: size of the DMA entry in the map
+ *
+ * Allocate the HMM map structure and all the lists it contains.
+ * Return 0 on success, -ENOMEM on failure.
+ */
+int hmm_dma_map_alloc(struct device *dev, struct hmm_dma_map *map,
+ size_t nr_entries, size_t dma_entry_size)
+{
+ bool dma_need_sync = false;
+ bool use_iova;
+
+ WARN_ON_ONCE(!(nr_entries * PAGE_SIZE / dma_entry_size));
+
+ /*
+ * The HMM API violates our normal DMA buffer ownership rules and can't
+ * transfer buffer ownership. The dma_addressing_limited() check is a
+ * best approximation to ensure no swiotlb buffering happens.
+ */
+#ifdef CONFIG_DMA_NEED_SYNC
+ dma_need_sync = !dev->dma_skip_sync;
+#endif /* CONFIG_DMA_NEED_SYNC */
+ if (dma_need_sync || dma_addressing_limited(dev))
+ return -EOPNOTSUPP;
+
+ map->dma_entry_size = dma_entry_size;
+ map->pfn_list = kvcalloc(nr_entries, sizeof(*map->pfn_list),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!map->pfn_list)
+ return -ENOMEM;
+
+ use_iova = dma_iova_try_alloc(dev, &map->state, 0,
+ nr_entries * PAGE_SIZE);
+ if (!use_iova && dma_need_unmap(dev)) {
+ map->dma_list = kvcalloc(nr_entries, sizeof(*map->dma_list),
+ GFP_KERNEL | __GFP_NOWARN);
+ if (!map->dma_list)
+ goto err_dma;
+ }
+ return 0;
+
+err_dma:
+ kvfree(map->pfn_list);
+ return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(hmm_dma_map_alloc);
+
+/**
+ * hmm_dma_map_free - iFree HMM map structure
+ * @dev: device to free structure from
+ * @map: HMM map containing the various lists and state
+ *
+ * Free the HMM map structure and all the lists it contains.
+ */
+void hmm_dma_map_free(struct device *dev, struct hmm_dma_map *map)
+{
+ if (dma_use_iova(&map->state))
+ dma_iova_free(dev, &map->state);
+ kvfree(map->pfn_list);
+ kvfree(map->dma_list);
+}
+EXPORT_SYMBOL_GPL(hmm_dma_map_free);
+
+/**
+ * hmm_dma_map_pfn - Map a physical HMM page to DMA address
+ * @dev: Device to map the page for
+ * @map: HMM map
+ * @idx: Index into the PFN and dma address arrays
+ * @p2pdma_state: PCI P2P state.
+ *
+ * dma_alloc_iova() allocates IOVA based on the size specified by their use in
+ * iova->size. Call this function after IOVA allocation to link whole @page
+ * to get the DMA address. Note that very first call to this function
+ * will have @offset set to 0 in the IOVA space allocated from
+ * dma_alloc_iova(). For subsequent calls to this function on same @iova,
+ * @offset needs to be advanced by the caller with the size of previous
+ * page that was linked + DMA address returned for the previous page that was
+ * linked by this function.
+ */
+dma_addr_t hmm_dma_map_pfn(struct device *dev, struct hmm_dma_map *map,
+ size_t idx,
+ struct pci_p2pdma_map_state *p2pdma_state)
+{
+ struct dma_iova_state *state = &map->state;
+ dma_addr_t *dma_addrs = map->dma_list;
+ unsigned long *pfns = map->pfn_list;
+ struct page *page = hmm_pfn_to_page(pfns[idx]);
+ phys_addr_t paddr = hmm_pfn_to_phys(pfns[idx]);
+ size_t offset = idx * map->dma_entry_size;
+ unsigned long attrs = 0;
+ dma_addr_t dma_addr;
+ int ret;
+
+ if ((pfns[idx] & HMM_PFN_DMA_MAPPED) &&
+ !(pfns[idx] & HMM_PFN_P2PDMA_BUS)) {
+ /*
+ * We are in this flow when there is a need to resync flags,
+ * for example when page was already linked in prefetch call
+ * with READ flag and now we need to add WRITE flag
+ *
+ * This page was already programmed to HW and we don't want/need
+ * to unlink and link it again just to resync flags.
+ */
+ if (dma_use_iova(state))
+ return state->addr + offset;
+
+ /*
+ * Without dma_need_unmap, the dma_addrs array is NULL, thus we
+ * need to regenerate the address below even if there already
+ * was a mapping. But !dma_need_unmap implies that the
+ * mapping stateless, so this is fine.
+ */
+ if (dma_need_unmap(dev))
+ return dma_addrs[idx];
+
+ /* Continue to remapping */
+ }
+
+ switch (pci_p2pdma_state(p2pdma_state, dev, page)) {
+ case PCI_P2PDMA_MAP_NONE:
+ break;
+ case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
+ attrs |= DMA_ATTR_SKIP_CPU_SYNC;
+ pfns[idx] |= HMM_PFN_P2PDMA;
+ break;
+ case PCI_P2PDMA_MAP_BUS_ADDR:
+ pfns[idx] |= HMM_PFN_P2PDMA_BUS | HMM_PFN_DMA_MAPPED;
+ return pci_p2pdma_bus_addr_map(p2pdma_state, paddr);
+ default:
+ return DMA_MAPPING_ERROR;
+ }
+
+ if (dma_use_iova(state)) {
+ ret = dma_iova_link(dev, state, paddr, offset,
+ map->dma_entry_size, DMA_BIDIRECTIONAL,
+ attrs);
+ if (ret)
+ goto error;
+
+ ret = dma_iova_sync(dev, state, offset, map->dma_entry_size);
+ if (ret) {
+ dma_iova_unlink(dev, state, offset, map->dma_entry_size,
+ DMA_BIDIRECTIONAL, attrs);
+ goto error;
+ }
+
+ dma_addr = state->addr + offset;
+ } else {
+ if (WARN_ON_ONCE(dma_need_unmap(dev) && !dma_addrs))
+ goto error;
+
+ dma_addr = dma_map_page(dev, page, 0, map->dma_entry_size,
+ DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(dev, dma_addr))
+ goto error;
+
+ if (dma_need_unmap(dev))
+ dma_addrs[idx] = dma_addr;
+ }
+ pfns[idx] |= HMM_PFN_DMA_MAPPED;
+ return dma_addr;
+error:
+ pfns[idx] &= ~HMM_PFN_P2PDMA;
+ return DMA_MAPPING_ERROR;
+
+}
+EXPORT_SYMBOL_GPL(hmm_dma_map_pfn);
+
+/**
+ * hmm_dma_unmap_pfn - Unmap a physical HMM page from DMA address
+ * @dev: Device to unmap the page from
+ * @map: HMM map
+ * @idx: Index of the PFN to unmap
+ *
+ * Returns true if the PFN was mapped and has been unmapped, false otherwise.
+ */
+bool hmm_dma_unmap_pfn(struct device *dev, struct hmm_dma_map *map, size_t idx)
+{
+ const unsigned long valid_dma = HMM_PFN_VALID | HMM_PFN_DMA_MAPPED;
+ struct dma_iova_state *state = &map->state;
+ dma_addr_t *dma_addrs = map->dma_list;
+ unsigned long *pfns = map->pfn_list;
+ unsigned long attrs = 0;
+
+ if ((pfns[idx] & valid_dma) != valid_dma)
+ return false;
+
+ if (pfns[idx] & HMM_PFN_P2PDMA_BUS)
+ ; /* no need to unmap bus address P2P mappings */
+ else if (dma_use_iova(state)) {
+ if (pfns[idx] & HMM_PFN_P2PDMA)
+ attrs |= DMA_ATTR_SKIP_CPU_SYNC;
+ dma_iova_unlink(dev, state, idx * map->dma_entry_size,
+ map->dma_entry_size, DMA_BIDIRECTIONAL, attrs);
+ } else if (dma_need_unmap(dev))
+ dma_unmap_page(dev, dma_addrs[idx], map->dma_entry_size,
+ DMA_BIDIRECTIONAL);
+
+ pfns[idx] &=
+ ~(HMM_PFN_DMA_MAPPED | HMM_PFN_P2PDMA | HMM_PFN_P2PDMA_BUS);
+ return true;
+}
+EXPORT_SYMBOL_GPL(hmm_dma_unmap_pfn);