summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/driver-api/cxl/allocation/page-allocator.rst31
-rw-r--r--drivers/acpi/numa/hmat.c11
-rw-r--r--drivers/cxl/core/cdat.c4
-rw-r--r--drivers/cxl/core/hdm.c3
-rw-r--r--drivers/cxl/core/pci.c87
-rw-r--r--drivers/cxl/core/region.c35
-rw-r--r--drivers/cxl/cxl.h8
-rw-r--r--drivers/cxl/cxlpci.h1
-rw-r--r--drivers/cxl/pci.c2
-rw-r--r--tools/testing/cxl/Kbuild1
-rw-r--r--tools/testing/cxl/test/cxl.c32
-rw-r--r--tools/testing/cxl/test/mock.c15
-rw-r--r--tools/testing/cxl/test/mock.h1
13 files changed, 55 insertions, 176 deletions
diff --git a/Documentation/driver-api/cxl/allocation/page-allocator.rst b/Documentation/driver-api/cxl/allocation/page-allocator.rst
index 7b8fe1b8d5bb..3fa584a248bd 100644
--- a/Documentation/driver-api/cxl/allocation/page-allocator.rst
+++ b/Documentation/driver-api/cxl/allocation/page-allocator.rst
@@ -41,37 +41,6 @@ To simplify this, the page allocator will prefer :code:`ZONE_MOVABLE` over
will fallback to allocate from :code:`ZONE_NORMAL`.
-Zone and Node Quirks
-====================
-Let's consider a configuration where the local DRAM capacity is largely onlined
-into :code:`ZONE_NORMAL`, with no :code:`ZONE_MOVABLE` capacity present. The
-CXL capacity has the opposite configuration - all onlined in
-:code:`ZONE_MOVABLE`.
-
-Under the default allocation policy, the page allocator will completely skip
-:code:`ZONE_MOVABLE` as a valid allocation target. This is because, as of
-Linux v6.15, the page allocator does (approximately) the following: ::
-
- for (each zone in local_node):
-
- for (each node in fallback_order):
-
- attempt_allocation(gfp_flags);
-
-Because the local node does not have :code:`ZONE_MOVABLE`, the CXL node is
-functionally unreachable for direct allocation. As a result, the only way
-for CXL capacity to be used is via `demotion` in the reclaim path.
-
-This configuration also means that if the DRAM ndoe has :code:`ZONE_MOVABLE`
-capacity - when that capacity is depleted, the page allocator will actually
-prefer CXL :code:`ZONE_MOVABLE` pages over DRAM :code:`ZONE_NORMAL` pages.
-
-We may wish to invert this priority in future Linux versions.
-
-If `demotion` and `swap` are disabled, Linux will begin to cause OOM crashes
-when the DRAM nodes are depleted. See the reclaim section for more details.
-
-
CGroups and CPUSets
===================
Finally, assuming CXL memory is reachable via the page allocation (i.e. onlined
diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
index 5a36d57289b4..1dc73d20d989 100644
--- a/drivers/acpi/numa/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -888,12 +888,13 @@ static void hmat_register_target(struct memory_target *target)
* Register generic port perf numbers. The nid may not be
* initialized and is still NUMA_NO_NODE.
*/
- mutex_lock(&target_lock);
- if (*(u16 *)target->gen_port_device_handle) {
- hmat_update_generic_target(target);
- target->registered = true;
+ scoped_guard(mutex, &target_lock) {
+ if (*(u16 *)target->gen_port_device_handle) {
+ hmat_update_generic_target(target);
+ target->registered = true;
+ return;
+ }
}
- mutex_unlock(&target_lock);
/*
* Skip offline nodes. This can happen when memory
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c
index c4bd6e8a0cf0..7120b5f2e31f 100644
--- a/drivers/cxl/core/cdat.c
+++ b/drivers/cxl/core/cdat.c
@@ -826,7 +826,7 @@ static struct xarray *cxl_switch_gather_bandwidth(struct cxl_region *cxlr,
cxl_coordinates_combine(coords, coords, ctx->coord);
/*
- * Take the min of the calculated bandwdith and the upstream
+ * Take the min of the calculated bandwidth and the upstream
* switch SSLBIS bandwidth if there's a parent switch
*/
if (!is_root)
@@ -949,7 +949,7 @@ static struct xarray *cxl_hb_gather_bandwidth(struct xarray *xa)
/**
* cxl_region_update_bandwidth - Update the bandwidth access coordinates of a region
* @cxlr: The region being operated on
- * @input_xa: xarray holds cxl_perf_ctx wht calculated bandwidth per ACPI0017 instance
+ * @input_xa: xarray holds cxl_perf_ctx with calculated bandwidth per ACPI0017 instance
*/
static void cxl_region_update_bandwidth(struct cxl_region *cxlr,
struct xarray *input_xa)
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index d3a094ca01ad..1c5d2022c87a 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -905,6 +905,9 @@ static void cxl_decoder_reset(struct cxl_decoder *cxld)
if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0)
return;
+ if (test_bit(CXL_DECODER_F_LOCK, &cxld->flags))
+ return;
+
if (port->commit_end == id)
cxl_port_commit_reap(cxld);
else
diff --git a/drivers/cxl/core/pci.c b/drivers/cxl/core/pci.c
index 18825e1505d6..5b023a0178a4 100644
--- a/drivers/cxl/core/pci.c
+++ b/drivers/cxl/core/pci.c
@@ -71,85 +71,6 @@ struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port,
}
EXPORT_SYMBOL_NS_GPL(__devm_cxl_add_dport_by_dev, "CXL");
-struct cxl_walk_context {
- struct pci_bus *bus;
- struct cxl_port *port;
- int type;
- int error;
- int count;
-};
-
-static int match_add_dports(struct pci_dev *pdev, void *data)
-{
- struct cxl_walk_context *ctx = data;
- struct cxl_port *port = ctx->port;
- int type = pci_pcie_type(pdev);
- struct cxl_register_map map;
- struct cxl_dport *dport;
- u32 lnkcap, port_num;
- int rc;
-
- if (pdev->bus != ctx->bus)
- return 0;
- if (!pci_is_pcie(pdev))
- return 0;
- if (type != ctx->type)
- return 0;
- if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP,
- &lnkcap))
- return 0;
-
- rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
- if (rc)
- dev_dbg(&port->dev, "failed to find component registers\n");
-
- port_num = FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap);
- dport = devm_cxl_add_dport(port, &pdev->dev, port_num, map.resource);
- if (IS_ERR(dport)) {
- ctx->error = PTR_ERR(dport);
- return PTR_ERR(dport);
- }
- ctx->count++;
-
- return 0;
-}
-
-/**
- * devm_cxl_port_enumerate_dports - enumerate downstream ports of the upstream port
- * @port: cxl_port whose ->uport_dev is the upstream of dports to be enumerated
- *
- * Returns a positive number of dports enumerated or a negative error
- * code.
- */
-int devm_cxl_port_enumerate_dports(struct cxl_port *port)
-{
- struct pci_bus *bus = cxl_port_to_pci_bus(port);
- struct cxl_walk_context ctx;
- int type;
-
- if (!bus)
- return -ENXIO;
-
- if (pci_is_root_bus(bus))
- type = PCI_EXP_TYPE_ROOT_PORT;
- else
- type = PCI_EXP_TYPE_DOWNSTREAM;
-
- ctx = (struct cxl_walk_context) {
- .port = port,
- .bus = bus,
- .type = type,
- };
- pci_walk_bus(bus, match_add_dports, &ctx);
-
- if (ctx.count == 0)
- return -ENODEV;
- if (ctx.error)
- return ctx.error;
- return ctx.count;
-}
-EXPORT_SYMBOL_NS_GPL(devm_cxl_port_enumerate_dports, "CXL");
-
static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id)
{
struct pci_dev *pdev = to_pci_dev(cxlds->dev);
@@ -1217,6 +1138,14 @@ int cxl_gpf_port_setup(struct cxl_dport *dport)
return 0;
}
+struct cxl_walk_context {
+ struct pci_bus *bus;
+ struct cxl_port *port;
+ int type;
+ int error;
+ int count;
+};
+
static int count_dports(struct pci_dev *pdev, void *data)
{
struct cxl_walk_context *ctx = data;
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index b06fee1978ba..0482144200d8 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -245,6 +245,9 @@ static void cxl_region_decode_reset(struct cxl_region *cxlr, int count)
struct cxl_region_params *p = &cxlr->params;
int i;
+ if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags))
+ return;
+
/*
* Before region teardown attempt to flush, evict any data cached for
* this region, or scream loudly about missing arch / platform support
@@ -419,6 +422,9 @@ static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
return len;
}
+ if (test_bit(CXL_REGION_F_LOCK, &cxlr->flags))
+ return -EPERM;
+
rc = queue_reset(cxlr);
if (rc)
return rc;
@@ -838,16 +844,16 @@ static int match_free_decoder(struct device *dev, const void *data)
return 1;
}
-static bool region_res_match_cxl_range(const struct cxl_region_params *p,
- const struct range *range)
+static bool spa_maps_hpa(const struct cxl_region_params *p,
+ const struct range *range)
{
if (!p->res)
return false;
/*
- * If an extended linear cache region then the CXL range is assumed
- * to be fronted by the DRAM range in current known implementation.
- * This assumption will be made until a variant implementation exists.
+ * The extended linear cache region is constructed by a 1:1 ratio
+ * where the SPA maps equal amounts of DRAM and CXL HPA capacity with
+ * CXL decoders at the high end of the SPA range.
*/
return p->res->start + p->cache_size == range->start &&
p->res->end == range->end;
@@ -865,7 +871,7 @@ static int match_auto_decoder(struct device *dev, const void *data)
cxld = to_cxl_decoder(dev);
r = &cxld->hpa_range;
- if (region_res_match_cxl_range(p, r))
+ if (spa_maps_hpa(p, r))
return 1;
return 0;
@@ -1059,6 +1065,16 @@ static int cxl_rr_assign_decoder(struct cxl_port *port, struct cxl_region *cxlr,
return 0;
}
+static void cxl_region_set_lock(struct cxl_region *cxlr,
+ struct cxl_decoder *cxld)
+{
+ if (!test_bit(CXL_DECODER_F_LOCK, &cxld->flags))
+ return;
+
+ set_bit(CXL_REGION_F_LOCK, &cxlr->flags);
+ clear_bit(CXL_REGION_F_NEEDS_RESET, &cxlr->flags);
+}
+
/**
* cxl_port_attach_region() - track a region's interest in a port by endpoint
* @port: port to add a new region reference 'struct cxl_region_ref'
@@ -1170,6 +1186,8 @@ static int cxl_port_attach_region(struct cxl_port *port,
}
}
+ cxl_region_set_lock(cxlr, cxld);
+
rc = cxl_rr_ep_add(cxl_rr, cxled);
if (rc) {
dev_dbg(&cxlr->dev,
@@ -1465,7 +1483,7 @@ static int cxl_port_setup_targets(struct cxl_port *port,
if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) {
if (cxld->interleave_ways != iw ||
(iw > 1 && cxld->interleave_granularity != ig) ||
- !region_res_match_cxl_range(p, &cxld->hpa_range) ||
+ !spa_maps_hpa(p, &cxld->hpa_range) ||
((cxld->flags & CXL_DECODER_F_ENABLE) == 0)) {
dev_err(&cxlr->dev,
"%s:%s %s expected iw: %d ig: %d %pr\n",
@@ -2439,6 +2457,7 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i
dev->bus = &cxl_bus_type;
dev->type = &cxl_region_type;
cxlr->id = id;
+ cxl_region_set_lock(cxlr, &cxlrd->cxlsd.cxld);
return cxlr;
}
@@ -3398,7 +3417,7 @@ static int match_region_by_range(struct device *dev, const void *data)
p = &cxlr->params;
guard(rwsem_read)(&cxl_rwsem.region);
- return region_res_match_cxl_range(p, r);
+ return spa_maps_hpa(p, r);
}
static int cxl_extended_linear_cache_resize(struct cxl_region *cxlr,
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 231ddccf8977..6382f1983865 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -517,6 +517,14 @@ enum cxl_partition_mode {
*/
#define CXL_REGION_F_NEEDS_RESET 1
+/*
+ * Indicate whether this region is locked due to 1 or more decoders that have
+ * been locked. The approach of all or nothing is taken with regard to the
+ * locked attribute. CXL_REGION_F_NEEDS_RESET should not be set if this flag is
+ * set.
+ */
+#define CXL_REGION_F_LOCK 2
+
/**
* struct cxl_region - CXL region
* @dev: This region's device
diff --git a/drivers/cxl/cxlpci.h b/drivers/cxl/cxlpci.h
index 7ae621e618e7..1d526bea8431 100644
--- a/drivers/cxl/cxlpci.h
+++ b/drivers/cxl/cxlpci.h
@@ -127,7 +127,6 @@ static inline bool cxl_pci_flit_256(struct pci_dev *pdev)
return lnksta2 & PCI_EXP_LNKSTA2_FLIT;
}
-int devm_cxl_port_enumerate_dports(struct cxl_port *port);
struct cxl_dev_state;
void read_cdat_data(struct cxl_port *port);
void cxl_cor_error_detected(struct pci_dev *pdev);
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index bd100ac31672..0be4e508affe 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -136,7 +136,7 @@ static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
if (opcode == CXL_MBOX_OP_SANITIZE) {
mutex_lock(&cxl_mbox->mbox_mutex);
if (mds->security.sanitize_node)
- mod_delayed_work(system_wq, &mds->security.poll_dwork, 0);
+ mod_delayed_work(system_percpu_wq, &mds->security.poll_dwork, 0);
mutex_unlock(&cxl_mbox->mbox_mutex);
} else {
/* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild
index 0d5ce4b74b9f..3dae06ac7fba 100644
--- a/tools/testing/cxl/Kbuild
+++ b/tools/testing/cxl/Kbuild
@@ -4,7 +4,6 @@ ldflags-y += --wrap=is_acpi_device_node
ldflags-y += --wrap=acpi_evaluate_integer
ldflags-y += --wrap=acpi_pci_find_root
ldflags-y += --wrap=nvdimm_bus_register
-ldflags-y += --wrap=devm_cxl_port_enumerate_dports
ldflags-y += --wrap=cxl_await_media_ready
ldflags-y += --wrap=devm_cxl_add_rch_dport
ldflags-y += --wrap=cxl_rcd_component_reg_phys
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 2d135ca533d0..10f9b83a9443 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -995,37 +995,6 @@ static int get_port_array(struct cxl_port *port,
return 0;
}
-static int mock_cxl_port_enumerate_dports(struct cxl_port *port)
-{
- struct platform_device **array;
- int i, array_size;
- int rc;
-
- rc = get_port_array(port, &array, &array_size);
- if (rc)
- return rc;
-
- for (i = 0; i < array_size; i++) {
- struct platform_device *pdev = array[i];
- struct cxl_dport *dport;
-
- if (pdev->dev.parent != port->uport_dev) {
- dev_dbg(&port->dev, "%s: mismatch parent %s\n",
- dev_name(port->uport_dev),
- dev_name(pdev->dev.parent));
- continue;
- }
-
- dport = devm_cxl_add_dport(port, &pdev->dev, pdev->id,
- CXL_RESOURCE_NONE);
-
- if (IS_ERR(dport))
- return PTR_ERR(dport);
- }
-
- return 0;
-}
-
static struct cxl_dport *mock_cxl_add_dport_by_dev(struct cxl_port *port,
struct device *dport_dev)
{
@@ -1114,7 +1083,6 @@ static struct cxl_mock_ops cxl_mock_ops = {
.acpi_pci_find_root = mock_acpi_pci_find_root,
.devm_cxl_switch_port_decoders_setup = mock_cxl_switch_port_decoders_setup,
.devm_cxl_endpoint_decoders_setup = mock_cxl_endpoint_decoders_setup,
- .devm_cxl_port_enumerate_dports = mock_cxl_port_enumerate_dports,
.cxl_endpoint_parse_cdat = mock_cxl_endpoint_parse_cdat,
.devm_cxl_add_dport_by_dev = mock_cxl_add_dport_by_dev,
.list = LIST_HEAD_INIT(cxl_mock_ops.list),
diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c
index 995269a75cbd..6fd4edb9215c 100644
--- a/tools/testing/cxl/test/mock.c
+++ b/tools/testing/cxl/test/mock.c
@@ -172,21 +172,6 @@ int __wrap_devm_cxl_endpoint_decoders_setup(struct cxl_port *port)
}
EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_endpoint_decoders_setup, "CXL");
-int __wrap_devm_cxl_port_enumerate_dports(struct cxl_port *port)
-{
- int rc, index;
- struct cxl_mock_ops *ops = get_cxl_mock_ops(&index);
-
- if (ops && ops->is_mock_port(port->uport_dev))
- rc = ops->devm_cxl_port_enumerate_dports(port);
- else
- rc = devm_cxl_port_enumerate_dports(port);
- put_cxl_mock_ops(index);
-
- return rc;
-}
-EXPORT_SYMBOL_NS_GPL(__wrap_devm_cxl_port_enumerate_dports, "CXL");
-
int __wrap_cxl_await_media_ready(struct cxl_dev_state *cxlds)
{
int rc, index;
diff --git a/tools/testing/cxl/test/mock.h b/tools/testing/cxl/test/mock.h
index 4ed932e76aae..580f38386224 100644
--- a/tools/testing/cxl/test/mock.h
+++ b/tools/testing/cxl/test/mock.h
@@ -19,7 +19,6 @@ struct cxl_mock_ops {
bool (*is_mock_bus)(struct pci_bus *bus);
bool (*is_mock_port)(struct device *dev);
bool (*is_mock_dev)(struct device *dev);
- int (*devm_cxl_port_enumerate_dports)(struct cxl_port *port);
int (*devm_cxl_switch_port_decoders_setup)(struct cxl_port *port);
int (*devm_cxl_endpoint_decoders_setup)(struct cxl_port *port);
void (*cxl_endpoint_parse_cdat)(struct cxl_port *port);