diff options
| author | Jijie Shao <shaojijie@huawei.com> | 2025-08-06 18:27:56 +0800 | 
|---|---|---|
| committer | Jakub Kicinski <kuba@kernel.org> | 2025-08-08 11:48:49 -0700 | 
| commit | c875503a9b9082928d7d3fc60b5400d16fbfae4e (patch) | |
| tree | b01f5ffaa28f0d1989fb44c1b30c528d9c131091 | |
| parent | f6a2a31043f9e3446a322adf66af7a110e7670b9 (diff) | |
net: hibmcge: fix rtnl deadlock issue
Currently, the hibmcge netdev acquires the rtnl_lock in
pci_error_handlers.reset_prepare() and releases it in
pci_error_handlers.reset_done().
However, in the PCI framework:
pci_reset_bus - __pci_reset_slot - pci_slot_save_and_disable_locked -
 pci_dev_save_and_disable - err_handler->reset_prepare(dev);
In pci_slot_save_and_disable_locked():
	list_for_each_entry(dev, &slot->bus->devices, bus_list) {
		if (!dev->slot || dev->slot!= slot)
			continue;
		pci_dev_save_and_disable(dev);
		if (dev->subordinate)
			pci_bus_save_and_disable_locked(dev->subordinate);
	}
This will iterate through all devices under the current bus and execute
err_handler->reset_prepare(), causing two devices of the hibmcge driver
to sequentially request the rtnl_lock, leading to a deadlock.
Since the driver now executes netif_device_detach()
before the reset process, it will not concurrently with
other netdev APIs, so there is no need to hold the rtnl_lock now.
Therefore, this patch removes the rtnl_lock during the reset process and
adjusts the position of HBG_NIC_STATE_RESETTING to ensure
that multiple resets are not executed concurrently.
Fixes: 3f5a61f6d504f ("net: hibmcge: Add reset supported in this module")
Signed-off-by: Jijie Shao <shaojijie@huawei.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
| -rw-r--r-- | drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c | 14 | 
1 files changed, 5 insertions, 9 deletions
| diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c index 503cfbfb4a8a..83cf75bf7a17 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_err.c @@ -53,9 +53,11 @@ static int hbg_reset_prepare(struct hbg_priv *priv, enum hbg_reset_type type)  {  	int ret; -	ASSERT_RTNL(); +	if (test_and_set_bit(HBG_NIC_STATE_RESETTING, &priv->state)) +		return -EBUSY;  	if (netif_running(priv->netdev)) { +		clear_bit(HBG_NIC_STATE_RESETTING, &priv->state);  		dev_warn(&priv->pdev->dev,  			 "failed to reset because port is up\n");  		return -EBUSY; @@ -64,7 +66,6 @@ static int hbg_reset_prepare(struct hbg_priv *priv, enum hbg_reset_type type)  	netif_device_detach(priv->netdev);  	priv->reset_type = type; -	set_bit(HBG_NIC_STATE_RESETTING, &priv->state);  	clear_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state);  	ret = hbg_hw_event_notify(priv, HBG_HW_EVENT_RESET);  	if (ret) { @@ -84,29 +85,26 @@ static int hbg_reset_done(struct hbg_priv *priv, enum hbg_reset_type type)  	    type != priv->reset_type)  		return 0; -	ASSERT_RTNL(); - -	clear_bit(HBG_NIC_STATE_RESETTING, &priv->state);  	ret = hbg_rebuild(priv);  	if (ret) {  		priv->stats.reset_fail_cnt++;  		set_bit(HBG_NIC_STATE_RESET_FAIL, &priv->state); +		clear_bit(HBG_NIC_STATE_RESETTING, &priv->state);  		dev_err(&priv->pdev->dev, "failed to rebuild after reset\n");  		return ret;  	}  	netif_device_attach(priv->netdev); +	clear_bit(HBG_NIC_STATE_RESETTING, &priv->state);  	dev_info(&priv->pdev->dev, "reset done\n");  	return ret;  } -/* must be protected by rtnl lock */  int hbg_reset(struct hbg_priv *priv)  {  	int ret; -	ASSERT_RTNL();  	ret = hbg_reset_prepare(priv, HBG_RESET_TYPE_FUNCTION);  	if (ret)  		return ret; @@ -171,7 +169,6 @@ static void hbg_pci_err_reset_prepare(struct pci_dev *pdev)  	struct net_device *netdev = pci_get_drvdata(pdev);  	struct hbg_priv *priv = netdev_priv(netdev); -	rtnl_lock();  	hbg_reset_prepare(priv, HBG_RESET_TYPE_FLR);  } @@ -181,7 +178,6 @@ static void hbg_pci_err_reset_done(struct pci_dev *pdev)  	struct hbg_priv *priv = netdev_priv(netdev);  	hbg_reset_done(priv, HBG_RESET_TYPE_FLR); -	rtnl_unlock();  }  static const struct pci_error_handlers hbg_pci_err_handler = { | 
