|
| 1 | +dm-mpath: Don't grab work_mutex while probing paths |
| 2 | + |
| 3 | +jira LE-3526 |
| 4 | +Rebuild_History Non-Buildable kernel-6.12.0-55.20.1.el10_0 |
| 5 | +commit-author Benjamin Marzinski < [email protected]> |
| 6 | +commit 5c977f1023156938915c57d362fddde8fad2b052 |
| 7 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 8 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 9 | +ciq/ciq_backports/kernel-6.12.0-55.20.1.el10_0/5c977f10.failed |
| 10 | + |
| 11 | +Grabbing the work_mutex keeps probe_active_paths() from running at the |
| 12 | +same time as multipath_message(). The only messages that could interfere |
| 13 | +with probing the paths are "disable_group", "enable_group", and |
| 14 | +"switch_group". These messages could force multipath to pick a new |
| 15 | +pathgroup while probe_active_paths() was probing the current pathgroup. |
| 16 | +If the multipath device has a hardware handler, and it switches active |
| 17 | +pathgroups while there is outstanding IO to a path device, it's possible |
| 18 | +that IO to the path will fail, even if the path would be usable if it |
| 19 | +was in the active pathgroup. To avoid this, do not clear the current |
| 20 | +pathgroup for the *_group messages while probe_active_paths() is |
| 21 | +running. Instead set a flag, and probe_active_paths() will clear the |
| 22 | +current pathgroup when it finishes probing the paths. For this to work |
| 23 | +correctly, multipath needs to check current_pg before next_pg in |
| 24 | +choose_pgpath(), but before this patch next_pg was only ever set when |
| 25 | +current_pg was cleared, so this doesn't change the current behavior when |
| 26 | +paths aren't being probed. Even with this change, it is still possible |
| 27 | +to switch pathgroups while the probe is running, but only if all the |
| 28 | +paths have failed, and the probe function will skip them as well in this |
| 29 | +case. |
| 30 | + |
| 31 | +If multiple DM_MPATH_PROBE_PATHS requests are received at once, there is |
| 32 | +no point in repeatedly issuing test IOs. Instead, the later probes |
| 33 | +should wait for the current probe to complete. If current pathgroup is |
| 34 | +still the same as the one that was just checked, the other probes should |
| 35 | +skip probing and just check the number of valid paths. Finally, probing |
| 36 | +the paths should quit early if the multipath device is trying to |
| 37 | +suspend, instead of continuing to issue test IOs, delaying the suspend. |
| 38 | + |
| 39 | +While this patch will not change the behavior of existing multipath |
| 40 | +users which don't use the DM_MPATH_PROBE_PATHS ioctl, when that ioctl |
| 41 | +is used, the behavior of the "disable_group", "enable_group", and |
| 42 | +"switch_group" messages can change subtly. When these messages return, |
| 43 | +the next IO to the multipath device will no longer be guaranteed to |
| 44 | +choose a new pathgroup. Instead, choosing a new pathgroup could be |
| 45 | +delayed by an in-progress DM_MPATH_PROBE_PATHS ioctl. The userspace |
| 46 | +multipath tools make no assumptions about what will happen to IOs after |
| 47 | +sending these messages, so this change will not effect already released |
| 48 | +versions of them, even if the DM_MPATH_PROBE_PATHS ioctl is run |
| 49 | +alongside them. |
| 50 | + |
| 51 | + Signed-off-by: Benjamin Marzinski < [email protected]> |
| 52 | + Signed-off-by: Mikulas Patocka < [email protected]> |
| 53 | +(cherry picked from commit 5c977f1023156938915c57d362fddde8fad2b052) |
| 54 | + Signed-off-by: Jonathan Maple < [email protected]> |
| 55 | + |
| 56 | +# Conflicts: |
| 57 | +# drivers/md/dm-mpath.c |
| 58 | +diff --cc drivers/md/dm-mpath.c |
| 59 | +index 368606afb6f0,12b7bcae490c..000000000000 |
| 60 | +--- a/drivers/md/dm-mpath.c |
| 61 | ++++ b/drivers/md/dm-mpath.c |
| 62 | +@@@ -2021,6 -2039,114 +2039,117 @@@ out |
| 63 | + return r; |
| 64 | + } |
| 65 | + |
| 66 | +++<<<<<<< HEAD |
| 67 | +++======= |
| 68 | ++ /* |
| 69 | ++ * Perform a minimal read from the given path to find out whether the |
| 70 | ++ * path still works. If a path error occurs, fail it. |
| 71 | ++ */ |
| 72 | ++ static int probe_path(struct pgpath *pgpath) |
| 73 | ++ { |
| 74 | ++ struct block_device *bdev = pgpath->path.dev->bdev; |
| 75 | ++ unsigned int read_size = bdev_logical_block_size(bdev); |
| 76 | ++ struct page *page; |
| 77 | ++ struct bio *bio; |
| 78 | ++ blk_status_t status; |
| 79 | ++ int r = 0; |
| 80 | ++ |
| 81 | ++ if (WARN_ON_ONCE(read_size > PAGE_SIZE)) |
| 82 | ++ return -EINVAL; |
| 83 | ++ |
| 84 | ++ page = alloc_page(GFP_KERNEL); |
| 85 | ++ if (!page) |
| 86 | ++ return -ENOMEM; |
| 87 | ++ |
| 88 | ++ /* Perform a minimal read: Sector 0, length read_size */ |
| 89 | ++ bio = bio_alloc(bdev, 1, REQ_OP_READ, GFP_KERNEL); |
| 90 | ++ if (!bio) { |
| 91 | ++ r = -ENOMEM; |
| 92 | ++ goto out; |
| 93 | ++ } |
| 94 | ++ |
| 95 | ++ bio->bi_iter.bi_sector = 0; |
| 96 | ++ __bio_add_page(bio, page, read_size, 0); |
| 97 | ++ submit_bio_wait(bio); |
| 98 | ++ status = bio->bi_status; |
| 99 | ++ bio_put(bio); |
| 100 | ++ |
| 101 | ++ if (status && blk_path_error(status)) |
| 102 | ++ fail_path(pgpath); |
| 103 | ++ |
| 104 | ++ out: |
| 105 | ++ __free_page(page); |
| 106 | ++ return r; |
| 107 | ++ } |
| 108 | ++ |
| 109 | ++ /* |
| 110 | ++ * Probe all active paths in current_pg to find out whether they still work. |
| 111 | ++ * Fail all paths that do not work. |
| 112 | ++ * |
| 113 | ++ * Return -ENOTCONN if no valid path is left (even outside of current_pg). We |
| 114 | ++ * cannot probe paths in other pgs without switching current_pg, so if valid |
| 115 | ++ * paths are only in different pgs, they may or may not work. Additionally |
| 116 | ++ * we should not probe paths in a pathgroup that is in the process of |
| 117 | ++ * Initializing. Userspace can submit a request and we'll switch and wait |
| 118 | ++ * for the pathgroup to be initialized. If the request fails, it may need to |
| 119 | ++ * probe again. |
| 120 | ++ */ |
| 121 | ++ static int probe_active_paths(struct multipath *m) |
| 122 | ++ { |
| 123 | ++ struct pgpath *pgpath; |
| 124 | ++ struct priority_group *pg = NULL; |
| 125 | ++ unsigned long flags; |
| 126 | ++ int r = 0; |
| 127 | ++ |
| 128 | ++ spin_lock_irqsave(&m->lock, flags); |
| 129 | ++ if (test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags)) { |
| 130 | ++ wait_event_lock_irq(m->probe_wait, |
| 131 | ++ !test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags), |
| 132 | ++ m->lock); |
| 133 | ++ /* |
| 134 | ++ * if we waited because a probe was already in progress, |
| 135 | ++ * and it probed the current active pathgroup, don't |
| 136 | ++ * reprobe. Just return the number of valid paths |
| 137 | ++ */ |
| 138 | ++ if (m->current_pg == m->last_probed_pg) |
| 139 | ++ goto skip_probe; |
| 140 | ++ } |
| 141 | ++ if (!m->current_pg || m->is_suspending || |
| 142 | ++ test_bit(MPATHF_QUEUE_IO, &m->flags)) |
| 143 | ++ goto skip_probe; |
| 144 | ++ set_bit(MPATHF_DELAY_PG_SWITCH, &m->flags); |
| 145 | ++ pg = m->last_probed_pg = m->current_pg; |
| 146 | ++ spin_unlock_irqrestore(&m->lock, flags); |
| 147 | ++ |
| 148 | ++ list_for_each_entry(pgpath, &pg->pgpaths, list) { |
| 149 | ++ if (pg != READ_ONCE(m->current_pg) || |
| 150 | ++ READ_ONCE(m->is_suspending)) |
| 151 | ++ goto out; |
| 152 | ++ if (!pgpath->is_active) |
| 153 | ++ continue; |
| 154 | ++ |
| 155 | ++ r = probe_path(pgpath); |
| 156 | ++ if (r < 0) |
| 157 | ++ goto out; |
| 158 | ++ } |
| 159 | ++ |
| 160 | ++ out: |
| 161 | ++ spin_lock_irqsave(&m->lock, flags); |
| 162 | ++ clear_bit(MPATHF_DELAY_PG_SWITCH, &m->flags); |
| 163 | ++ if (test_and_clear_bit(MPATHF_NEED_PG_SWITCH, &m->flags)) { |
| 164 | ++ m->current_pgpath = NULL; |
| 165 | ++ m->current_pg = NULL; |
| 166 | ++ } |
| 167 | ++ skip_probe: |
| 168 | ++ if (r == 0 && !atomic_read(&m->nr_valid_paths)) |
| 169 | ++ r = -ENOTCONN; |
| 170 | ++ spin_unlock_irqrestore(&m->lock, flags); |
| 171 | ++ if (pg) |
| 172 | ++ wake_up(&m->probe_wait); |
| 173 | ++ return r; |
| 174 | ++ } |
| 175 | ++ |
| 176 | +++>>>>>>> 5c977f102315 (dm-mpath: Don't grab work_mutex while probing paths) |
| 177 | + static int multipath_prepare_ioctl(struct dm_target *ti, |
| 178 | + struct block_device **bdev, |
| 179 | + unsigned int cmd, unsigned long arg, |
| 180 | +* Unmerged path drivers/md/dm-mpath.c |
0 commit comments