1/*	$OpenBSD: mpath.c,v 1.58 2024/05/13 01:15:53 jsg Exp $ */
2
3/*
4 * Copyright (c) 2009 David Gwynne <dlg@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19#include <sys/param.h>
20#include <sys/systm.h>
21#include <sys/kernel.h>
22#include <sys/malloc.h>
23#include <sys/device.h>
24#include <sys/conf.h>
25#include <sys/queue.h>
26#include <sys/rwlock.h>
27#include <sys/ioctl.h>
28
29#include <scsi/scsi_all.h>
30#include <scsi/scsiconf.h>
31#include <scsi/mpathvar.h>
32
33#define MPATH_BUSWIDTH 256
34
35int		mpath_match(struct device *, void *, void *);
36void		mpath_attach(struct device *, struct device *, void *);
37
38TAILQ_HEAD(mpath_paths, mpath_path);
39
40struct mpath_group {
41	TAILQ_ENTRY(mpath_group) g_entry;
42	struct mpath_paths	 g_paths;
43	struct mpath_dev	*g_dev;
44	u_int			 g_id;
45};
46TAILQ_HEAD(mpath_groups, mpath_group);
47
48struct mpath_dev {
49	struct mutex		 d_mtx;
50
51	struct scsi_xfer_list	 d_xfers;
52	struct mpath_path	*d_next_path;
53
54	struct mpath_groups	 d_groups;
55
56	struct mpath_group	*d_failover_iter;
57	struct timeout		 d_failover_tmo;
58	u_int			 d_failover;
59
60	const struct mpath_ops	*d_ops;
61	struct devid		*d_id;
62};
63
64struct mpath_softc {
65	struct device		sc_dev;
66	struct scsibus_softc	*sc_scsibus;
67	struct mpath_dev	*sc_devs[MPATH_BUSWIDTH];
68};
69#define DEVNAME(_s) ((_s)->sc_dev.dv_xname)
70
71struct mpath_softc	*mpath;
72
73const struct cfattach mpath_ca = {
74	sizeof(struct mpath_softc),
75	mpath_match,
76	mpath_attach
77};
78
79struct cfdriver mpath_cd = {
80	NULL,
81	"mpath",
82	DV_DULL
83};
84
85void		mpath_cmd(struct scsi_xfer *);
86void		mpath_minphys(struct buf *, struct scsi_link *);
87int		mpath_probe(struct scsi_link *);
88
89struct mpath_path *mpath_next_path(struct mpath_dev *);
90void		mpath_done(struct scsi_xfer *);
91
92void		mpath_failover(struct mpath_dev *);
93void		mpath_failover_start(void *);
94void		mpath_failover_check(struct mpath_dev *);
95
96const struct scsi_adapter mpath_switch = {
97	mpath_cmd, NULL, mpath_probe, NULL, NULL
98};
99
100void		mpath_xs_stuffup(struct scsi_xfer *);
101
102int
103mpath_match(struct device *parent, void *match, void *aux)
104{
105	return (1);
106}
107
108void
109mpath_attach(struct device *parent, struct device *self, void *aux)
110{
111	struct mpath_softc		*sc = (struct mpath_softc *)self;
112	struct scsibus_attach_args	saa;
113
114	mpath = sc;
115
116	printf("\n");
117
118	saa.saa_adapter = &mpath_switch;
119	saa.saa_adapter_softc = sc;
120	saa.saa_adapter_target = SDEV_NO_ADAPTER_TARGET;
121	saa.saa_adapter_buswidth = MPATH_BUSWIDTH;
122	saa.saa_luns = 1;
123	saa.saa_openings = 1024; /* XXX magical */
124	saa.saa_pool = NULL;
125	saa.saa_quirks = saa.saa_flags = 0;
126	saa.saa_wwpn = saa.saa_wwnn = 0;
127
128	sc->sc_scsibus = (struct scsibus_softc *)config_found(&sc->sc_dev,
129	    &saa, scsiprint);
130}
131
132void
133mpath_xs_stuffup(struct scsi_xfer *xs)
134{
135	xs->error = XS_DRIVER_STUFFUP;
136	scsi_done(xs);
137}
138
139int
140mpath_probe(struct scsi_link *link)
141{
142	struct mpath_softc *sc = link->bus->sb_adapter_softc;
143	struct mpath_dev *d = sc->sc_devs[link->target];
144
145	if (link->lun != 0 || d == NULL)
146		return (ENXIO);
147
148	link->id = devid_copy(d->d_id);
149
150	return (0);
151}
152
153struct mpath_path *
154mpath_next_path(struct mpath_dev *d)
155{
156	struct mpath_group *g;
157	struct mpath_path *p;
158
159#ifdef DIAGNOSTIC
160	if (d == NULL)
161		panic("%s: d is NULL", __func__);
162#endif /* DIAGNOSTIC */
163
164	p = d->d_next_path;
165	if (p != NULL) {
166		d->d_next_path = TAILQ_NEXT(p, p_entry);
167		if (d->d_next_path == NULL &&
168		    (g = TAILQ_FIRST(&d->d_groups)) != NULL)
169			d->d_next_path = TAILQ_FIRST(&g->g_paths);
170	}
171
172	return (p);
173}
174
175void
176mpath_cmd(struct scsi_xfer *xs)
177{
178	struct scsi_link *link = xs->sc_link;
179	struct mpath_softc *sc = link->bus->sb_adapter_softc;
180	struct mpath_dev *d = sc->sc_devs[link->target];
181	struct mpath_path *p;
182	struct scsi_xfer *mxs;
183
184#ifdef DIAGNOSTIC
185	if (d == NULL)
186		panic("mpath_cmd issued against nonexistent device");
187#endif /* DIAGNOSTIC */
188
189	if (ISSET(xs->flags, SCSI_POLL)) {
190		mtx_enter(&d->d_mtx);
191		p = mpath_next_path(d);
192		mtx_leave(&d->d_mtx);
193		if (p == NULL) {
194			mpath_xs_stuffup(xs);
195			return;
196		}
197
198		mxs = scsi_xs_get(p->p_link, xs->flags);
199		if (mxs == NULL) {
200			mpath_xs_stuffup(xs);
201			return;
202		}
203
204		memcpy(&mxs->cmd, &xs->cmd, xs->cmdlen);
205		mxs->cmdlen = xs->cmdlen;
206		mxs->data = xs->data;
207		mxs->datalen = xs->datalen;
208		mxs->retries = xs->retries;
209		mxs->timeout = xs->timeout;
210		mxs->bp = xs->bp;
211
212		scsi_xs_sync(mxs);
213
214		xs->error = mxs->error;
215		xs->status = mxs->status;
216		xs->resid = mxs->resid;
217
218		memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
219
220		scsi_xs_put(mxs);
221		scsi_done(xs);
222		return;
223	}
224
225	mtx_enter(&d->d_mtx);
226	SIMPLEQ_INSERT_TAIL(&d->d_xfers, xs, xfer_list);
227	p = mpath_next_path(d);
228	mtx_leave(&d->d_mtx);
229
230	if (p != NULL)
231		scsi_xsh_add(&p->p_xsh);
232}
233
234void
235mpath_start(struct mpath_path *p, struct scsi_xfer *mxs)
236{
237	struct mpath_dev *d = p->p_group->g_dev;
238	struct scsi_xfer *xs;
239	int addxsh = 0;
240
241	if (ISSET(p->p_link->state, SDEV_S_DYING) || d == NULL)
242		goto fail;
243
244	mtx_enter(&d->d_mtx);
245	xs = SIMPLEQ_FIRST(&d->d_xfers);
246	if (xs != NULL) {
247		SIMPLEQ_REMOVE_HEAD(&d->d_xfers, xfer_list);
248		if (!SIMPLEQ_EMPTY(&d->d_xfers))
249			addxsh = 1;
250	}
251	mtx_leave(&d->d_mtx);
252
253	if (xs == NULL)
254		goto fail;
255
256	memcpy(&mxs->cmd, &xs->cmd, xs->cmdlen);
257	mxs->cmdlen = xs->cmdlen;
258	mxs->data = xs->data;
259	mxs->datalen = xs->datalen;
260	mxs->retries = xs->retries;
261	mxs->timeout = xs->timeout;
262	mxs->bp = xs->bp;
263	mxs->flags = xs->flags;
264
265	mxs->cookie = xs;
266	mxs->done = mpath_done;
267
268	scsi_xs_exec(mxs);
269
270	if (addxsh)
271		scsi_xsh_add(&p->p_xsh);
272
273	return;
274fail:
275	scsi_xs_put(mxs);
276}
277
278void
279mpath_done(struct scsi_xfer *mxs)
280{
281	struct scsi_xfer *xs = mxs->cookie;
282	struct scsi_link *link = xs->sc_link;
283	struct mpath_softc *sc = link->bus->sb_adapter_softc;
284	struct mpath_dev *d = sc->sc_devs[link->target];
285	struct mpath_path *p;
286
287	switch (mxs->error) {
288	case XS_SELTIMEOUT: /* physical path is gone, try the next */
289	case XS_RESET:
290		mtx_enter(&d->d_mtx);
291		SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
292		p = mpath_next_path(d);
293		mtx_leave(&d->d_mtx);
294
295		scsi_xs_put(mxs);
296
297		if (p != NULL)
298			scsi_xsh_add(&p->p_xsh);
299		return;
300	case XS_SENSE:
301		switch (d->d_ops->op_checksense(mxs)) {
302		case MPATH_SENSE_FAILOVER:
303			mtx_enter(&d->d_mtx);
304			SIMPLEQ_INSERT_HEAD(&d->d_xfers, xs, xfer_list);
305			p = mpath_next_path(d);
306			mtx_leave(&d->d_mtx);
307
308			scsi_xs_put(mxs);
309
310			mpath_failover(d);
311			return;
312		case MPATH_SENSE_DECLINED:
313			break;
314#ifdef DIAGNOSTIC
315		default:
316			panic("unexpected return from checksense");
317#endif /* DIAGNOSTIC */
318		}
319		break;
320	}
321
322	xs->error = mxs->error;
323	xs->status = mxs->status;
324	xs->resid = mxs->resid;
325
326	memcpy(&xs->sense, &mxs->sense, sizeof(xs->sense));
327
328	scsi_xs_put(mxs);
329
330	scsi_done(xs);
331}
332
333void
334mpath_failover(struct mpath_dev *d)
335{
336	if (!scsi_pending_start(&d->d_mtx, &d->d_failover))
337		return;
338
339	mpath_failover_start(d);
340}
341
342void
343mpath_failover_start(void *xd)
344{
345	struct mpath_dev *d = xd;
346
347	mtx_enter(&d->d_mtx);
348	d->d_failover_iter = TAILQ_FIRST(&d->d_groups);
349	mtx_leave(&d->d_mtx);
350
351	mpath_failover_check(d);
352}
353
354void
355mpath_failover_check(struct mpath_dev *d)
356{
357	struct mpath_group *g = d->d_failover_iter;
358	struct mpath_path *p;
359
360	if (g == NULL)
361		timeout_add_sec(&d->d_failover_tmo, 1);
362	else {
363		p = TAILQ_FIRST(&g->g_paths);
364		d->d_ops->op_status(p->p_link);
365	}
366}
367
368void
369mpath_path_status(struct mpath_path *p, int status)
370{
371	struct mpath_group *g = p->p_group;
372	struct mpath_dev *d = g->g_dev;
373
374	mtx_enter(&d->d_mtx);
375	if (status == MPATH_S_ACTIVE) {
376		TAILQ_REMOVE(&d->d_groups, g, g_entry);
377		TAILQ_INSERT_HEAD(&d->d_groups, g, g_entry);
378		d->d_next_path = p;
379	} else
380		d->d_failover_iter = TAILQ_NEXT(d->d_failover_iter, g_entry);
381	mtx_leave(&d->d_mtx);
382
383	if (status == MPATH_S_ACTIVE) {
384		scsi_xsh_add(&p->p_xsh);
385		if (!scsi_pending_finish(&d->d_mtx, &d->d_failover))
386			mpath_failover_start(d);
387	} else
388		mpath_failover_check(d);
389}
390
391void
392mpath_minphys(struct buf *bp, struct scsi_link *link)
393{
394	struct mpath_softc *sc = link->bus->sb_adapter_softc;
395	struct mpath_dev *d = sc->sc_devs[link->target];
396	struct mpath_group *g;
397	struct mpath_path *p;
398
399#ifdef DIAGNOSTIC
400	if (d == NULL)
401		panic("mpath_minphys against nonexistent device");
402#endif /* DIAGNOSTIC */
403
404	mtx_enter(&d->d_mtx);
405	TAILQ_FOREACH(g, &d->d_groups, g_entry) {
406		TAILQ_FOREACH(p, &g->g_paths, p_entry) {
407			/* XXX crossing layers with mutex held */
408			if (p->p_link->bus->sb_adapter->dev_minphys != NULL)
409				p->p_link->bus->sb_adapter->dev_minphys(bp,
410				    p->p_link);
411		}
412	}
413	mtx_leave(&d->d_mtx);
414}
415
416int
417mpath_path_probe(struct scsi_link *link)
418{
419	if (mpath == NULL)
420		return (ENXIO);
421
422	if (link->id == NULL)
423		return (EINVAL);
424
425	if (ISSET(link->flags, SDEV_UMASS))
426		return (EINVAL);
427
428	if (mpath == link->bus->sb_adapter_softc)
429		return (ENXIO);
430
431	return (0);
432}
433
434int
435mpath_path_attach(struct mpath_path *p, u_int g_id, const struct mpath_ops *ops)
436{
437	struct mpath_softc *sc = mpath;
438	struct scsi_link *link = p->p_link;
439	struct mpath_dev *d = NULL;
440	struct mpath_group *g;
441	int newdev = 0, addxsh = 0;
442	int target;
443
444#ifdef DIAGNOSTIC
445	if (p->p_link == NULL)
446		panic("mpath_path_attach: NULL link");
447	if (p->p_group != NULL)
448		panic("mpath_path_attach: group is not NULL");
449#endif /* DIAGNOSTIC */
450
451	for (target = 0; target < MPATH_BUSWIDTH; target++) {
452		if ((d = sc->sc_devs[target]) == NULL)
453			continue;
454
455		if (DEVID_CMP(d->d_id, link->id) && d->d_ops == ops)
456			break;
457
458		d = NULL;
459	}
460
461	if (d == NULL) {
462		for (target = 0; target < MPATH_BUSWIDTH; target++) {
463			if (sc->sc_devs[target] == NULL)
464				break;
465		}
466		if (target >= MPATH_BUSWIDTH)
467			return (ENXIO);
468
469		d = malloc(sizeof(*d), M_DEVBUF, M_WAITOK | M_CANFAIL | M_ZERO);
470		if (d == NULL)
471			return (ENOMEM);
472
473		mtx_init(&d->d_mtx, IPL_BIO);
474		TAILQ_INIT(&d->d_groups);
475		SIMPLEQ_INIT(&d->d_xfers);
476		d->d_id = devid_copy(link->id);
477		d->d_ops = ops;
478
479		timeout_set(&d->d_failover_tmo, mpath_failover_start, d);
480
481		sc->sc_devs[target] = d;
482		newdev = 1;
483	} else {
484		/*
485		 * instead of carrying identical values in different devid
486		 * instances, delete the new one and reference the old one in
487		 * the new scsi_link.
488		 */
489		devid_free(link->id);
490		link->id = devid_copy(d->d_id);
491	}
492
493	TAILQ_FOREACH(g, &d->d_groups, g_entry) {
494		if (g->g_id == g_id)
495			break;
496	}
497
498	if (g == NULL) {
499		g = malloc(sizeof(*g),  M_DEVBUF,
500		    M_WAITOK | M_CANFAIL | M_ZERO);
501		if (g == NULL) {
502			if (newdev) {
503				free(d, M_DEVBUF, sizeof(*d));
504				sc->sc_devs[target] = NULL;
505			}
506
507			return (ENOMEM);
508		}
509
510		TAILQ_INIT(&g->g_paths);
511		g->g_dev = d;
512		g->g_id = g_id;
513
514		mtx_enter(&d->d_mtx);
515		TAILQ_INSERT_TAIL(&d->d_groups, g, g_entry);
516		mtx_leave(&d->d_mtx);
517	}
518
519	p->p_group = g;
520
521	mtx_enter(&d->d_mtx);
522	TAILQ_INSERT_TAIL(&g->g_paths, p, p_entry);
523	if (!SIMPLEQ_EMPTY(&d->d_xfers))
524		addxsh = 1;
525
526	if (d->d_next_path == NULL)
527		d->d_next_path = p;
528	mtx_leave(&d->d_mtx);
529
530	if (newdev)
531		scsi_probe_target(mpath->sc_scsibus, target);
532	else if (addxsh)
533		scsi_xsh_add(&p->p_xsh);
534
535	return (0);
536}
537
538int
539mpath_path_detach(struct mpath_path *p)
540{
541	struct mpath_group *g = p->p_group;
542	struct mpath_dev *d;
543	struct mpath_path *np = NULL;
544
545#ifdef DIAGNOSTIC
546	if (g == NULL)
547		panic("mpath: detaching a path from a nonexistent bus");
548#endif /* DIAGNOSTIC */
549	d = g->g_dev;
550	p->p_group = NULL;
551
552	mtx_enter(&d->d_mtx);
553	TAILQ_REMOVE(&g->g_paths, p, p_entry);
554	if (d->d_next_path == p)
555		d->d_next_path = TAILQ_FIRST(&g->g_paths);
556
557	if (TAILQ_EMPTY(&g->g_paths))
558		TAILQ_REMOVE(&d->d_groups, g, g_entry);
559	else
560		g = NULL;
561
562	if (!SIMPLEQ_EMPTY(&d->d_xfers))
563		np = d->d_next_path;
564	mtx_leave(&d->d_mtx);
565
566	if (g != NULL)
567		free(g, M_DEVBUF, sizeof(*g));
568
569	scsi_xsh_del(&p->p_xsh);
570
571	if (np == NULL)
572		mpath_failover(d);
573	else
574		scsi_xsh_add(&np->p_xsh);
575
576	return (0);
577}
578
579struct device *
580mpath_bootdv(struct device *dev)
581{
582	struct mpath_softc *sc = mpath;
583	struct mpath_dev *d;
584	struct mpath_group *g;
585	struct mpath_path *p;
586	int target;
587
588	if (sc == NULL)
589		return (dev);
590
591	for (target = 0; target < MPATH_BUSWIDTH; target++) {
592		if ((d = sc->sc_devs[target]) == NULL)
593			continue;
594
595		TAILQ_FOREACH(g, &d->d_groups, g_entry) {
596			TAILQ_FOREACH(p, &g->g_paths, p_entry) {
597				if (p->p_link->device_softc == dev) {
598					return (scsi_get_link(mpath->sc_scsibus,
599					    target, 0)->device_softc);
600				}
601			}
602		}
603	}
604
605	return (dev);
606}
607