1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * Copyright (C) 2022, Alibaba Cloud
4 * Copyright (C) 2022, Bytedance Inc. All rights reserved.
5 */
6#include <linux/pseudo_fs.h>
7#include <linux/fscache.h>
8#include "internal.h"
9
10static DEFINE_MUTEX(erofs_domain_list_lock);
11static DEFINE_MUTEX(erofs_domain_cookies_lock);
12static LIST_HEAD(erofs_domain_list);
13static LIST_HEAD(erofs_domain_cookies_list);
14static struct vfsmount *erofs_pseudo_mnt;
15
16static int erofs_anon_init_fs_context(struct fs_context *fc)
17{
18	return init_pseudo(fc, EROFS_SUPER_MAGIC) ? 0 : -ENOMEM;
19}
20
21static struct file_system_type erofs_anon_fs_type = {
22	.owner		= THIS_MODULE,
23	.name           = "pseudo_erofs",
24	.init_fs_context = erofs_anon_init_fs_context,
25	.kill_sb        = kill_anon_super,
26};
27
28struct erofs_fscache_io {
29	struct netfs_cache_resources cres;
30	struct iov_iter		iter;
31	netfs_io_terminated_t	end_io;
32	void			*private;
33	refcount_t		ref;
34};
35
36struct erofs_fscache_rq {
37	struct address_space	*mapping;	/* The mapping being accessed */
38	loff_t			start;		/* Start position */
39	size_t			len;		/* Length of the request */
40	size_t			submitted;	/* Length of submitted */
41	short			error;		/* 0 or error that occurred */
42	refcount_t		ref;
43};
44
45static bool erofs_fscache_io_put(struct erofs_fscache_io *io)
46{
47	if (!refcount_dec_and_test(&io->ref))
48		return false;
49	if (io->cres.ops)
50		io->cres.ops->end_operation(&io->cres);
51	kfree(io);
52	return true;
53}
54
55static void erofs_fscache_req_complete(struct erofs_fscache_rq *req)
56{
57	struct folio *folio;
58	bool failed = req->error;
59	pgoff_t start_page = req->start / PAGE_SIZE;
60	pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1;
61
62	XA_STATE(xas, &req->mapping->i_pages, start_page);
63
64	rcu_read_lock();
65	xas_for_each(&xas, folio, last_page) {
66		if (xas_retry(&xas, folio))
67			continue;
68		if (!failed)
69			folio_mark_uptodate(folio);
70		folio_unlock(folio);
71	}
72	rcu_read_unlock();
73}
74
75static void erofs_fscache_req_put(struct erofs_fscache_rq *req)
76{
77	if (!refcount_dec_and_test(&req->ref))
78		return;
79	erofs_fscache_req_complete(req);
80	kfree(req);
81}
82
83static struct erofs_fscache_rq *erofs_fscache_req_alloc(struct address_space *mapping,
84						loff_t start, size_t len)
85{
86	struct erofs_fscache_rq *req = kzalloc(sizeof(*req), GFP_KERNEL);
87
88	if (!req)
89		return NULL;
90	req->mapping = mapping;
91	req->start = start;
92	req->len = len;
93	refcount_set(&req->ref, 1);
94	return req;
95}
96
97static void erofs_fscache_req_io_put(struct erofs_fscache_io *io)
98{
99	struct erofs_fscache_rq *req = io->private;
100
101	if (erofs_fscache_io_put(io))
102		erofs_fscache_req_put(req);
103}
104
105static void erofs_fscache_req_end_io(void *priv,
106		ssize_t transferred_or_error, bool was_async)
107{
108	struct erofs_fscache_io *io = priv;
109	struct erofs_fscache_rq *req = io->private;
110
111	if (IS_ERR_VALUE(transferred_or_error))
112		req->error = transferred_or_error;
113	erofs_fscache_req_io_put(io);
114}
115
116static struct erofs_fscache_io *erofs_fscache_req_io_alloc(struct erofs_fscache_rq *req)
117{
118	struct erofs_fscache_io *io = kzalloc(sizeof(*io), GFP_KERNEL);
119
120	if (!io)
121		return NULL;
122	io->end_io = erofs_fscache_req_end_io;
123	io->private = req;
124	refcount_inc(&req->ref);
125	refcount_set(&io->ref, 1);
126	return io;
127}
128
129/*
130 * Read data from fscache described by cookie at pstart physical address
131 * offset, and fill the read data into buffer described by io->iter.
132 */
133static int erofs_fscache_read_io_async(struct fscache_cookie *cookie,
134		loff_t pstart, struct erofs_fscache_io *io)
135{
136	enum netfs_io_source source;
137	struct netfs_cache_resources *cres = &io->cres;
138	struct iov_iter *iter = &io->iter;
139	int ret;
140
141	ret = fscache_begin_read_operation(cres, cookie);
142	if (ret)
143		return ret;
144
145	while (iov_iter_count(iter)) {
146		size_t orig_count = iov_iter_count(iter), len = orig_count;
147		unsigned long flags = 1 << NETFS_SREQ_ONDEMAND;
148
149		source = cres->ops->prepare_ondemand_read(cres,
150				pstart, &len, LLONG_MAX, &flags, 0);
151		if (WARN_ON(len == 0))
152			source = NETFS_INVALID_READ;
153		if (source != NETFS_READ_FROM_CACHE) {
154			erofs_err(NULL, "prepare_ondemand_read failed (source %d)", source);
155			return -EIO;
156		}
157
158		iov_iter_truncate(iter, len);
159		refcount_inc(&io->ref);
160		ret = fscache_read(cres, pstart, iter, NETFS_READ_HOLE_FAIL,
161				   io->end_io, io);
162		if (ret == -EIOCBQUEUED)
163			ret = 0;
164		if (ret) {
165			erofs_err(NULL, "fscache_read failed (ret %d)", ret);
166			return ret;
167		}
168		if (WARN_ON(iov_iter_count(iter)))
169			return -EIO;
170
171		iov_iter_reexpand(iter, orig_count - len);
172		pstart += len;
173	}
174	return 0;
175}
176
177struct erofs_fscache_bio {
178	struct erofs_fscache_io io;
179	struct bio bio;		/* w/o bdev to share bio_add_page/endio() */
180	struct bio_vec bvecs[BIO_MAX_VECS];
181};
182
183static void erofs_fscache_bio_endio(void *priv,
184		ssize_t transferred_or_error, bool was_async)
185{
186	struct erofs_fscache_bio *io = priv;
187
188	if (IS_ERR_VALUE(transferred_or_error))
189		io->bio.bi_status = errno_to_blk_status(transferred_or_error);
190	io->bio.bi_end_io(&io->bio);
191	BUILD_BUG_ON(offsetof(struct erofs_fscache_bio, io) != 0);
192	erofs_fscache_io_put(&io->io);
193}
194
195struct bio *erofs_fscache_bio_alloc(struct erofs_map_dev *mdev)
196{
197	struct erofs_fscache_bio *io;
198
199	io = kmalloc(sizeof(*io), GFP_KERNEL | __GFP_NOFAIL);
200	bio_init(&io->bio, NULL, io->bvecs, BIO_MAX_VECS, REQ_OP_READ);
201	io->io.private = mdev->m_fscache->cookie;
202	io->io.end_io = erofs_fscache_bio_endio;
203	refcount_set(&io->io.ref, 1);
204	return &io->bio;
205}
206
207void erofs_fscache_submit_bio(struct bio *bio)
208{
209	struct erofs_fscache_bio *io = container_of(bio,
210			struct erofs_fscache_bio, bio);
211	int ret;
212
213	iov_iter_bvec(&io->io.iter, ITER_DEST, io->bvecs, bio->bi_vcnt,
214		      bio->bi_iter.bi_size);
215	ret = erofs_fscache_read_io_async(io->io.private,
216				bio->bi_iter.bi_sector << 9, &io->io);
217	erofs_fscache_io_put(&io->io);
218	if (!ret)
219		return;
220	bio->bi_status = errno_to_blk_status(ret);
221	bio->bi_end_io(bio);
222}
223
224static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
225{
226	struct erofs_fscache *ctx = folio->mapping->host->i_private;
227	int ret = -ENOMEM;
228	struct erofs_fscache_rq *req;
229	struct erofs_fscache_io *io;
230
231	req = erofs_fscache_req_alloc(folio->mapping,
232				folio_pos(folio), folio_size(folio));
233	if (!req) {
234		folio_unlock(folio);
235		return ret;
236	}
237
238	io = erofs_fscache_req_io_alloc(req);
239	if (!io) {
240		req->error = ret;
241		goto out;
242	}
243	iov_iter_xarray(&io->iter, ITER_DEST, &folio->mapping->i_pages,
244			folio_pos(folio), folio_size(folio));
245
246	ret = erofs_fscache_read_io_async(ctx->cookie, folio_pos(folio), io);
247	if (ret)
248		req->error = ret;
249
250	erofs_fscache_req_io_put(io);
251out:
252	erofs_fscache_req_put(req);
253	return ret;
254}
255
256static int erofs_fscache_data_read_slice(struct erofs_fscache_rq *req)
257{
258	struct address_space *mapping = req->mapping;
259	struct inode *inode = mapping->host;
260	struct super_block *sb = inode->i_sb;
261	struct erofs_fscache_io *io;
262	struct erofs_map_blocks map;
263	struct erofs_map_dev mdev;
264	loff_t pos = req->start + req->submitted;
265	size_t count;
266	int ret;
267
268	map.m_la = pos;
269	ret = erofs_map_blocks(inode, &map);
270	if (ret)
271		return ret;
272
273	if (map.m_flags & EROFS_MAP_META) {
274		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
275		struct iov_iter iter;
276		size_t size = map.m_llen;
277		void *src;
278
279		src = erofs_read_metabuf(&buf, sb, map.m_pa, EROFS_KMAP);
280		if (IS_ERR(src))
281			return PTR_ERR(src);
282
283		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
284		if (copy_to_iter(src, size, &iter) != size) {
285			erofs_put_metabuf(&buf);
286			return -EFAULT;
287		}
288		iov_iter_zero(PAGE_SIZE - size, &iter);
289		erofs_put_metabuf(&buf);
290		req->submitted += PAGE_SIZE;
291		return 0;
292	}
293
294	count = req->len - req->submitted;
295	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
296		struct iov_iter iter;
297
298		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
299		iov_iter_zero(count, &iter);
300		req->submitted += count;
301		return 0;
302	}
303
304	count = min_t(size_t, map.m_llen - (pos - map.m_la), count);
305	DBG_BUGON(!count || count % PAGE_SIZE);
306
307	mdev = (struct erofs_map_dev) {
308		.m_deviceid = map.m_deviceid,
309		.m_pa = map.m_pa,
310	};
311	ret = erofs_map_dev(sb, &mdev);
312	if (ret)
313		return ret;
314
315	io = erofs_fscache_req_io_alloc(req);
316	if (!io)
317		return -ENOMEM;
318	iov_iter_xarray(&io->iter, ITER_DEST, &mapping->i_pages, pos, count);
319	ret = erofs_fscache_read_io_async(mdev.m_fscache->cookie,
320			mdev.m_pa + (pos - map.m_la), io);
321	erofs_fscache_req_io_put(io);
322
323	req->submitted += count;
324	return ret;
325}
326
327static int erofs_fscache_data_read(struct erofs_fscache_rq *req)
328{
329	int ret;
330
331	do {
332		ret = erofs_fscache_data_read_slice(req);
333		if (ret)
334			req->error = ret;
335	} while (!ret && req->submitted < req->len);
336	return ret;
337}
338
339static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
340{
341	struct erofs_fscache_rq *req;
342	int ret;
343
344	req = erofs_fscache_req_alloc(folio->mapping,
345			folio_pos(folio), folio_size(folio));
346	if (!req) {
347		folio_unlock(folio);
348		return -ENOMEM;
349	}
350
351	ret = erofs_fscache_data_read(req);
352	erofs_fscache_req_put(req);
353	return ret;
354}
355
356static void erofs_fscache_readahead(struct readahead_control *rac)
357{
358	struct erofs_fscache_rq *req;
359
360	if (!readahead_count(rac))
361		return;
362
363	req = erofs_fscache_req_alloc(rac->mapping,
364			readahead_pos(rac), readahead_length(rac));
365	if (!req)
366		return;
367
368	/* The request completion will drop refs on the folios. */
369	while (readahead_folio(rac))
370		;
371
372	erofs_fscache_data_read(req);
373	erofs_fscache_req_put(req);
374}
375
376static const struct address_space_operations erofs_fscache_meta_aops = {
377	.read_folio = erofs_fscache_meta_read_folio,
378};
379
380const struct address_space_operations erofs_fscache_access_aops = {
381	.read_folio = erofs_fscache_read_folio,
382	.readahead = erofs_fscache_readahead,
383};
384
385static void erofs_fscache_domain_put(struct erofs_domain *domain)
386{
387	mutex_lock(&erofs_domain_list_lock);
388	if (refcount_dec_and_test(&domain->ref)) {
389		list_del(&domain->list);
390		if (list_empty(&erofs_domain_list)) {
391			kern_unmount(erofs_pseudo_mnt);
392			erofs_pseudo_mnt = NULL;
393		}
394		fscache_relinquish_volume(domain->volume, NULL, false);
395		mutex_unlock(&erofs_domain_list_lock);
396		kfree(domain->domain_id);
397		kfree(domain);
398		return;
399	}
400	mutex_unlock(&erofs_domain_list_lock);
401}
402
403static int erofs_fscache_register_volume(struct super_block *sb)
404{
405	struct erofs_sb_info *sbi = EROFS_SB(sb);
406	char *domain_id = sbi->domain_id;
407	struct fscache_volume *volume;
408	char *name;
409	int ret = 0;
410
411	name = kasprintf(GFP_KERNEL, "erofs,%s",
412			 domain_id ? domain_id : sbi->fsid);
413	if (!name)
414		return -ENOMEM;
415
416	volume = fscache_acquire_volume(name, NULL, NULL, 0);
417	if (IS_ERR_OR_NULL(volume)) {
418		erofs_err(sb, "failed to register volume for %s", name);
419		ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
420		volume = NULL;
421	}
422
423	sbi->volume = volume;
424	kfree(name);
425	return ret;
426}
427
428static int erofs_fscache_init_domain(struct super_block *sb)
429{
430	int err;
431	struct erofs_domain *domain;
432	struct erofs_sb_info *sbi = EROFS_SB(sb);
433
434	domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
435	if (!domain)
436		return -ENOMEM;
437
438	domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
439	if (!domain->domain_id) {
440		kfree(domain);
441		return -ENOMEM;
442	}
443
444	err = erofs_fscache_register_volume(sb);
445	if (err)
446		goto out;
447
448	if (!erofs_pseudo_mnt) {
449		struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type);
450		if (IS_ERR(mnt)) {
451			err = PTR_ERR(mnt);
452			goto out;
453		}
454		erofs_pseudo_mnt = mnt;
455	}
456
457	domain->volume = sbi->volume;
458	refcount_set(&domain->ref, 1);
459	list_add(&domain->list, &erofs_domain_list);
460	sbi->domain = domain;
461	return 0;
462out:
463	kfree(domain->domain_id);
464	kfree(domain);
465	return err;
466}
467
468static int erofs_fscache_register_domain(struct super_block *sb)
469{
470	int err;
471	struct erofs_domain *domain;
472	struct erofs_sb_info *sbi = EROFS_SB(sb);
473
474	mutex_lock(&erofs_domain_list_lock);
475	list_for_each_entry(domain, &erofs_domain_list, list) {
476		if (!strcmp(domain->domain_id, sbi->domain_id)) {
477			sbi->domain = domain;
478			sbi->volume = domain->volume;
479			refcount_inc(&domain->ref);
480			mutex_unlock(&erofs_domain_list_lock);
481			return 0;
482		}
483	}
484	err = erofs_fscache_init_domain(sb);
485	mutex_unlock(&erofs_domain_list_lock);
486	return err;
487}
488
489static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
490						char *name, unsigned int flags)
491{
492	struct fscache_volume *volume = EROFS_SB(sb)->volume;
493	struct erofs_fscache *ctx;
494	struct fscache_cookie *cookie;
495	struct super_block *isb;
496	struct inode *inode;
497	int ret;
498
499	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
500	if (!ctx)
501		return ERR_PTR(-ENOMEM);
502	INIT_LIST_HEAD(&ctx->node);
503	refcount_set(&ctx->ref, 1);
504
505	cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
506					name, strlen(name), NULL, 0, 0);
507	if (!cookie) {
508		erofs_err(sb, "failed to get cookie for %s", name);
509		ret = -EINVAL;
510		goto err;
511	}
512	fscache_use_cookie(cookie, false);
513
514	/*
515	 * Allocate anonymous inode in global pseudo mount for shareable blobs,
516	 * so that they are accessible among erofs fs instances.
517	 */
518	isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb;
519	inode = new_inode(isb);
520	if (!inode) {
521		erofs_err(sb, "failed to get anon inode for %s", name);
522		ret = -ENOMEM;
523		goto err_cookie;
524	}
525
526	inode->i_size = OFFSET_MAX;
527	inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
528	mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
529	inode->i_blkbits = EROFS_SB(sb)->blkszbits;
530	inode->i_private = ctx;
531
532	ctx->cookie = cookie;
533	ctx->inode = inode;
534	return ctx;
535
536err_cookie:
537	fscache_unuse_cookie(cookie, NULL, NULL);
538	fscache_relinquish_cookie(cookie, false);
539err:
540	kfree(ctx);
541	return ERR_PTR(ret);
542}
543
544static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
545{
546	fscache_unuse_cookie(ctx->cookie, NULL, NULL);
547	fscache_relinquish_cookie(ctx->cookie, false);
548	iput(ctx->inode);
549	kfree(ctx->name);
550	kfree(ctx);
551}
552
553static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb,
554						char *name, unsigned int flags)
555{
556	struct erofs_fscache *ctx;
557	struct erofs_domain *domain = EROFS_SB(sb)->domain;
558
559	ctx = erofs_fscache_acquire_cookie(sb, name, flags);
560	if (IS_ERR(ctx))
561		return ctx;
562
563	ctx->name = kstrdup(name, GFP_KERNEL);
564	if (!ctx->name) {
565		erofs_fscache_relinquish_cookie(ctx);
566		return ERR_PTR(-ENOMEM);
567	}
568
569	refcount_inc(&domain->ref);
570	ctx->domain = domain;
571	list_add(&ctx->node, &erofs_domain_cookies_list);
572	return ctx;
573}
574
575static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
576						char *name, unsigned int flags)
577{
578	struct erofs_fscache *ctx;
579	struct erofs_domain *domain = EROFS_SB(sb)->domain;
580
581	flags |= EROFS_REG_COOKIE_SHARE;
582	mutex_lock(&erofs_domain_cookies_lock);
583	list_for_each_entry(ctx, &erofs_domain_cookies_list, node) {
584		if (ctx->domain != domain || strcmp(ctx->name, name))
585			continue;
586		if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) {
587			refcount_inc(&ctx->ref);
588		} else {
589			erofs_err(sb, "%s already exists in domain %s", name,
590				  domain->domain_id);
591			ctx = ERR_PTR(-EEXIST);
592		}
593		mutex_unlock(&erofs_domain_cookies_lock);
594		return ctx;
595	}
596	ctx = erofs_domain_init_cookie(sb, name, flags);
597	mutex_unlock(&erofs_domain_cookies_lock);
598	return ctx;
599}
600
601struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
602						    char *name,
603						    unsigned int flags)
604{
605	if (EROFS_SB(sb)->domain_id)
606		return erofs_domain_register_cookie(sb, name, flags);
607	return erofs_fscache_acquire_cookie(sb, name, flags);
608}
609
610void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
611{
612	struct erofs_domain *domain = NULL;
613
614	if (!ctx)
615		return;
616	if (!ctx->domain)
617		return erofs_fscache_relinquish_cookie(ctx);
618
619	mutex_lock(&erofs_domain_cookies_lock);
620	if (refcount_dec_and_test(&ctx->ref)) {
621		domain = ctx->domain;
622		list_del(&ctx->node);
623		erofs_fscache_relinquish_cookie(ctx);
624	}
625	mutex_unlock(&erofs_domain_cookies_lock);
626	if (domain)
627		erofs_fscache_domain_put(domain);
628}
629
630int erofs_fscache_register_fs(struct super_block *sb)
631{
632	int ret;
633	struct erofs_sb_info *sbi = EROFS_SB(sb);
634	struct erofs_fscache *fscache;
635	unsigned int flags = 0;
636
637	if (sbi->domain_id)
638		ret = erofs_fscache_register_domain(sb);
639	else
640		ret = erofs_fscache_register_volume(sb);
641	if (ret)
642		return ret;
643
644	/*
645	 * When shared domain is enabled, using NEED_NOEXIST to guarantee
646	 * the primary data blob (aka fsid) is unique in the shared domain.
647	 *
648	 * For non-shared-domain case, fscache_acquire_volume() invoked by
649	 * erofs_fscache_register_volume() has already guaranteed
650	 * the uniqueness of primary data blob.
651	 *
652	 * Acquired domain/volume will be relinquished in kill_sb() on error.
653	 */
654	if (sbi->domain_id)
655		flags |= EROFS_REG_COOKIE_NEED_NOEXIST;
656	fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags);
657	if (IS_ERR(fscache))
658		return PTR_ERR(fscache);
659
660	sbi->s_fscache = fscache;
661	return 0;
662}
663
664void erofs_fscache_unregister_fs(struct super_block *sb)
665{
666	struct erofs_sb_info *sbi = EROFS_SB(sb);
667
668	erofs_fscache_unregister_cookie(sbi->s_fscache);
669
670	if (sbi->domain)
671		erofs_fscache_domain_put(sbi->domain);
672	else
673		fscache_relinquish_volume(sbi->volume, NULL, false);
674
675	sbi->s_fscache = NULL;
676	sbi->volume = NULL;
677	sbi->domain = NULL;
678}
679