1
2
3#include <linux/config.h>
4#include <linux/types.h>
5#include <linux/slab.h>
6#include <linux/swap.h>
7#include <linux/pagemap.h>
8#include <linux/file.h>
9
10#include <linux/sunrpc/clnt.h>
11#include <linux/nfs_fs.h>
12#include <linux/nfs_mount.h>
13#include <linux/nfs_flushd.h>
14#include <linux/nfs_page.h>
15#include <asm/uaccess.h>
16#include <linux/smp_lock.h>
17
18#define NFSDBG_FACILITY		NFSDBG_PAGECACHE
19
20/*
21 * Local structures
22 *
23 * This is the struct where the WRITE/COMMIT arguments go.
24 */
25struct nfs_write_data {
26	struct rpc_task		task;
27	struct inode		*inode;
28	struct rpc_cred		*cred;
29	struct nfs_writeargs	args;		/* argument struct */
30	struct nfs_writeres	res;		/* result struct */
31	struct nfs_fattr	fattr;
32	struct nfs_writeverf	verf;
33	struct list_head	pages;		/* Coalesced requests we wish to flush */
34	struct page		*pagevec[NFS_WRITE_MAXIOV];
35};
36
37/*
38 * Local function declarations
39 */
40static struct nfs_page * nfs_update_request(struct file*, struct inode *,
41					    struct page *,
42					    unsigned int, unsigned int);
43static void	nfs_strategy(struct inode *inode);
44static void	nfs_writeback_done(struct rpc_task *);
45#ifdef CONFIG_NFS_V3
46static void	nfs_commit_done(struct rpc_task *);
47#endif
48
49/* Hack for future NFS swap support */
50#ifndef IS_SWAPFILE
51# define IS_SWAPFILE(inode)	(0)
52#endif
53
54static kmem_cache_t *nfs_wdata_cachep;
55
56static __inline__ struct nfs_write_data *nfs_writedata_alloc(void)
57{
58	struct nfs_write_data	*p;
59	p = kmem_cache_alloc(nfs_wdata_cachep, SLAB_NOFS);
60	if (p) {
61		memset(p, 0, sizeof(*p));
62		INIT_LIST_HEAD(&p->pages);
63		p->args.pages = p->pagevec;
64	}
65	return p;
66}
67
68static __inline__ void nfs_writedata_free(struct nfs_write_data *p)
69{
70	kmem_cache_free(nfs_wdata_cachep, p);
71}
72
73static void nfs_writedata_release(struct rpc_task *task)
74{
75	struct nfs_write_data	*wdata = (struct nfs_write_data *)task->tk_calldata;
76	nfs_writedata_free(wdata);
77}
78
79/*
80 * This function will be used to simulate weak cache consistency
81 * under NFSv2 when the NFSv3 attribute patch is included.
82 * For the moment, we just call nfs_refresh_inode().
83 */
84static __inline__ int
85nfs_write_attributes(struct inode *inode, struct nfs_fattr *fattr)
86{
87	if ((fattr->valid & NFS_ATTR_FATTR) && !(fattr->valid & NFS_ATTR_WCC)) {
88		fattr->pre_size  = NFS_CACHE_ISIZE(inode);
89		fattr->pre_mtime = NFS_CACHE_MTIME(inode);
90		fattr->pre_ctime = NFS_CACHE_CTIME(inode);
91		fattr->valid |= NFS_ATTR_WCC;
92	}
93	return nfs_refresh_inode(inode, fattr);
94}
95
96/*
97 * Write a page synchronously.
98 * Offset is the data offset within the page.
99 */
100static int
101nfs_writepage_sync(struct file *file, struct inode *inode, struct page *page,
102		   unsigned int offset, unsigned int count)
103{
104	struct rpc_cred	*cred = NULL;
105	loff_t		base;
106	unsigned int	wsize = NFS_SERVER(inode)->wsize;
107	int		result, refresh = 0, written = 0, flags;
108	u8		*buffer;
109	struct nfs_fattr fattr;
110	struct nfs_writeverf verf;
111
112
113	if (file)
114		cred = get_rpccred(nfs_file_cred(file));
115	if (!cred)
116		cred = get_rpccred(NFS_I(inode)->mm_cred);
117
118	dprintk("NFS:      nfs_writepage_sync(%x/%Ld %d@%Ld)\n",
119		inode->i_dev, (long long)NFS_FILEID(inode),
120		count, (long long)(page_offset(page) + offset));
121
122	base = page_offset(page) + offset;
123
124	flags = ((IS_SWAPFILE(inode)) ? NFS_RW_SWAP : 0) | NFS_RW_SYNC;
125
126	do {
127		if (count < wsize && !IS_SWAPFILE(inode))
128			wsize = count;
129
130		result = NFS_PROTO(inode)->write(inode, cred, &fattr, flags,
131						 offset, wsize, page, &verf);
132		nfs_write_attributes(inode, &fattr);
133
134		if (result < 0) {
135			/* Must mark the page invalid after I/O error */
136			ClearPageUptodate(page);
137			goto io_error;
138		}
139		if (result != wsize)
140			printk("NFS: short write, wsize=%u, result=%d\n",
141			wsize, result);
142		refresh = 1;
143		buffer  += wsize;
144		base    += wsize;
145	        offset  += wsize;
146		written += wsize;
147		count   -= wsize;
148		/*
149		 * If we've extended the file, update the inode
150		 * now so we don't invalidate the cache.
151		 */
152		if (base > inode->i_size)
153			inode->i_size = base;
154	} while (count);
155
156	if (PageError(page))
157		ClearPageError(page);
158
159io_error:
160	if (cred)
161		put_rpccred(cred);
162
163	return written? written : result;
164}
165
166static int
167nfs_writepage_async(struct file *file, struct inode *inode, struct page *page,
168		    unsigned int offset, unsigned int count)
169{
170	struct nfs_page	*req;
171	loff_t		end;
172	int		status;
173
174	req = nfs_update_request(file, inode, page, offset, count);
175	status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
176	if (status < 0)
177		goto out;
178	if (!req->wb_cred)
179		req->wb_cred = get_rpccred(NFS_I(inode)->mm_cred);
180	nfs_unlock_request(req);
181	nfs_strategy(inode);
182	end = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + (loff_t)(offset + count);
183	if (inode->i_size < end)
184		inode->i_size = end;
185
186 out:
187	return status;
188}
189
190/*
191 * Write an mmapped page to the server.
192 */
193int
194nfs_writepage(struct page *page)
195{
196	struct inode *inode = page->mapping->host;
197	unsigned long end_index;
198	unsigned offset = PAGE_CACHE_SIZE;
199	int err;
200
201	end_index = inode->i_size >> PAGE_CACHE_SHIFT;
202
203	/* Ensure we've flushed out any previous writes */
204	nfs_wb_page(inode,page);
205
206	/* easy case */
207	if (page->index < end_index)
208		goto do_it;
209	/* things got complicated... */
210	offset = inode->i_size & (PAGE_CACHE_SIZE-1);
211
212	/* OK, are we completely out? */
213	err = -EIO;
214	if (page->index >= end_index+1 || !offset)
215		goto out;
216do_it:
217	lock_kernel();
218	if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && !IS_SYNC(inode)) {
219		err = nfs_writepage_async(NULL, inode, page, 0, offset);
220		if (err >= 0)
221			err = 0;
222	} else {
223		err = nfs_writepage_sync(NULL, inode, page, 0, offset);
224		if (err == offset)
225			err = 0;
226	}
227	unlock_kernel();
228out:
229	UnlockPage(page);
230	return err;
231}
232
233/*
234 * Check whether the file range we want to write to is locked by
235 * us.
236 */
237static int
238region_locked(struct inode *inode, struct nfs_page *req)
239{
240	struct file_lock	*fl;
241	loff_t			rqstart, rqend;
242
243	/* Don't optimize writes if we don't use NLM */
244	if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)
245		return 0;
246
247	rqstart = page_offset(req->wb_page) + req->wb_offset;
248	rqend = rqstart + req->wb_bytes;
249	for (fl = inode->i_flock; fl; fl = fl->fl_next) {
250		if (fl->fl_owner == current->files && (fl->fl_flags & FL_POSIX)
251		    && fl->fl_type == F_WRLCK
252		    && fl->fl_start <= rqstart && rqend <= fl->fl_end) {
253			return 1;
254		}
255	}
256
257	return 0;
258}
259
260/*
261 * Insert a write request into an inode
262 * Note: we sort the list in order to be able to optimize nfs_find_request()
263 *	 & co. for the 'write append' case. For 2.5 we may want to consider
264 *	 some form of hashing so as to perform well on random writes.
265 */
266static inline void
267nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
268{
269	struct list_head *pos, *head;
270	unsigned long pg_idx = page_index(req->wb_page);
271
272	if (!list_empty(&req->wb_hash))
273		return;
274	if (!NFS_WBACK_BUSY(req))
275		printk(KERN_ERR "NFS: unlocked request attempted hashed!\n");
276	head = &inode->u.nfs_i.writeback;
277	if (list_empty(head))
278		igrab(inode);
279	list_for_each_prev(pos, head) {
280		struct nfs_page *entry = nfs_inode_wb_entry(pos);
281		if (page_index(entry->wb_page) < pg_idx)
282			break;
283	}
284	inode->u.nfs_i.npages++;
285	list_add(&req->wb_hash, pos);
286	req->wb_count++;
287}
288
289/*
290 * Insert a write request into an inode
291 */
292static inline void
293nfs_inode_remove_request(struct nfs_page *req)
294{
295	struct inode *inode;
296	spin_lock(&nfs_wreq_lock);
297	if (list_empty(&req->wb_hash)) {
298		spin_unlock(&nfs_wreq_lock);
299		return;
300	}
301	if (!NFS_WBACK_BUSY(req))
302		printk(KERN_ERR "NFS: unlocked request attempted unhashed!\n");
303	inode = req->wb_inode;
304	list_del(&req->wb_hash);
305	INIT_LIST_HEAD(&req->wb_hash);
306	inode->u.nfs_i.npages--;
307	if ((inode->u.nfs_i.npages == 0) != list_empty(&inode->u.nfs_i.writeback))
308		printk(KERN_ERR "NFS: desynchronized value of nfs_i.npages.\n");
309	if (list_empty(&inode->u.nfs_i.writeback)) {
310		spin_unlock(&nfs_wreq_lock);
311		iput(inode);
312	} else
313		spin_unlock(&nfs_wreq_lock);
314	nfs_clear_request(req);
315	nfs_release_request(req);
316}
317
318/*
319 * Find a request
320 */
321static inline struct nfs_page *
322_nfs_find_request(struct inode *inode, struct page *page)
323{
324	struct list_head	*head, *pos;
325	unsigned long pg_idx = page_index(page);
326
327	head = &inode->u.nfs_i.writeback;
328	list_for_each_prev(pos, head) {
329		struct nfs_page *req = nfs_inode_wb_entry(pos);
330		unsigned long found_idx = page_index(req->wb_page);
331
332		if (pg_idx < found_idx)
333			continue;
334		if (pg_idx != found_idx)
335			break;
336		req->wb_count++;
337		return req;
338	}
339	return NULL;
340}
341
342static struct nfs_page *
343nfs_find_request(struct inode *inode, struct page *page)
344{
345	struct nfs_page		*req;
346
347	spin_lock(&nfs_wreq_lock);
348	req = _nfs_find_request(inode, page);
349	spin_unlock(&nfs_wreq_lock);
350	return req;
351}
352
353/*
354 * Add a request to the inode's dirty list.
355 */
356static inline void
357nfs_mark_request_dirty(struct nfs_page *req)
358{
359	struct inode *inode = req->wb_inode;
360
361	spin_lock(&nfs_wreq_lock);
362	nfs_list_add_request(req, &inode->u.nfs_i.dirty);
363	inode->u.nfs_i.ndirty++;
364	__nfs_del_lru(req);
365	__nfs_add_lru(&NFS_SERVER(inode)->lru_dirty, req);
366	spin_unlock(&nfs_wreq_lock);
367	mark_inode_dirty(inode);
368}
369
370/*
371 * Check if a request is dirty
372 */
373static inline int
374nfs_dirty_request(struct nfs_page *req)
375{
376	struct inode *inode = req->wb_inode;
377	return !list_empty(&req->wb_list) && req->wb_list_head == &inode->u.nfs_i.dirty;
378}
379
380#ifdef CONFIG_NFS_V3
381/*
382 * Add a request to the inode's commit list.
383 */
384static inline void
385nfs_mark_request_commit(struct nfs_page *req)
386{
387	struct inode *inode = req->wb_inode;
388
389	spin_lock(&nfs_wreq_lock);
390	nfs_list_add_request(req, &inode->u.nfs_i.commit);
391	inode->u.nfs_i.ncommit++;
392	__nfs_del_lru(req);
393	__nfs_add_lru(&NFS_SERVER(inode)->lru_commit, req);
394	spin_unlock(&nfs_wreq_lock);
395	mark_inode_dirty(inode);
396}
397#endif
398
399/*
400 * Wait for a request to complete.
401 *
402 * Interruptible by signals only if mounted with intr flag.
403 */
404static int
405nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_start, unsigned int npages)
406{
407	struct list_head	*p, *head;
408	unsigned long		idx_end;
409	unsigned int		res = 0;
410	int			error;
411
412	if (npages == 0)
413		idx_end = ~0;
414	else
415		idx_end = idx_start + npages - 1;
416
417	head = &inode->u.nfs_i.writeback;
418 restart:
419	spin_lock(&nfs_wreq_lock);
420	list_for_each_prev(p, head) {
421		unsigned long pg_idx;
422		struct nfs_page *req = nfs_inode_wb_entry(p);
423
424		if (file && req->wb_file != file)
425			continue;
426
427		pg_idx = page_index(req->wb_page);
428		if (pg_idx < idx_start)
429			break;
430		if (pg_idx > idx_end)
431			continue;
432
433		if (!NFS_WBACK_BUSY(req))
434			continue;
435		req->wb_count++;
436		spin_unlock(&nfs_wreq_lock);
437		error = nfs_wait_on_request(req);
438		nfs_release_request(req);
439		if (error < 0)
440			return error;
441		res++;
442		goto restart;
443	}
444	spin_unlock(&nfs_wreq_lock);
445	return res;
446}
447
448/**
449 * nfs_scan_lru_dirty_timeout - Scan LRU list for timed out dirty requests
450 * @server: NFS superblock data
451 * @dst: destination list
452 *
453 * Moves a maximum of 'wpages' requests from the NFS dirty page LRU list.
454 * The elements are checked to ensure that they form a contiguous set
455 * of pages, and that they originated from the same file.
456 */
457int
458nfs_scan_lru_dirty_timeout(struct nfs_server *server, struct list_head *dst)
459{
460	struct inode *inode;
461	int npages;
462
463	npages = nfs_scan_lru_timeout(&server->lru_dirty, dst, server->wpages);
464	if (npages) {
465		inode = nfs_list_entry(dst->next)->wb_inode;
466		inode->u.nfs_i.ndirty -= npages;
467	}
468	return npages;
469}
470
471/**
472 * nfs_scan_lru_dirty - Scan LRU list for dirty requests
473 * @server: NFS superblock data
474 * @dst: destination list
475 *
476 * Moves a maximum of 'wpages' requests from the NFS dirty page LRU list.
477 * The elements are checked to ensure that they form a contiguous set
478 * of pages, and that they originated from the same file.
479 */
480int
481nfs_scan_lru_dirty(struct nfs_server *server, struct list_head *dst)
482{
483	struct inode *inode;
484	int npages;
485
486	npages = nfs_scan_lru(&server->lru_dirty, dst, server->wpages);
487	if (npages) {
488		inode = nfs_list_entry(dst->next)->wb_inode;
489		inode->u.nfs_i.ndirty -= npages;
490	}
491	return npages;
492}
493
494/*
495 * nfs_scan_dirty - Scan an inode for dirty requests
496 * @inode: NFS inode to scan
497 * @dst: destination list
498 * @file: if set, ensure we match requests from this file
499 * @idx_start: lower bound of page->index to scan.
500 * @npages: idx_start + npages sets the upper bound to scan.
501 *
502 * Moves requests from the inode's dirty page list.
503 * The requests are *not* checked to ensure that they form a contiguous set.
504 */
505static int
506nfs_scan_dirty(struct inode *inode, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages)
507{
508	int	res;
509	res = nfs_scan_list(&inode->u.nfs_i.dirty, dst, file, idx_start, npages);
510	inode->u.nfs_i.ndirty -= res;
511	if ((inode->u.nfs_i.ndirty == 0) != list_empty(&inode->u.nfs_i.dirty))
512		printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
513	return res;
514}
515
516#ifdef CONFIG_NFS_V3
517/**
518 * nfs_scan_lru_commit_timeout - Scan LRU list for timed out commit requests
519 * @server: NFS superblock data
520 * @dst: destination list
521 *
522 * Finds the first a timed out request in the NFS commit LRU list and moves it
523 * to the list dst. If such an element is found, we move all other commit
524 * requests that apply to the same inode.
525 * The assumption is that doing everything in a single commit-to-disk is
526 * the cheaper alternative.
527 */
528int
529nfs_scan_lru_commit_timeout(struct nfs_server *server, struct list_head *dst)
530{
531	struct inode *inode;
532	int npages;
533
534	npages = nfs_scan_lru_timeout(&server->lru_commit, dst, 1);
535	if (npages) {
536		inode = nfs_list_entry(dst->next)->wb_inode;
537		npages += nfs_scan_list(&inode->u.nfs_i.commit, dst, NULL, 0, 0);
538		inode->u.nfs_i.ncommit -= npages;
539	}
540	return npages;
541}
542
543
544/**
545 * nfs_scan_lru_commit_timeout - Scan LRU list for timed out commit requests
546 * @server: NFS superblock data
547 * @dst: destination list
548 *
549 * Finds the first request in the NFS commit LRU list and moves it
550 * to the list dst. If such an element is found, we move all other commit
551 * requests that apply to the same inode.
552 * The assumption is that doing everything in a single commit-to-disk is
553 * the cheaper alternative.
554 */
555int
556nfs_scan_lru_commit(struct nfs_server *server, struct list_head *dst)
557{
558	struct inode *inode;
559	int npages;
560
561	npages = nfs_scan_lru(&server->lru_commit, dst, 1);
562	if (npages) {
563		inode = nfs_list_entry(dst->next)->wb_inode;
564		npages += nfs_scan_list(&inode->u.nfs_i.commit, dst, NULL, 0, 0);
565		inode->u.nfs_i.ncommit -= npages;
566	}
567	return npages;
568}
569
570/*
571 * nfs_scan_commit - Scan an inode for commit requests
572 * @inode: NFS inode to scan
573 * @dst: destination list
574 * @file: if set, ensure we collect requests from this file only.
575 * @idx_start: lower bound of page->index to scan.
576 * @npages: idx_start + npages sets the upper bound to scan.
577 *
578 * Moves requests from the inode's 'commit' request list.
579 * The requests are *not* checked to ensure that they form a contiguous set.
580 */
581static int
582nfs_scan_commit(struct inode *inode, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages)
583{
584	int	res;
585	res = nfs_scan_list(&inode->u.nfs_i.commit, dst, file, idx_start, npages);
586	inode->u.nfs_i.ncommit -= res;
587	if ((inode->u.nfs_i.ncommit == 0) != list_empty(&inode->u.nfs_i.commit))
588		printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
589	return res;
590}
591#endif
592
593
594/*
595 * Try to update any existing write request, or create one if there is none.
596 * In order to match, the request's credentials must match those of
597 * the calling process.
598 *
599 * Note: Should always be called with the Page Lock held!
600 */
601static struct nfs_page *
602nfs_update_request(struct file* file, struct inode *inode, struct page *page,
603		   unsigned int offset, unsigned int bytes)
604{
605	struct nfs_page		*req, *new = NULL;
606	unsigned long		rqend, end;
607
608	end = offset + bytes;
609
610	for (;;) {
611		/* Loop over all inode entries and see if we find
612		 * A request for the page we wish to update
613		 */
614		spin_lock(&nfs_wreq_lock);
615		req = _nfs_find_request(inode, page);
616		if (req) {
617			if (!nfs_lock_request_dontget(req)) {
618				int error;
619				spin_unlock(&nfs_wreq_lock);
620				error = nfs_wait_on_request(req);
621				nfs_release_request(req);
622				if (error < 0)
623					return ERR_PTR(error);
624				continue;
625			}
626			spin_unlock(&nfs_wreq_lock);
627			if (new)
628				nfs_release_request(new);
629			break;
630		}
631
632		if (new) {
633			nfs_lock_request_dontget(new);
634			nfs_inode_add_request(inode, new);
635			spin_unlock(&nfs_wreq_lock);
636			nfs_mark_request_dirty(new);
637			return new;
638		}
639		spin_unlock(&nfs_wreq_lock);
640
641		new = nfs_create_request(nfs_file_cred(file), inode, page, offset, bytes);
642		if (IS_ERR(new))
643			return new;
644		if (file) {
645			new->wb_file = file;
646			get_file(file);
647		}
648		/* If the region is locked, adjust the timeout */
649		if (region_locked(inode, new))
650			new->wb_timeout = jiffies + NFS_WRITEBACK_LOCKDELAY;
651		else
652			new->wb_timeout = jiffies + NFS_WRITEBACK_DELAY;
653	}
654
655	/* We have a request for our page.
656	 * If the creds don't match, or the
657	 * page addresses don't match,
658	 * tell the caller to wait on the conflicting
659	 * request.
660	 */
661	rqend = req->wb_offset + req->wb_bytes;
662	if (req->wb_file != file
663	    || req->wb_page != page
664	    || !nfs_dirty_request(req)
665	    || offset > rqend || end < req->wb_offset) {
666		nfs_unlock_request(req);
667		return ERR_PTR(-EBUSY);
668	}
669
670	/* Okay, the request matches. Update the region */
671	if (offset < req->wb_offset) {
672		req->wb_offset = offset;
673		req->wb_bytes = rqend - req->wb_offset;
674	}
675
676	if (end > rqend)
677		req->wb_bytes = end - req->wb_offset;
678
679	return req;
680}
681
682#define NFS_STRATEGY_PAGES      8
683static void
684nfs_strategy(struct inode *inode)
685{
686	unsigned int	dirty, wpages;
687
688	dirty  = inode->u.nfs_i.ndirty;
689	wpages = NFS_SERVER(inode)->wpages;
690#ifdef CONFIG_NFS_V3
691	if (NFS_PROTO(inode)->version == 2) {
692		if (dirty >= NFS_STRATEGY_PAGES * wpages)
693			nfs_flush_file(inode, NULL, 0, 0, 0);
694	} else if (dirty >= wpages)
695		nfs_flush_file(inode, NULL, 0, 0, 0);
696#else
697	if (dirty >= NFS_STRATEGY_PAGES * wpages)
698		nfs_flush_file(inode, NULL, 0, 0, 0);
699#endif
700}
701
702int
703nfs_flush_incompatible(struct file *file, struct page *page)
704{
705	struct rpc_cred	*cred = nfs_file_cred(file);
706	struct inode	*inode = page->mapping->host;
707	struct nfs_page	*req;
708	int		status = 0;
709	/*
710	 * Look for a request corresponding to this page. If there
711	 * is one, and it belongs to another file, we flush it out
712	 * before we try to copy anything into the page. Do this
713	 * due to the lack of an ACCESS-type call in NFSv2.
714	 * Also do the same if we find a request from an existing
715	 * dropped page.
716	 */
717	req = nfs_find_request(inode,page);
718	if (req) {
719		if (req->wb_file != file || req->wb_cred != cred || req->wb_page != page)
720			status = nfs_wb_page(inode, page);
721		nfs_release_request(req);
722	}
723	return (status < 0) ? status : 0;
724}
725
726int
727nfs_updatepage(struct file *file, struct page *page, unsigned int offset, unsigned int count)
728{
729	struct dentry	*dentry = file->f_dentry;
730	struct inode	*inode = page->mapping->host;
731	struct nfs_page	*req;
732	loff_t		end;
733	int		status = 0;
734
735	dprintk("NFS:      nfs_updatepage(%s/%s %d@%Ld)\n",
736		dentry->d_parent->d_name.name, dentry->d_name.name,
737		count, (long long)(page_offset(page) +offset));
738
739	/*
740	 * If wsize is smaller than page size, update and write
741	 * page synchronously.
742	 */
743	if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE || IS_SYNC(inode))
744		return nfs_writepage_sync(file, inode, page, offset, count);
745
746	/*
747	 * Try to find an NFS request corresponding to this page
748	 * and update it.
749	 * If the existing request cannot be updated, we must flush
750	 * it out now.
751	 */
752	do {
753		req = nfs_update_request(file, inode, page, offset, count);
754		status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
755		if (status != -EBUSY)
756			break;
757		/* Request could not be updated. Flush it out and try again */
758		status = nfs_wb_page(inode, page);
759	} while (status >= 0);
760	if (status < 0)
761		goto done;
762
763	status = 0;
764	end = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + (loff_t)(offset + count);
765	if (inode->i_size < end)
766		inode->i_size = end;
767
768	/* If we wrote past the end of the page.
769	 * Call the strategy routine so it can send out a bunch
770	 * of requests.
771	 */
772	if (req->wb_offset == 0 && req->wb_bytes == PAGE_CACHE_SIZE) {
773		SetPageUptodate(page);
774		nfs_unlock_request(req);
775		nfs_strategy(inode);
776	} else
777		nfs_unlock_request(req);
778done:
779        dprintk("NFS:      nfs_updatepage returns %d (isize %Ld)\n",
780                                                status, (long long)inode->i_size);
781	if (status < 0)
782		ClearPageUptodate(page);
783	return status;
784}
785
786/*
787 * Set up the argument/result storage required for the RPC call.
788 */
789static void
790nfs_write_rpcsetup(struct list_head *head, struct nfs_write_data *data)
791{
792	struct nfs_page		*req;
793	struct page		**pages;
794	unsigned int		count;
795
796	/* Set up the RPC argument and reply structs
797	 * NB: take care not to mess about with data->commit et al. */
798
799	pages = data->args.pages;
800	count = 0;
801	while (!list_empty(head)) {
802		struct nfs_page *req = nfs_list_entry(head->next);
803		nfs_list_remove_request(req);
804		nfs_list_add_request(req, &data->pages);
805		*pages++ = req->wb_page;
806		count += req->wb_bytes;
807	}
808	req = nfs_list_entry(data->pages.next);
809	data->inode = req->wb_inode;
810	data->cred = req->wb_cred;
811	data->args.fh     = NFS_FH(req->wb_inode);
812	data->args.offset = page_offset(req->wb_page) + req->wb_offset;
813	data->args.pgbase = req->wb_offset;
814	data->args.count  = count;
815	data->res.fattr   = &data->fattr;
816	data->res.count   = count;
817	data->res.verf    = &data->verf;
818}
819
820
821/*
822 * Create an RPC task for the given write request and kick it.
823 * The page must have been locked by the caller.
824 *
825 * It may happen that the page we're passed is not marked dirty.
826 * This is the case if nfs_updatepage detects a conflicting request
827 * that has been written but not committed.
828 */
829static int
830nfs_flush_one(struct list_head *head, struct inode *inode, int how)
831{
832	struct rpc_clnt 	*clnt = NFS_CLIENT(inode);
833	struct nfs_write_data	*data;
834	struct rpc_task		*task;
835	struct rpc_message	msg;
836	int                     flags,
837				nfsvers = NFS_PROTO(inode)->version,
838				async = !(how & FLUSH_SYNC),
839				stable = (how & FLUSH_STABLE);
840	sigset_t		oldset;
841
842
843	data = nfs_writedata_alloc();
844	if (!data)
845		goto out_bad;
846	task = &data->task;
847
848	/* Set the initial flags for the task.  */
849	flags = (async) ? RPC_TASK_ASYNC : 0;
850
851	/* Set up the argument struct */
852	nfs_write_rpcsetup(head, data);
853	if (nfsvers < 3)
854		data->args.stable = NFS_FILE_SYNC;
855	else if (stable) {
856		if (!inode->u.nfs_i.ncommit)
857			data->args.stable = NFS_FILE_SYNC;
858		else
859			data->args.stable = NFS_DATA_SYNC;
860	} else
861		data->args.stable = NFS_UNSTABLE;
862
863	/* Finalize the task. */
864	rpc_init_task(task, clnt, nfs_writeback_done, flags);
865	task->tk_calldata = data;
866	/* Release requests */
867	task->tk_release = nfs_writedata_release;
868
869#ifdef CONFIG_NFS_V3
870	msg.rpc_proc = (nfsvers == 3) ? NFS3PROC_WRITE : NFSPROC_WRITE;
871#else
872	msg.rpc_proc = NFSPROC_WRITE;
873#endif
874	msg.rpc_argp = &data->args;
875	msg.rpc_resp = &data->res;
876	msg.rpc_cred = data->cred;
877
878	dprintk("NFS: %4d initiated write call (req %x/%Ld count %u)\n",
879		task->tk_pid,
880		inode->i_dev,
881		(long long)NFS_FILEID(inode),
882		data->args.count);
883
884	rpc_clnt_sigmask(clnt, &oldset);
885	rpc_call_setup(task, &msg, 0);
886	lock_kernel();
887	rpc_execute(task);
888	unlock_kernel();
889	rpc_clnt_sigunmask(clnt, &oldset);
890	return 0;
891 out_bad:
892	while (!list_empty(head)) {
893		struct nfs_page *req = nfs_list_entry(head->next);
894		nfs_list_remove_request(req);
895		nfs_mark_request_dirty(req);
896		nfs_unlock_request(req);
897	}
898	return -ENOMEM;
899}
900
901int
902nfs_flush_list(struct list_head *head, int wpages, int how)
903{
904	LIST_HEAD(one_request);
905	struct nfs_page		*req;
906	int			error = 0;
907	unsigned int		pages = 0;
908
909	while (!list_empty(head)) {
910		pages += nfs_coalesce_requests(head, &one_request, wpages);
911		req = nfs_list_entry(one_request.next);
912		error = nfs_flush_one(&one_request, req->wb_inode, how);
913		if (error < 0)
914			break;
915	}
916	if (error >= 0)
917		return pages;
918
919	while (!list_empty(head)) {
920		req = nfs_list_entry(head->next);
921		nfs_list_remove_request(req);
922		nfs_mark_request_dirty(req);
923		nfs_unlock_request(req);
924	}
925	return error;
926}
927
928
929/*
930 * This function is called when the WRITE call is complete.
931 */
932static void
933nfs_writeback_done(struct rpc_task *task)
934{
935	struct nfs_write_data	*data = (struct nfs_write_data *) task->tk_calldata;
936	struct nfs_writeargs	*argp = &data->args;
937	struct nfs_writeres	*resp = &data->res;
938	struct inode		*inode = data->inode;
939	struct nfs_page		*req;
940	struct page		*page;
941
942	dprintk("NFS: %4d nfs_writeback_done (status %d)\n",
943		task->tk_pid, task->tk_status);
944
945	if (nfs_async_handle_jukebox(task))
946		return;
947
948	/* We can't handle that yet but we check for it nevertheless */
949	if (resp->count < argp->count && task->tk_status >= 0) {
950		static unsigned long    complain;
951		if (time_before(complain, jiffies)) {
952			printk(KERN_WARNING
953			       "NFS: Server wrote less than requested.\n");
954			complain = jiffies + 300 * HZ;
955		}
956		/* Can't do anything about it right now except throw
957		 * an error. */
958		task->tk_status = -EIO;
959	}
960#ifdef CONFIG_NFS_V3
961	if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
962		/* We tried a write call, but the server did not
963		 * commit data to stable storage even though we
964		 * requested it.
965		 * Note: There is a known bug in Tru64 < 5.0 in which
966		 *	 the server reports NFS_DATA_SYNC, but performs
967		 *	 NFS_FILE_SYNC. We therefore implement this checking
968		 *	 as a dprintk() in order to avoid filling syslog.
969		 */
970		static unsigned long    complain;
971
972		if (time_before(complain, jiffies)) {
973			dprintk("NFS: faulty NFSv3 server %s:"
974				" (committed = %d) != (stable = %d)\n",
975				NFS_SERVER(inode)->hostname,
976				resp->verf->committed, argp->stable);
977			complain = jiffies + 300 * HZ;
978		}
979	}
980#endif
981
982	nfs_write_attributes(inode, resp->fattr);
983	while (!list_empty(&data->pages)) {
984		req = nfs_list_entry(data->pages.next);
985		nfs_list_remove_request(req);
986		page = req->wb_page;
987
988		dprintk("NFS: write (%x/%Ld %d@%Ld)",
989			req->wb_inode->i_dev,
990			(long long)NFS_FILEID(req->wb_inode),
991			req->wb_bytes,
992			(long long)(page_offset(page) + req->wb_offset));
993
994		if (task->tk_status < 0) {
995			ClearPageUptodate(page);
996			SetPageError(page);
997			if (req->wb_file)
998				req->wb_file->f_error = task->tk_status;
999			nfs_inode_remove_request(req);
1000			dprintk(", error = %d\n", task->tk_status);
1001			goto next;
1002		}
1003
1004#ifdef CONFIG_NFS_V3
1005		if (argp->stable != NFS_UNSTABLE || resp->verf->committed == NFS_FILE_SYNC) {
1006			nfs_inode_remove_request(req);
1007			dprintk(" OK\n");
1008			goto next;
1009		}
1010		memcpy(&req->wb_verf, resp->verf, sizeof(req->wb_verf));
1011		req->wb_timeout = jiffies + NFS_COMMIT_DELAY;
1012		nfs_mark_request_commit(req);
1013		dprintk(" marked for commit\n");
1014#else
1015		nfs_inode_remove_request(req);
1016#endif
1017	next:
1018		nfs_unlock_request(req);
1019	}
1020}
1021
1022
1023#ifdef CONFIG_NFS_V3
1024/*
1025 * Set up the argument/result storage required for the RPC call.
1026 */
1027static void
1028nfs_commit_rpcsetup(struct list_head *head, struct nfs_write_data *data)
1029{
1030	struct nfs_page		*first, *last;
1031	struct inode		*inode;
1032	loff_t			start, end, len;
1033
1034	/* Set up the RPC argument and reply structs
1035	 * NB: take care not to mess about with data->commit et al. */
1036
1037	list_splice(head, &data->pages);
1038	INIT_LIST_HEAD(head);
1039	first = nfs_list_entry(data->pages.next);
1040	last = nfs_list_entry(data->pages.prev);
1041	inode = first->wb_inode;
1042
1043	/*
1044	 * Determine the offset range of requests in the COMMIT call.
1045	 * We rely on the fact that data->pages is an ordered list...
1046	 */
1047	start = page_offset(first->wb_page) + first->wb_offset;
1048	end = page_offset(last->wb_page) + (last->wb_offset + last->wb_bytes);
1049	len = end - start;
1050	/* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
1051	if (end >= inode->i_size || len < 0 || len > (~((u32)0) >> 1))
1052		len = 0;
1053
1054	data->inode	  = inode;
1055	data->cred	  = first->wb_cred;
1056	data->args.fh     = NFS_FH(inode);
1057	data->args.offset = start;
1058	data->res.count   = data->args.count = (u32)len;
1059	data->res.fattr   = &data->fattr;
1060	data->res.verf    = &data->verf;
1061}
1062
1063/*
1064 * Commit dirty pages
1065 */
1066int
1067nfs_commit_list(struct list_head *head, int how)
1068{
1069	struct rpc_message	msg;
1070	struct rpc_clnt		*clnt;
1071	struct nfs_write_data	*data;
1072	struct rpc_task         *task;
1073	struct nfs_page         *req;
1074	int                     flags,
1075				async = !(how & FLUSH_SYNC);
1076	sigset_t		oldset;
1077
1078	data = nfs_writedata_alloc();
1079
1080	if (!data)
1081		goto out_bad;
1082	task = &data->task;
1083
1084	flags = (async) ? RPC_TASK_ASYNC : 0;
1085
1086	/* Set up the argument struct */
1087	nfs_commit_rpcsetup(head, data);
1088	req = nfs_list_entry(data->pages.next);
1089	clnt = NFS_CLIENT(req->wb_inode);
1090
1091	rpc_init_task(task, clnt, nfs_commit_done, flags);
1092	task->tk_calldata = data;
1093	/* Release requests */
1094	task->tk_release = nfs_writedata_release;
1095
1096	msg.rpc_proc = NFS3PROC_COMMIT;
1097	msg.rpc_argp = &data->args;
1098	msg.rpc_resp = &data->res;
1099	msg.rpc_cred = data->cred;
1100
1101	dprintk("NFS: %4d initiated commit call\n", task->tk_pid);
1102	rpc_clnt_sigmask(clnt, &oldset);
1103	rpc_call_setup(task, &msg, 0);
1104	lock_kernel();
1105	rpc_execute(task);
1106	unlock_kernel();
1107	rpc_clnt_sigunmask(clnt, &oldset);
1108	return 0;
1109 out_bad:
1110	while (!list_empty(head)) {
1111		req = nfs_list_entry(head->next);
1112		nfs_list_remove_request(req);
1113		nfs_mark_request_commit(req);
1114		nfs_unlock_request(req);
1115	}
1116	return -ENOMEM;
1117}
1118
1119/*
1120 * COMMIT call returned
1121 */
1122static void
1123nfs_commit_done(struct rpc_task *task)
1124{
1125	struct nfs_write_data	*data = (struct nfs_write_data *)task->tk_calldata;
1126	struct nfs_writeres	*resp = &data->res;
1127	struct nfs_page		*req;
1128	struct inode		*inode = data->inode;
1129
1130        dprintk("NFS: %4d nfs_commit_done (status %d)\n",
1131                                task->tk_pid, task->tk_status);
1132
1133	if (nfs_async_handle_jukebox(task))
1134		return;
1135
1136	nfs_write_attributes(inode, resp->fattr);
1137	while (!list_empty(&data->pages)) {
1138		req = nfs_list_entry(data->pages.next);
1139		nfs_list_remove_request(req);
1140
1141		dprintk("NFS: commit (%x/%Ld %d@%Ld)",
1142			req->wb_inode->i_dev,
1143			(long long)NFS_FILEID(req->wb_inode),
1144			req->wb_bytes,
1145			(long long)(page_offset(req->wb_page) + req->wb_offset));
1146		if (task->tk_status < 0) {
1147			if (req->wb_file)
1148				req->wb_file->f_error = task->tk_status;
1149			nfs_inode_remove_request(req);
1150			dprintk(", error = %d\n", task->tk_status);
1151			goto next;
1152		}
1153
1154		/* Okay, COMMIT succeeded, apparently. Check the verifier
1155		 * returned by the server against all stored verfs. */
1156		if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
1157			/* We have a match */
1158			nfs_inode_remove_request(req);
1159			dprintk(" OK\n");
1160			goto next;
1161		}
1162		/* We have a mismatch. Write the page again */
1163		dprintk(" mismatch\n");
1164		nfs_mark_request_dirty(req);
1165	next:
1166		nfs_unlock_request(req);
1167	}
1168}
1169#endif
1170
1171int nfs_flush_file(struct inode *inode, struct file *file, unsigned long idx_start,
1172		   unsigned int npages, int how)
1173{
1174	LIST_HEAD(head);
1175	int			res,
1176				error = 0;
1177
1178	spin_lock(&nfs_wreq_lock);
1179	res = nfs_scan_dirty(inode, &head, file, idx_start, npages);
1180	spin_unlock(&nfs_wreq_lock);
1181	if (res)
1182		error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how);
1183	if (error < 0)
1184		return error;
1185	return res;
1186}
1187
1188#ifdef CONFIG_NFS_V3
1189int nfs_commit_file(struct inode *inode, struct file *file, unsigned long idx_start,
1190		    unsigned int npages, int how)
1191{
1192	LIST_HEAD(head);
1193	int			res,
1194				error = 0;
1195
1196	spin_lock(&nfs_wreq_lock);
1197	res = nfs_scan_commit(inode, &head, file, idx_start, npages);
1198	spin_unlock(&nfs_wreq_lock);
1199	if (res)
1200		error = nfs_commit_list(&head, how);
1201	if (error < 0)
1202		return error;
1203	return res;
1204}
1205#endif
1206
1207int nfs_sync_file(struct inode *inode, struct file *file, unsigned long idx_start,
1208		  unsigned int npages, int how)
1209{
1210	int	error,
1211		wait;
1212
1213	wait = how & FLUSH_WAIT;
1214	how &= ~FLUSH_WAIT;
1215
1216	if (!inode && file)
1217		inode = file->f_dentry->d_inode;
1218
1219	do {
1220		error = 0;
1221		if (wait)
1222			error = nfs_wait_on_requests(inode, file, idx_start, npages);
1223		if (error == 0)
1224			error = nfs_flush_file(inode, file, idx_start, npages, how);
1225#ifdef CONFIG_NFS_V3
1226		if (error == 0)
1227			error = nfs_commit_file(inode, file, idx_start, npages, how);
1228#endif
1229	} while (error > 0);
1230	return error;
1231}
1232
1233int nfs_init_writepagecache(void)
1234{
1235	nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
1236					     sizeof(struct nfs_write_data),
1237					     0, SLAB_HWCACHE_ALIGN,
1238					     NULL, NULL);
1239	if (nfs_wdata_cachep == NULL)
1240		return -ENOMEM;
1241
1242	return 0;
1243}
1244
1245void nfs_destroy_writepagecache(void)
1246{
1247	if (kmem_cache_destroy(nfs_wdata_cachep))
1248		printk(KERN_INFO "nfs_write_data: not all structures were freed\n");
1249}
1250
1251