1/*-
2 * Copyright (c) 1991, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Keith Muller of the University of California, San Diego and Lance
7 * Visser of Convex Computer Corporation.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#if 0
35#ifndef lint
36static char const copyright[] =
37"@(#) Copyright (c) 1991, 1993, 1994\n\
38	The Regents of the University of California.  All rights reserved.\n";
39#endif /* not lint */
40
41#ifndef lint
42static char sccsid[] = "@(#)dd.c	8.5 (Berkeley) 4/2/94";
43#endif /* not lint */
44#endif
45#include <sys/cdefs.h>
46__FBSDID("$FreeBSD$");
47
48#include <sys/param.h>
49#include <sys/stat.h>
50#include <sys/conf.h>
51#include <sys/disklabel.h>
52#include <sys/filio.h>
53#include <sys/time.h>
54
55#include <ctype.h>
56#include <err.h>
57#include <errno.h>
58#include <fcntl.h>
59#include <inttypes.h>
60#include <locale.h>
61#include <stdio.h>
62#include <stdlib.h>
63#include <string.h>
64#include <unistd.h>
65
66#include "dd.h"
67#include "extern.h"
68
69static void dd_close(void);
70static void dd_in(void);
71static void getfdtype(IO *);
72static void setup(void);
73
74IO	in, out;		/* input/output state */
75STAT	st;			/* statistics */
76void	(*cfunc)(void);		/* conversion function */
77uintmax_t cpy_cnt;		/* # of blocks to copy */
78static off_t	pending = 0;	/* pending seek if sparse */
79u_int	ddflags = 0;		/* conversion options */
80size_t	cbsz;			/* conversion block size */
81uintmax_t files_cnt = 1;	/* # of files to copy */
82const	u_char *ctab;		/* conversion table */
83char	fill_char;		/* Character to fill with if defined */
84volatile sig_atomic_t need_summary;
85
86int
87main(int argc __unused, char *argv[])
88{
89	(void)setlocale(LC_CTYPE, "");
90	jcl(argv);
91	setup();
92
93	(void)signal(SIGINFO, siginfo_handler);
94	(void)signal(SIGINT, terminate);
95
96	atexit(summary);
97
98	while (files_cnt--)
99		dd_in();
100
101	dd_close();
102	/*
103	 * Some devices such as cfi(4) may perform significant amounts
104	 * of work when a write descriptor is closed.  Close the out
105	 * descriptor explicitly so that the summary handler (called
106	 * from an atexit() hook) includes this work.
107	 */
108	close(out.fd);
109	exit(0);
110}
111
112static int
113parity(u_char c)
114{
115	int i;
116
117	i = c ^ (c >> 1) ^ (c >> 2) ^ (c >> 3) ^
118	    (c >> 4) ^ (c >> 5) ^ (c >> 6) ^ (c >> 7);
119	return (i & 1);
120}
121
122static void
123setup(void)
124{
125	u_int cnt;
126	struct timeval tv;
127
128	if (in.name == NULL) {
129		in.name = "stdin";
130		in.fd = STDIN_FILENO;
131	} else {
132		in.fd = open(in.name, O_RDONLY, 0);
133		if (in.fd == -1)
134			err(1, "%s", in.name);
135	}
136
137	getfdtype(&in);
138
139	if (files_cnt > 1 && !(in.flags & ISTAPE))
140		errx(1, "files is not supported for non-tape devices");
141
142	if (out.name == NULL) {
143		/* No way to check for read access here. */
144		out.fd = STDOUT_FILENO;
145		out.name = "stdout";
146	} else {
147#define	OFLAGS \
148    (O_CREAT | (ddflags & (C_SEEK | C_NOTRUNC) ? 0 : O_TRUNC))
149		out.fd = open(out.name, O_RDWR | OFLAGS, DEFFILEMODE);
150		/*
151		 * May not have read access, so try again with write only.
152		 * Without read we may have a problem if output also does
153		 * not support seeks.
154		 */
155		if (out.fd == -1) {
156			out.fd = open(out.name, O_WRONLY | OFLAGS, DEFFILEMODE);
157			out.flags |= NOREAD;
158		}
159		if (out.fd == -1)
160			err(1, "%s", out.name);
161	}
162
163	getfdtype(&out);
164
165	/*
166	 * Allocate space for the input and output buffers.  If not doing
167	 * record oriented I/O, only need a single buffer.
168	 */
169	if (!(ddflags & (C_BLOCK | C_UNBLOCK))) {
170		if ((in.db = malloc(out.dbsz + in.dbsz - 1)) == NULL)
171			err(1, "input buffer");
172		out.db = in.db;
173	} else if ((in.db = malloc(MAX(in.dbsz, cbsz) + cbsz)) == NULL ||
174	    (out.db = malloc(out.dbsz + cbsz)) == NULL)
175		err(1, "output buffer");
176	in.dbp = in.db;
177	out.dbp = out.db;
178
179	/* Position the input/output streams. */
180	if (in.offset)
181		pos_in();
182	if (out.offset)
183		pos_out();
184
185	/*
186	 * Truncate the output file.  If it fails on a type of output file
187	 * that it should _not_ fail on, error out.
188	 */
189	if ((ddflags & (C_OF | C_SEEK | C_NOTRUNC)) == (C_OF | C_SEEK) &&
190	    out.flags & ISTRUNC)
191		if (ftruncate(out.fd, out.offset * out.dbsz) == -1)
192			err(1, "truncating %s", out.name);
193
194	if (ddflags & (C_LCASE  | C_UCASE | C_ASCII | C_EBCDIC | C_PARITY)) {
195		if (ctab != NULL) {
196			for (cnt = 0; cnt <= 0377; ++cnt)
197				casetab[cnt] = ctab[cnt];
198		} else {
199			for (cnt = 0; cnt <= 0377; ++cnt)
200				casetab[cnt] = cnt;
201		}
202		if ((ddflags & C_PARITY) && !(ddflags & C_ASCII)) {
203			/*
204			 * If the input is not EBCDIC, and we do parity
205			 * processing, strip input parity.
206			 */
207			for (cnt = 200; cnt <= 0377; ++cnt)
208				casetab[cnt] = casetab[cnt & 0x7f];
209		}
210		if (ddflags & C_LCASE) {
211			for (cnt = 0; cnt <= 0377; ++cnt)
212				casetab[cnt] = tolower(casetab[cnt]);
213		} else if (ddflags & C_UCASE) {
214			for (cnt = 0; cnt <= 0377; ++cnt)
215				casetab[cnt] = toupper(casetab[cnt]);
216		}
217		if ((ddflags & C_PARITY)) {
218			/*
219			 * This should strictly speaking be a no-op, but I
220			 * wonder what funny LANG settings could get us.
221			 */
222			for (cnt = 0; cnt <= 0377; ++cnt)
223				casetab[cnt] = casetab[cnt] & 0x7f;
224		}
225		if ((ddflags & C_PARSET)) {
226			for (cnt = 0; cnt <= 0377; ++cnt)
227				casetab[cnt] = casetab[cnt] | 0x80;
228		}
229		if ((ddflags & C_PAREVEN)) {
230			for (cnt = 0; cnt <= 0377; ++cnt)
231				if (parity(casetab[cnt]))
232					casetab[cnt] = casetab[cnt] | 0x80;
233		}
234		if ((ddflags & C_PARODD)) {
235			for (cnt = 0; cnt <= 0377; ++cnt)
236				if (!parity(casetab[cnt]))
237					casetab[cnt] = casetab[cnt] | 0x80;
238		}
239
240		ctab = casetab;
241	}
242
243	(void)gettimeofday(&tv, NULL);
244	st.start = tv.tv_sec + tv.tv_usec * 1e-6;
245}
246
247static void
248getfdtype(IO *io)
249{
250	struct stat sb;
251	int type;
252
253	if (fstat(io->fd, &sb) == -1)
254		err(1, "%s", io->name);
255	if (S_ISREG(sb.st_mode))
256		io->flags |= ISTRUNC;
257	if (S_ISCHR(sb.st_mode) || S_ISBLK(sb.st_mode)) {
258		if (ioctl(io->fd, FIODTYPE, &type) == -1) {
259			err(1, "%s", io->name);
260		} else {
261			if (type & D_TAPE)
262				io->flags |= ISTAPE;
263			else if (type & (D_DISK | D_MEM))
264				io->flags |= ISSEEK;
265			if (S_ISCHR(sb.st_mode) && (type & D_TAPE) == 0)
266				io->flags |= ISCHR;
267		}
268		return;
269	}
270	errno = 0;
271	if (lseek(io->fd, (off_t)0, SEEK_CUR) == -1 && errno == ESPIPE)
272		io->flags |= ISPIPE;
273	else
274		io->flags |= ISSEEK;
275}
276
277static void
278dd_in(void)
279{
280	ssize_t n;
281
282	for (;;) {
283		switch (cpy_cnt) {
284		case -1:			/* count=0 was specified */
285			return;
286		case 0:
287			break;
288		default:
289			if (st.in_full + st.in_part >= (uintmax_t)cpy_cnt)
290				return;
291			break;
292		}
293
294		/*
295		 * Zero the buffer first if sync; if doing block operations,
296		 * use spaces.
297		 */
298		if (ddflags & C_SYNC) {
299			if (ddflags & C_FILL)
300				memset(in.dbp, fill_char, in.dbsz);
301			else if (ddflags & (C_BLOCK | C_UNBLOCK))
302				memset(in.dbp, ' ', in.dbsz);
303			else
304				memset(in.dbp, 0, in.dbsz);
305		}
306
307		n = read(in.fd, in.dbp, in.dbsz);
308		if (n == 0) {
309			in.dbrcnt = 0;
310			return;
311		}
312
313		/* Read error. */
314		if (n == -1) {
315			/*
316			 * If noerror not specified, die.  POSIX requires that
317			 * the warning message be followed by an I/O display.
318			 */
319			if (!(ddflags & C_NOERROR))
320				err(1, "%s", in.name);
321			warn("%s", in.name);
322			summary();
323
324			/*
325			 * If it's a seekable file descriptor, seek past the
326			 * error.  If your OS doesn't do the right thing for
327			 * raw disks this section should be modified to re-read
328			 * in sector size chunks.
329			 */
330			if (in.flags & ISSEEK &&
331			    lseek(in.fd, (off_t)in.dbsz, SEEK_CUR))
332				warn("%s", in.name);
333
334			/* If sync not specified, omit block and continue. */
335			if (!(ddflags & C_SYNC))
336				continue;
337
338			/* Read errors count as full blocks. */
339			in.dbcnt += in.dbrcnt = in.dbsz;
340			++st.in_full;
341
342		/* Handle full input blocks. */
343		} else if ((size_t)n == in.dbsz) {
344			in.dbcnt += in.dbrcnt = n;
345			++st.in_full;
346
347		/* Handle partial input blocks. */
348		} else {
349			/* If sync, use the entire block. */
350			if (ddflags & C_SYNC)
351				in.dbcnt += in.dbrcnt = in.dbsz;
352			else
353				in.dbcnt += in.dbrcnt = n;
354			++st.in_part;
355		}
356
357		/*
358		 * POSIX states that if bs is set and no other conversions
359		 * than noerror, notrunc or sync are specified, the block
360		 * is output without buffering as it is read.
361		 */
362		if ((ddflags & ~(C_NOERROR | C_NOTRUNC | C_SYNC)) == C_BS) {
363			out.dbcnt = in.dbcnt;
364			dd_out(1);
365			in.dbcnt = 0;
366			continue;
367		}
368
369		if (ddflags & C_SWAB) {
370			if ((n = in.dbrcnt) & 1) {
371				++st.swab;
372				--n;
373			}
374			swab(in.dbp, in.dbp, (size_t)n);
375		}
376
377		in.dbp += in.dbrcnt;
378		(*cfunc)();
379		if (need_summary) {
380			summary();
381		}
382	}
383}
384
385/*
386 * Clean up any remaining I/O and flush output.  If necessary, the output file
387 * is truncated.
388 */
389static void
390dd_close(void)
391{
392	if (cfunc == def)
393		def_close();
394	else if (cfunc == block)
395		block_close();
396	else if (cfunc == unblock)
397		unblock_close();
398	if (ddflags & C_OSYNC && out.dbcnt && out.dbcnt < out.dbsz) {
399		if (ddflags & C_FILL)
400			memset(out.dbp, fill_char, out.dbsz - out.dbcnt);
401		else if (ddflags & (C_BLOCK | C_UNBLOCK))
402			memset(out.dbp, ' ', out.dbsz - out.dbcnt);
403		else
404			memset(out.dbp, 0, out.dbsz - out.dbcnt);
405		out.dbcnt = out.dbsz;
406	}
407	if (out.dbcnt || pending)
408		dd_out(1);
409}
410
411void
412dd_out(int force)
413{
414	u_char *outp;
415	size_t cnt, i, n;
416	ssize_t nw;
417	static int warned;
418	int sparse;
419
420	/*
421	 * Write one or more blocks out.  The common case is writing a full
422	 * output block in a single write; increment the full block stats.
423	 * Otherwise, we're into partial block writes.  If a partial write,
424	 * and it's a character device, just warn.  If a tape device, quit.
425	 *
426	 * The partial writes represent two cases.  1: Where the input block
427	 * was less than expected so the output block was less than expected.
428	 * 2: Where the input block was the right size but we were forced to
429	 * write the block in multiple chunks.  The original versions of dd(1)
430	 * never wrote a block in more than a single write, so the latter case
431	 * never happened.
432	 *
433	 * One special case is if we're forced to do the write -- in that case
434	 * we play games with the buffer size, and it's usually a partial write.
435	 */
436	outp = out.db;
437	for (n = force ? out.dbcnt : out.dbsz;; n = out.dbsz) {
438		for (cnt = n;; cnt -= nw) {
439			sparse = 0;
440			if (ddflags & C_SPARSE) {
441				sparse = 1;	/* Is buffer sparse? */
442				for (i = 0; i < cnt; i++)
443					if (outp[i] != 0) {
444						sparse = 0;
445						break;
446					}
447			}
448			if (sparse && !force) {
449				pending += cnt;
450				nw = cnt;
451			} else {
452				if (pending != 0) {
453					if (force)
454						pending--;
455					if (lseek(out.fd, pending, SEEK_CUR) ==
456					    -1)
457						err(2, "%s: seek error creating sparse file",
458						    out.name);
459					if (force)
460						write(out.fd, outp, 1);
461					pending = 0;
462				}
463				if (cnt)
464					nw = write(out.fd, outp, cnt);
465				else
466					return;
467			}
468
469			if (nw <= 0) {
470				if (nw == 0)
471					errx(1, "%s: end of device", out.name);
472				if (errno != EINTR)
473					err(1, "%s", out.name);
474				nw = 0;
475			}
476			outp += nw;
477			st.bytes += nw;
478			if ((size_t)nw == n) {
479				if (n != out.dbsz)
480					++st.out_part;
481				else
482					++st.out_full;
483				break;
484			}
485			++st.out_part;
486			if ((size_t)nw == cnt)
487				break;
488			if (out.flags & ISTAPE)
489				errx(1, "%s: short write on tape device",
490				    out.name);
491			if (out.flags & ISCHR && !warned) {
492				warned = 1;
493				warnx("%s: short write on character device",
494				    out.name);
495			}
496		}
497		if ((out.dbcnt -= n) < out.dbsz)
498			break;
499	}
500
501	/* Reassemble the output block. */
502	if (out.dbcnt)
503		(void)memmove(out.db, out.dbp - out.dbcnt, out.dbcnt);
504	out.dbp = out.db + out.dbcnt;
505}
506