1### the gnu tar specification:
2### http://www.gnu.org/software/tar/manual/tar.html
3###
4### and the pax format spec, which tar derives from:
5### http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html
6
7package Archive::Tar;
8require 5.005_03;
9
10use Cwd;
11use IO::Zlib;
12use IO::File;
13use Carp                qw(carp croak);
14use File::Spec          ();
15use File::Spec::Unix    ();
16use File::Path          ();
17
18use Archive::Tar::File;
19use Archive::Tar::Constant;
20
21require Exporter;
22
23use strict;
24use vars qw[$DEBUG $error $VERSION $WARN $FOLLOW_SYMLINK $CHOWN $CHMOD
25            $DO_NOT_USE_PREFIX $HAS_PERLIO $HAS_IO_STRING $SAME_PERMISSIONS
26            $INSECURE_EXTRACT_MODE $ZERO_PAD_NUMBERS @ISA @EXPORT $RESOLVE_SYMLINK
27         ];
28
29@ISA                    = qw[Exporter];
30@EXPORT                 = qw[ COMPRESS_GZIP COMPRESS_BZIP COMPRESS_XZ ];
31$DEBUG                  = 0;
32$WARN                   = 1;
33$FOLLOW_SYMLINK         = 0;
34$VERSION                = "2.40";
35$CHOWN                  = 1;
36$CHMOD                  = 1;
37$SAME_PERMISSIONS       = $> == 0 ? 1 : 0;
38$DO_NOT_USE_PREFIX      = 0;
39$INSECURE_EXTRACT_MODE  = 0;
40$ZERO_PAD_NUMBERS       = 0;
41$RESOLVE_SYMLINK        = $ENV{'PERL5_AT_RESOLVE_SYMLINK'} || 'speed';
42
43BEGIN {
44    use Config;
45    $HAS_PERLIO = $Config::Config{useperlio};
46
47    ### try and load IO::String anyway, so you can dynamically
48    ### switch between perlio and IO::String
49    $HAS_IO_STRING = eval {
50        require IO::String;
51        IO::String->import;
52        1;
53    } || 0;
54}
55
56=head1 NAME
57
58Archive::Tar - module for manipulations of tar archives
59
60=head1 SYNOPSIS
61
62    use Archive::Tar;
63    my $tar = Archive::Tar->new;
64
65    $tar->read('origin.tgz');
66    $tar->extract();
67
68    $tar->add_files('file/foo.pl', 'docs/README');
69    $tar->add_data('file/baz.txt', 'This is the contents now');
70
71    $tar->rename('oldname', 'new/file/name');
72    $tar->chown('/', 'root');
73    $tar->chown('/', 'root:root');
74    $tar->chmod('/tmp', '1777');
75
76    $tar->write('files.tar');                   # plain tar
77    $tar->write('files.tgz', COMPRESS_GZIP);    # gzip compressed
78    $tar->write('files.tbz', COMPRESS_BZIP);    # bzip2 compressed
79    $tar->write('files.txz', COMPRESS_XZ);      # xz compressed
80
81=head1 DESCRIPTION
82
83Archive::Tar provides an object oriented mechanism for handling tar
84files.  It provides class methods for quick and easy files handling
85while also allowing for the creation of tar file objects for custom
86manipulation.  If you have the IO::Zlib module installed,
87Archive::Tar will also support compressed or gzipped tar files.
88
89An object of class Archive::Tar represents a .tar(.gz) archive full
90of files and things.
91
92=head1 Object Methods
93
94=head2 Archive::Tar->new( [$file, $compressed] )
95
96Returns a new Tar object. If given any arguments, C<new()> calls the
97C<read()> method automatically, passing on the arguments provided to
98the C<read()> method.
99
100If C<new()> is invoked with arguments and the C<read()> method fails
101for any reason, C<new()> returns undef.
102
103=cut
104
105my $tmpl = {
106    _data   => [ ],
107    _file   => 'Unknown',
108};
109
110### install get/set accessors for this object.
111for my $key ( keys %$tmpl ) {
112    no strict 'refs';
113    *{__PACKAGE__."::$key"} = sub {
114        my $self = shift;
115        $self->{$key} = $_[0] if @_;
116        return $self->{$key};
117    }
118}
119
120sub new {
121    my $class = shift;
122    $class = ref $class if ref $class;
123
124    ### copying $tmpl here since a shallow copy makes it use the
125    ### same aref, causing for files to remain in memory always.
126    my $obj = bless { _data => [ ], _file => 'Unknown', _error => '' }, $class;
127
128    if (@_) {
129        unless ( $obj->read( @_ ) ) {
130            $obj->_error(qq[No data could be read from file]);
131            return;
132        }
133    }
134
135    return $obj;
136}
137
138=head2 $tar->read ( $filename|$handle, [$compressed, {opt => 'val'}] )
139
140Read the given tar file into memory.
141The first argument can either be the name of a file or a reference to
142an already open filehandle (or an IO::Zlib object if it's compressed)
143
144The C<read> will I<replace> any previous content in C<$tar>!
145
146The second argument may be considered optional, but remains for
147backwards compatibility. Archive::Tar now looks at the file
148magic to determine what class should be used to open the file
149and will transparently Do The Right Thing.
150
151Archive::Tar will warn if you try to pass a bzip2 / xz compressed file and the
152IO::Uncompress::Bunzip2 / IO::Uncompress::UnXz are not available and simply return.
153
154Note that you can currently B<not> pass a C<gzip> compressed
155filehandle, which is not opened with C<IO::Zlib>, a C<bzip2> compressed
156filehandle, which is not opened with C<IO::Uncompress::Bunzip2>, a C<xz> compressed
157filehandle, which is not opened with C<IO::Uncompress::UnXz>, nor a string
158containing the full archive information (either compressed or
159uncompressed). These are worth while features, but not currently
160implemented. See the C<TODO> section.
161
162The third argument can be a hash reference with options. Note that
163all options are case-sensitive.
164
165=over 4
166
167=item limit
168
169Do not read more than C<limit> files. This is useful if you have
170very big archives, and are only interested in the first few files.
171
172=item filter
173
174Can be set to a regular expression.  Only files with names that match
175the expression will be read.
176
177=item md5
178
179Set to 1 and the md5sum of files will be returned (instead of file data)
180    my $iter = Archive::Tar->iter( $file,  1, {md5 => 1} );
181    while( my $f = $iter->() ) {
182        print $f->data . "\t" . $f->full_path . $/;
183    }
184
185=item extract
186
187If set to true, immediately extract entries when reading them. This
188gives you the same memory break as the C<extract_archive> function.
189Note however that entries will not be read into memory, but written
190straight to disk. This means no C<Archive::Tar::File> objects are
191created for you to inspect.
192
193=back
194
195All files are stored internally as C<Archive::Tar::File> objects.
196Please consult the L<Archive::Tar::File> documentation for details.
197
198Returns the number of files read in scalar context, and a list of
199C<Archive::Tar::File> objects in list context.
200
201=cut
202
203sub read {
204    my $self = shift;
205    my $file = shift;
206    my $gzip = shift || 0;
207    my $opts = shift || {};
208
209    unless( defined $file ) {
210        $self->_error( qq[No file to read from!] );
211        return;
212    } else {
213        $self->_file( $file );
214    }
215
216    my $handle = $self->_get_handle($file, $gzip, READ_ONLY->( ZLIB ) )
217                    or return;
218
219    my $data = $self->_read_tar( $handle, $opts ) or return;
220
221    $self->_data( $data );
222
223    return wantarray ? @$data : scalar @$data;
224}
225
226sub _get_handle {
227    my $self     = shift;
228    my $file     = shift;   return unless defined $file;
229    my $compress = shift || 0;
230    my $mode     = shift || READ_ONLY->( ZLIB ); # default to read only
231
232    ### Check if file is a file handle or IO glob
233    if ( ref $file ) {
234	return $file if eval{ *$file{IO} };
235	return $file if eval{ $file->isa(q{IO::Handle}) };
236	$file = q{}.$file;
237    }
238
239    ### get a FH opened to the right class, so we can use it transparently
240    ### throughout the program
241    my $fh;
242    {   ### reading magic only makes sense if we're opening a file for
243        ### reading. otherwise, just use what the user requested.
244        my $magic = '';
245        if( MODE_READ->($mode) ) {
246            open my $tmp, $file or do {
247                $self->_error( qq[Could not open '$file' for reading: $!] );
248                return;
249            };
250
251            ### read the first 6 bytes of the file to figure out which class to
252            ### use to open the file.
253            sysread( $tmp, $magic, 6 );
254            close $tmp;
255        }
256
257        ### is it xz?
258        ### if you asked specifically for xz compression, or if we're in
259        ### read mode and the magic numbers add up, use xz
260        if( XZ and (
261               ($compress eq COMPRESS_XZ) or
262               ( MODE_READ->($mode) and $magic =~ XZ_MAGIC_NUM )
263            )
264        ) {
265            if( MODE_READ->($mode) ) {
266                $fh = IO::Uncompress::UnXz->new( $file ) or do {
267                    $self->_error( qq[Could not read '$file': ] .
268                        $IO::Uncompress::UnXz::UnXzError
269                    );
270                    return;
271                };
272            } else {
273                $fh = IO::Compress::Xz->new( $file ) or do {
274                    $self->_error( qq[Could not write to '$file': ] .
275                        $IO::Compress::Xz::XzError
276                    );
277                    return;
278                };
279            }
280
281        ### is it bzip?
282        ### if you asked specifically for bzip compression, or if we're in
283        ### read mode and the magic numbers add up, use bzip
284        } elsif( BZIP and (
285                ($compress eq COMPRESS_BZIP) or
286                ( MODE_READ->($mode) and $magic =~ BZIP_MAGIC_NUM )
287            )
288        ) {
289
290            ### different reader/writer modules, different error vars... sigh
291            if( MODE_READ->($mode) ) {
292                $fh = IO::Uncompress::Bunzip2->new( $file, MultiStream => 1 ) or do {
293                    $self->_error( qq[Could not read '$file': ] .
294                        $IO::Uncompress::Bunzip2::Bunzip2Error
295                    );
296                    return;
297                };
298
299            } else {
300                $fh = IO::Compress::Bzip2->new( $file ) or do {
301                    $self->_error( qq[Could not write to '$file': ] .
302                        $IO::Compress::Bzip2::Bzip2Error
303                    );
304                    return;
305                };
306            }
307
308        ### is it gzip?
309        ### if you asked for compression, if you wanted to read or the gzip
310        ### magic number is present (redundant with read)
311        } elsif( ZLIB and (
312                    $compress or MODE_READ->($mode) or $magic =~ GZIP_MAGIC_NUM
313                 )
314        ) {
315            $fh = IO::Zlib->new;
316
317            unless( $fh->open( $file, $mode ) ) {
318                $self->_error(qq[Could not create filehandle for '$file': $!]);
319                return;
320            }
321
322        ### is it plain tar?
323        } else {
324            $fh = IO::File->new;
325
326            unless( $fh->open( $file, $mode ) ) {
327                $self->_error(qq[Could not create filehandle for '$file': $!]);
328                return;
329            }
330
331            ### enable bin mode on tar archives
332            binmode $fh;
333        }
334    }
335
336    return $fh;
337}
338
339
340sub _read_tar {
341    my $self    = shift;
342    my $handle  = shift or return;
343    my $opts    = shift || {};
344
345    my $count   = $opts->{limit}    || 0;
346    my $filter  = $opts->{filter};
347    my $md5  = $opts->{md5} || 0;	# cdrake
348    my $filter_cb = $opts->{filter_cb};
349    my $extract = $opts->{extract}  || 0;
350
351    ### set a cap on the amount of files to extract ###
352    my $limit   = 0;
353    $limit = 1 if $count > 0;
354
355    my $tarfile = [ ];
356    my $chunk;
357    my $read = 0;
358    my $real_name;  # to set the name of a file when
359                    # we're encountering @longlink
360    my $data;
361
362    LOOP:
363    while( $handle->read( $chunk, HEAD ) ) {
364        ### IO::Zlib doesn't support this yet
365        my $offset;
366        if ( ref($handle) ne 'IO::Zlib' ) {
367            local $@;
368            $offset = eval { tell $handle } || 'unknown';
369            $@ = '';
370        }
371        else {
372            $offset = 'unknown';
373        }
374
375        unless( $read++ ) {
376            my $gzip = GZIP_MAGIC_NUM;
377            if( $chunk =~ /$gzip/ ) {
378                $self->_error( qq[Cannot read compressed format in tar-mode] );
379                return;
380            }
381
382            ### size is < HEAD, which means a corrupted file, as the minimum
383            ### length is _at least_ HEAD
384            if (length $chunk != HEAD) {
385                $self->_error( qq[Cannot read enough bytes from the tarfile] );
386                return;
387            }
388        }
389
390        ### if we can't read in all bytes... ###
391        last if length $chunk != HEAD;
392
393        ### Apparently this should really be two blocks of 512 zeroes,
394        ### but GNU tar sometimes gets it wrong. See comment in the
395        ### source code (tar.c) to GNU cpio.
396        next if $chunk eq TAR_END;
397
398        ### according to the posix spec, the last 12 bytes of the header are
399        ### null bytes, to pad it to a 512 byte block. That means if these
400        ### bytes are NOT null bytes, it's a corrupt header. See:
401        ### www.koders.com/c/fidCE473AD3D9F835D690259D60AD5654591D91D5BA.aspx
402        ### line 111
403        {   my $nulls = join '', "\0" x 12;
404            unless( $nulls eq substr( $chunk, 500, 12 ) ) {
405                $self->_error( qq[Invalid header block at offset $offset] );
406                next LOOP;
407            }
408        }
409
410        ### pass the realname, so we can set it 'proper' right away
411        ### some of the heuristics are done on the name, so important
412        ### to set it ASAP
413        my $entry;
414        {   my %extra_args = ();
415            $extra_args{'name'} = $$real_name if defined $real_name;
416
417            unless( $entry = Archive::Tar::File->new(   chunk => $chunk,
418                                                        %extra_args )
419            ) {
420                $self->_error( qq[Couldn't read chunk at offset $offset] );
421                next LOOP;
422            }
423        }
424
425        ### ignore labels:
426        ### http://www.gnu.org/software/tar/manual/html_chapter/Media.html#SEC159
427        next if $entry->is_label;
428
429        if( length $entry->type and ($entry->is_file || $entry->is_longlink) ) {
430
431            if ( $entry->is_file && !$entry->validate ) {
432                ### sometimes the chunk is rather fux0r3d and a whole 512
433                ### bytes ends up in the ->name area.
434                ### clean it up, if need be
435                my $name = $entry->name;
436                $name = substr($name, 0, 100) if length $name > 100;
437                $name =~ s/\n/ /g;
438
439                $self->_error( $name . qq[: checksum error] );
440                next LOOP;
441            }
442
443            my $block = BLOCK_SIZE->( $entry->size );
444
445            $data = $entry->get_content_by_ref;
446
447	    my $skip = 0;
448	    my $ctx;			# cdrake
449	    ### skip this entry if we're filtering
450
451	    if($md5) {			# cdrake
452	      $ctx = Digest::MD5->new;	# cdrake
453	        $skip=5;		# cdrake
454
455	    } elsif ($filter && $entry->name !~ $filter) {
456		$skip = 1;
457
458	    } elsif ($filter_cb && ! $filter_cb->($entry)) {
459		$skip = 2;
460
461	    ### skip this entry if it's a pax header. This is a special file added
462	    ### by, among others, git-generated tarballs. It holds comments and is
463	    ### not meant for extracting. See #38932: pax_global_header extracted
464	    } elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
465		$skip = 3;
466	    }
467
468	    if ($skip) {
469		#
470		# Since we're skipping, do not allocate memory for the
471		# whole file.  Read it 64 BLOCKS at a time.  Do not
472		# complete the skip yet because maybe what we read is a
473		# longlink and it won't get skipped after all
474		#
475		my $amt = $block;
476		my $fsz=$entry->size;	# cdrake
477		while ($amt > 0) {
478		    $$data = '';
479		    my $this = 64 * BLOCK;
480		    $this = $amt if $this > $amt;
481		    if( $handle->read( $$data, $this ) < $this ) {
482			$self->_error( qq[Read error on tarfile (missing data) '].
483					    $entry->full_path ."' at offset $offset" );
484			next LOOP;
485		    }
486		    $amt -= $this;
487		    $fsz -= $this;	# cdrake
488		substr ($$data, $fsz) = "" if ($fsz<0);	# remove external junk prior to md5	# cdrake
489		$ctx->add($$data) if($skip==5);	# cdrake
490		}
491		$$data = $ctx->hexdigest if($skip==5 && !$entry->is_longlink && !$entry->is_unknown && !$entry->is_label ) ;	# cdrake
492            } else {
493
494		### just read everything into memory
495		### can't do lazy loading since IO::Zlib doesn't support 'seek'
496		### this is because Compress::Zlib doesn't support it =/
497		### this reads in the whole data in one read() call.
498		if ( $handle->read( $$data, $block ) < $block ) {
499		    $self->_error( qq[Read error on tarfile (missing data) '].
500                                    $entry->full_path ."' at offset $offset" );
501		    next LOOP;
502		}
503		### throw away trailing garbage ###
504		substr ($$data, $entry->size) = "" if defined $$data;
505            }
506
507            ### part II of the @LongLink munging -- need to do /after/
508            ### the checksum check.
509            if( $entry->is_longlink ) {
510                ### weird thing in tarfiles -- if the file is actually a
511                ### @LongLink, the data part seems to have a trailing ^@
512                ### (unprintable) char. to display, pipe output through less.
513                ### but that doesn't *always* happen.. so check if the last
514                ### character is a control character, and if so remove it
515                ### at any rate, we better remove that character here, or tests
516                ### like 'eq' and hash lookups based on names will SO not work
517                ### remove it by calculating the proper size, and then
518                ### tossing out everything that's longer than that size.
519
520                ### count number of nulls
521                my $nulls = $$data =~ tr/\0/\0/;
522
523                ### cut data + size by that many bytes
524                $entry->size( $entry->size - $nulls );
525                substr ($$data, $entry->size) = "";
526            }
527        }
528
529        ### clean up of the entries.. posix tar /apparently/ has some
530        ### weird 'feature' that allows for filenames > 255 characters
531        ### they'll put a header in with as name '././@LongLink' and the
532        ### contents will be the name of the /next/ file in the archive
533        ### pretty crappy and kludgy if you ask me
534
535        ### set the name for the next entry if this is a @LongLink;
536        ### this is one ugly hack =/ but needed for direct extraction
537        if( $entry->is_longlink ) {
538            $real_name = $data;
539            next LOOP;
540        } elsif ( defined $real_name ) {
541            $entry->name( $$real_name );
542            $entry->prefix('');
543            undef $real_name;
544        }
545
546	if ($filter && $entry->name !~ $filter) {
547	    next LOOP;
548
549	} elsif ($filter_cb && ! $filter_cb->($entry)) {
550	    next LOOP;
551
552	### skip this entry if it's a pax header. This is a special file added
553	### by, among others, git-generated tarballs. It holds comments and is
554	### not meant for extracting. See #38932: pax_global_header extracted
555	} elsif ( $entry->name eq PAX_HEADER or $entry->type =~ /^(x|g)$/ ) {
556	    next LOOP;
557	}
558
559        if ( $extract && !$entry->is_longlink
560                      && !$entry->is_unknown
561                      && !$entry->is_label ) {
562            $self->_extract_file( $entry ) or return;
563        }
564
565        ### Guard against tarfiles with garbage at the end
566	    last LOOP if $entry->name eq '';
567
568        ### push only the name on the rv if we're extracting
569        ### -- for extract_archive
570        push @$tarfile, ($extract ? $entry->name : $entry);
571
572        if( $limit ) {
573            $count-- unless $entry->is_longlink || $entry->is_dir;
574            last LOOP unless $count;
575        }
576    } continue {
577        undef $data;
578    }
579
580    return $tarfile;
581}
582
583=head2 $tar->contains_file( $filename )
584
585Check if the archive contains a certain file.
586It will return true if the file is in the archive, false otherwise.
587
588Note however, that this function does an exact match using C<eq>
589on the full path. So it cannot compensate for case-insensitive file-
590systems or compare 2 paths to see if they would point to the same
591underlying file.
592
593=cut
594
595sub contains_file {
596    my $self = shift;
597    my $full = shift;
598
599    return unless defined $full;
600
601    ### don't warn if the entry isn't there.. that's what this function
602    ### is for after all.
603    local $WARN = 0;
604    return 1 if $self->_find_entry($full);
605    return;
606}
607
608=head2 $tar->extract( [@filenames] )
609
610Write files whose names are equivalent to any of the names in
611C<@filenames> to disk, creating subdirectories as necessary. This
612might not work too well under VMS.
613Under MacPerl, the file's modification time will be converted to the
614MacOS zero of time, and appropriate conversions will be done to the
615path.  However, the length of each element of the path is not
616inspected to see whether it's longer than MacOS currently allows (32
617characters).
618
619If C<extract> is called without a list of file names, the entire
620contents of the archive are extracted.
621
622Returns a list of filenames extracted.
623
624=cut
625
626sub extract {
627    my $self    = shift;
628    my @args    = @_;
629    my @files;
630    my $hashmap;
631
632    # use the speed optimization for all extracted files
633    local($self->{cwd}) = cwd() unless $self->{cwd};
634
635    ### you requested the extraction of only certain files
636    if( @args ) {
637        for my $file ( @args ) {
638
639            ### it's already an object?
640            if( UNIVERSAL::isa( $file, 'Archive::Tar::File' ) ) {
641                push @files, $file;
642                next;
643
644            ### go find it then
645            } else {
646
647                # create hash-map once to speed up lookup
648                $hashmap = $hashmap || {
649                    map { $_->full_path, $_ } @{$self->_data}
650                };
651
652                if (exists $hashmap->{$file}) {
653                    ### we found the file you're looking for
654                    push @files, $hashmap->{$file};
655                } else {
656                    return $self->_error(
657                        qq[Could not find '$file' in archive] );
658                }
659            }
660        }
661
662    ### just grab all the file items
663    } else {
664        @files = $self->get_files;
665    }
666
667    ### nothing found? that's an error
668    unless( scalar @files ) {
669        $self->_error( qq[No files found for ] . $self->_file );
670        return;
671    }
672
673    ### now extract them
674    for my $entry ( @files ) {
675        unless( $self->_extract_file( $entry ) ) {
676            $self->_error(q[Could not extract ']. $entry->full_path .q['] );
677            return;
678        }
679    }
680
681    return @files;
682}
683
684=head2 $tar->extract_file( $file, [$extract_path] )
685
686Write an entry, whose name is equivalent to the file name provided to
687disk. Optionally takes a second parameter, which is the full native
688path (including filename) the entry will be written to.
689
690For example:
691
692    $tar->extract_file( 'name/in/archive', 'name/i/want/to/give/it' );
693
694    $tar->extract_file( $at_file_object,   'name/i/want/to/give/it' );
695
696Returns true on success, false on failure.
697
698=cut
699
700sub extract_file {
701    my $self = shift;
702    my $file = shift;   return unless defined $file;
703    my $alt  = shift;
704
705    my $entry = $self->_find_entry( $file )
706        or $self->_error( qq[Could not find an entry for '$file'] ), return;
707
708    return $self->_extract_file( $entry, $alt );
709}
710
711sub _extract_file {
712    my $self    = shift;
713    my $entry   = shift or return;
714    my $alt     = shift;
715
716    ### you wanted an alternate extraction location ###
717    my $name = defined $alt ? $alt : $entry->full_path;
718
719                            ### splitpath takes a bool at the end to indicate
720                            ### that it's splitting a dir
721    my ($vol,$dirs,$file);
722    if ( defined $alt ) { # It's a local-OS path
723        ($vol,$dirs,$file) = File::Spec->splitpath(       $alt,
724                                                          $entry->is_dir );
725    } else {
726        ($vol,$dirs,$file) = File::Spec::Unix->splitpath( $name,
727                                                          $entry->is_dir );
728    }
729
730    my $dir;
731    ### is $name an absolute path? ###
732    if( $vol || File::Spec->file_name_is_absolute( $dirs ) ) {
733
734        ### absolute names are not allowed to be in tarballs under
735        ### strict mode, so only allow it if a user tells us to do it
736        if( not defined $alt and not $INSECURE_EXTRACT_MODE ) {
737            $self->_error(
738                q[Entry ']. $entry->full_path .q[' is an absolute path. ].
739                q[Not extracting absolute paths under SECURE EXTRACT MODE]
740            );
741            return;
742        }
743
744        ### user asked us to, it's fine.
745        $dir = File::Spec->catpath( $vol, $dirs, "" );
746
747    ### it's a relative path ###
748    } else {
749        my $cwd     = (ref $self and defined $self->{cwd})
750                        ? $self->{cwd}
751                        : cwd();
752
753        my @dirs = defined $alt
754            ? File::Spec->splitdir( $dirs )         # It's a local-OS path
755            : File::Spec::Unix->splitdir( $dirs );  # it's UNIX-style, likely
756                                                    # straight from the tarball
757
758        if( not defined $alt            and
759            not $INSECURE_EXTRACT_MODE
760        ) {
761
762            ### paths that leave the current directory are not allowed under
763            ### strict mode, so only allow it if a user tells us to do this.
764            if( grep { $_ eq '..' } @dirs ) {
765
766                $self->_error(
767                    q[Entry ']. $entry->full_path .q[' is attempting to leave ].
768                    q[the current working directory. Not extracting under ].
769                    q[SECURE EXTRACT MODE]
770                );
771                return;
772            }
773
774            ### the archive may be asking us to extract into a symlink. This
775            ### is not sane and a possible security issue, as outlined here:
776            ### https://rt.cpan.org/Ticket/Display.html?id=30380
777            ### https://bugzilla.redhat.com/show_bug.cgi?id=295021
778            ### https://issues.rpath.com/browse/RPL-1716
779            my $full_path = $cwd;
780            for my $d ( @dirs ) {
781                $full_path = File::Spec->catdir( $full_path, $d );
782
783                ### we've already checked this one, and it's safe. Move on.
784                next if ref $self and $self->{_link_cache}->{$full_path};
785
786                if( -l $full_path ) {
787                    my $to   = readlink $full_path;
788                    my $diag = "symlinked directory ($full_path => $to)";
789
790                    $self->_error(
791                        q[Entry ']. $entry->full_path .q[' is attempting to ].
792                        qq[extract to a $diag. This is considered a security ].
793                        q[vulnerability and not allowed under SECURE EXTRACT ].
794                        q[MODE]
795                    );
796                    return;
797                }
798
799                ### XXX keep a cache if possible, so the stats become cheaper:
800                $self->{_link_cache}->{$full_path} = 1 if ref $self;
801            }
802        }
803
804        ### '.' is the directory delimiter on VMS, which has to be escaped
805        ### or changed to '_' on vms.  vmsify is used, because older versions
806        ### of vmspath do not handle this properly.
807        ### Must not add a '/' to an empty directory though.
808        map { length() ? VMS::Filespec::vmsify($_.'/') : $_ } @dirs if ON_VMS;
809
810        my ($cwd_vol,$cwd_dir,$cwd_file)
811                    = File::Spec->splitpath( $cwd );
812        my @cwd     = File::Spec->splitdir( $cwd_dir );
813        push @cwd, $cwd_file if length $cwd_file;
814
815        ### We need to pass '' as the last element to catpath. Craig Berry
816        ### explains why (msgid <p0624083dc311ae541393@[172.16.52.1]>):
817        ### The root problem is that splitpath on UNIX always returns the
818        ### final path element as a file even if it is a directory, and of
819        ### course there is no way it can know the difference without checking
820        ### against the filesystem, which it is documented as not doing.  When
821        ### you turn around and call catpath, on VMS you have to know which bits
822        ### are directory bits and which bits are file bits.  In this case we
823        ### know the result should be a directory.  I had thought you could omit
824        ### the file argument to catpath in such a case, but apparently on UNIX
825        ### you can't.
826        $dir        = File::Spec->catpath(
827                            $cwd_vol, File::Spec->catdir( @cwd, @dirs ), ''
828                        );
829
830        ### catdir() returns undef if the path is longer than 255 chars on
831        ### older VMS systems.
832        unless ( defined $dir ) {
833            $^W && $self->_error( qq[Could not compose a path for '$dirs'\n] );
834            return;
835        }
836
837    }
838
839    if( -e $dir && !-d _ ) {
840        $^W && $self->_error( qq['$dir' exists, but it's not a directory!\n] );
841        return;
842    }
843
844    unless ( -d _ ) {
845        eval { File::Path::mkpath( $dir, 0, 0777 ) };
846        if( $@ ) {
847            my $fp = $entry->full_path;
848            $self->_error(qq[Could not create directory '$dir' for '$fp': $@]);
849            return;
850        }
851
852        ### XXX chown here? that might not be the same as in the archive
853        ### as we're only chown'ing to the owner of the file we're extracting
854        ### not to the owner of the directory itself, which may or may not
855        ### be another entry in the archive
856        ### Answer: no, gnu tar doesn't do it either, it'd be the wrong
857        ### way to go.
858        #if( $CHOWN && CAN_CHOWN ) {
859        #    chown $entry->uid, $entry->gid, $dir or
860        #        $self->_error( qq[Could not set uid/gid on '$dir'] );
861        #}
862    }
863
864    ### we're done if we just needed to create a dir ###
865    return 1 if $entry->is_dir;
866
867    my $full = File::Spec->catfile( $dir, $file );
868
869    if( $entry->is_unknown ) {
870        $self->_error( qq[Unknown file type for file '$full'] );
871        return;
872    }
873
874    ### If a file system already contains a block device with the same name as
875    ### the being extracted regular file, we would write the file's content
876    ### to the block device. So remove the existing file (block device) now.
877    ### If an archive contains multiple same-named entries, the last one
878    ### should replace the previous ones. So remove the old file now.
879    ### If the old entry is a symlink to a file outside of the CWD, the new
880    ### entry would create a file there. This is CVE-2018-12015
881    ### <https://rt.cpan.org/Ticket/Display.html?id=125523>.
882    if (-l $full || -e _) {
883	if (!unlink $full) {
884	    $self->_error( qq[Could not remove old file '$full': $!] );
885	    return;
886	}
887    }
888    if( length $entry->type && $entry->is_file ) {
889        my $fh = IO::File->new;
890        $fh->open( $full, '>' ) or (
891            $self->_error( qq[Could not open file '$full': $!] ),
892            return
893        );
894
895        if( $entry->size ) {
896            binmode $fh;
897            syswrite $fh, $entry->data or (
898                $self->_error( qq[Could not write data to '$full'] ),
899                return
900            );
901        }
902
903        close $fh or (
904            $self->_error( qq[Could not close file '$full'] ),
905            return
906        );
907
908    } else {
909        $self->_make_special_file( $entry, $full ) or return;
910    }
911
912    ### only update the timestamp if it's not a symlink; that will change the
913    ### timestamp of the original. This addresses bug #33669: Could not update
914    ### timestamp warning on symlinks
915    if( not -l $full ) {
916        utime time, $entry->mtime - TIME_OFFSET, $full or
917            $self->_error( qq[Could not update timestamp] );
918    }
919
920    if( $CHOWN && CAN_CHOWN->() and not -l $full ) {
921        CORE::chown( $entry->uid, $entry->gid, $full ) or
922            $self->_error( qq[Could not set uid/gid on '$full'] );
923    }
924
925    ### only chmod if we're allowed to, but never chmod symlinks, since they'll
926    ### change the perms on the file they're linking too...
927    if( $CHMOD and not -l $full ) {
928        my $mode = $entry->mode;
929        unless ($SAME_PERMISSIONS) {
930            $mode &= ~(oct(7000) | umask);
931        }
932        CORE::chmod( $mode, $full ) or
933            $self->_error( qq[Could not chown '$full' to ] . $entry->mode );
934    }
935
936    return 1;
937}
938
939sub _make_special_file {
940    my $self    = shift;
941    my $entry   = shift     or return;
942    my $file    = shift;    return unless defined $file;
943
944    my $err;
945
946    if( $entry->is_symlink ) {
947        my $fail;
948        if( ON_UNIX ) {
949            symlink( $entry->linkname, $file ) or $fail++;
950
951        } else {
952            $self->_extract_special_file_as_plain_file( $entry, $file )
953                or $fail++;
954        }
955
956        $err =  qq[Making symbolic link '$file' to '] .
957                $entry->linkname .q[' failed] if $fail;
958
959    } elsif ( $entry->is_hardlink ) {
960        my $fail;
961        if( ON_UNIX ) {
962            link( $entry->linkname, $file ) or $fail++;
963
964        } else {
965            $self->_extract_special_file_as_plain_file( $entry, $file )
966                or $fail++;
967        }
968
969        $err =  qq[Making hard link from '] . $entry->linkname .
970                qq[' to '$file' failed] if $fail;
971
972    } elsif ( $entry->is_fifo ) {
973        ON_UNIX && !system('mknod', $file, 'p') or
974            $err = qq[Making fifo ']. $entry->name .qq[' failed];
975
976    } elsif ( $entry->is_blockdev or $entry->is_chardev ) {
977        my $mode = $entry->is_blockdev ? 'b' : 'c';
978
979        ON_UNIX && !system('mknod', $file, $mode,
980                            $entry->devmajor, $entry->devminor) or
981            $err =  qq[Making block device ']. $entry->name .qq[' (maj=] .
982                    $entry->devmajor . qq[ min=] . $entry->devminor .
983                    qq[) failed.];
984
985    } elsif ( $entry->is_socket ) {
986        ### the original doesn't do anything special for sockets.... ###
987        1;
988    }
989
990    return $err ? $self->_error( $err ) : 1;
991}
992
993### don't know how to make symlinks, let's just extract the file as
994### a plain file
995sub _extract_special_file_as_plain_file {
996    my $self    = shift;
997    my $entry   = shift     or return;
998    my $file    = shift;    return unless defined $file;
999
1000    my $err;
1001    TRY: {
1002        my $orig = $self->_find_entry( $entry->linkname, $entry );
1003
1004        unless( $orig ) {
1005            $err =  qq[Could not find file '] . $entry->linkname .
1006                    qq[' in memory.];
1007            last TRY;
1008        }
1009
1010        ### clone the entry, make it appear as a normal file ###
1011        my $clone = $orig->clone;
1012        $clone->_downgrade_to_plainfile;
1013        $self->_extract_file( $clone, $file ) or last TRY;
1014
1015        return 1;
1016    }
1017
1018    return $self->_error($err);
1019}
1020
1021=head2 $tar->list_files( [\@properties] )
1022
1023Returns a list of the names of all the files in the archive.
1024
1025If C<list_files()> is passed an array reference as its first argument
1026it returns a list of hash references containing the requested
1027properties of each file.  The following list of properties is
1028supported: name, size, mtime (last modified date), mode, uid, gid,
1029linkname, uname, gname, devmajor, devminor, prefix.
1030
1031Passing an array reference containing only one element, 'name', is
1032special cased to return a list of names rather than a list of hash
1033references, making it equivalent to calling C<list_files> without
1034arguments.
1035
1036=cut
1037
1038sub list_files {
1039    my $self = shift;
1040    my $aref = shift || [ ];
1041
1042    unless( $self->_data ) {
1043        $self->read() or return;
1044    }
1045
1046    if( @$aref == 0 or ( @$aref == 1 and $aref->[0] eq 'name' ) ) {
1047        return map { $_->full_path } @{$self->_data};
1048    } else {
1049
1050        #my @rv;
1051        #for my $obj ( @{$self->_data} ) {
1052        #    push @rv, { map { $_ => $obj->$_() } @$aref };
1053        #}
1054        #return @rv;
1055
1056        ### this does the same as the above.. just needs a +{ }
1057        ### to make sure perl doesn't confuse it for a block
1058        return map {    my $o=$_;
1059                        +{ map { $_ => $o->$_() } @$aref }
1060                    } @{$self->_data};
1061    }
1062}
1063
1064sub _find_entry {
1065    my $self = shift;
1066    my $file = shift;
1067
1068    unless( defined $file ) {
1069        $self->_error( qq[No file specified] );
1070        return;
1071    }
1072
1073    ### it's an object already
1074    return $file if UNIVERSAL::isa( $file, 'Archive::Tar::File' );
1075
1076seach_entry:
1077		if($self->_data){
1078			for my $entry ( @{$self->_data} ) {
1079					my $path = $entry->full_path;
1080					return $entry if $path eq $file;
1081			}
1082		}
1083
1084		if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
1085			if(my $link_entry = shift()){#fallback mode when symlinks are using relative notations ( ../a/./b/text.bin )
1086				$file = _symlinks_resolver( $link_entry->name, $file );
1087				goto seach_entry if $self->_data;
1088
1089				#this will be slower than never, but won't failed!
1090
1091				my $iterargs = $link_entry->{'_archive'};
1092				if($Archive::Tar::RESOLVE_SYMLINK=~/speed/ && @$iterargs==3){
1093				#faster	but whole archive will be read in memory
1094					#read whole archive and share data
1095					my $archive = Archive::Tar->new;
1096					$archive->read( @$iterargs );
1097					push @$iterargs, $archive; #take a trace for destruction
1098					if($archive->_data){
1099						$self->_data( $archive->_data );
1100						goto seach_entry;
1101					}
1102				}#faster
1103
1104				{#slower but lower memory usage
1105					# $iterargs = [$filename, $compressed, $opts];
1106					my $next = Archive::Tar->iter( @$iterargs );
1107					while(my $e = $next->()){
1108						if($e->full_path eq $file){
1109							undef $next;
1110							return $e;
1111						}
1112					}
1113				}#slower
1114			}
1115		}
1116
1117    $self->_error( qq[No such file in archive: '$file'] );
1118    return;
1119}
1120
1121=head2 $tar->get_files( [@filenames] )
1122
1123Returns the C<Archive::Tar::File> objects matching the filenames
1124provided. If no filename list was passed, all C<Archive::Tar::File>
1125objects in the current Tar object are returned.
1126
1127Please refer to the C<Archive::Tar::File> documentation on how to
1128handle these objects.
1129
1130=cut
1131
1132sub get_files {
1133    my $self = shift;
1134
1135    return @{ $self->_data } unless @_;
1136
1137    my @list;
1138    for my $file ( @_ ) {
1139        push @list, grep { defined } $self->_find_entry( $file );
1140    }
1141
1142    return @list;
1143}
1144
1145=head2 $tar->get_content( $file )
1146
1147Return the content of the named file.
1148
1149=cut
1150
1151sub get_content {
1152    my $self = shift;
1153    my $entry = $self->_find_entry( shift ) or return;
1154
1155    return $entry->data;
1156}
1157
1158=head2 $tar->replace_content( $file, $content )
1159
1160Make the string $content be the content for the file named $file.
1161
1162=cut
1163
1164sub replace_content {
1165    my $self = shift;
1166    my $entry = $self->_find_entry( shift ) or return;
1167
1168    return $entry->replace_content( shift );
1169}
1170
1171=head2 $tar->rename( $file, $new_name )
1172
1173Rename the file of the in-memory archive to $new_name.
1174
1175Note that you must specify a Unix path for $new_name, since per tar
1176standard, all files in the archive must be Unix paths.
1177
1178Returns true on success and false on failure.
1179
1180=cut
1181
1182sub rename {
1183    my $self = shift;
1184    my $file = shift; return unless defined $file;
1185    my $new  = shift; return unless defined $new;
1186
1187    my $entry = $self->_find_entry( $file ) or return;
1188
1189    return $entry->rename( $new );
1190}
1191
1192=head2 $tar->chmod( $file, $mode )
1193
1194Change mode of $file to $mode.
1195
1196Returns true on success and false on failure.
1197
1198=cut
1199
1200sub chmod {
1201    my $self = shift;
1202    my $file = shift; return unless defined $file;
1203    my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/;
1204    my @args = ("$mode");
1205
1206    my $entry = $self->_find_entry( $file ) or return;
1207    my $x = $entry->chmod( @args );
1208    return $x;
1209}
1210
1211=head2 $tar->chown( $file, $uname [, $gname] )
1212
1213Change owner $file to $uname and $gname.
1214
1215Returns true on success and false on failure.
1216
1217=cut
1218
1219sub chown {
1220    my $self = shift;
1221    my $file = shift; return unless defined $file;
1222    my $uname  = shift; return unless defined $uname;
1223    my @args   = ($uname);
1224    push(@args, shift);
1225
1226    my $entry = $self->_find_entry( $file ) or return;
1227    my $x = $entry->chown( @args );
1228    return $x;
1229}
1230
1231=head2 $tar->remove (@filenamelist)
1232
1233Removes any entries with names matching any of the given filenames
1234from the in-memory archive. Returns a list of C<Archive::Tar::File>
1235objects that remain.
1236
1237=cut
1238
1239sub remove {
1240    my $self = shift;
1241    my @list = @_;
1242
1243    my %seen = map { $_->full_path => $_ } @{$self->_data};
1244    delete $seen{ $_ } for @list;
1245
1246    $self->_data( [values %seen] );
1247
1248    return values %seen;
1249}
1250
1251=head2 $tar->clear
1252
1253C<clear> clears the current in-memory archive. This effectively gives
1254you a 'blank' object, ready to be filled again. Note that C<clear>
1255only has effect on the object, not the underlying tarfile.
1256
1257=cut
1258
1259sub clear {
1260    my $self = shift or return;
1261
1262    $self->_data( [] );
1263    $self->_file( '' );
1264
1265    return 1;
1266}
1267
1268
1269=head2 $tar->write ( [$file, $compressed, $prefix] )
1270
1271Write the in-memory archive to disk.  The first argument can either
1272be the name of a file or a reference to an already open filehandle (a
1273GLOB reference).
1274
1275The second argument is used to indicate compression. You can
1276compress using C<gzip>, C<bzip2> or C<xz>. If you pass a digit, it's assumed
1277to be the C<gzip> compression level (between 1 and 9), but the use of
1278constants is preferred:
1279
1280  # write a gzip compressed file
1281  $tar->write( 'out.tgz', COMPRESS_GZIP );
1282
1283  # write a bzip compressed file
1284  $tar->write( 'out.tbz', COMPRESS_BZIP );
1285
1286  # write a xz compressed file
1287  $tar->write( 'out.txz', COMPRESS_XZ );
1288
1289Note that when you pass in a filehandle, the compression argument
1290is ignored, as all files are printed verbatim to your filehandle.
1291If you wish to enable compression with filehandles, use an
1292C<IO::Zlib>, C<IO::Compress::Bzip2> or C<IO::Compress::Xz> filehandle instead.
1293
1294The third argument is an optional prefix. All files will be tucked
1295away in the directory you specify as prefix. So if you have files
1296'a' and 'b' in your archive, and you specify 'foo' as prefix, they
1297will be written to the archive as 'foo/a' and 'foo/b'.
1298
1299If no arguments are given, C<write> returns the entire formatted
1300archive as a string, which could be useful if you'd like to stuff the
1301archive into a socket or a pipe to gzip or something.
1302
1303
1304=cut
1305
1306sub write {
1307    my $self        = shift;
1308    my $file        = shift; $file = '' unless defined $file;
1309    my $gzip        = shift || 0;
1310    my $ext_prefix  = shift; $ext_prefix = '' unless defined $ext_prefix;
1311    my $dummy       = '';
1312
1313    ### only need a handle if we have a file to print to ###
1314    my $handle = length($file)
1315                    ? ( $self->_get_handle($file, $gzip, WRITE_ONLY->($gzip) )
1316                        or return )
1317                    : $HAS_PERLIO    ? do { open my $h, '>', \$dummy; $h }
1318                    : $HAS_IO_STRING ? IO::String->new
1319                    : __PACKAGE__->no_string_support();
1320
1321    ### Addresses: #41798: Nonempty $\ when writing a TAR file produces a
1322    ### corrupt TAR file. Must clear out $\ to make sure no garbage is
1323    ### printed to the archive
1324    local $\;
1325
1326    for my $entry ( @{$self->_data} ) {
1327        ### entries to be written to the tarfile ###
1328        my @write_me;
1329
1330        ### only now will we change the object to reflect the current state
1331        ### of the name and prefix fields -- this needs to be limited to
1332        ### write() only!
1333        my $clone = $entry->clone;
1334
1335
1336        ### so, if you don't want use to use the prefix, we'll stuff
1337        ### everything in the name field instead
1338        if( $DO_NOT_USE_PREFIX ) {
1339
1340            ### you might have an extended prefix, if so, set it in the clone
1341            ### XXX is ::Unix right?
1342            $clone->name( length $ext_prefix
1343                            ? File::Spec::Unix->catdir( $ext_prefix,
1344                                                        $clone->full_path)
1345                            : $clone->full_path );
1346            $clone->prefix( '' );
1347
1348        ### otherwise, we'll have to set it properly -- prefix part in the
1349        ### prefix and name part in the name field.
1350        } else {
1351
1352            ### split them here, not before!
1353            my ($prefix,$name) = $clone->_prefix_and_file( $clone->full_path );
1354
1355            ### you might have an extended prefix, if so, set it in the clone
1356            ### XXX is ::Unix right?
1357            $prefix = File::Spec::Unix->catdir( $ext_prefix, $prefix )
1358                if length $ext_prefix;
1359
1360            $clone->prefix( $prefix );
1361            $clone->name( $name );
1362        }
1363
1364        ### names are too long, and will get truncated if we don't add a
1365        ### '@LongLink' file...
1366        my $make_longlink = (   length($clone->name)    > NAME_LENGTH or
1367                                length($clone->prefix)  > PREFIX_LENGTH
1368                            ) || 0;
1369
1370        ### perhaps we need to make a longlink file?
1371        if( $make_longlink ) {
1372            my $longlink = Archive::Tar::File->new(
1373                            data => LONGLINK_NAME,
1374                            $clone->full_path,
1375                            { type => LONGLINK }
1376                        );
1377
1378            unless( $longlink ) {
1379                $self->_error(  qq[Could not create 'LongLink' entry for ] .
1380                                qq[oversize file '] . $clone->full_path ."'" );
1381                return;
1382            };
1383
1384            push @write_me, $longlink;
1385        }
1386
1387        push @write_me, $clone;
1388
1389        ### write the one, optionally 2 a::t::file objects to the handle
1390        for my $clone (@write_me) {
1391
1392            ### if the file is a symlink, there are 2 options:
1393            ### either we leave the symlink intact, but then we don't write any
1394            ### data OR we follow the symlink, which means we actually make a
1395            ### copy. if we do the latter, we have to change the TYPE of the
1396            ### clone to 'FILE'
1397            my $link_ok =  $clone->is_symlink && $Archive::Tar::FOLLOW_SYMLINK;
1398            my $data_ok = !$clone->is_symlink && $clone->has_content;
1399
1400            ### downgrade to a 'normal' file if it's a symlink we're going to
1401            ### treat as a regular file
1402            $clone->_downgrade_to_plainfile if $link_ok;
1403
1404            ### get the header for this block
1405            my $header = $self->_format_tar_entry( $clone );
1406            unless( $header ) {
1407                $self->_error(q[Could not format header for: ] .
1408                                    $clone->full_path );
1409                return;
1410            }
1411
1412            unless( print $handle $header ) {
1413                $self->_error(q[Could not write header for: ] .
1414                                    $clone->full_path);
1415                return;
1416            }
1417
1418            if( $link_ok or $data_ok ) {
1419                unless( print $handle $clone->data ) {
1420                    $self->_error(q[Could not write data for: ] .
1421                                    $clone->full_path);
1422                    return;
1423                }
1424
1425                ### pad the end of the clone if required ###
1426                print $handle TAR_PAD->( $clone->size ) if $clone->size % BLOCK
1427            }
1428
1429        } ### done writing these entries
1430    }
1431
1432    ### write the end markers ###
1433    print $handle TAR_END x 2 or
1434            return $self->_error( qq[Could not write tar end markers] );
1435
1436    ### did you want it written to a file, or returned as a string? ###
1437    my $rv =  length($file) ? 1
1438                        : $HAS_PERLIO ? $dummy
1439                        : do { seek $handle, 0, 0; local $/; <$handle> };
1440
1441    ### make sure to close the handle if we created it
1442    if ( $file ne $handle ) {
1443	unless( close $handle ) {
1444	    $self->_error( qq[Could not write tar] );
1445	    return;
1446	}
1447    }
1448
1449    return $rv;
1450}
1451
1452sub _format_tar_entry {
1453    my $self        = shift;
1454    my $entry       = shift or return;
1455    my $ext_prefix  = shift; $ext_prefix = '' unless defined $ext_prefix;
1456    my $no_prefix   = shift || 0;
1457
1458    my $file    = $entry->name;
1459    my $prefix  = $entry->prefix; $prefix = '' unless defined $prefix;
1460
1461    ### remove the prefix from the file name
1462    ### not sure if this is still needed --kane
1463    ### no it's not -- Archive::Tar::File->_new_from_file will take care of
1464    ### this for us. Even worse, this would break if we tried to add a file
1465    ### like x/x.
1466    #if( length $prefix ) {
1467    #    $file =~ s/^$match//;
1468    #}
1469
1470    $prefix = File::Spec::Unix->catdir($ext_prefix, $prefix)
1471                if length $ext_prefix;
1472
1473    ### not sure why this is... ###
1474    my $l = PREFIX_LENGTH; # is ambiguous otherwise...
1475    substr ($prefix, 0, -$l) = "" if length $prefix >= PREFIX_LENGTH;
1476
1477    my $f1 = "%06o"; my $f2  = $ZERO_PAD_NUMBERS ? "%011o" : "%11o";
1478
1479    ### this might be optimizable with a 'changed' flag in the file objects ###
1480    my $tar = pack (
1481                PACK,
1482                $file,
1483
1484                (map { sprintf( $f1, $entry->$_() ) } qw[mode uid gid]),
1485                (map { sprintf( $f2, $entry->$_() ) } qw[size mtime]),
1486
1487                "",  # checksum field - space padded a bit down
1488
1489                (map { $entry->$_() }                 qw[type linkname magic]),
1490
1491                $entry->version || TAR_VERSION,
1492
1493                (map { $entry->$_() }                 qw[uname gname]),
1494                (map { sprintf( $f1, $entry->$_() ) } qw[devmajor devminor]),
1495
1496                ($no_prefix ? '' : $prefix)
1497    );
1498
1499    ### add the checksum ###
1500    my $checksum_fmt = $ZERO_PAD_NUMBERS ? "%06o\0" : "%06o\0";
1501    substr($tar,148,7) = sprintf("%6o\0", unpack("%16C*",$tar));
1502
1503    return $tar;
1504}
1505
1506=head2 $tar->add_files( @filenamelist )
1507
1508Takes a list of filenames and adds them to the in-memory archive.
1509
1510The path to the file is automatically converted to a Unix like
1511equivalent for use in the archive, and, if on MacOS, the file's
1512modification time is converted from the MacOS epoch to the Unix epoch.
1513So tar archives created on MacOS with B<Archive::Tar> can be read
1514both with I<tar> on Unix and applications like I<suntar> or
1515I<Stuffit Expander> on MacOS.
1516
1517Be aware that the file's type/creator and resource fork will be lost,
1518which is usually what you want in cross-platform archives.
1519
1520Instead of a filename, you can also pass it an existing C<Archive::Tar::File>
1521object from, for example, another archive. The object will be clone, and
1522effectively be a copy of the original, not an alias.
1523
1524Returns a list of C<Archive::Tar::File> objects that were just added.
1525
1526=cut
1527
1528sub add_files {
1529    my $self    = shift;
1530    my @files   = @_ or return;
1531
1532    my @rv;
1533    for my $file ( @files ) {
1534
1535        ### you passed an Archive::Tar::File object
1536        ### clone it so we don't accidentally have a reference to
1537        ### an object from another archive
1538        if( UNIVERSAL::isa( $file,'Archive::Tar::File' ) ) {
1539            push @rv, $file->clone;
1540            next;
1541        }
1542
1543        eval {
1544            if( utf8::is_utf8( $file )) {
1545              utf8::encode( $file );
1546            }
1547        };
1548
1549        unless( -e $file || -l $file ) {
1550            $self->_error( qq[No such file: '$file'] );
1551            next;
1552        }
1553
1554        my $obj = Archive::Tar::File->new( file => $file );
1555        unless( $obj ) {
1556            $self->_error( qq[Unable to add file: '$file'] );
1557            next;
1558        }
1559
1560        push @rv, $obj;
1561    }
1562
1563    push @{$self->{_data}}, @rv;
1564
1565    return @rv;
1566}
1567
1568=head2 $tar->add_data ( $filename, $data, [$opthashref] )
1569
1570Takes a filename, a scalar full of data and optionally a reference to
1571a hash with specific options.
1572
1573Will add a file to the in-memory archive, with name C<$filename> and
1574content C<$data>. Specific properties can be set using C<$opthashref>.
1575The following list of properties is supported: name, size, mtime
1576(last modified date), mode, uid, gid, linkname, uname, gname,
1577devmajor, devminor, prefix, type.  (On MacOS, the file's path and
1578modification times are converted to Unix equivalents.)
1579
1580Valid values for the file type are the following constants defined by
1581Archive::Tar::Constant:
1582
1583=over 4
1584
1585=item FILE
1586
1587Regular file.
1588
1589=item HARDLINK
1590
1591=item SYMLINK
1592
1593Hard and symbolic ("soft") links; linkname should specify target.
1594
1595=item CHARDEV
1596
1597=item BLOCKDEV
1598
1599Character and block devices. devmajor and devminor should specify the major
1600and minor device numbers.
1601
1602=item DIR
1603
1604Directory.
1605
1606=item FIFO
1607
1608FIFO (named pipe).
1609
1610=item SOCKET
1611
1612Socket.
1613
1614=back
1615
1616Returns the C<Archive::Tar::File> object that was just added, or
1617C<undef> on failure.
1618
1619=cut
1620
1621sub add_data {
1622    my $self    = shift;
1623    my ($file, $data, $opt) = @_;
1624
1625    my $obj = Archive::Tar::File->new( data => $file, $data, $opt );
1626    unless( $obj ) {
1627        $self->_error( qq[Unable to add file: '$file'] );
1628        return;
1629    }
1630
1631    push @{$self->{_data}}, $obj;
1632
1633    return $obj;
1634}
1635
1636=head2 $tar->error( [$BOOL] )
1637
1638Returns the current error string (usually, the last error reported).
1639If a true value was specified, it will give the C<Carp::longmess>
1640equivalent of the error, in effect giving you a stacktrace.
1641
1642For backwards compatibility, this error is also available as
1643C<$Archive::Tar::error> although it is much recommended you use the
1644method call instead.
1645
1646=cut
1647
1648{
1649    $error = '';
1650    my $longmess;
1651
1652    sub _error {
1653        my $self    = shift;
1654        my $msg     = $error = shift;
1655        $longmess   = Carp::longmess($error);
1656        if (ref $self) {
1657            $self->{_error} = $error;
1658            $self->{_longmess} = $longmess;
1659        }
1660
1661        ### set Archive::Tar::WARN to 0 to disable printing
1662        ### of errors
1663        if( $WARN ) {
1664            carp $DEBUG ? $longmess : $msg;
1665        }
1666
1667        return;
1668    }
1669
1670    sub error {
1671        my $self = shift;
1672        if (ref $self) {
1673            return shift() ? $self->{_longmess} : $self->{_error};
1674        } else {
1675            return shift() ? $longmess : $error;
1676        }
1677    }
1678}
1679
1680=head2 $tar->setcwd( $cwd );
1681
1682C<Archive::Tar> needs to know the current directory, and it will run
1683C<Cwd::cwd()> I<every> time it extracts a I<relative> entry from the
1684tarfile and saves it in the file system. (As of version 1.30, however,
1685C<Archive::Tar> will use the speed optimization described below
1686automatically, so it's only relevant if you're using C<extract_file()>).
1687
1688Since C<Archive::Tar> doesn't change the current directory internally
1689while it is extracting the items in a tarball, all calls to C<Cwd::cwd()>
1690can be avoided if we can guarantee that the current directory doesn't
1691get changed externally.
1692
1693To use this performance boost, set the current directory via
1694
1695    use Cwd;
1696    $tar->setcwd( cwd() );
1697
1698once before calling a function like C<extract_file> and
1699C<Archive::Tar> will use the current directory setting from then on
1700and won't call C<Cwd::cwd()> internally.
1701
1702To switch back to the default behaviour, use
1703
1704    $tar->setcwd( undef );
1705
1706and C<Archive::Tar> will call C<Cwd::cwd()> internally again.
1707
1708If you're using C<Archive::Tar>'s C<extract()> method, C<setcwd()> will
1709be called for you.
1710
1711=cut
1712
1713sub setcwd {
1714    my $self     = shift;
1715    my $cwd      = shift;
1716
1717    $self->{cwd} = $cwd;
1718}
1719
1720=head1 Class Methods
1721
1722=head2 Archive::Tar->create_archive($file, $compressed, @filelist)
1723
1724Creates a tar file from the list of files provided.  The first
1725argument can either be the name of the tar file to create or a
1726reference to an open file handle (e.g. a GLOB reference).
1727
1728The second argument is used to indicate compression. You can
1729compress using C<gzip>, C<bzip2> or C<xz>. If you pass a digit, it's assumed
1730to be the C<gzip> compression level (between 1 and 9), but the use of
1731constants is preferred:
1732
1733  # write a gzip compressed file
1734  Archive::Tar->create_archive( 'out.tgz', COMPRESS_GZIP, @filelist );
1735
1736  # write a bzip compressed file
1737  Archive::Tar->create_archive( 'out.tbz', COMPRESS_BZIP, @filelist );
1738
1739  # write a xz compressed file
1740  Archive::Tar->create_archive( 'out.txz', COMPRESS_XZ, @filelist );
1741
1742Note that when you pass in a filehandle, the compression argument
1743is ignored, as all files are printed verbatim to your filehandle.
1744If you wish to enable compression with filehandles, use an
1745C<IO::Zlib>, C<IO::Compress::Bzip2> or C<IO::Compress::Xz> filehandle instead.
1746
1747The remaining arguments list the files to be included in the tar file.
1748These files must all exist. Any files which don't exist or can't be
1749read are silently ignored.
1750
1751If the archive creation fails for any reason, C<create_archive> will
1752return false. Please use the C<error> method to find the cause of the
1753failure.
1754
1755Note that this method does not write C<on the fly> as it were; it
1756still reads all the files into memory before writing out the archive.
1757Consult the FAQ below if this is a problem.
1758
1759=cut
1760
1761sub create_archive {
1762    my $class = shift;
1763
1764    my $file    = shift; return unless defined $file;
1765    my $gzip    = shift || 0;
1766    my @files   = @_;
1767
1768    unless( @files ) {
1769        return $class->_error( qq[Cowardly refusing to create empty archive!] );
1770    }
1771
1772    my $tar = $class->new;
1773    $tar->add_files( @files );
1774    return $tar->write( $file, $gzip );
1775}
1776
1777=head2 Archive::Tar->iter( $filename, [ $compressed, {opt => $val} ] )
1778
1779Returns an iterator function that reads the tar file without loading
1780it all in memory.  Each time the function is called it will return the
1781next file in the tarball. The files are returned as
1782C<Archive::Tar::File> objects. The iterator function returns the
1783empty list once it has exhausted the files contained.
1784
1785The second argument can be a hash reference with options, which are
1786identical to the arguments passed to C<read()>.
1787
1788Example usage:
1789
1790    my $next = Archive::Tar->iter( "example.tar.gz", 1, {filter => qr/\.pm$/} );
1791
1792    while( my $f = $next->() ) {
1793        print $f->name, "\n";
1794
1795        $f->extract or warn "Extraction failed";
1796
1797        # ....
1798    }
1799
1800=cut
1801
1802
1803sub iter {
1804    my $class       = shift;
1805    my $filename    = shift;
1806    return unless defined $filename;
1807    my $compressed  = shift || 0;
1808    my $opts        = shift || {};
1809
1810    ### get a handle to read from.
1811    my $handle = $class->_get_handle(
1812        $filename,
1813        $compressed,
1814        READ_ONLY->( ZLIB )
1815    ) or return;
1816
1817    my @data;
1818		my $CONSTRUCT_ARGS = [ $filename, $compressed, $opts ];
1819    return sub {
1820        return shift(@data)     if @data;       # more than one file returned?
1821        return                  unless $handle; # handle exhausted?
1822
1823        ### read data, should only return file
1824        my $tarfile = $class->_read_tar($handle, { %$opts, limit => 1 });
1825        @data = @$tarfile if ref $tarfile && ref $tarfile eq 'ARRAY';
1826				if($Archive::Tar::RESOLVE_SYMLINK!~/none/){
1827					foreach(@data){
1828						#may refine this heuristic for ON_UNIX?
1829						if($_->linkname){
1830							#is there a better slot to store/share it ?
1831							$_->{'_archive'} = $CONSTRUCT_ARGS;
1832						}
1833					}
1834				}
1835
1836        ### return one piece of data
1837        return shift(@data)     if @data;
1838
1839        ### data is exhausted, free the filehandle
1840        undef $handle;
1841				if(@$CONSTRUCT_ARGS == 4){
1842					#free archive in memory
1843					undef $CONSTRUCT_ARGS->[-1];
1844				}
1845        return;
1846    };
1847}
1848
1849=head2 Archive::Tar->list_archive($file, $compressed, [\@properties])
1850
1851Returns a list of the names of all the files in the archive.  The
1852first argument can either be the name of the tar file to list or a
1853reference to an open file handle (e.g. a GLOB reference).
1854
1855If C<list_archive()> is passed an array reference as its third
1856argument it returns a list of hash references containing the requested
1857properties of each file.  The following list of properties is
1858supported: full_path, name, size, mtime (last modified date), mode,
1859uid, gid, linkname, uname, gname, devmajor, devminor, prefix, type.
1860
1861See C<Archive::Tar::File> for details about supported properties.
1862
1863Passing an array reference containing only one element, 'name', is
1864special cased to return a list of names rather than a list of hash
1865references.
1866
1867=cut
1868
1869sub list_archive {
1870    my $class   = shift;
1871    my $file    = shift; return unless defined $file;
1872    my $gzip    = shift || 0;
1873
1874    my $tar = $class->new($file, $gzip);
1875    return unless $tar;
1876
1877    return $tar->list_files( @_ );
1878}
1879
1880=head2 Archive::Tar->extract_archive($file, $compressed)
1881
1882Extracts the contents of the tar file.  The first argument can either
1883be the name of the tar file to create or a reference to an open file
1884handle (e.g. a GLOB reference).  All relative paths in the tar file will
1885be created underneath the current working directory.
1886
1887C<extract_archive> will return a list of files it extracted.
1888If the archive extraction fails for any reason, C<extract_archive>
1889will return false.  Please use the C<error> method to find the cause
1890of the failure.
1891
1892=cut
1893
1894sub extract_archive {
1895    my $class   = shift;
1896    my $file    = shift; return unless defined $file;
1897    my $gzip    = shift || 0;
1898
1899    my $tar = $class->new( ) or return;
1900
1901    return $tar->read( $file, $gzip, { extract => 1 } );
1902}
1903
1904=head2 $bool = Archive::Tar->has_io_string
1905
1906Returns true if we currently have C<IO::String> support loaded.
1907
1908Either C<IO::String> or C<perlio> support is needed to support writing
1909stringified archives. Currently, C<perlio> is the preferred method, if
1910available.
1911
1912See the C<GLOBAL VARIABLES> section to see how to change this preference.
1913
1914=cut
1915
1916sub has_io_string { return $HAS_IO_STRING; }
1917
1918=head2 $bool = Archive::Tar->has_perlio
1919
1920Returns true if we currently have C<perlio> support loaded.
1921
1922This requires C<perl-5.8> or higher, compiled with C<perlio>
1923
1924Either C<IO::String> or C<perlio> support is needed to support writing
1925stringified archives. Currently, C<perlio> is the preferred method, if
1926available.
1927
1928See the C<GLOBAL VARIABLES> section to see how to change this preference.
1929
1930=cut
1931
1932sub has_perlio { return $HAS_PERLIO; }
1933
1934=head2 $bool = Archive::Tar->has_zlib_support
1935
1936Returns true if C<Archive::Tar> can extract C<zlib> compressed archives
1937
1938=cut
1939
1940sub has_zlib_support { return ZLIB }
1941
1942=head2 $bool = Archive::Tar->has_bzip2_support
1943
1944Returns true if C<Archive::Tar> can extract C<bzip2> compressed archives
1945
1946=cut
1947
1948sub has_bzip2_support { return BZIP }
1949
1950=head2 $bool = Archive::Tar->has_xz_support
1951
1952Returns true if C<Archive::Tar> can extract C<xz> compressed archives
1953
1954=cut
1955
1956sub has_xz_support { return XZ }
1957
1958=head2 Archive::Tar->can_handle_compressed_files
1959
1960A simple checking routine, which will return true if C<Archive::Tar>
1961is able to uncompress compressed archives on the fly with C<IO::Zlib>,
1962C<IO::Compress::Bzip2> and C<IO::Compress::Xz> or false if not both are installed.
1963
1964You can use this as a shortcut to determine whether C<Archive::Tar>
1965will do what you think before passing compressed archives to its
1966C<read> method.
1967
1968=cut
1969
1970sub can_handle_compressed_files { return ZLIB && BZIP ? 1 : 0 }
1971
1972sub no_string_support {
1973    croak("You have to install IO::String to support writing archives to strings");
1974}
1975
1976sub _symlinks_resolver{
1977  my ($src, $trg) = @_;
1978  my @src = split /[\/\\]/, $src;
1979  my @trg = split /[\/\\]/, $trg;
1980  pop @src; #strip out current object name
1981  if(@trg and $trg[0] eq ''){
1982    shift @trg;
1983    #restart path from scratch
1984    @src = ( );
1985  }
1986  foreach my $part ( @trg ){
1987    next if $part eq '.'; #ignore current
1988    if($part eq '..'){
1989      #got to parent
1990      pop @src;
1991    }
1992    else{
1993      #append it
1994      push @src, $part;
1995    }
1996  }
1997  my $path = join('/', @src);
1998  warn "_symlinks_resolver('$src','$trg') = $path" if $DEBUG;
1999  return $path;
2000}
2001
20021;
2003
2004__END__
2005
2006=head1 GLOBAL VARIABLES
2007
2008=head2 $Archive::Tar::FOLLOW_SYMLINK
2009
2010Set this variable to C<1> to make C<Archive::Tar> effectively make a
2011copy of the file when extracting. Default is C<0>, which
2012means the symlink stays intact. Of course, you will have to pack the
2013file linked to as well.
2014
2015This option is checked when you write out the tarfile using C<write>
2016or C<create_archive>.
2017
2018This works just like C</bin/tar>'s C<-h> option.
2019
2020=head2 $Archive::Tar::CHOWN
2021
2022By default, C<Archive::Tar> will try to C<chown> your files if it is
2023able to. In some cases, this may not be desired. In that case, set
2024this variable to C<0> to disable C<chown>-ing, even if it were
2025possible.
2026
2027The default is C<1>.
2028
2029=head2 $Archive::Tar::CHMOD
2030
2031By default, C<Archive::Tar> will try to C<chmod> your files to
2032whatever mode was specified for the particular file in the archive.
2033In some cases, this may not be desired. In that case, set this
2034variable to C<0> to disable C<chmod>-ing.
2035
2036The default is C<1>.
2037
2038=head2 $Archive::Tar::SAME_PERMISSIONS
2039
2040When, C<$Archive::Tar::CHMOD> is enabled, this setting controls whether
2041the permissions on files from the archive are used without modification
2042of if they are filtered by removing any setid bits and applying the
2043current umask.
2044
2045The default is C<1> for the root user and C<0> for normal users.
2046
2047=head2 $Archive::Tar::DO_NOT_USE_PREFIX
2048
2049By default, C<Archive::Tar> will try to put paths that are over
2050100 characters in the C<prefix> field of your tar header, as
2051defined per POSIX-standard. However, some (older) tar programs
2052do not implement this spec. To retain compatibility with these older
2053or non-POSIX compliant versions, you can set the C<$DO_NOT_USE_PREFIX>
2054variable to a true value, and C<Archive::Tar> will use an alternate
2055way of dealing with paths over 100 characters by using the
2056C<GNU Extended Header> feature.
2057
2058Note that clients who do not support the C<GNU Extended Header>
2059feature will not be able to read these archives. Such clients include
2060tars on C<Solaris>, C<Irix> and C<AIX>.
2061
2062The default is C<0>.
2063
2064=head2 $Archive::Tar::DEBUG
2065
2066Set this variable to C<1> to always get the C<Carp::longmess> output
2067of the warnings, instead of the regular C<carp>. This is the same
2068message you would get by doing:
2069
2070    $tar->error(1);
2071
2072Defaults to C<0>.
2073
2074=head2 $Archive::Tar::WARN
2075
2076Set this variable to C<0> if you do not want any warnings printed.
2077Personally I recommend against doing this, but people asked for the
2078option. Also, be advised that this is of course not threadsafe.
2079
2080Defaults to C<1>.
2081
2082=head2 $Archive::Tar::error
2083
2084Holds the last reported error. Kept for historical reasons, but its
2085use is very much discouraged. Use the C<error()> method instead:
2086
2087    warn $tar->error unless $tar->extract;
2088
2089Note that in older versions of this module, the C<error()> method
2090would return an effectively global value even when called an instance
2091method as above. This has since been fixed, and multiple instances of
2092C<Archive::Tar> now have separate error strings.
2093
2094=head2 $Archive::Tar::INSECURE_EXTRACT_MODE
2095
2096This variable indicates whether C<Archive::Tar> should allow
2097files to be extracted outside their current working directory.
2098
2099Allowing this could have security implications, as a malicious
2100tar archive could alter or replace any file the extracting user
2101has permissions to. Therefor, the default is to not allow
2102insecure extractions.
2103
2104If you trust the archive, or have other reasons to allow the
2105archive to write files outside your current working directory,
2106set this variable to C<true>.
2107
2108Note that this is a backwards incompatible change from version
2109C<1.36> and before.
2110
2111=head2 $Archive::Tar::HAS_PERLIO
2112
2113This variable holds a boolean indicating if we currently have
2114C<perlio> support loaded. This will be enabled for any perl
2115greater than C<5.8> compiled with C<perlio>.
2116
2117If you feel strongly about disabling it, set this variable to
2118C<false>. Note that you will then need C<IO::String> installed
2119to support writing stringified archives.
2120
2121Don't change this variable unless you B<really> know what you're
2122doing.
2123
2124=head2 $Archive::Tar::HAS_IO_STRING
2125
2126This variable holds a boolean indicating if we currently have
2127C<IO::String> support loaded. This will be enabled for any perl
2128that has a loadable C<IO::String> module.
2129
2130If you feel strongly about disabling it, set this variable to
2131C<false>. Note that you will then need C<perlio> support from
2132your perl to be able to  write stringified archives.
2133
2134Don't change this variable unless you B<really> know what you're
2135doing.
2136
2137=head2 $Archive::Tar::ZERO_PAD_NUMBERS
2138
2139This variable holds a boolean indicating if we will create
2140zero padded numbers for C<size>, C<mtime> and C<checksum>.
2141The default is C<0>, indicating that we will create space padded
2142numbers. Added for compatibility with C<busybox> implementations.
2143
2144=head2 Tuning the way RESOLVE_SYMLINK will works
2145
2146	You can tune the behaviour by setting the $Archive::Tar::RESOLVE_SYMLINK variable,
2147	or $ENV{PERL5_AT_RESOLVE_SYMLINK} before loading the module Archive::Tar.
2148
2149  Values can be one of the following:
2150
2151		none
2152           Disable this mechanism and failed as it was in previous version (<1.88)
2153
2154		speed (default)
2155           If you prefer speed
2156           this will read again the whole archive using read() so all entries
2157           will be available
2158
2159    memory
2160           If you prefer memory
2161
2162	Limitation
2163
2164		It won't work for terminal, pipe or sockets or every non seekable source.
2165
2166=cut
2167
2168=head1 FAQ
2169
2170=over 4
2171
2172=item What's the minimum perl version required to run Archive::Tar?
2173
2174You will need perl version 5.005_03 or newer.
2175
2176=item Isn't Archive::Tar slow?
2177
2178Yes it is. It's pure perl, so it's a lot slower then your C</bin/tar>
2179However, it's very portable. If speed is an issue, consider using
2180C</bin/tar> instead.
2181
2182=item Isn't Archive::Tar heavier on memory than /bin/tar?
2183
2184Yes it is, see previous answer. Since C<Compress::Zlib> and therefore
2185C<IO::Zlib> doesn't support C<seek> on their filehandles, there is little
2186choice but to read the archive into memory.
2187This is ok if you want to do in-memory manipulation of the archive.
2188
2189If you just want to extract, use the C<extract_archive> class method
2190instead. It will optimize and write to disk immediately.
2191
2192Another option is to use the C<iter> class method to iterate over
2193the files in the tarball without reading them all in memory at once.
2194
2195=item Can you lazy-load data instead?
2196
2197In some cases, yes. You can use the C<iter> class method to iterate
2198over the files in the tarball without reading them all in memory at once.
2199
2200=item How much memory will an X kb tar file need?
2201
2202Probably more than X kb, since it will all be read into memory. If
2203this is a problem, and you don't need to do in memory manipulation
2204of the archive, consider using the C<iter> class method, or C</bin/tar>
2205instead.
2206
2207=item What do you do with unsupported filetypes in an archive?
2208
2209C<Unix> has a few filetypes that aren't supported on other platforms,
2210like C<Win32>. If we encounter a C<hardlink> or C<symlink> we'll just
2211try to make a copy of the original file, rather than throwing an error.
2212
2213This does require you to read the entire archive in to memory first,
2214since otherwise we wouldn't know what data to fill the copy with.
2215(This means that you cannot use the class methods, including C<iter>
2216on archives that have incompatible filetypes and still expect things
2217to work).
2218
2219For other filetypes, like C<chardevs> and C<blockdevs> we'll warn that
2220the extraction of this particular item didn't work.
2221
2222=item I'm using WinZip, or some other non-POSIX client, and files are not being extracted properly!
2223
2224By default, C<Archive::Tar> is in a completely POSIX-compatible
2225mode, which uses the POSIX-specification of C<tar> to store files.
2226For paths greater than 100 characters, this is done using the
2227C<POSIX header prefix>. Non-POSIX-compatible clients may not support
2228this part of the specification, and may only support the C<GNU Extended
2229Header> functionality. To facilitate those clients, you can set the
2230C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>. See the
2231C<GLOBAL VARIABLES> section for details on this variable.
2232
2233Note that GNU tar earlier than version 1.14 does not cope well with
2234the C<POSIX header prefix>. If you use such a version, consider setting
2235the C<$Archive::Tar::DO_NOT_USE_PREFIX> variable to C<true>.
2236
2237=item How do I extract only files that have property X from an archive?
2238
2239Sometimes, you might not wish to extract a complete archive, just
2240the files that are relevant to you, based on some criteria.
2241
2242You can do this by filtering a list of C<Archive::Tar::File> objects
2243based on your criteria. For example, to extract only files that have
2244the string C<foo> in their title, you would use:
2245
2246    $tar->extract(
2247        grep { $_->full_path =~ /foo/ } $tar->get_files
2248    );
2249
2250This way, you can filter on any attribute of the files in the archive.
2251Consult the C<Archive::Tar::File> documentation on how to use these
2252objects.
2253
2254=item How do I access .tar.Z files?
2255
2256The C<Archive::Tar> module can optionally use C<Compress::Zlib> (via
2257the C<IO::Zlib> module) to access tar files that have been compressed
2258with C<gzip>. Unfortunately tar files compressed with the Unix C<compress>
2259utility cannot be read by C<Compress::Zlib> and so cannot be directly
2260accesses by C<Archive::Tar>.
2261
2262If the C<uncompress> or C<gunzip> programs are available, you can use
2263one of these workarounds to read C<.tar.Z> files from C<Archive::Tar>
2264
2265Firstly with C<uncompress>
2266
2267    use Archive::Tar;
2268
2269    open F, "uncompress -c $filename |";
2270    my $tar = Archive::Tar->new(*F);
2271    ...
2272
2273and this with C<gunzip>
2274
2275    use Archive::Tar;
2276
2277    open F, "gunzip -c $filename |";
2278    my $tar = Archive::Tar->new(*F);
2279    ...
2280
2281Similarly, if the C<compress> program is available, you can use this to
2282write a C<.tar.Z> file
2283
2284    use Archive::Tar;
2285    use IO::File;
2286
2287    my $fh = IO::File->new( "| compress -c >$filename" );
2288    my $tar = Archive::Tar->new();
2289    ...
2290    $tar->write($fh);
2291    $fh->close ;
2292
2293=item How do I handle Unicode strings?
2294
2295C<Archive::Tar> uses byte semantics for any files it reads from or writes
2296to disk. This is not a problem if you only deal with files and never
2297look at their content or work solely with byte strings. But if you use
2298Unicode strings with character semantics, some additional steps need
2299to be taken.
2300
2301For example, if you add a Unicode string like
2302
2303    # Problem
2304    $tar->add_data('file.txt', "Euro: \x{20AC}");
2305
2306then there will be a problem later when the tarfile gets written out
2307to disk via C<< $tar->write() >>:
2308
2309    Wide character in print at .../Archive/Tar.pm line 1014.
2310
2311The data was added as a Unicode string and when writing it out to disk,
2312the C<:utf8> line discipline wasn't set by C<Archive::Tar>, so Perl
2313tried to convert the string to ISO-8859 and failed. The written file
2314now contains garbage.
2315
2316For this reason, Unicode strings need to be converted to UTF-8-encoded
2317bytestrings before they are handed off to C<add_data()>:
2318
2319    use Encode;
2320    my $data = "Accented character: \x{20AC}";
2321    $data = encode('utf8', $data);
2322
2323    $tar->add_data('file.txt', $data);
2324
2325A opposite problem occurs if you extract a UTF8-encoded file from a
2326tarball. Using C<get_content()> on the C<Archive::Tar::File> object
2327will return its content as a bytestring, not as a Unicode string.
2328
2329If you want it to be a Unicode string (because you want character
2330semantics with operations like regular expression matching), you need
2331to decode the UTF8-encoded content and have Perl convert it into
2332a Unicode string:
2333
2334    use Encode;
2335    my $data = $tar->get_content();
2336
2337    # Make it a Unicode string
2338    $data = decode('utf8', $data);
2339
2340There is no easy way to provide this functionality in C<Archive::Tar>,
2341because a tarball can contain many files, and each of which could be
2342encoded in a different way.
2343
2344=back
2345
2346=head1 CAVEATS
2347
2348The AIX tar does not fill all unused space in the tar archive with 0x00.
2349This sometimes leads to warning messages from C<Archive::Tar>.
2350
2351  Invalid header block at offset nnn
2352
2353A fix for that problem is scheduled to be released in the following levels
2354of AIX, all of which should be coming out in the 4th quarter of 2009:
2355
2356 AIX 5.3 TL7 SP10
2357 AIX 5.3 TL8 SP8
2358 AIX 5.3 TL9 SP5
2359 AIX 5.3 TL10 SP2
2360
2361 AIX 6.1 TL0 SP11
2362 AIX 6.1 TL1 SP7
2363 AIX 6.1 TL2 SP6
2364 AIX 6.1 TL3 SP3
2365
2366The IBM APAR number for this problem is IZ50240 (Reported component ID:
23675765G0300 / AIX 5.3). It is possible to get an ifix for that problem.
2368If you need an ifix please contact your local IBM AIX support.
2369
2370=head1 TODO
2371
2372=over 4
2373
2374=item Check if passed in handles are open for read/write
2375
2376Currently I don't know of any portable pure perl way to do this.
2377Suggestions welcome.
2378
2379=item Allow archives to be passed in as string
2380
2381Currently, we only allow opened filehandles or filenames, but
2382not strings. The internals would need some reworking to facilitate
2383stringified archives.
2384
2385=item Facilitate processing an opened filehandle of a compressed archive
2386
2387Currently, we only support this if the filehandle is an IO::Zlib object.
2388Environments, like apache, will present you with an opened filehandle
2389to an uploaded file, which might be a compressed archive.
2390
2391=back
2392
2393=head1 SEE ALSO
2394
2395=over 4
2396
2397=item The GNU tar specification
2398
2399C<http://www.gnu.org/software/tar/manual/tar.html>
2400
2401=item The PAX format specification
2402
2403The specification which tar derives from; C< http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html>
2404
2405=item A comparison of GNU and POSIX tar standards; C<http://www.delorie.com/gnu/docs/tar/tar_114.html>
2406
2407=item GNU tar intends to switch to POSIX compatibility
2408
2409GNU Tar authors have expressed their intention to become completely
2410POSIX-compatible; C<http://www.gnu.org/software/tar/manual/html_node/Formats.html>
2411
2412=item A Comparison between various tar implementations
2413
2414Lists known issues and incompatibilities; C<http://gd.tuwien.ac.at/utils/archivers/star/README.otherbugs>
2415
2416=back
2417
2418=head1 AUTHOR
2419
2420This module by Jos Boumans E<lt>kane@cpan.orgE<gt>.
2421
2422Please reports bugs to E<lt>bug-archive-tar@rt.cpan.orgE<gt>.
2423
2424=head1 ACKNOWLEDGEMENTS
2425
2426Thanks to Sean Burke, Chris Nandor, Chip Salzenberg, Tim Heaney, Gisle Aas,
2427Rainer Tammer and especially Andrew Savige for their help and suggestions.
2428
2429=head1 COPYRIGHT
2430
2431This module is copyright (c) 2002 - 2009 Jos Boumans
2432E<lt>kane@cpan.orgE<gt>. All rights reserved.
2433
2434This library is free software; you may redistribute and/or modify
2435it under the same terms as Perl itself.
2436
2437=cut
2438