Spaces:
Running
Running
package Archive::Tar::File; | |
use strict; | |
use Carp (); | |
use IO::File; | |
use File::Spec::Unix (); | |
use File::Spec (); | |
use File::Basename (); | |
use Archive::Tar::Constant; | |
use vars qw[@ISA $VERSION]; | |
#@ISA = qw[Archive::Tar]; | |
$VERSION = '2.38'; | |
### set value to 1 to oct() it during the unpack ### | |
my $tmpl = [ | |
name => 0, # string A100 | |
mode => 1, # octal A8 | |
uid => 1, # octal A8 | |
gid => 1, # octal A8 | |
size => 0, # octal # cdrake - not *always* octal.. A12 | |
mtime => 1, # octal A12 | |
chksum => 1, # octal A8 | |
type => 0, # character A1 | |
linkname => 0, # string A100 | |
magic => 0, # string A6 | |
version => 0, # 2 bytes A2 | |
uname => 0, # string A32 | |
gname => 0, # string A32 | |
devmajor => 1, # octal A8 | |
devminor => 1, # octal A8 | |
prefix => 0, # A155 x 12 | |
### end UNPACK items ### | |
raw => 0, # the raw data chunk | |
data => 0, # the data associated with the file -- | |
# This might be very memory intensive | |
]; | |
### install get/set accessors for this object. | |
for ( my $i=0; $i<scalar @$tmpl ; $i+=2 ) { | |
my $key = $tmpl->[$i]; | |
no strict 'refs'; | |
*{__PACKAGE__."::$key"} = sub { | |
my $self = shift; | |
$self->{$key} = $_[0] if @_; | |
### just in case the key is not there or undef or something ### | |
{ local $^W = 0; | |
return $self->{$key}; | |
} | |
} | |
} | |
=head1 NAME | |
Archive::Tar::File - a subclass for in-memory extracted file from Archive::Tar | |
=head1 SYNOPSIS | |
my @items = $tar->get_files; | |
print $_->name, ' ', $_->size, "\n" for @items; | |
print $object->get_content; | |
$object->replace_content('new content'); | |
$object->rename( 'new/full/path/to/file.c' ); | |
=head1 DESCRIPTION | |
Archive::Tar::Files provides a neat little object layer for in-memory | |
extracted files. It's mostly used internally in Archive::Tar to tidy | |
up the code, but there's no reason users shouldn't use this API as | |
well. | |
=head2 Accessors | |
A lot of the methods in this package are accessors to the various | |
fields in the tar header: | |
=over 4 | |
=item name | |
The file's name | |
=item mode | |
The file's mode | |
=item uid | |
The user id owning the file | |
=item gid | |
The group id owning the file | |
=item size | |
File size in bytes | |
=item mtime | |
Modification time. Adjusted to mac-time on MacOS if required | |
=item chksum | |
Checksum field for the tar header | |
=item type | |
File type -- numeric, but comparable to exported constants -- see | |
Archive::Tar's documentation | |
=item linkname | |
If the file is a symlink, the file it's pointing to | |
=item magic | |
Tar magic string -- not useful for most users | |
=item version | |
Tar version string -- not useful for most users | |
=item uname | |
The user name that owns the file | |
=item gname | |
The group name that owns the file | |
=item devmajor | |
Device major number in case of a special file | |
=item devminor | |
Device minor number in case of a special file | |
=item prefix | |
Any directory to prefix to the extraction path, if any | |
=item raw | |
Raw tar header -- not useful for most users | |
=back | |
=head1 Methods | |
=head2 Archive::Tar::File->new( file => $path ) | |
Returns a new Archive::Tar::File object from an existing file. | |
Returns undef on failure. | |
=head2 Archive::Tar::File->new( data => $path, $data, $opt ) | |
Returns a new Archive::Tar::File object from data. | |
C<$path> defines the file name (which need not exist), C<$data> the | |
file contents, and C<$opt> is a reference to a hash of attributes | |
which may be used to override the default attributes (fields in the | |
tar header), which are described above in the Accessors section. | |
Returns undef on failure. | |
=head2 Archive::Tar::File->new( chunk => $chunk ) | |
Returns a new Archive::Tar::File object from a raw 512-byte tar | |
archive chunk. | |
Returns undef on failure. | |
=cut | |
sub new { | |
my $class = shift; | |
my $what = shift; | |
my $obj = ($what eq 'chunk') ? __PACKAGE__->_new_from_chunk( @_ ) : | |
($what eq 'file' ) ? __PACKAGE__->_new_from_file( @_ ) : | |
($what eq 'data' ) ? __PACKAGE__->_new_from_data( @_ ) : | |
undef; | |
return $obj; | |
} | |
### copies the data, creates a clone ### | |
sub clone { | |
my $self = shift; | |
return bless { %$self }, ref $self; | |
} | |
sub _new_from_chunk { | |
my $class = shift; | |
my $chunk = shift or return; # 512 bytes of tar header | |
my %hash = @_; | |
### filter any arguments on defined-ness of values. | |
### this allows overriding from what the tar-header is saying | |
### about this tar-entry. Particularly useful for @LongLink files | |
my %args = map { $_ => $hash{$_} } grep { defined $hash{$_} } keys %hash; | |
### makes it start at 0 actually... :) ### | |
my $i = -1; | |
my %entry = map { | |
my ($s,$v)=($tmpl->[++$i],$tmpl->[++$i]); # cdrake | |
($_)=($_=~/^([^\0]*)/) unless($s eq 'size'); # cdrake | |
$s=> $v ? oct $_ : $_ # cdrake | |
# $tmpl->[++$i] => $tmpl->[++$i] ? oct $_ : $_ # removed by cdrake - mucks up binary sizes >8gb | |
} unpack( UNPACK, $chunk ); # cdrake | |
# } map { /^([^\0]*)/ } unpack( UNPACK, $chunk ); # old - replaced now by cdrake | |
if(substr($entry{'size'}, 0, 1) eq "\x80") { # binary size extension for files >8gigs (> octal 77777777777777) # cdrake | |
my @sz=unpack("aCSNN",$entry{'size'}); $entry{'size'}=$sz[4]+(2**32)*$sz[3]+$sz[2]*(2**64); # Use the low 80 bits (should use the upper 15 as well, but as at year 2011, that seems unlikely to ever be needed - the numbers are just too big...) # cdrake | |
} else { # cdrake | |
($entry{'size'})=($entry{'size'}=~/^([^\0]*)/); $entry{'size'}=oct $entry{'size'}; # cdrake | |
} # cdrake | |
my $obj = bless { %entry, %args }, $class; | |
### magic is a filetype string.. it should have something like 'ustar' or | |
### something similar... if the chunk is garbage, skip it | |
return unless $obj->magic !~ /\W/; | |
### store the original chunk ### | |
$obj->raw( $chunk ); | |
$obj->type(FILE) if ( (!length $obj->type) or ($obj->type =~ /\W/) ); | |
$obj->type(DIR) if ( ($obj->is_file) && ($obj->name =~ m|/$|) ); | |
return $obj; | |
} | |
sub _new_from_file { | |
my $class = shift; | |
my $path = shift; | |
### path has to at least exist | |
return unless defined $path; | |
my $type = __PACKAGE__->_filetype($path); | |
my $data = ''; | |
READ: { | |
unless ($type == DIR ) { | |
my $fh = IO::File->new; | |
unless( $fh->open($path) ) { | |
### dangling symlinks are fine, stop reading but continue | |
### creating the object | |
last READ if $type == SYMLINK; | |
### otherwise, return from this function -- | |
### anything that's *not* a symlink should be | |
### resolvable | |
return; | |
} | |
### binmode needed to read files properly on win32 ### | |
binmode $fh; | |
$data = do { local $/; <$fh> }; | |
close $fh; | |
} | |
} | |
my @items = qw[mode uid gid size mtime]; | |
my %hash = map { shift(@items), $_ } (lstat $path)[2,4,5,7,9]; | |
if (ON_VMS) { | |
### VMS has two UID modes, traditional and POSIX. Normally POSIX is | |
### not used. We currently do not have an easy way to see if we are in | |
### POSIX mode. In traditional mode, the UID is actually the VMS UIC. | |
### The VMS UIC has the upper 16 bits is the GID, which in many cases | |
### the VMS UIC will be larger than 209715, the largest that TAR can | |
### handle. So for now, assume it is traditional if the UID is larger | |
### than 0x10000. | |
if ($hash{uid} > 0x10000) { | |
$hash{uid} = $hash{uid} & 0xFFFF; | |
} | |
### The file length from stat() is the physical length of the file | |
### However the amount of data read in may be more for some file types. | |
### Fixed length files are read past the logical EOF to end of the block | |
### containing. Other file types get expanded on read because record | |
### delimiters are added. | |
my $data_len = length $data; | |
$hash{size} = $data_len if $hash{size} < $data_len; | |
} | |
### you *must* set size == 0 on symlinks, or the next entry will be | |
### though of as the contents of the symlink, which is wrong. | |
### this fixes bug #7937 | |
$hash{size} = 0 if ($type == DIR or $type == SYMLINK); | |
$hash{mtime} -= TIME_OFFSET; | |
### strip the high bits off the mode, which we don't need to store | |
$hash{mode} = STRIP_MODE->( $hash{mode} ); | |
### probably requires some file path munging here ... ### | |
### name and prefix are set later | |
my $obj = { | |
%hash, | |
name => '', | |
chksum => CHECK_SUM, | |
type => $type, | |
linkname => ($type == SYMLINK and CAN_READLINK) | |
? readlink $path | |
: '', | |
magic => MAGIC, | |
version => TAR_VERSION, | |
uname => UNAME->( $hash{uid} ), | |
gname => GNAME->( $hash{gid} ), | |
devmajor => 0, # not handled | |
devminor => 0, # not handled | |
prefix => '', | |
data => $data, | |
}; | |
bless $obj, $class; | |
### fix up the prefix and file from the path | |
my($prefix,$file) = $obj->_prefix_and_file( $path ); | |
$obj->prefix( $prefix ); | |
$obj->name( $file ); | |
return $obj; | |
} | |
sub _new_from_data { | |
my $class = shift; | |
my $path = shift; return unless defined $path; | |
my $data = shift; return unless defined $data; | |
my $opt = shift; | |
my $obj = { | |
data => $data, | |
name => '', | |
mode => MODE, | |
uid => UID, | |
gid => GID, | |
size => length $data, | |
mtime => time - TIME_OFFSET, | |
chksum => CHECK_SUM, | |
type => FILE, | |
linkname => '', | |
magic => MAGIC, | |
version => TAR_VERSION, | |
uname => UNAME->( UID ), | |
gname => GNAME->( GID ), | |
devminor => 0, | |
devmajor => 0, | |
prefix => '', | |
}; | |
### overwrite with user options, if provided ### | |
if( $opt and ref $opt eq 'HASH' ) { | |
for my $key ( keys %$opt ) { | |
### don't write bogus options ### | |
next unless exists $obj->{$key}; | |
$obj->{$key} = $opt->{$key}; | |
} | |
} | |
bless $obj, $class; | |
### fix up the prefix and file from the path | |
my($prefix,$file) = $obj->_prefix_and_file( $path ); | |
$obj->prefix( $prefix ); | |
$obj->name( $file ); | |
return $obj; | |
} | |
sub _prefix_and_file { | |
my $self = shift; | |
my $path = shift; | |
my ($vol, $dirs, $file) = File::Spec->splitpath( $path, $self->is_dir ); | |
my @dirs = File::Spec->splitdir( File::Spec->canonpath($dirs) ); | |
### if it's a directory, then $file might be empty | |
$file = pop @dirs if $self->is_dir and not length $file; | |
### splitting ../ gives you the relative path in native syntax | |
### Remove the root (000000) directory | |
### The volume from splitpath will also be in native syntax | |
if (ON_VMS) { | |
map { $_ = '..' if $_ eq '-'; $_ = '' if $_ eq '000000' } @dirs; | |
if (length($vol)) { | |
$vol = VMS::Filespec::unixify($vol); | |
unshift @dirs, $vol; | |
} | |
} | |
my $prefix = File::Spec::Unix->catdir(@dirs); | |
return( $prefix, $file ); | |
} | |
sub _filetype { | |
my $self = shift; | |
my $file = shift; | |
return unless defined $file; | |
return SYMLINK if (-l $file); # Symlink | |
return FILE if (-f _); # Plain file | |
return DIR if (-d _); # Directory | |
return FIFO if (-p _); # Named pipe | |
return SOCKET if (-S _); # Socket | |
return BLOCKDEV if (-b _); # Block special | |
return CHARDEV if (-c _); # Character special | |
### shouldn't happen, this is when making archives, not reading ### | |
return LONGLINK if ( $file eq LONGLINK_NAME ); | |
return UNKNOWN; # Something else (like what?) | |
} | |
### this method 'downgrades' a file to plain file -- this is used for | |
### symlinks when FOLLOW_SYMLINKS is true. | |
sub _downgrade_to_plainfile { | |
my $entry = shift; | |
$entry->type( FILE ); | |
$entry->mode( MODE ); | |
$entry->linkname(''); | |
return 1; | |
} | |
=head2 $bool = $file->extract( [ $alternative_name ] ) | |
Extract this object, optionally to an alternative name. | |
See C<< Archive::Tar->extract_file >> for details. | |
Returns true on success and false on failure. | |
=cut | |
sub extract { | |
my $self = shift; | |
local $Carp::CarpLevel += 1; | |
### avoid circular use, so only require; | |
require Archive::Tar; | |
return Archive::Tar->_extract_file( $self, @_ ); | |
} | |
=head2 $path = $file->full_path | |
Returns the full path from the tar header; this is basically a | |
concatenation of the C<prefix> and C<name> fields. | |
=cut | |
sub full_path { | |
my $self = shift; | |
### if prefix field is empty | |
return $self->name unless defined $self->prefix and length $self->prefix; | |
### or otherwise, catfile'd | |
return File::Spec::Unix->catfile( $self->prefix, $self->name ); | |
} | |
=head2 $bool = $file->validate | |
Done by Archive::Tar internally when reading the tar file: | |
validate the header against the checksum to ensure integer tar file. | |
Returns true on success, false on failure | |
=cut | |
sub validate { | |
my $self = shift; | |
my $raw = $self->raw; | |
### don't know why this one is different from the one we /write/ ### | |
substr ($raw, 148, 8) = " "; | |
### bug #43513: [PATCH] Accept wrong checksums from SunOS and HP-UX tar | |
### like GNU tar does. See here for details: | |
### http://www.gnu.org/software/tar/manual/tar.html#SEC139 | |
### so we do both a signed AND unsigned validate. if one succeeds, that's | |
### good enough | |
return ( (unpack ("%16C*", $raw) == $self->chksum) | |
or (unpack ("%16c*", $raw) == $self->chksum)) ? 1 : 0; | |
} | |
=head2 $bool = $file->has_content | |
Returns a boolean to indicate whether the current object has content. | |
Some special files like directories and so on never will have any | |
content. This method is mainly to make sure you don't get warnings | |
for using uninitialized values when looking at an object's content. | |
=cut | |
sub has_content { | |
my $self = shift; | |
return defined $self->data() && length $self->data() ? 1 : 0; | |
} | |
=head2 $content = $file->get_content | |
Returns the current content for the in-memory file | |
=cut | |
sub get_content { | |
my $self = shift; | |
$self->data( ); | |
} | |
=head2 $cref = $file->get_content_by_ref | |
Returns the current content for the in-memory file as a scalar | |
reference. Normal users won't need this, but it will save memory if | |
you are dealing with very large data files in your tar archive, since | |
it will pass the contents by reference, rather than make a copy of it | |
first. | |
=cut | |
sub get_content_by_ref { | |
my $self = shift; | |
return \$self->{data}; | |
} | |
=head2 $bool = $file->replace_content( $content ) | |
Replace the current content of the file with the new content. This | |
only affects the in-memory archive, not the on-disk version until | |
you write it. | |
Returns true on success, false on failure. | |
=cut | |
sub replace_content { | |
my $self = shift; | |
my $data = shift || ''; | |
$self->data( $data ); | |
$self->size( length $data ); | |
return 1; | |
} | |
=head2 $bool = $file->rename( $new_name ) | |
Rename the current file to $new_name. | |
Note that you must specify a Unix path for $new_name, since per tar | |
standard, all files in the archive must be Unix paths. | |
Returns true on success and false on failure. | |
=cut | |
sub rename { | |
my $self = shift; | |
my $path = shift; | |
return unless defined $path; | |
my ($prefix,$file) = $self->_prefix_and_file( $path ); | |
$self->name( $file ); | |
$self->prefix( $prefix ); | |
return 1; | |
} | |
=head2 $bool = $file->chmod $mode) | |
Change mode of $file to $mode. The mode can be a string or a number | |
which is interpreted as octal whether or not a leading 0 is given. | |
Returns true on success and false on failure. | |
=cut | |
sub chmod { | |
my $self = shift; | |
my $mode = shift; return unless defined $mode && $mode =~ /^[0-7]{1,4}$/; | |
$self->{mode} = oct($mode); | |
return 1; | |
} | |
=head2 $bool = $file->chown( $user [, $group]) | |
Change owner of $file to $user. If a $group is given that is changed | |
as well. You can also pass a single parameter with a colon separating the | |
use and group as in 'root:wheel'. | |
Returns true on success and false on failure. | |
=cut | |
sub chown { | |
my $self = shift; | |
my $uname = shift; | |
return unless defined $uname; | |
my $gname; | |
if (-1 != index($uname, ':')) { | |
($uname, $gname) = split(/:/, $uname); | |
} else { | |
$gname = shift if @_ > 0; | |
} | |
$self->uname( $uname ); | |
$self->gname( $gname ) if $gname; | |
return 1; | |
} | |
=head1 Convenience methods | |
To quickly check the type of a C<Archive::Tar::File> object, you can | |
use the following methods: | |
=over 4 | |
=item $file->is_file | |
Returns true if the file is of type C<file> | |
=item $file->is_dir | |
Returns true if the file is of type C<dir> | |
=item $file->is_hardlink | |
Returns true if the file is of type C<hardlink> | |
=item $file->is_symlink | |
Returns true if the file is of type C<symlink> | |
=item $file->is_chardev | |
Returns true if the file is of type C<chardev> | |
=item $file->is_blockdev | |
Returns true if the file is of type C<blockdev> | |
=item $file->is_fifo | |
Returns true if the file is of type C<fifo> | |
=item $file->is_socket | |
Returns true if the file is of type C<socket> | |
=item $file->is_longlink | |
Returns true if the file is of type C<LongLink>. | |
Should not happen after a successful C<read>. | |
=item $file->is_label | |
Returns true if the file is of type C<Label>. | |
Should not happen after a successful C<read>. | |
=item $file->is_unknown | |
Returns true if the file type is C<unknown> | |
=back | |
=cut | |
#stupid perl5.5.3 needs to warn if it's not numeric | |
sub is_file { local $^W; FILE == $_[0]->type } | |
sub is_dir { local $^W; DIR == $_[0]->type } | |
sub is_hardlink { local $^W; HARDLINK == $_[0]->type } | |
sub is_symlink { local $^W; SYMLINK == $_[0]->type } | |
sub is_chardev { local $^W; CHARDEV == $_[0]->type } | |
sub is_blockdev { local $^W; BLOCKDEV == $_[0]->type } | |
sub is_fifo { local $^W; FIFO == $_[0]->type } | |
sub is_socket { local $^W; SOCKET == $_[0]->type } | |
sub is_unknown { local $^W; UNKNOWN == $_[0]->type } | |
sub is_longlink { local $^W; LONGLINK eq $_[0]->type } | |
sub is_label { local $^W; LABEL eq $_[0]->type } | |
1; | |