Spaces:

xsigus24
/

text-generation-webui

Running

App Files Files Community

text-generation-webui / installer_files /env /lib /5.34.0 /File /GlobMapper.pm

xsigus24

Upload folder using huggingface_hub

1d777c4 over 1 year ago

raw

history blame contribute delete

15.7 kB

	package File::GlobMapper;

	use strict;
	use warnings;
	use Carp;

	our ($CSH_GLOB);

	BEGIN
	{
	if ($] < 5.006)
	{
	require File::BSDGlob; import File::BSDGlob qw(:glob) ;
	$CSH_GLOB = File::BSDGlob::GLOB_CSH() ;
	*globber = \&File::BSDGlob::csh_glob;
	}
	else
	{
	require File::Glob; import File::Glob qw(:glob) ;
	$CSH_GLOB = File::Glob::GLOB_CSH() ;
	#*globber = \&File::Glob::bsd_glob;
	*globber = \&File::Glob::csh_glob;
	}
	}

	our ($Error);

	our ($VERSION, @EXPORT_OK);
	$VERSION = '1.001';
	@EXPORT_OK = qw( globmap );


	our ($noPreBS, $metachars, $matchMetaRE, %mapping, %wildCount);
	$noPreBS = '(?<!\\\)' ; # no preceding backslash
	$metachars = '.*?[](){}';
	$matchMetaRE = '[' . quotemeta($metachars) . ']';

	%mapping = (
	'' => '([^/])',
	'?' => '([^/])',
	'.' => '\.',
	'[' => '([',
	'(' => '(',
	')' => ')',
	);

	%wildCount = map { $_ => 1 } qw/ * ? . { ( [ /;

	sub globmap ($$;)
	{
	my $inputGlob = shift ;
	my $outputGlob = shift ;

	my $obj = File::GlobMapper->new($inputGlob, $outputGlob, @_)
	or croak "globmap: $Error" ;
	return $obj->getFileMap();
	}

	sub new
	{
	my $class = shift ;
	my $inputGlob = shift ;
	my $outputGlob = shift ;
	# TODO -- flags needs to default to whatever File::Glob does
	my $flags = shift \|\| $CSH_GLOB ;
	#my $flags = shift ;

	$inputGlob =~ s/^\s\<\s//;
	$inputGlob =~ s/\s\>\s$//;

	$outputGlob =~ s/^\s\<\s//;
	$outputGlob =~ s/\s\>\s$//;

	my %object =
	( InputGlob => $inputGlob,
	OutputGlob => $outputGlob,
	GlobFlags => $flags,
	Braces => 0,
	WildCount => 0,
	Pairs => [],
	Sigil => '#',
	);

	my $self = bless \%object, ref($class) \|\| $class ;

	$self->_parseInputGlob()
	or return undef ;

	$self->_parseOutputGlob()
	or return undef ;

	my @inputFiles = globber($self->{InputGlob}, $flags) ;

	if (GLOB_ERROR)
	{
	$Error = $!;
	return undef ;
	}

	#if (whatever)
	{
	my $missing = grep { ! -e $_ } @inputFiles ;

	if ($missing)
	{
	$Error = "$missing input files do not exist";
	return undef ;
	}
	}

	$self->{InputFiles} = \@inputFiles ;

	$self->_getFiles()
	or return undef ;

	return $self;
	}

	sub _retError
	{
	my $string = shift ;
	$Error = "$string in input fileglob" ;
	return undef ;
	}

	sub _unmatched
	{
	my $delimeter = shift ;

	_retError("Unmatched $delimeter");
	return undef ;
	}

	sub _parseBit
	{
	my $self = shift ;

	my $string = shift ;

	my $out = '';
	my $depth = 0 ;

	while ($string =~ s/(.*?)$noPreBS(,\|$matchMetaRE)//)
	{
	$out .= quotemeta($1) ;
	$out .= $mapping{$2} if defined $mapping{$2};

	++ $self->{WildCount} if $wildCount{$2} ;

	if ($2 eq ',')
	{
	return _unmatched("(")
	if $depth ;

	$out .= '\|';
	}
	elsif ($2 eq '(')
	{
	++ $depth ;
	}
	elsif ($2 eq ')')
	{
	return _unmatched(")")
	if ! $depth ;

	-- $depth ;
	}
	elsif ($2 eq '[')
	{
	# TODO -- quotemeta & check no '/'
	# TODO -- check for \] & other \ within the []
	$string =~ s#(.*?\])##
	or return _unmatched("[");
	$out .= "$1)" ;
	}
	elsif ($2 eq ']')
	{
	return _unmatched("]");
	}
	elsif ($2 eq '{' \|\| $2 eq '}')
	{
	return _retError("Nested {} not allowed");
	}
	}

	$out .= quotemeta $string;

	return _unmatched("(")
	if $depth ;

	return $out ;
	}

	sub _parseInputGlob
	{
	my $self = shift ;

	my $string = $self->{InputGlob} ;
	my $inGlob = '';

	# Multiple concatenated *'s don't make sense
	#$string =~ s#\\+#*# ;

	# TODO -- Allow space to delimit patterns?
	#my @strings = split /\s+/, $string ;
	#for my $str (@strings)
	my $out = '';
	my $depth = 0 ;

	while ($string =~ s/(.*?)$noPreBS($matchMetaRE)//)
	{
	$out .= quotemeta($1) ;
	$out .= $mapping{$2} if defined $mapping{$2};
	++ $self->{WildCount} if $wildCount{$2} ;

	if ($2 eq '(')
	{
	++ $depth ;
	}
	elsif ($2 eq ')')
	{
	return _unmatched(")")
	if ! $depth ;

	-- $depth ;
	}
	elsif ($2 eq '[')
	{
	# TODO -- quotemeta & check no '/' or '(' or ')'
	# TODO -- check for \] & other \ within the []
	$string =~ s#(.*?\])##
	or return _unmatched("[");
	$out .= "$1)" ;
	}
	elsif ($2 eq ']')
	{
	return _unmatched("]");
	}
	elsif ($2 eq '}')
	{
	return _unmatched("}");
	}
	elsif ($2 eq '{')
	{
	# TODO -- check no '/' within the {}
	# TODO -- check for \} & other \ within the {}

	my $tmp ;
	unless ( $string =~ s/(.*?)$noPreBS\}//)
	{
	return _unmatched("{");
	}
	#$string =~ s#(.*?)\}##;

	#my $alt = join '\|',
	# map { quotemeta $_ }
	# split "$noPreBS,", $1 ;
	my $alt = $self->_parseBit($1);
	defined $alt or return 0 ;
	$out .= "($alt)" ;

	++ $self->{Braces} ;
	}
	}

	return _unmatched("(")
	if $depth ;

	$out .= quotemeta $string ;


	$self->{InputGlob} =~ s/$noPreBS[]//g;
	$self->{InputPattern} = $out ;

	#print "# INPUT '$self->{InputGlob}' => '$out'\n";

	return 1 ;

	}

	sub _parseOutputGlob
	{
	my $self = shift ;

	my $string = $self->{OutputGlob} ;
	my $maxwild = $self->{WildCount};

	if ($self->{GlobFlags} & GLOB_TILDE)
	#if (1)
	{
	$string =~ s{
	^ ~ # find a leading tilde
	( # save this in $1
	[^/] # a non-slash character
	* # repeated 0 or more times (0 means me)
	)
	}{
	$1
	? (getpwnam($1))[7]
	: ( $ENV{HOME} \|\| $ENV{LOGDIR} )
	}ex;

	}

	# max #1 must be == to max no of '*' in input
	while ( $string =~ m/#(\d)/g )
	{
	croak "Max wild is #$maxwild, you tried #$1"
	if $1 > $maxwild ;
	}

	my $noPreBS = '(?<!\\\)' ; # no preceding backslash
	#warn "noPreBS = '$noPreBS'\n";

	#$string =~ s/${noPreBS}\$(\d)/\${$1}/g;
	$string =~ s/${noPreBS}#(\d)/\${$1}/g;
	$string =~ s#${noPreBS}\*#\${inFile}#g;
	$string = '"' . $string . '"';

	#print "OUTPUT '$self->{OutputGlob}' => '$string'\n";
	$self->{OutputPattern} = $string ;

	return 1 ;
	}

	sub _getFiles
	{
	my $self = shift ;

	my %outInMapping = ();
	my %inFiles = () ;

	foreach my $inFile (@{ $self->{InputFiles} })
	{
	next if $inFiles{$inFile} ++ ;

	my $outFile = $inFile ;

	if ( $inFile =~ m/$self->{InputPattern}/ )
	{
	no warnings 'uninitialized';
	eval "\$outFile = $self->{OutputPattern};" ;

	if (defined $outInMapping{$outFile})
	{
	$Error = "multiple input files map to one output file";
	return undef ;
	}
	$outInMapping{$outFile} = $inFile;
	push @{ $self->{Pairs} }, [$inFile, $outFile];
	}
	}

	return 1 ;
	}

	sub getFileMap
	{
	my $self = shift ;

	return $self->{Pairs} ;
	}

	sub getHash
	{
	my $self = shift ;

	return { map { $_->[0] => $_->[1] } @{ $self->{Pairs} } } ;
	}

	1;

	__END__

	=head1 NAME

	File::GlobMapper - Extend File Glob to Allow Input and Output Files

	=head1 SYNOPSIS

	use File::GlobMapper qw( globmap );

	my $aref = globmap $input => $output
	or die $File::GlobMapper::Error ;

	my $gm = File::GlobMapper->new( $input => $output )
	or die $File::GlobMapper::Error ;


	=head1 DESCRIPTION

	This module needs Perl5.005 or better.

	This module takes the existing C<File::Glob> module as a starting point and
	extends it to allow new filenames to be derived from the files matched by
	C<File::Glob>.

	This can be useful when carrying out batch operations on multiple files that
	have both an input filename and output filename and the output file can be
	derived from the input filename. Examples of operations where this can be
	useful include, file renaming, file copying and file compression.


	=head2 Behind The Scenes

	To help explain what C<File::GlobMapper> does, consider what code you
	would write if you wanted to rename all files in the current directory
	that ended in C<.tar.gz> to C<.tgz>. So say these files are in the
	current directory

	alpha.tar.gz
	beta.tar.gz
	gamma.tar.gz

	and they need renamed to this

	alpha.tgz
	beta.tgz
	gamma.tgz

	Below is a possible implementation of a script to carry out the rename
	(error cases have been omitted)

	foreach my $old ( glob "*.tar.gz" )
	{
	my $new = $old;
	$new =~ s#(.*)\.tar\.gz$#$1.tgz# ;

	rename $old => $new
	or die "Cannot rename '$old' to '$new': $!\n;
	}

	Notice that a file glob pattern C<*.tar.gz> was used to match the
	C<.tar.gz> files, then a fairly similar regular expression was used in
	the substitute to allow the new filename to be created.

	Given that the file glob is just a cut-down regular expression and that it
	has already done a lot of the hard work in pattern matching the filenames,
	wouldn't it be handy to be able to use the patterns in the fileglob to
	drive the new filename?

	Well, that's I<exactly> what C<File::GlobMapper> does.

	Here is same snippet of code rewritten using C<globmap>

	for my $pair (globmap '<*.tar.gz>' => '<#1.tgz>' )
	{
	my ($from, $to) = @$pair;
	rename $from => $to
	or die "Cannot rename '$old' to '$new': $!\n;
	}

	So how does it work?

	Behind the scenes the C<globmap> function does a combination of a
	file glob to match existing filenames followed by a substitute
	to create the new filenames.

	Notice how both parameters to C<globmap> are strings that are delimited by <>.
	This is done to make them look more like file globs - it is just syntactic
	sugar, but it can be handy when you want the strings to be visually
	distinctive. The enclosing <> are optional, so you don't have to use them - in
	fact the first thing globmap will do is remove these delimiters if they are
	present.

	The first parameter to C<globmap>, C<*.tar.gz>, is an I<Input File Glob>.
	Once the enclosing "< ... >" is removed, this is passed (more or
	less) unchanged to C<File::Glob> to carry out a file match.

	Next the fileglob C<*.tar.gz> is transformed behind the scenes into a
	full Perl regular expression, with the additional step of wrapping each
	transformed wildcard metacharacter sequence in parenthesis.

	In this case the input fileglob C<*.tar.gz> will be transformed into
	this Perl regular expression

	([^/]*)\.tar\.gz

	Wrapping with parenthesis allows the wildcard parts of the Input File
	Glob to be referenced by the second parameter to C<globmap>, C<#1.tgz>,
	the I<Output File Glob>. This parameter operates just like the replacement
	part of a substitute command. The difference is that the C<#1> syntax
	is used to reference sub-patterns matched in the input fileglob, rather
	than the C<$1> syntax that is used with perl regular expressions. In
	this case C<#1> is used to refer to the text matched by the C<*> in the
	Input File Glob. This makes it easier to use this module where the
	parameters to C<globmap> are typed at the command line.

	The final step involves passing each filename matched by the C<*.tar.gz>
	file glob through the derived Perl regular expression in turn and
	expanding the output fileglob using it.

	The end result of all this is a list of pairs of filenames. By default
	that is what is returned by C<globmap>. In this example the data structure
	returned will look like this

	( ['alpha.tar.gz' => 'alpha.tgz'],
	['beta.tar.gz' => 'beta.tgz' ],
	['gamma.tar.gz' => 'gamma.tgz']
	)


	Each pair is an array reference with two elements - namely the I<from>
	filename, that C<File::Glob> has matched, and a I<to> filename that is
	derived from the I<from> filename.



	=head2 Limitations

	C<File::GlobMapper> has been kept simple deliberately, so it isn't intended to
	solve all filename mapping operations. Under the hood C<File::Glob> (or for
	older versions of Perl, C<File::BSDGlob>) is used to match the files, so you
	will never have the flexibility of full Perl regular expression.

	=head2 Input File Glob

	The syntax for an Input FileGlob is identical to C<File::Glob>, except
	for the following

	=over 5

	=item 1.

	No nested {}

	=item 2.

	Whitespace does not delimit fileglobs.

	=item 3.

	The use of parenthesis can be used to capture parts of the input filename.

	=item 4.

	If an Input glob matches the same file more than once, only the first
	will be used.

	=back

	The syntax

	=over 5

	=item B<~>

	=item B<~user>


	=item B<.>

	Matches a literal '.'.
	Equivalent to the Perl regular expression

	\.

	=item B<*>

	Matches zero or more characters, except '/'. Equivalent to the Perl
	regular expression

	[^/]*

	=item B<?>

	Matches zero or one character, except '/'. Equivalent to the Perl
	regular expression

	[^/]?

	=item B<\>

	Backslash is used, as usual, to escape the next character.

	=item B<[]>

	Character class.

	=item B<{,}>

	Alternation

	=item B<()>

	Capturing parenthesis that work just like perl

	=back

	Any other character it taken literally.

	=head2 Output File Glob

	The Output File Glob is a normal string, with 2 glob-like features.

	The first is the '*' metacharacter. This will be replaced by the complete
	filename matched by the input file glob. So

	.c .Z

	The second is

	Output FileGlobs take the

	=over 5

	=item "*"

	The "*" character will be replaced with the complete input filename.

	=item #1

	Patterns of the form /#\d/ will be replaced with the

	=back

	=head2 Returned Data


	=head1 EXAMPLES

	=head2 A Rename script

	Below is a simple "rename" script that uses C<globmap> to determine the
	source and destination filenames.

	use File::GlobMapper qw(globmap) ;
	use File::Copy;

	die "rename: Usage rename 'from' 'to'\n"
	unless @ARGV == 2 ;

	my $fromGlob = shift @ARGV;
	my $toGlob = shift @ARGV;

	my $pairs = globmap($fromGlob, $toGlob)
	or die $File::GlobMapper::Error;

	for my $pair (@$pairs)
	{
	my ($from, $to) = @$pair;
	move $from => $to ;
	}



	Here is an example that renames all c files to cpp.

	$ rename '*.c' '#1.cpp'

	=head2 A few example globmaps

	Below are a few examples of globmaps

	To copy all your .c file to a backup directory

	'</my/home/*.c>' '</my/backup/#1.c>'

	If you want to compress all

	'</my/home/.[ch]>' '<.gz>'

	To uncompress

	'</my/home/*.[ch].gz>' '</my/home/#1.#2>'

	=head1 SEE ALSO

	L<File::Glob\|File::Glob>

	=head1 AUTHOR

	The I<File::GlobMapper> module was written by Paul Marquess, F<[email protected]>.

	=head1 COPYRIGHT AND LICENSE

	Copyright (c) 2005 Paul Marquess. All rights reserved.
	This program is free software; you can redistribute it and/or
	modify it under the same terms as Perl itself.