Spaces:
Running
Running
package I18N::Collate; | |
use strict; | |
our $VERSION = '1.02'; | |
=head1 NAME | |
I18N::Collate - compare 8-bit scalar data according to the current locale | |
=head1 SYNOPSIS | |
use I18N::Collate; | |
setlocale(LC_COLLATE, 'locale-of-your-choice'); | |
$s1 = I18N::Collate->new("scalar_data_1"); | |
$s2 = I18N::Collate->new("scalar_data_2"); | |
=head1 DESCRIPTION | |
*** | |
WARNING: starting from the Perl version 5.003_06 | |
the I18N::Collate interface for comparing 8-bit scalar data | |
according to the current locale | |
HAS BEEN DEPRECATED | |
That is, please do not use it anymore for any new applications | |
and please migrate the old applications away from it because its | |
functionality was integrated into the Perl core language in the | |
release 5.003_06. | |
See the perllocale manual page for further information. | |
*** | |
This module provides you with objects that will collate | |
according to your national character set, provided that the | |
POSIX setlocale() function is supported on your system. | |
You can compare $s1 and $s2 above with | |
$s1 le $s2 | |
to extract the data itself, you'll need a dereference: $$s1 | |
This module uses POSIX::setlocale(). The basic collation conversion is | |
done by strxfrm() which terminates at NUL characters being a decent C | |
routine. collate_xfrm() handles embedded NUL characters gracefully. | |
The available locales depend on your operating system; try whether | |
C<locale -a> shows them or man pages for "locale" or "nlsinfo" or the | |
direct approach C<ls /usr/lib/nls/loc> or C<ls /usr/lib/nls> or | |
C<ls /usr/lib/locale>. Not all the locales that your vendor supports | |
are necessarily installed: please consult your operating system's | |
documentation and possibly your local system administration. The | |
locale names are probably something like C<xx_XX.(ISO)?8859-N> or | |
C<xx_XX.(ISO)?8859N>, for example C<fr_CH.ISO8859-1> is the Swiss (CH) | |
variant of French (fr), ISO Latin (8859) 1 (-1) which is the Western | |
European character set. | |
=cut | |
# I18N::Collate.pm | |
# | |
# Author: Jarkko Hietaniemi <F<[email protected]>> | |
# Helsinki University of Technology, Finland | |
# | |
# Acks: Guy Decoux <F<[email protected]>> understood | |
# overloading magic much deeper than I and told | |
# how to cut the size of this code by more than half. | |
# (my first version did overload all of lt gt eq le ge cmp) | |
# | |
# Purpose: compare 8-bit scalar data according to the current locale | |
# | |
# Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm() | |
# | |
# Exports: setlocale 1) | |
# collate_xfrm 2) | |
# | |
# Overloads: cmp # 3) | |
# | |
# Usage: use I18N::Collate; | |
# setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4) | |
# $s1 = I18N::Collate->("scalar_data_1"); | |
# $s2 = I18N::Collate->("scalar_data_2"); | |
# | |
# now you can compare $s1 and $s2: $s1 le $s2 | |
# to extract the data itself, you need to deref: $$s1 | |
# | |
# Notes: | |
# 1) this uses POSIX::setlocale | |
# 2) the basic collation conversion is done by strxfrm() which | |
# terminates at NUL characters being a decent C routine. | |
# collate_xfrm handles embedded NUL characters gracefully. | |
# 3) due to cmp and overload magic, lt le eq ge gt work also | |
# 4) the available locales depend on your operating system; | |
# try whether "locale -a" shows them or man pages for | |
# "locale" or "nlsinfo" work or the more direct | |
# approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls". | |
# Not all the locales that your vendor supports | |
# are necessarily installed: please consult your | |
# operating system's documentation. | |
# The locale names are probably something like | |
# 'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N', | |
# for example 'fr_CH.ISO8859-1' is the Swiss (CH) | |
# variant of French (fr), ISO Latin (8859) 1 (-1) | |
# which is the Western European character set. | |
# | |
# Updated: 19961005 | |
# | |
# --- | |
use POSIX qw(strxfrm LC_COLLATE); | |
use warnings::register; | |
require Exporter; | |
our @ISA = qw(Exporter); | |
our @EXPORT = qw(collate_xfrm setlocale LC_COLLATE); | |
our @EXPORT_OK = qw(); | |
use overload qw( | |
fallback 1 | |
cmp collate_cmp | |
); | |
our($LOCALE, $C); | |
our $please_use_I18N_Collate_even_if_deprecated = 0; | |
sub new { | |
my $new = $_[1]; | |
if (warnings::enabled() && $] >= 5.003_06) { | |
unless ($please_use_I18N_Collate_even_if_deprecated) { | |
warnings::warn <<___EOD___; | |
*** | |
WARNING: starting from the Perl version 5.003_06 | |
the I18N::Collate interface for comparing 8-bit scalar data | |
according to the current locale | |
HAS BEEN DEPRECATED | |
That is, please do not use it anymore for any new applications | |
and please migrate the old applications away from it because its | |
functionality was integrated into the Perl core language in the | |
release 5.003_06. | |
See the perllocale manual page for further information. | |
*** | |
___EOD___ | |
$please_use_I18N_Collate_even_if_deprecated++; | |
} | |
} | |
bless \$new; | |
} | |
sub setlocale { | |
my ($category, $locale) = @_[0,1]; | |
POSIX::setlocale($category, $locale) if (defined $category); | |
# the current $LOCALE | |
$LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || ''; | |
} | |
sub C { | |
my $s = ${$_[0]}; | |
$C->{$LOCALE}->{$s} = collate_xfrm($s) | |
unless (defined $C->{$LOCALE}->{$s}); # cache when met | |
$C->{$LOCALE}->{$s}; | |
} | |
sub collate_xfrm { | |
my $s = $_[0]; | |
my $x = ''; | |
for (split(/(\000+)/, $s)) { | |
$x .= (/^\000/) ? $_ : strxfrm("$_\000"); | |
} | |
$x; | |
} | |
sub collate_cmp { | |
&C($_[0]) cmp &C($_[1]); | |
} | |
# init $LOCALE | |
&I18N::Collate::setlocale(); | |
1; # keep require happy | |