Spaces:
Sleeping
Sleeping
File size: 33,322 Bytes
1d777c4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 |
package ExtUtils::Constant::Base;
use strict;
use vars qw($VERSION);
use Carp;
use Text::Wrap;
use ExtUtils::Constant::Utils qw(C_stringify perl_stringify);
$VERSION = '0.06';
use constant is_perl56 => ($] < 5.007 && $] > 5.005_50);
=head1 NAME
ExtUtils::Constant::Base - base class for ExtUtils::Constant objects
=head1 SYNOPSIS
require ExtUtils::Constant::Base;
@ISA = 'ExtUtils::Constant::Base';
=head1 DESCRIPTION
ExtUtils::Constant::Base provides a base implementation of methods to
generate C code to give fast constant value lookup by named string. Currently
it's mostly used ExtUtils::Constant::XS, which generates the lookup code
for the constant() subroutine found in many XS modules.
=head1 USAGE
ExtUtils::Constant::Base exports no subroutines. The following methods are
available
=over 4
=cut
sub valid_type {
# Default to assuming that you don't need different types of return data.
1;
}
sub default_type {
'';
}
=item header
A method returning a scalar containing definitions needed, typically for a
C header file.
=cut
sub header {
''
}
# This might actually be a return statement. Note that you are responsible
# for any space you might need before your value, as it lets to perform
# "tricks" such as "return KEY_" and have strings appended.
sub assignment_clause_for_type;
# In which case this might be an empty string
sub return_statement_for_type {undef};
sub return_statement_for_notdef;
sub return_statement_for_notfound;
# "#if 1" is true to a C pre-processor
sub macro_from_name {
1;
}
sub macro_from_item {
1;
}
sub macro_to_ifdef {
my ($self, $macro) = @_;
if (ref $macro) {
return $macro->[0];
}
if (defined $macro && $macro ne "" && $macro ne "1") {
return $macro ? "#ifdef $macro\n" : "#if 0\n";
}
return "";
}
sub macro_to_ifndef {
my ($self, $macro) = @_;
if (ref $macro) {
# Can't invert these stylishly, so "bodge it"
return "$macro->[0]#else\n";
}
if (defined $macro && $macro ne "" && $macro ne "1") {
return $macro ? "#ifndef $macro\n" : "#if 1\n";
}
croak "Can't generate an ifndef for unconditional code";
}
sub macro_to_endif {
my ($self, $macro) = @_;
if (ref $macro) {
return $macro->[1];
}
if (defined $macro && $macro ne "" && $macro ne "1") {
return "#endif\n";
}
return "";
}
sub name_param {
'name';
}
# This is possibly buggy, in that it's not mandatory (below, in the main
# C_constant parameters, but is expected to exist here, if it's needed)
# Buggy because if you're definitely pure 8 bit only, and will never be
# presented with your constants in utf8, the default form of C_constant can't
# be told not to do the utf8 version.
sub is_utf8_param {
'utf8';
}
sub memEQ {
"!memcmp";
}
=item memEQ_clause args_hashref
A method to return a suitable C C<if> statement to check whether I<name>
is equal to the C variable C<name>. If I<checked_at> is defined, then it
is used to avoid C<memEQ> for short names, or to generate a comment to
highlight the position of the character in the C<switch> statement.
If i<checked_at> is a reference to a scalar, then instead it gives
the characters pre-checked at the beginning, (and the number of chars by
which the C variable name has been advanced. These need to be chopped from
the front of I<name>).
=cut
sub memEQ_clause {
# if (memEQ(name, "thingy", 6)) {
# Which could actually be a character comparison or even ""
my ($self, $args) = @_;
my ($name, $checked_at, $indent) = @{$args}{qw(name checked_at indent)};
$indent = ' ' x ($indent || 4);
my $front_chop;
if (ref $checked_at) {
# regexp won't work on 5.6.1 without use utf8; in turn that won't work
# on 5.005_03.
substr ($name, 0, length $$checked_at,) = '';
$front_chop = C_stringify ($$checked_at);
undef $checked_at;
}
my $len = length $name;
if ($len < 2) {
return $indent . "{\n"
if (defined $checked_at and $checked_at == 0) or $len == 0;
# We didn't switch, drop through to the code for the 2 character string
$checked_at = 1;
}
my $name_param = $self->name_param;
if ($len < 3 and defined $checked_at) {
my $check;
if ($checked_at == 1) {
$check = 0;
} elsif ($checked_at == 0) {
$check = 1;
}
if (defined $check) {
my $char = C_stringify (substr $name, $check, 1);
# Placate 5.005 with a break in the string. I can't see a good way of
# getting it to not take [ as introducing an array lookup, even with
# ${name_param}[$check]
return $indent . "if ($name_param" . "[$check] == '$char') {\n";
}
}
if (($len == 2 and !defined $checked_at)
or ($len == 3 and defined ($checked_at) and $checked_at == 2)) {
my $char1 = C_stringify (substr $name, 0, 1);
my $char2 = C_stringify (substr $name, 1, 1);
return $indent .
"if ($name_param" . "[0] == '$char1' && $name_param" . "[1] == '$char2') {\n";
}
if (($len == 3 and defined ($checked_at) and $checked_at == 1)) {
my $char1 = C_stringify (substr $name, 0, 1);
my $char2 = C_stringify (substr $name, 2, 1);
return $indent .
"if ($name_param" . "[0] == '$char1' && $name_param" . "[2] == '$char2') {\n";
}
my $pointer = '^';
my $have_checked_last = defined ($checked_at) && $len == $checked_at + 1;
if ($have_checked_last) {
# Checked at the last character, so no need to memEQ it.
$pointer = C_stringify (chop $name);
$len--;
}
$name = C_stringify ($name);
my $memEQ = $self->memEQ();
my $body = $indent . "if ($memEQ($name_param, \"$name\", $len)) {\n";
# Put a little ^ under the letter we checked at
# Screws up for non printable and non-7 bit stuff, but that's too hard to
# get right.
if (defined $checked_at) {
$body .= $indent . "/* " . (' ' x length $memEQ)
. (' ' x length $name_param)
. (' ' x $checked_at) . $pointer
. (' ' x ($len - $checked_at + length $len)) . " */\n";
} elsif (defined $front_chop) {
$body .= $indent . "/* $front_chop"
. (' ' x ($len + 1 + length $len)) . " */\n";
}
return $body;
}
=item dump_names arg_hashref, ITEM...
An internal function to generate the embedded perl code that will regenerate
the constant subroutines. I<default_type>, I<types> and I<ITEM>s are the
same as for C_constant. I<indent> is treated as number of spaces to indent
by. If C<declare_types> is true a C<$types> is always declared in the perl
code generated, if defined and false never declared, and if undefined C<$types>
is only declared if the values in I<types> as passed in cannot be inferred from
I<default_types> and the I<ITEM>s.
=cut
sub dump_names {
my ($self, $args, @items) = @_;
my ($default_type, $what, $indent, $declare_types)
= @{$args}{qw(default_type what indent declare_types)};
$indent = ' ' x ($indent || 0);
my $result;
my (@simple, @complex, %used_types);
foreach (@items) {
my $type;
if (ref $_) {
$type = $_->{type} || $default_type;
if ($_->{utf8}) {
# For simplicity always skip the bytes case, and reconstitute this entry
# from its utf8 twin.
next if $_->{utf8} eq 'no';
# Copy the hashref, as we don't want to mess with the caller's hashref.
$_ = {%$_};
unless (is_perl56) {
utf8::decode ($_->{name});
} else {
$_->{name} = pack 'U*', unpack 'U0U*', $_->{name};
}
delete $_->{utf8};
}
} else {
$_ = {name=>$_};
$type = $default_type;
}
$used_types{$type}++;
if ($type eq $default_type
# grr 5.6.1
and length $_->{name}
and length $_->{name} == ($_->{name} =~ tr/A-Za-z0-9_//)
and !defined ($_->{macro}) and !defined ($_->{value})
and !defined ($_->{default}) and !defined ($_->{pre})
and !defined ($_->{post}) and !defined ($_->{def_pre})
and !defined ($_->{def_post}) and !defined ($_->{weight})) {
# It's the default type, and the name consists only of A-Za-z0-9_
push @simple, $_->{name};
} else {
push @complex, $_;
}
}
if (!defined $declare_types) {
# Do they pass in any types we weren't already using?
foreach (keys %$what) {
next if $used_types{$_};
$declare_types++; # Found one in $what that wasn't used.
last; # And one is enough to terminate this loop
}
}
if ($declare_types) {
$result = $indent . 'my $types = {map {($_, 1)} qw('
. join (" ", sort keys %$what) . ")};\n";
}
local $Text::Wrap::huge = 'overflow';
local $Text::Wrap::columns = 80;
$result .= wrap ($indent . "my \@names = (qw(",
$indent . " ", join (" ", sort @simple) . ")");
if (@complex) {
foreach my $item (sort {$a->{name} cmp $b->{name}} @complex) {
my $name = perl_stringify $item->{name};
my $line = ",\n$indent {name=>\"$name\"";
$line .= ", type=>\"$item->{type}\"" if defined $item->{type};
foreach my $thing (qw (macro value default pre post def_pre def_post)) {
my $value = $item->{$thing};
if (defined $value) {
if (ref $value) {
$line .= ", $thing=>[\""
. join ('", "', map {perl_stringify $_} @$value) . '"]';
} else {
$line .= ", $thing=>\"" . perl_stringify($value) . "\"";
}
}
}
$line .= "}";
# Ensure that the enclosing C comment doesn't end
# by turning */ into *" . "/
$line =~ s!\*\/!\*" . "/!gs;
# gcc -Wall doesn't like finding /* inside a comment
$line =~ s!\/\*!/" . "\*!gs;
$result .= $line;
}
}
$result .= ");\n";
$result;
}
=item assign arg_hashref, VALUE...
A method to return a suitable assignment clause. If I<type> is aggregate
(eg I<PVN> expects both pointer and length) then there should be multiple
I<VALUE>s for the components. I<pre> and I<post> if defined give snippets
of C code to proceed and follow the assignment. I<pre> will be at the start
of a block, so variables may be defined in it.
=cut
# Hmm. value undef to do NOTDEF? value () to do NOTFOUND?
sub assign {
my $self = shift;
my $args = shift;
my ($indent, $type, $pre, $post, $item)
= @{$args}{qw(indent type pre post item)};
$post ||= '';
my $clause;
my $close;
if ($pre) {
chomp $pre;
$close = "$indent}\n";
$clause = $indent . "{\n";
$indent .= " ";
$clause .= "$indent$pre";
$clause .= ";" unless $pre =~ /;$/;
$clause .= "\n";
}
confess "undef \$type" unless defined $type;
confess "Can't generate code for type $type"
unless $self->valid_type($type);
$clause .= join '', map {"$indent$_\n"}
$self->assignment_clause_for_type({type=>$type,item=>$item}, @_);
chomp $post;
if (length $post) {
$clause .= "$post";
$clause .= ";" unless $post =~ /;$/;
$clause .= "\n";
}
my $return = $self->return_statement_for_type($type);
$clause .= "$indent$return\n" if defined $return;
$clause .= $close if $close;
return $clause;
}
=item return_clause arg_hashref, ITEM
A method to return a suitable C<#ifdef> clause. I<ITEM> is a hashref
(as passed to C<C_constant> and C<match_clause>. I<indent> is the number
of spaces to indent, defaulting to 6.
=cut
sub return_clause {
##ifdef thingy
# *iv_return = thingy;
# return PERL_constant_ISIV;
##else
# return PERL_constant_NOTDEF;
##endif
my ($self, $args, $item) = @_;
my $indent = $args->{indent};
my ($name, $value, $default, $pre, $post, $def_pre, $def_post, $type)
= @$item{qw (name value default pre post def_pre def_post type)};
$value = $name unless defined $value;
my $macro = $self->macro_from_item($item);
$indent = ' ' x ($indent || 6);
unless (defined $type) {
# use Data::Dumper; print STDERR Dumper ($item);
confess "undef \$type";
}
##ifdef thingy
my $clause = $self->macro_to_ifdef($macro);
# *iv_return = thingy;
# return PERL_constant_ISIV;
$clause
.= $self->assign ({indent=>$indent, type=>$type, pre=>$pre, post=>$post,
item=>$item}, ref $value ? @$value : $value);
if (defined $macro && $macro ne "" && $macro ne "1") {
##else
$clause .= "#else\n";
# return PERL_constant_NOTDEF;
if (!defined $default) {
my $notdef = $self->return_statement_for_notdef();
$clause .= "$indent$notdef\n" if defined $notdef;
} else {
my @default = ref $default ? @$default : $default;
$type = shift @default;
$clause .= $self->assign ({indent=>$indent, type=>$type, pre=>$pre,
post=>$post, item=>$item}, @default);
}
}
##endif
$clause .= $self->macro_to_endif($macro);
return $clause;
}
sub match_clause {
# $offset defined if we have checked an offset.
my ($self, $args, $item) = @_;
my ($offset, $indent) = @{$args}{qw(checked_at indent)};
$indent = ' ' x ($indent || 4);
my $body = '';
my ($no, $yes, $either, $name, $inner_indent);
if (ref $item eq 'ARRAY') {
($yes, $no) = @$item;
$either = $yes || $no;
confess "$item is $either expecting hashref in [0] || [1]"
unless ref $either eq 'HASH';
$name = $either->{name};
} else {
confess "$item->{name} has utf8 flag '$item->{utf8}', should be false"
if $item->{utf8};
$name = $item->{name};
$inner_indent = $indent;
}
$body .= $self->memEQ_clause ({name => $name, checked_at => $offset,
indent => length $indent});
# If we've been presented with an arrayref for $item, then the user string
# contains in the range 128-255, and we need to check whether it was utf8
# (or not).
# In the worst case we have two named constants, where one's name happens
# encoded in UTF8 happens to be the same byte sequence as the second's
# encoded in (say) ISO-8859-1.
# In this case, $yes and $no both have item hashrefs.
if ($yes) {
$body .= $indent . " if (" . $self->is_utf8_param . ") {\n";
} elsif ($no) {
$body .= $indent . " if (!" . $self->is_utf8_param . ") {\n";
}
if ($either) {
$body .= $self->return_clause ({indent=>4 + length $indent}, $either);
if ($yes and $no) {
$body .= $indent . " } else {\n";
$body .= $self->return_clause ({indent=>4 + length $indent}, $no);
}
$body .= $indent . " }\n";
} else {
$body .= $self->return_clause ({indent=>2 + length $indent}, $item);
}
$body .= $indent . "}\n";
}
=item switch_clause arg_hashref, NAMELEN, ITEMHASH, ITEM...
An internal method to generate a suitable C<switch> clause, called by
C<C_constant> I<ITEM>s are in the hash ref format as given in the description
of C<C_constant>, and must all have the names of the same length, given by
I<NAMELEN>. I<ITEMHASH> is a reference to a hash, keyed by name, values being
the hashrefs in the I<ITEM> list. (No parameters are modified, and there can
be keys in the I<ITEMHASH> that are not in the list of I<ITEM>s without
causing problems - the hash is passed in to save generating it afresh for
each call).
=cut
sub switch_clause {
my ($self, $args, $namelen, $items, @items) = @_;
my ($indent, $comment) = @{$args}{qw(indent comment)};
$indent = ' ' x ($indent || 2);
local $Text::Wrap::huge = 'overflow';
local $Text::Wrap::columns = 80;
my @names = sort map {$_->{name}} @items;
my $leader = $indent . '/* ';
my $follower = ' ' x length $leader;
my $body = $indent . "/* Names all of length $namelen. */\n";
if (defined $comment) {
$body = wrap ($leader, $follower, $comment) . "\n";
$leader = $follower;
}
my @safe_names = @names;
foreach (@safe_names) {
confess sprintf "Name '$_' is length %d, not $namelen", length
unless length == $namelen;
# Argh. 5.6.1
# next unless tr/A-Za-z0-9_//c;
next if tr/A-Za-z0-9_// == length;
$_ = '"' . perl_stringify ($_) . '"';
# Ensure that the enclosing C comment doesn't end
# by turning */ into *" . "/
s!\*\/!\*"."/!gs;
# gcc -Wall doesn't like finding /* inside a comment
s!\/\*!/"."\*!gs;
}
$body .= wrap ($leader, $follower, join (" ", @safe_names) . " */") . "\n";
# Figure out what to switch on.
# (RMS, Spread of jump table, Position, Hashref)
my @best = (1e38, ~0);
# Prefer the last character over the others. (As it lets us shorten the
# memEQ clause at no cost).
foreach my $i ($namelen - 1, 0 .. ($namelen - 2)) {
my ($min, $max) = (~0, 0);
my %spread;
if (is_perl56) {
# Need proper Unicode preserving hash keys for bytes in range 128-255
# here too, for some reason. grr 5.6.1 yet again.
tie %spread, 'ExtUtils::Constant::Aaargh56Hash';
}
foreach (@names) {
my $char = substr $_, $i, 1;
my $ord = ord $char;
confess "char $ord is out of range" if $ord > 255;
$max = $ord if $ord > $max;
$min = $ord if $ord < $min;
push @{$spread{$char}}, $_;
# warn "$_ $char";
}
# I'm going to pick the character to split on that minimises the root
# mean square of the number of names in each case. Normally this should
# be the one with the most keys, but it may pick a 7 where the 8 has
# one long linear search. I'm not sure if RMS or just sum of squares is
# actually better.
# $max and $min are for the tie-breaker if the root mean squares match.
# Assuming that the compiler may be building a jump table for the
# switch() then try to minimise the size of that jump table.
# Finally use < not <= so that if it still ties the earliest part of
# the string wins. Because if that passes but the memEQ fails, it may
# only need the start of the string to bin the choice.
# I think. But I'm micro-optimising. :-)
# OK. Trump that. Now favour the last character of the string, before the
# rest.
my $ss;
$ss += @$_ * @$_ foreach values %spread;
my $rms = sqrt ($ss / keys %spread);
if ($rms < $best[0] || ($rms == $best[0] && ($max - $min) < $best[1])) {
@best = ($rms, $max - $min, $i, \%spread);
}
}
confess "Internal error. Failed to pick a switch point for @names"
unless defined $best[2];
# use Data::Dumper; print Dumper (@best);
my ($offset, $best) = @best[2,3];
$body .= $indent . "/* Offset $offset gives the best switch position. */\n";
my $do_front_chop = $offset == 0 && $namelen > 2;
if ($do_front_chop) {
$body .= $indent . "switch (*" . $self->name_param() . "++) {\n";
} else {
$body .= $indent . "switch (" . $self->name_param() . "[$offset]) {\n";
}
foreach my $char (sort keys %$best) {
confess sprintf "'$char' is %d bytes long, not 1", length $char
if length ($char) != 1;
confess sprintf "char %#X is out of range", ord $char if ord ($char) > 255;
$body .= $indent . "case '" . C_stringify ($char) . "':\n";
foreach my $thisone (sort {
# Deal with the case of an item actually being an array ref to 1 or 2
# hashrefs. Don't assign to $a or $b, as they're aliases to the
# original
my $l = ref $a eq 'ARRAY' ? ($a->[0] || $->[1]) : $a;
my $r = ref $b eq 'ARRAY' ? ($b->[0] || $->[1]) : $b;
# Sort by weight first
($r->{weight} || 0) <=> ($l->{weight} || 0)
# Sort equal weights by name
or $l->{name} cmp $r->{name}}
# If this looks evil, maybe it is. $items is a
# hashref, and we're doing a hash slice on it
@{$items}{@{$best->{$char}}}) {
# warn "You are here";
if ($do_front_chop) {
$body .= $self->match_clause ({indent => 2 + length $indent,
checked_at => \$char}, $thisone);
} else {
$body .= $self->match_clause ({indent => 2 + length $indent,
checked_at => $offset}, $thisone);
}
}
$body .= $indent . " break;\n";
}
$body .= $indent . "}\n";
return $body;
}
sub C_constant_return_type {
"static int";
}
sub C_constant_prefix_param {
'';
}
sub C_constant_prefix_param_defintion {
'';
}
sub name_param_definition {
"const char *" . $_[0]->name_param;
}
sub namelen_param {
'len';
}
sub namelen_param_definition {
'size_t ' . $_[0]->namelen_param;
}
sub C_constant_other_params {
'';
}
sub C_constant_other_params_defintion {
'';
}
=item params WHAT
An "internal" method, subject to change, currently called to allow an
overriding class to cache information that will then be passed into all
the C<*param*> calls. (Yes, having to read the source to make sense of this is
considered a known bug). I<WHAT> is be a hashref of types the constant
function will return. In ExtUtils::Constant::XS this method is used to
returns a hashref keyed IV NV PV SV to show which combination of pointers will
be needed in the C argument list generated by
C_constant_other_params_definition and C_constant_other_params
=cut
sub params {
'';
}
=item dogfood arg_hashref, ITEM...
An internal function to generate the embedded perl code that will regenerate
the constant subroutines. Parameters are the same as for C_constant.
Currently the base class does nothing and returns an empty string.
=cut
sub dogfood {
''
}
=item normalise_items args, default_type, seen_types, seen_items, ITEM...
Convert the items to a normalised form. For 8 bit and Unicode values converts
the item to an array of 1 or 2 items, both 8 bit and UTF-8 encoded.
=cut
sub normalise_items
{
my $self = shift;
my $args = shift;
my $default_type = shift;
my $what = shift;
my $items = shift;
my @new_items;
foreach my $orig (@_) {
my ($name, $item);
if (ref $orig) {
# Make a copy which is a normalised version of the ref passed in.
$name = $orig->{name};
my ($type, $macro, $value) = @$orig{qw (type macro value)};
$type ||= $default_type;
$what->{$type} = 1;
$item = {name=>$name, type=>$type};
undef $macro if defined $macro and $macro eq $name;
$item->{macro} = $macro if defined $macro;
undef $value if defined $value and $value eq $name;
$item->{value} = $value if defined $value;
foreach my $key (qw(default pre post def_pre def_post weight
not_constant)) {
my $value = $orig->{$key};
$item->{$key} = $value if defined $value;
# warn "$key $value";
}
} else {
$name = $orig;
$item = {name=>$name, type=>$default_type};
$what->{$default_type} = 1;
}
warn +(ref ($self) || $self)
. "doesn't know how to handle values of type $_ used in macro $name"
unless $self->valid_type ($item->{type});
# tr///c is broken on 5.6.1 for utf8, so my original tr/\0-\177//c
# doesn't work. Upgrade to 5.8
# if ($name !~ tr/\0-\177//c || $] < 5.005_50) {
if ($name =~ tr/\0-\177// == length $name || $] < 5.005_50
|| $args->{disable_utf8_duplication}) {
# No characters outside 7 bit ASCII.
if (exists $items->{$name}) {
die "Multiple definitions for macro $name";
}
$items->{$name} = $item;
} else {
# No characters outside 8 bit. This is hardest.
if (exists $items->{$name} and ref $items->{$name} ne 'ARRAY') {
confess "Unexpected ASCII definition for macro $name";
}
# Again, 5.6.1 tr broken, so s/5\.6.*/5\.8\.0/;
# if ($name !~ tr/\0-\377//c) {
if ($name =~ tr/\0-\377// == length $name) {
# if ($] < 5.007) {
# $name = pack "C*", unpack "U*", $name;
# }
$item->{utf8} = 'no';
$items->{$name}[1] = $item;
push @new_items, $item;
# Copy item, to create the utf8 variant.
$item = {%$item};
}
# Encode the name as utf8 bytes.
unless (is_perl56) {
utf8::encode($name);
} else {
# warn "Was >$name< " . length ${name};
$name = pack 'C*', unpack 'C*', $name . pack 'U*';
# warn "Now '${name}' " . length ${name};
}
if ($items->{$name}[0]) {
die "Multiple definitions for macro $name";
}
$item->{utf8} = 'yes';
$item->{name} = $name;
$items->{$name}[0] = $item;
# We have need for the utf8 flag.
$what->{''} = 1;
}
push @new_items, $item;
}
@new_items;
}
=item C_constant arg_hashref, ITEM...
A function that returns a B<list> of C subroutine definitions that return
the value and type of constants when passed the name by the XS wrapper.
I<ITEM...> gives a list of constant names. Each can either be a string,
which is taken as a C macro name, or a reference to a hash with the following
keys
=over 8
=item name
The name of the constant, as seen by the perl code.
=item type
The type of the constant (I<IV>, I<NV> etc)
=item value
A C expression for the value of the constant, or a list of C expressions if
the type is aggregate. This defaults to the I<name> if not given.
=item macro
The C pre-processor macro to use in the C<#ifdef>. This defaults to the
I<name>, and is mainly used if I<value> is an C<enum>. If a reference an
array is passed then the first element is used in place of the C<#ifdef>
line, and the second element in place of the C<#endif>. This allows
pre-processor constructions such as
#if defined (foo)
#if !defined (bar)
...
#endif
#endif
to be used to determine if a constant is to be defined.
A "macro" 1 signals that the constant is always defined, so the C<#if>/C<#endif>
test is omitted.
=item default
Default value to use (instead of C<croak>ing with "your vendor has not
defined...") to return if the macro isn't defined. Specify a reference to
an array with type followed by value(s).
=item pre
C code to use before the assignment of the value of the constant. This allows
you to use temporary variables to extract a value from part of a C<struct>
and return this as I<value>. This C code is places at the start of a block,
so you can declare variables in it.
=item post
C code to place between the assignment of value (to a temporary) and the
return from the function. This allows you to clear up anything in I<pre>.
Rarely needed.
=item def_pre
=item def_post
Equivalents of I<pre> and I<post> for the default value.
=item utf8
Generated internally. Is zero or undefined if name is 7 bit ASCII,
"no" if the name is 8 bit (and so should only match if SvUTF8() is false),
"yes" if the name is utf8 encoded.
The internals automatically clone any name with characters 128-255 but none
256+ (ie one that could be either in bytes or utf8) into a second entry
which is utf8 encoded.
=item weight
Optional sorting weight for names, to determine the order of
linear testing when multiple names fall in the same case of a switch clause.
Higher comes earlier, undefined defaults to zero.
=back
In the argument hashref, I<package> is the name of the package, and is only
used in comments inside the generated C code. I<subname> defaults to
C<constant> if undefined.
I<default_type> is the type returned by C<ITEM>s that don't specify their
type. It defaults to the value of C<default_type()>. I<types> should be given
either as a comma separated list of types that the C subroutine I<subname>
will generate or as a reference to a hash. I<default_type> will be added to
the list if not present, as will any types given in the list of I<ITEM>s. The
resultant list should be the same list of types that C<XS_constant> is
given. [Otherwise C<XS_constant> and C<C_constant> may differ in the number of
parameters to the constant function. I<indent> is currently unused and
ignored. In future it may be used to pass in information used to change the C
indentation style used.] The best way to maintain consistency is to pass in a
hash reference and let this function update it.
I<breakout> governs when child functions of I<subname> are generated. If there
are I<breakout> or more I<ITEM>s with the same length of name, then the code
to switch between them is placed into a function named I<subname>_I<len>, for
example C<constant_5> for names 5 characters long. The default I<breakout> is
3. A single C<ITEM> is always inlined.
=cut
# The parameter now BREAKOUT was previously documented as:
#
# I<NAMELEN> if defined signals that all the I<name>s of the I<ITEM>s are of
# this length, and that the constant name passed in by perl is checked and
# also of this length. It is used during recursion, and should be C<undef>
# unless the caller has checked all the lengths during code generation, and
# the generated subroutine is only to be called with a name of this length.
#
# As you can see it now performs this function during recursion by being a
# scalar reference.
sub C_constant {
my ($self, $args, @items) = @_;
my ($package, $subname, $default_type, $what, $indent, $breakout) =
@{$args}{qw(package subname default_type types indent breakout)};
$package ||= 'Foo';
$subname ||= 'constant';
# I'm not using this. But a hashref could be used for full formatting without
# breaking this API
# $indent ||= 0;
my ($namelen, $items);
if (ref $breakout) {
# We are called recursively. We trust @items to be normalised, $what to
# be a hashref, and pinch %$items from our parent to save recalculation.
($namelen, $items) = @$breakout;
} else {
$items = {};
if (is_perl56) {
# Need proper Unicode preserving hash keys.
require ExtUtils::Constant::Aaargh56Hash;
tie %$items, 'ExtUtils::Constant::Aaargh56Hash';
}
$breakout ||= 3;
$default_type ||= $self->default_type();
if (!ref $what) {
# Convert line of the form IV,UV,NV to hash
$what = {map {$_ => 1} split /,\s*/, ($what || '')};
# Figure out what types we're dealing with, and assign all unknowns to the
# default type
}
@items = $self->normalise_items ({}, $default_type, $what, $items, @items);
# use Data::Dumper; print Dumper @items;
}
my $params = $self->params ($what);
# Probably "static int"
my ($body, @subs);
$body = $self->C_constant_return_type($params) . "\n$subname ("
# Eg "pTHX_ "
. $self->C_constant_prefix_param_defintion($params)
# Probably "const char *name"
. $self->name_param_definition($params);
# Something like ", STRLEN len"
$body .= ", " . $self->namelen_param_definition($params)
unless defined $namelen;
$body .= $self->C_constant_other_params_defintion($params);
$body .= ") {\n";
if (defined $namelen) {
# We are a child subroutine. Print the simple description
my $comment = 'When generated this function returned values for the list'
. ' of names given here. However, subsequent manual editing may have'
. ' added or removed some.';
$body .= $self->switch_clause ({indent=>2, comment=>$comment},
$namelen, $items, @items);
} else {
# We are the top level.
$body .= " /* Initially switch on the length of the name. */\n";
$body .= $self->dogfood ({package => $package, subname => $subname,
default_type => $default_type, what => $what,
indent => $indent, breakout => $breakout},
@items);
$body .= ' switch ('.$self->namelen_param().") {\n";
# Need to group names of the same length
my @by_length;
foreach (@items) {
push @{$by_length[length $_->{name}]}, $_;
}
foreach my $i (0 .. $#by_length) {
next unless $by_length[$i]; # None of this length
$body .= " case $i:\n";
if (@{$by_length[$i]} == 1) {
my $only_thing = $by_length[$i]->[0];
if ($only_thing->{utf8}) {
if ($only_thing->{utf8} eq 'yes') {
# With utf8 on flag item is passed in element 0
$body .= $self->match_clause (undef, [$only_thing]);
} else {
# With utf8 off flag item is passed in element 1
$body .= $self->match_clause (undef, [undef, $only_thing]);
}
} else {
$body .= $self->match_clause (undef, $only_thing);
}
} elsif (@{$by_length[$i]} < $breakout) {
$body .= $self->switch_clause ({indent=>4},
$i, $items, @{$by_length[$i]});
} else {
# Only use the minimal set of parameters actually needed by the types
# of the names of this length.
my $what = {};
foreach (@{$by_length[$i]}) {
$what->{$_->{type}} = 1;
$what->{''} = 1 if $_->{utf8};
}
$params = $self->params ($what);
push @subs, $self->C_constant ({package=>$package,
subname=>"${subname}_$i",
default_type => $default_type,
types => $what, indent => $indent,
breakout => [$i, $items]},
@{$by_length[$i]});
$body .= " return ${subname}_$i ("
# Eg "aTHX_ "
. $self->C_constant_prefix_param($params)
# Probably "name"
. $self->name_param($params);
$body .= $self->C_constant_other_params($params);
$body .= ");\n";
}
$body .= " break;\n";
}
$body .= " }\n";
}
my $notfound = $self->return_statement_for_notfound();
$body .= " $notfound\n" if $notfound;
$body .= "}\n";
return (@subs, $body);
}
1;
__END__
=back
=head1 BUGS
Not everything is documented yet.
Probably others.
=head1 AUTHOR
Nicholas Clark <[email protected]> based on the code in C<h2xs> by Larry Wall and
others
|