summaryrefslogtreecommitdiff
path: root/docs/docbook/scripts/collateindex.pl
diff options
context:
space:
mode:
Diffstat (limited to 'docs/docbook/scripts/collateindex.pl')
-rw-r--r--docs/docbook/scripts/collateindex.pl595
1 files changed, 0 insertions, 595 deletions
diff --git a/docs/docbook/scripts/collateindex.pl b/docs/docbook/scripts/collateindex.pl
deleted file mode 100644
index fd757edb32..0000000000
--- a/docs/docbook/scripts/collateindex.pl
+++ /dev/null
@@ -1,595 +0,0 @@
-# -*- Perl -*-
-#
-
-use Getopt::Std;
-
-$usage = "Usage: $0 <opts> file
-Where <opts> are:
- -p Link to points in the document. The default is to link
- to the closest containing section.
- -g Group terms with IndexDiv based on the first letter
- of the term (or its sortas attribute).
- (This probably doesn't handle i10n particularly well)
- -s name Name the IndexDiv that contains symbols. The default
- is 'Symbols'. Meaningless if -g is not used.
- -t name Title for the index.
- -P file Read a preamble from file. The content of file will
- be inserted before the <index> tag.
- -i id The ID for the <index> tag.
- -o file Output to file. Defaults to stdout.
- -S scope Scope of the index, must be 'all', 'local', or 'global'.
- If unspecified, 'all' is assumed.
- -I scope The implied scope, must be 'all', 'local', or 'global'.
- IndexTerms which do not specify a scope will have the
- implied scope. If unspecified, 'all' is assumed.
- -x Make a SetIndex.
- -f Force the output file to be written, even if it appears
- to have been edited by hand.
- -N New index (generates an empty index file).
- file The file containing index data generated by Jade
- with the DocBook HTML Stylesheet.\n";
-
-die $usage if ! getopts('Dfgi:NpP:s:o:S:I:t:x');
-
-$linkpoints = $opt_p;
-$lettergroups = $opt_g;
-$symbolsname = $opt_s || "Symbols";
-$title = $opt_t;
-$preamble = $opt_P;
-$outfile = $opt_o || '-';
-$indexid = $opt_i;
-$scope = uc($opt_S) || 'ALL';
-$impliedscope = uc($opt_I) || 'ALL';
-$setindex = $opt_x;
-$forceoutput = $opt_f;
-$newindex = $opt_N;
-$debug = $opt_D;
-
-$indextag = $setindex ? 'setindex' : 'index';
-
-if ($newindex) {
- safe_open(*OUT, $outfile);
- if ($indexid) {
- print OUT "<$indextag id='$indexid'>\n\n";
- } else {
- print OUT "<$indextag>\n\n";
- }
-
- print OUT "<!-- This file was produced by collateindex.pl. -->\n";
- print OUT "<!-- Remove this comment if you edit this file by hand! -->\n";
-
- print OUT "</$indextag>\n";
- exit 0;
-}
-
-$dat = shift @ARGV || die $usage;
-die "$0: cannot find $dat.\n" if ! -f $dat;
-
-%legal_scopes = ('ALL' => 1, 'LOCAL' => 1, 'GLOBAL' => 1);
-if ($scope && !$legal_scopes{$scope}) {
- die "Invalid scope.\n$usage\n";
-}
-if ($impliedscope && !$legal_scopes{$impliedscope}) {
- die "Invalid implied scope.\n$usage\n";
-}
-
-@term = ();
-%id = ();
-
-$termcount = 0;
-
-print STDERR "Processing $dat...\n";
-
-# Read the index file, creating an array of objects. Each object
-# represents and indexterm and has fields for the content of the
-# indexterm
-
-open (F, $dat);
-while (<F>) {
- chop;
-
- if (/^\/indexterm/i) {
- push (@term, $idx);
- next;
- }
-
- if (/^indexterm (.*)$/i) {
- $termcount++;
- $idx = {};
- $idx->{'zone'} = {};
- $idx->{'href'} = $1;
- $idx->{'count'} = $termcount;
- $idx->{'scope'} = $impliedscope;
- next;
- }
-
- if (/^indexpoint (.*)$/i) {
- $idx->{'hrefpoint'} = $1;
- next;
- }
-
- if (/^title (.*)$/i) {
- $idx->{'title'} = $1;
- next;
- }
-
- if (/^primary[\[ ](.*)$/i) {
- if (/^primary\[(.*?)\] (.*)$/i) {
- $idx->{'psortas'} = $1;
- $idx->{'primary'} = $2;
- } else {
- $idx->{'psortas'} = $1;
- $idx->{'primary'} = $1;
- }
- next;
- }
-
- if (/^secondary[\[ ](.*)$/i) {
- if (/^secondary\[(.*?)\] (.*)$/i) {
- $idx->{'ssortas'} = $1;
- $idx->{'secondary'} = $2;
- } else {
- $idx->{'ssortas'} = $1;
- $idx->{'secondary'} = $1;
- }
- next;
- }
-
- if (/^tertiary[\[ ](.*)$/i) {
- if (/^tertiary\[(.*?)\] (.*)$/i) {
- $idx->{'tsortas'} = $1;
- $idx->{'tertiary'} = $2;
- } else {
- $idx->{'tsortas'} = $1;
- $idx->{'tertiary'} = $1;
- }
- next;
- }
-
- if (/^see (.*)$/i) {
- $idx->{'see'} = $1;
- next;
- }
-
- if (/^seealso (.*)$/i) {
- $idx->{'seealso'} = $1;
- next;
- }
-
- if (/^significance (.*)$/i) {
- $idx->{'significance'} = $1;
- next;
- }
-
- if (/^class (.*)$/i) {
- $idx->{'class'} = $1;
- next;
- }
-
- if (/^scope (.*)$/i) {
- $idx->{'scope'} = uc($1);
- next;
- }
-
- if (/^startref (.*)$/i) {
- $idx->{'startref'} = $1;
- next;
- }
-
- if (/^id (.*)$/i) {
- $idx->{'id'} = $1;
- $id{$1} = $idx;
- next;
- }
-
- if (/^zone (.*)$/i) {
- my($href) = $1;
- $_ = scalar(<F>);
- chop;
- die "Bad zone: $_\n" if !/^title (.*)$/i;
- $idx->{'zone'}->{$href} = $1;
- next;
- }
-
- die "Unrecognized: $_\n";
-}
-close (F);
-
-print STDERR "$termcount entries loaded...\n";
-
-# Fixup the startrefs...
-# In DocBook, STARTREF is a #CONREF attribute; support this by copying
-# all of the fields from the indexterm with the id specified by STARTREF
-# to the indexterm that has the STARTREF.
-foreach $idx (@term) {
- my($ididx, $field);
- if ($idx->{'startref'}) {
- $ididx = $id{$idx->{'startref'}};
- foreach $field ('primary', 'secondary', 'tertiary', 'see', 'seealso',
- 'psortas', 'ssortas', 'tsortas', 'significance',
- 'class', 'scope') {
- $idx->{$field} = $ididx->{$field};
- }
- }
-}
-
-# Sort the index terms
-@term = sort termsort @term;
-
-# Move all of the non-alphabetic entries to the front of the index.
-@term = sortsymbols(@term);
-
-safe_open(*OUT, $outfile);
-
-# Write the index...
-if ($indexid) {
- print OUT "<$indextag id='$indexid'>\n\n";
-} else {
- print OUT "<$indextag>\n\n";
-}
-
-print OUT "<!-- This file was produced by collateindex.pl. -->\n";
-print OUT "<!-- Remove this comment if you edit this file by hand! -->\n";
-
-print OUT "<!-- ULINK is abused here.
-
- The URL attribute holds the URL that points from the index entry
- back to the appropriate place in the output produced by the HTML
- stylesheet. (It's much easier to calculate this URL in the first
- pass.)
-
- The Role attribute holds the ID (either real or manufactured) of
- the corresponding INDEXTERM. This is used by the print backends
- to produce page numbers.
-
- The entries below are sorted and collated into the correct order.
- Duplicates may be removed in the HTML backend, but in the print
- backends, it is impossible to suppress duplicate pages or coalesce
- sequences of pages into a range.
--->\n\n";
-
-print OUT "<title>$title</title>\n\n" if $title;
-
-$last = {}; # the last indexterm we processed
-$first = 1; # this is the first one
-$group = ""; # we're not in a group yet
-$lastout = ""; # we've not put anything out yet
-
-foreach $idx (@term) {
- next if $idx->{'startref'}; # no way to represent spans...
- next if ($idx->{'scope'} eq 'LOCAL') && ($scope eq 'GLOBAL');
- next if ($idx->{'scope'} eq 'GLOBAL') && ($scope eq 'LOCAL');
- next if &same($idx, $last); # suppress duplicates
-
- $termcount--;
-
- # If primary changes, output a whole new index term, otherwise just
- # output another secondary or tertiary, as appropriate. We know from
- # sorting that the terms will always be in the right order.
- if (!&tsame($last, $idx, 'primary')) {
- print "DIFF PRIM\n" if $debug;
- &end_entry() if not $first;
-
- if ($lettergroups) {
- # If we're grouping, make the right indexdivs
- $letter = $idx->{'psortas'};
- $letter = $idx->{'primary'} if !$letter;
- $letter = uc(substr($letter, 0, 1));
-
- # symbols are a special case
- if (($letter lt 'A') || ($letter gt 'Z')) {
- if (($group eq '')
- || (($group ge 'A') && ($group le 'Z'))) {
- print OUT "</indexdiv>\n" if !$first;
- print OUT "<indexdiv><title>$symbolsname</title>\n\n";
- $group = $letter;
- }
- } elsif (($group eq '') || ($group ne $letter)) {
- print OUT "</indexdiv>\n" if !$first;
- print OUT "<indexdiv><title>$letter</title>\n\n";
- $group = $letter;
- }
- }
-
- $first = 0; # there can only be on first ;-)
-
- print OUT "<indexentry>\n";
- print OUT " <primaryie>", $idx->{'primary'};
- $lastout = "primaryie";
-
- if ($idx->{'secondary'}) {
- print OUT "\n </primaryie>\n";
- print OUT " <secondaryie>", $idx->{'secondary'};
- $lastout = "secondaryie";
- };
-
- if ($idx->{'tertiary'}) {
- print OUT "\n </secondaryie>\n";
- print OUT " <tertiaryie>", $idx->{'tertiary'};
- $lastout = "tertiaryie";
- }
- } elsif (!&tsame($last, $idx, 'secondary')) {
- print "DIFF SEC\n" if $debug;
-
- print OUT "\n </$lastout>\n" if $lastout;
-
- print OUT " <secondaryie>", $idx->{'secondary'};
- $lastout = "secondaryie";
- if ($idx->{'tertiary'}) {
- print OUT "\n </secondaryie>\n";
- print OUT " <tertiaryie>", $idx->{'tertiary'};
- $lastout = "tertiaryie";
- }
- } elsif (!&tsame($last, $idx, 'tertiary')) {
- print "DIFF TERT\n" if $debug;
-
- print OUT "\n </$lastout>\n" if $lastout;
-
- if ($idx->{'tertiary'}) {
- print OUT " <tertiaryie>", $idx->{'tertiary'};
- $lastout = "tertiaryie";
- }
- }
-
- &print_term($idx);
-
- $last = $idx;
-}
-
-# Termcount is > 0 iff some entries were skipped.
-print STDERR "$termcount entries ignored...\n";
-
-&end_entry();
-
-print OUT "</indexdiv>\n" if $lettergroups;
-print OUT "</$indextag>\n";
-
-close (OUT);
-
-print STDERR "Done.\n";
-
-sub same {
- my($a) = shift;
- my($b) = shift;
-
- my($aP) = $a->{'psortas'} || $a->{'primary'};
- my($aS) = $a->{'ssortas'} || $a->{'secondary'};
- my($aT) = $a->{'tsortas'} || $a->{'tertiary'};
-
- my($bP) = $b->{'psortas'} || $b->{'primary'};
- my($bS) = $b->{'ssortas'} || $b->{'secondary'};
- my($bT) = $b->{'tsortas'} || $b->{'tertiary'};
-
- my($same);
-
- $aP =~ s/^\s*//; $aP =~ s/\s*$//; $aP = uc($aP);
- $aS =~ s/^\s*//; $aS =~ s/\s*$//; $aS = uc($aS);
- $aT =~ s/^\s*//; $aT =~ s/\s*$//; $aT = uc($aT);
- $bP =~ s/^\s*//; $bP =~ s/\s*$//; $bP = uc($bP);
- $bS =~ s/^\s*//; $bS =~ s/\s*$//; $bS = uc($bS);
- $bT =~ s/^\s*//; $bT =~ s/\s*$//; $bT = uc($bT);
-
-# print "[$aP]=[$bP]\n";
-# print "[$aS]=[$bS]\n";
-# print "[$aT]=[$bT]\n";
-
- # Two index terms are the same if:
- # 1. the primary, secondary, and tertiary entries are the same
- # (or have the same SORTAS)
- # AND
- # 2. They occur in the same titled section
- # AND
- # 3. They point to the same place
- #
- # Notes: Scope is used to suppress some entries, but can't be used
- # for comparing duplicates.
- # Interpretation of "the same place" depends on whether or
- # not $linkpoints is true.
-
- $same = (($aP eq $bP)
- && ($aS eq $bS)
- && ($aT eq $bT)
- && ($a->{'title'} eq $b->{'title'})
- && ($a->{'href'} eq $b->{'href'}));
-
- # If we're linking to points, they're only the same if they link
- # to exactly the same spot. (surely this is redundant?)
- $same = $same && ($a->{'hrefpoint'} eq $b->{'hrefpoint'})
- if $linkpoints;
-
- $same;
-}
-
-sub tsame {
- # Unlike same(), tsame only compares a single term
- my($a) = shift;
- my($b) = shift;
- my($term) = shift;
- my($sterm) = substr($term, 0, 1) . "sortas";
- my($A, $B);
-
- $A = $a->{$sterm} || $a->{$term};
- $B = $b->{$sterm} || $b->{$term};
-
- $A =~ s/^\s*//; $A =~ s/\s*$//; $A = uc($A);
- $B =~ s/^\s*//; $B =~ s/\s*$//; $B = uc($B);
-
- return $A eq $B;
-}
-
-sub end_entry {
- # End any open elements...
- print OUT "\n </$lastout>\n" if $lastout;
- print OUT "</indexentry>\n\n";
- $lastout = "";
-}
-
-sub print_term {
- # Print out the links for an indexterm. There can be more than
- # one if the term has a ZONE that points to more than one place.
- # (do we do the right thing in that case?)
- my($idx) = shift;
- my($key, $indent, @hrefs);
- my(%href) = ();
- my(%phref) = ();
-
- $indent = " ";
-
- if ($idx->{'see'}) {
- # it'd be nice to make this a link...
- if ($lastout) {
- print OUT "\n </$lastout>\n";
- $lastout = "";
- }
- print OUT $indent, "<seeie>", $idx->{'see'}, "</seeie>\n";
- return;
- }
-
- if ($idx->{'seealso'}) {
- # it'd be nice to make this a link...
- if ($lastout) {
- print OUT "\n </$lastout>\n";
- $lastout = "";
- }
- print OUT $indent, "<seealsoie>", $idx->{'seealso'}, "</seealsoie>\n";
- return;
- }
-
- if (keys %{$idx->{'zone'}}) {
- foreach $key (keys %{$idx->{'zone'}}) {
- $href{$key} = $idx->{'zone'}->{$key};
- $phref{$key} = $idx->{'zone'}->{$key};
- }
- } else {
- $href{$idx->{'href'}} = $idx->{'title'};
- $phref{$idx->{'href'}} = $idx->{'hrefpoint'};
- }
-
- # We can't use <LINK> because we don't know the ID of the term in the
- # original source (and, in fact, it might not have one).
- print OUT ",\n";
- @hrefs = keys %href;
- while (@hrefs) {
- my($linkend) = "";
- my($role) = "";
- $key = shift @hrefs;
- if ($linkpoints) {
- $linkend = $phref{$key};
- } else {
- $linkend = $key;
- }
-
- $role = $linkend;
- $role = $1 if $role =~ /\#(.*)$/;
-
- print OUT $indent;
- print OUT "<ulink url=\"$linkend\" role=\"$role\">";
- print OUT "<emphasis>" if ($idx->{'significance'} eq 'PREFERRED');
- print OUT $href{$key};
- print OUT "</emphasis>" if ($idx->{'significance'} eq 'PREFERRED');
- print OUT "</ulink>";
- }
-}
-
-sub termsort {
- my($aP) = $a->{'psortas'} || $a->{'primary'};
- my($aS) = $a->{'ssortas'} || $a->{'secondary'};
- my($aT) = $a->{'tsortas'} || $a->{'tertiary'};
- my($ap) = $a->{'count'};
-
- my($bP) = $b->{'psortas'} || $b->{'primary'};
- my($bS) = $b->{'ssortas'} || $b->{'secondary'};
- my($bT) = $b->{'tsortas'} || $b->{'tertiary'};
- my($bp) = $b->{'count'};
-
- $aP =~ s/^\s*//; $aP =~ s/\s*$//; $aP = uc($aP);
- $aS =~ s/^\s*//; $aS =~ s/\s*$//; $aS = uc($aS);
- $aT =~ s/^\s*//; $aT =~ s/\s*$//; $aT = uc($aT);
- $bP =~ s/^\s*//; $bP =~ s/\s*$//; $bP = uc($bP);
- $bS =~ s/^\s*//; $bS =~ s/\s*$//; $bS = uc($bS);
- $bT =~ s/^\s*//; $bT =~ s/\s*$//; $bT = uc($bT);
-
- if ($aP eq $bP) {
- if ($aS eq $bS) {
- if ($aT eq $bT) {
- # make sure seealso's always sort to the bottom
- return 1 if ($a->{'seealso'});
- return -1 if ($b->{'seealso'});
- # if everything else is the same, keep these elements
- # in document order (so the index links are in the right
- # order)
- return $ap <=> $bp;
- } else {
- return $aT cmp $bT;
- }
- } else {
- return $aS cmp $bS;
- }
- } else {
- return $aP cmp $bP;
- }
-}
-
-sub sortsymbols {
- my(@term) = @_;
- my(@new) = ();
- my(@sym) = ();
- my($letter);
- my($idx);
-
- # Move the non-letter things to the front. Should digits be thier
- # own group? Maybe...
- foreach $idx (@term) {
- $letter = $idx->{'psortas'};
- $letter = $idx->{'primary'} if !$letter;
- $letter = uc(substr($letter, 0, 1));
-
- if (($letter lt 'A') || ($letter gt 'Z')) {
- push (@sym, $idx);
- } else {
- push (@new, $idx);
- }
- }
-
- return (@sym, @new);
-}
-
-sub safe_open {
- local(*OUT) = shift;
- local(*F, $_);
-
- if (($outfile ne '-') && (!$forceoutput)) {
- my($handedit) = 1;
- if (open (OUT, $outfile)) {
- while (<OUT>) {
- if (/<!-- Remove this comment if you edit this file by hand! -->/){
- $handedit = 0;
- last;
- }
- }
- close (OUT);
- } else {
- $handedit = 0;
- }
-
- if ($handedit) {
- print "\n$outfile appears to have been edited by hand; use -f or\n";
- print " change the output file.\n";
- exit 1;
- }
- }
-
- open (OUT, ">$outfile") || die "$usage\nCannot write to $outfile.\n";
-
- if ($preamble) {
- # Copy the preamble
- if (open(F, $preamble)) {
- while (<F>) {
- print OUT $_;
- }
- close(F);
- } else {
- warn "$0: cannot open preamble $preamble.\n";
- }
- }
-}