From 55abd936a838a4410899db76cb5530b0c4694dc9 Mon Sep 17 00:00:00 2001 From: Gerald Carter Date: Wed, 10 Oct 2001 17:19:10 +0000 Subject: mega-merge from 2.2 (This used to be commit c76bf8ed3275e217d1b691879153fe9137bcbe38) --- docs/docbook/scripts/collateindex.pl | 595 +++++++++++++++++++++++++++++++++++ 1 file changed, 595 insertions(+) create mode 100644 docs/docbook/scripts/collateindex.pl (limited to 'docs/docbook/scripts/collateindex.pl') diff --git a/docs/docbook/scripts/collateindex.pl b/docs/docbook/scripts/collateindex.pl new file mode 100644 index 0000000000..fd757edb32 --- /dev/null +++ b/docs/docbook/scripts/collateindex.pl @@ -0,0 +1,595 @@ +# -*- Perl -*- +# + +use Getopt::Std; + +$usage = "Usage: $0 file +Where are: + -p Link to points in the document. The default is to link + to the closest containing section. + -g Group terms with IndexDiv based on the first letter + of the term (or its sortas attribute). + (This probably doesn't handle i10n particularly well) + -s name Name the IndexDiv that contains symbols. The default + is 'Symbols'. Meaningless if -g is not used. + -t name Title for the index. + -P file Read a preamble from file. The content of file will + be inserted before the tag. + -i id The ID for the tag. + -o file Output to file. Defaults to stdout. + -S scope Scope of the index, must be 'all', 'local', or 'global'. + If unspecified, 'all' is assumed. + -I scope The implied scope, must be 'all', 'local', or 'global'. + IndexTerms which do not specify a scope will have the + implied scope. If unspecified, 'all' is assumed. + -x Make a SetIndex. + -f Force the output file to be written, even if it appears + to have been edited by hand. + -N New index (generates an empty index file). + file The file containing index data generated by Jade + with the DocBook HTML Stylesheet.\n"; + +die $usage if ! getopts('Dfgi:NpP:s:o:S:I:t:x'); + +$linkpoints = $opt_p; +$lettergroups = $opt_g; +$symbolsname = $opt_s || "Symbols"; +$title = $opt_t; +$preamble = $opt_P; +$outfile = $opt_o || '-'; +$indexid = $opt_i; +$scope = uc($opt_S) || 'ALL'; +$impliedscope = uc($opt_I) || 'ALL'; +$setindex = $opt_x; +$forceoutput = $opt_f; +$newindex = $opt_N; +$debug = $opt_D; + +$indextag = $setindex ? 'setindex' : 'index'; + +if ($newindex) { + safe_open(*OUT, $outfile); + if ($indexid) { + print OUT "<$indextag id='$indexid'>\n\n"; + } else { + print OUT "<$indextag>\n\n"; + } + + print OUT "\n"; + print OUT "\n"; + + print OUT "\n"; + exit 0; +} + +$dat = shift @ARGV || die $usage; +die "$0: cannot find $dat.\n" if ! -f $dat; + +%legal_scopes = ('ALL' => 1, 'LOCAL' => 1, 'GLOBAL' => 1); +if ($scope && !$legal_scopes{$scope}) { + die "Invalid scope.\n$usage\n"; +} +if ($impliedscope && !$legal_scopes{$impliedscope}) { + die "Invalid implied scope.\n$usage\n"; +} + +@term = (); +%id = (); + +$termcount = 0; + +print STDERR "Processing $dat...\n"; + +# Read the index file, creating an array of objects. Each object +# represents and indexterm and has fields for the content of the +# indexterm + +open (F, $dat); +while () { + chop; + + if (/^\/indexterm/i) { + push (@term, $idx); + next; + } + + if (/^indexterm (.*)$/i) { + $termcount++; + $idx = {}; + $idx->{'zone'} = {}; + $idx->{'href'} = $1; + $idx->{'count'} = $termcount; + $idx->{'scope'} = $impliedscope; + next; + } + + if (/^indexpoint (.*)$/i) { + $idx->{'hrefpoint'} = $1; + next; + } + + if (/^title (.*)$/i) { + $idx->{'title'} = $1; + next; + } + + if (/^primary[\[ ](.*)$/i) { + if (/^primary\[(.*?)\] (.*)$/i) { + $idx->{'psortas'} = $1; + $idx->{'primary'} = $2; + } else { + $idx->{'psortas'} = $1; + $idx->{'primary'} = $1; + } + next; + } + + if (/^secondary[\[ ](.*)$/i) { + if (/^secondary\[(.*?)\] (.*)$/i) { + $idx->{'ssortas'} = $1; + $idx->{'secondary'} = $2; + } else { + $idx->{'ssortas'} = $1; + $idx->{'secondary'} = $1; + } + next; + } + + if (/^tertiary[\[ ](.*)$/i) { + if (/^tertiary\[(.*?)\] (.*)$/i) { + $idx->{'tsortas'} = $1; + $idx->{'tertiary'} = $2; + } else { + $idx->{'tsortas'} = $1; + $idx->{'tertiary'} = $1; + } + next; + } + + if (/^see (.*)$/i) { + $idx->{'see'} = $1; + next; + } + + if (/^seealso (.*)$/i) { + $idx->{'seealso'} = $1; + next; + } + + if (/^significance (.*)$/i) { + $idx->{'significance'} = $1; + next; + } + + if (/^class (.*)$/i) { + $idx->{'class'} = $1; + next; + } + + if (/^scope (.*)$/i) { + $idx->{'scope'} = uc($1); + next; + } + + if (/^startref (.*)$/i) { + $idx->{'startref'} = $1; + next; + } + + if (/^id (.*)$/i) { + $idx->{'id'} = $1; + $id{$1} = $idx; + next; + } + + if (/^zone (.*)$/i) { + my($href) = $1; + $_ = scalar(); + chop; + die "Bad zone: $_\n" if !/^title (.*)$/i; + $idx->{'zone'}->{$href} = $1; + next; + } + + die "Unrecognized: $_\n"; +} +close (F); + +print STDERR "$termcount entries loaded...\n"; + +# Fixup the startrefs... +# In DocBook, STARTREF is a #CONREF attribute; support this by copying +# all of the fields from the indexterm with the id specified by STARTREF +# to the indexterm that has the STARTREF. +foreach $idx (@term) { + my($ididx, $field); + if ($idx->{'startref'}) { + $ididx = $id{$idx->{'startref'}}; + foreach $field ('primary', 'secondary', 'tertiary', 'see', 'seealso', + 'psortas', 'ssortas', 'tsortas', 'significance', + 'class', 'scope') { + $idx->{$field} = $ididx->{$field}; + } + } +} + +# Sort the index terms +@term = sort termsort @term; + +# Move all of the non-alphabetic entries to the front of the index. +@term = sortsymbols(@term); + +safe_open(*OUT, $outfile); + +# Write the index... +if ($indexid) { + print OUT "<$indextag id='$indexid'>\n\n"; +} else { + print OUT "<$indextag>\n\n"; +} + +print OUT "\n"; +print OUT "\n"; + +print OUT "\n\n"; + +print OUT "$title\n\n" if $title; + +$last = {}; # the last indexterm we processed +$first = 1; # this is the first one +$group = ""; # we're not in a group yet +$lastout = ""; # we've not put anything out yet + +foreach $idx (@term) { + next if $idx->{'startref'}; # no way to represent spans... + next if ($idx->{'scope'} eq 'LOCAL') && ($scope eq 'GLOBAL'); + next if ($idx->{'scope'} eq 'GLOBAL') && ($scope eq 'LOCAL'); + next if &same($idx, $last); # suppress duplicates + + $termcount--; + + # If primary changes, output a whole new index term, otherwise just + # output another secondary or tertiary, as appropriate. We know from + # sorting that the terms will always be in the right order. + if (!&tsame($last, $idx, 'primary')) { + print "DIFF PRIM\n" if $debug; + &end_entry() if not $first; + + if ($lettergroups) { + # If we're grouping, make the right indexdivs + $letter = $idx->{'psortas'}; + $letter = $idx->{'primary'} if !$letter; + $letter = uc(substr($letter, 0, 1)); + + # symbols are a special case + if (($letter lt 'A') || ($letter gt 'Z')) { + if (($group eq '') + || (($group ge 'A') && ($group le 'Z'))) { + print OUT "\n" if !$first; + print OUT "$symbolsname\n\n"; + $group = $letter; + } + } elsif (($group eq '') || ($group ne $letter)) { + print OUT "\n" if !$first; + print OUT "$letter\n\n"; + $group = $letter; + } + } + + $first = 0; # there can only be on first ;-) + + print OUT "\n"; + print OUT " ", $idx->{'primary'}; + $lastout = "primaryie"; + + if ($idx->{'secondary'}) { + print OUT "\n \n"; + print OUT " ", $idx->{'secondary'}; + $lastout = "secondaryie"; + }; + + if ($idx->{'tertiary'}) { + print OUT "\n \n"; + print OUT " ", $idx->{'tertiary'}; + $lastout = "tertiaryie"; + } + } elsif (!&tsame($last, $idx, 'secondary')) { + print "DIFF SEC\n" if $debug; + + print OUT "\n \n" if $lastout; + + print OUT " ", $idx->{'secondary'}; + $lastout = "secondaryie"; + if ($idx->{'tertiary'}) { + print OUT "\n \n"; + print OUT " ", $idx->{'tertiary'}; + $lastout = "tertiaryie"; + } + } elsif (!&tsame($last, $idx, 'tertiary')) { + print "DIFF TERT\n" if $debug; + + print OUT "\n \n" if $lastout; + + if ($idx->{'tertiary'}) { + print OUT " ", $idx->{'tertiary'}; + $lastout = "tertiaryie"; + } + } + + &print_term($idx); + + $last = $idx; +} + +# Termcount is > 0 iff some entries were skipped. +print STDERR "$termcount entries ignored...\n"; + +&end_entry(); + +print OUT "\n" if $lettergroups; +print OUT "\n"; + +close (OUT); + +print STDERR "Done.\n"; + +sub same { + my($a) = shift; + my($b) = shift; + + my($aP) = $a->{'psortas'} || $a->{'primary'}; + my($aS) = $a->{'ssortas'} || $a->{'secondary'}; + my($aT) = $a->{'tsortas'} || $a->{'tertiary'}; + + my($bP) = $b->{'psortas'} || $b->{'primary'}; + my($bS) = $b->{'ssortas'} || $b->{'secondary'}; + my($bT) = $b->{'tsortas'} || $b->{'tertiary'}; + + my($same); + + $aP =~ s/^\s*//; $aP =~ s/\s*$//; $aP = uc($aP); + $aS =~ s/^\s*//; $aS =~ s/\s*$//; $aS = uc($aS); + $aT =~ s/^\s*//; $aT =~ s/\s*$//; $aT = uc($aT); + $bP =~ s/^\s*//; $bP =~ s/\s*$//; $bP = uc($bP); + $bS =~ s/^\s*//; $bS =~ s/\s*$//; $bS = uc($bS); + $bT =~ s/^\s*//; $bT =~ s/\s*$//; $bT = uc($bT); + +# print "[$aP]=[$bP]\n"; +# print "[$aS]=[$bS]\n"; +# print "[$aT]=[$bT]\n"; + + # Two index terms are the same if: + # 1. the primary, secondary, and tertiary entries are the same + # (or have the same SORTAS) + # AND + # 2. They occur in the same titled section + # AND + # 3. They point to the same place + # + # Notes: Scope is used to suppress some entries, but can't be used + # for comparing duplicates. + # Interpretation of "the same place" depends on whether or + # not $linkpoints is true. + + $same = (($aP eq $bP) + && ($aS eq $bS) + && ($aT eq $bT) + && ($a->{'title'} eq $b->{'title'}) + && ($a->{'href'} eq $b->{'href'})); + + # If we're linking to points, they're only the same if they link + # to exactly the same spot. (surely this is redundant?) + $same = $same && ($a->{'hrefpoint'} eq $b->{'hrefpoint'}) + if $linkpoints; + + $same; +} + +sub tsame { + # Unlike same(), tsame only compares a single term + my($a) = shift; + my($b) = shift; + my($term) = shift; + my($sterm) = substr($term, 0, 1) . "sortas"; + my($A, $B); + + $A = $a->{$sterm} || $a->{$term}; + $B = $b->{$sterm} || $b->{$term}; + + $A =~ s/^\s*//; $A =~ s/\s*$//; $A = uc($A); + $B =~ s/^\s*//; $B =~ s/\s*$//; $B = uc($B); + + return $A eq $B; +} + +sub end_entry { + # End any open elements... + print OUT "\n \n" if $lastout; + print OUT "\n\n"; + $lastout = ""; +} + +sub print_term { + # Print out the links for an indexterm. There can be more than + # one if the term has a ZONE that points to more than one place. + # (do we do the right thing in that case?) + my($idx) = shift; + my($key, $indent, @hrefs); + my(%href) = (); + my(%phref) = (); + + $indent = " "; + + if ($idx->{'see'}) { + # it'd be nice to make this a link... + if ($lastout) { + print OUT "\n \n"; + $lastout = ""; + } + print OUT $indent, "", $idx->{'see'}, "\n"; + return; + } + + if ($idx->{'seealso'}) { + # it'd be nice to make this a link... + if ($lastout) { + print OUT "\n \n"; + $lastout = ""; + } + print OUT $indent, "", $idx->{'seealso'}, "\n"; + return; + } + + if (keys %{$idx->{'zone'}}) { + foreach $key (keys %{$idx->{'zone'}}) { + $href{$key} = $idx->{'zone'}->{$key}; + $phref{$key} = $idx->{'zone'}->{$key}; + } + } else { + $href{$idx->{'href'}} = $idx->{'title'}; + $phref{$idx->{'href'}} = $idx->{'hrefpoint'}; + } + + # We can't use because we don't know the ID of the term in the + # original source (and, in fact, it might not have one). + print OUT ",\n"; + @hrefs = keys %href; + while (@hrefs) { + my($linkend) = ""; + my($role) = ""; + $key = shift @hrefs; + if ($linkpoints) { + $linkend = $phref{$key}; + } else { + $linkend = $key; + } + + $role = $linkend; + $role = $1 if $role =~ /\#(.*)$/; + + print OUT $indent; + print OUT ""; + print OUT "" if ($idx->{'significance'} eq 'PREFERRED'); + print OUT $href{$key}; + print OUT "" if ($idx->{'significance'} eq 'PREFERRED'); + print OUT ""; + } +} + +sub termsort { + my($aP) = $a->{'psortas'} || $a->{'primary'}; + my($aS) = $a->{'ssortas'} || $a->{'secondary'}; + my($aT) = $a->{'tsortas'} || $a->{'tertiary'}; + my($ap) = $a->{'count'}; + + my($bP) = $b->{'psortas'} || $b->{'primary'}; + my($bS) = $b->{'ssortas'} || $b->{'secondary'}; + my($bT) = $b->{'tsortas'} || $b->{'tertiary'}; + my($bp) = $b->{'count'}; + + $aP =~ s/^\s*//; $aP =~ s/\s*$//; $aP = uc($aP); + $aS =~ s/^\s*//; $aS =~ s/\s*$//; $aS = uc($aS); + $aT =~ s/^\s*//; $aT =~ s/\s*$//; $aT = uc($aT); + $bP =~ s/^\s*//; $bP =~ s/\s*$//; $bP = uc($bP); + $bS =~ s/^\s*//; $bS =~ s/\s*$//; $bS = uc($bS); + $bT =~ s/^\s*//; $bT =~ s/\s*$//; $bT = uc($bT); + + if ($aP eq $bP) { + if ($aS eq $bS) { + if ($aT eq $bT) { + # make sure seealso's always sort to the bottom + return 1 if ($a->{'seealso'}); + return -1 if ($b->{'seealso'}); + # if everything else is the same, keep these elements + # in document order (so the index links are in the right + # order) + return $ap <=> $bp; + } else { + return $aT cmp $bT; + } + } else { + return $aS cmp $bS; + } + } else { + return $aP cmp $bP; + } +} + +sub sortsymbols { + my(@term) = @_; + my(@new) = (); + my(@sym) = (); + my($letter); + my($idx); + + # Move the non-letter things to the front. Should digits be thier + # own group? Maybe... + foreach $idx (@term) { + $letter = $idx->{'psortas'}; + $letter = $idx->{'primary'} if !$letter; + $letter = uc(substr($letter, 0, 1)); + + if (($letter lt 'A') || ($letter gt 'Z')) { + push (@sym, $idx); + } else { + push (@new, $idx); + } + } + + return (@sym, @new); +} + +sub safe_open { + local(*OUT) = shift; + local(*F, $_); + + if (($outfile ne '-') && (!$forceoutput)) { + my($handedit) = 1; + if (open (OUT, $outfile)) { + while () { + if (//){ + $handedit = 0; + last; + } + } + close (OUT); + } else { + $handedit = 0; + } + + if ($handedit) { + print "\n$outfile appears to have been edited by hand; use -f or\n"; + print " change the output file.\n"; + exit 1; + } + } + + open (OUT, ">$outfile") || die "$usage\nCannot write to $outfile.\n"; + + if ($preamble) { + # Copy the preamble + if (open(F, $preamble)) { + while () { + print OUT $_; + } + close(F); + } else { + warn "$0: cannot open preamble $preamble.\n"; + } + } +} -- cgit