From 55abd936a838a4410899db76cb5530b0c4694dc9 Mon Sep 17 00:00:00 2001
From: Gerald Carter <jerry@samba.org>
Date: Wed, 10 Oct 2001 17:19:10 +0000
Subject: mega-merge from 2.2 (This used to be commit
 c76bf8ed3275e217d1b691879153fe9137bcbe38)

---
 docs/docbook/scripts/collateindex.pl | 595 +++++++++++++++++++++++++++++++++++
 1 file changed, 595 insertions(+)
 create mode 100644 docs/docbook/scripts/collateindex.pl

(limited to 'docs/docbook/scripts/collateindex.pl')
diff --git a/docs/docbook/scripts/collateindex.pl b/docs/docbook/scripts/collateindex.pl
new file mode 100644
index 0000000000..fd757edb32
--- /dev/null
+++ b/docs/docbook/scripts/collateindex.pl
@@ -0,0 +1,595 @@
+# -*- Perl -*-
+#
+
+use Getopt::Std;
+
+$usage = "Usage: $0 <opts> file
+Where <opts> are:
+       -p        Link to points in the document.  The default is to link
+                 to the closest containing section.
+       -g        Group terms with IndexDiv based on the first letter 
+                 of the term (or its sortas attribute).
+                 (This probably doesn't handle i10n particularly well)
+       -s name   Name the IndexDiv that contains symbols.  The default
+                 is 'Symbols'.  Meaningless if -g is not used.
+       -t name   Title for the index.
+       -P file   Read a preamble from file.  The content of file will
+                 be inserted before the <index> tag.
+       -i id     The ID for the <index> tag.
+       -o file   Output to file. Defaults to stdout.
+       -S scope  Scope of the index, must be 'all', 'local', or 'global'.
+                 If unspecified, 'all' is assumed.
+       -I scope  The implied scope, must be 'all', 'local', or 'global'.
+                 IndexTerms which do not specify a scope will have the
+                 implied scope.  If unspecified, 'all' is assumed.
+       -x        Make a SetIndex.
+       -f        Force the output file to be written, even if it appears
+                 to have been edited by hand.
+       -N        New index (generates an empty index file).
+       file      The file containing index data generated by Jade
+                 with the DocBook HTML Stylesheet.\n";
+
+die $usage if ! getopts('Dfgi:NpP:s:o:S:I:t:x');
+
+$linkpoints   = $opt_p;
+$lettergroups = $opt_g;
+$symbolsname  = $opt_s || "Symbols";
+$title        = $opt_t;
+$preamble     = $opt_P;
+$outfile      = $opt_o || '-';
+$indexid      = $opt_i;
+$scope        = uc($opt_S) || 'ALL';
+$impliedscope = uc($opt_I) || 'ALL';
+$setindex     = $opt_x;
+$forceoutput  = $opt_f;
+$newindex     = $opt_N;
+$debug        = $opt_D;
+
+$indextag     = $setindex ? 'setindex' : 'index';
+
+if ($newindex) {
+    safe_open(*OUT, $outfile);
+    if ($indexid) {
+	print OUT "<$indextag id='$indexid'>\n\n";
+    } else {
+	print OUT "<$indextag>\n\n";
+    }
+
+    print OUT "<!-- This file was produced by collateindex.pl.         -->\n";
+    print OUT "<!-- Remove this comment if you edit this file by hand! -->\n";
+
+    print OUT "</$indextag>\n";
+    exit 0;
+}
+
+$dat = shift @ARGV || die $usage;
+die "$0: cannot find $dat.\n" if ! -f $dat;
+
+%legal_scopes = ('ALL' => 1, 'LOCAL' => 1, 'GLOBAL' => 1);
+if ($scope && !$legal_scopes{$scope}) {
+    die "Invalid scope.\n$usage\n";
+}
+if ($impliedscope && !$legal_scopes{$impliedscope}) {
+    die "Invalid implied scope.\n$usage\n";
+}
+
+@term = ();
+%id   = ();
+
+$termcount = 0;
+
+print STDERR "Processing $dat...\n";
+
+# Read the index file, creating an array of objects.  Each object 
+# represents and indexterm and has fields for the content of the
+# indexterm
+
+open (F, $dat);
+while (<F>) {
+    chop;
+
+    if (/^\/indexterm/i) {
+	push (@term, $idx);
+	next;
+    }
+
+    if (/^indexterm (.*)$/i) {
+	$termcount++;
+	$idx = {};
+	$idx->{'zone'} = {};
+	$idx->{'href'} = $1;
+	$idx->{'count'} = $termcount;
+	$idx->{'scope'} = $impliedscope;
+	next;
+    }
+
+    if (/^indexpoint (.*)$/i) {
+	$idx->{'hrefpoint'} = $1;
+	next;
+    }
+
+    if (/^title (.*)$/i) {
+	$idx->{'title'} = $1;
+	next;
+    }
+
+    if (/^primary[\[ ](.*)$/i) {
+	if (/^primary\[(.*?)\] (.*)$/i) {
+	    $idx->{'psortas'} = $1;
+	    $idx->{'primary'} = $2;
+	} else {
+	    $idx->{'psortas'} = $1;
+	    $idx->{'primary'} = $1;
+	}
+	next;
+    }
+
+    if (/^secondary[\[ ](.*)$/i) {
+	if (/^secondary\[(.*?)\] (.*)$/i) {
+	    $idx->{'ssortas'} = $1;
+	    $idx->{'secondary'} = $2;
+	} else {
+	    $idx->{'ssortas'} = $1;
+	    $idx->{'secondary'} = $1;
+	}
+	next;
+    }
+
+    if (/^tertiary[\[ ](.*)$/i) {
+	if (/^tertiary\[(.*?)\] (.*)$/i) {
+	    $idx->{'tsortas'} = $1;
+	    $idx->{'tertiary'} = $2;
+	} else {
+	    $idx->{'tsortas'} = $1;
+	    $idx->{'tertiary'} = $1;
+	}
+	next;
+    }
+
+    if (/^see (.*)$/i) {
+	$idx->{'see'} = $1;
+	next;
+    }
+
+    if (/^seealso (.*)$/i) {
+	$idx->{'seealso'} = $1;
+	next;
+    }
+
+    if (/^significance (.*)$/i) {
+	$idx->{'significance'} = $1;
+	next;
+    }
+
+    if (/^class (.*)$/i) {
+	$idx->{'class'} = $1;
+	next;
+    }
+
+    if (/^scope (.*)$/i) {
+	$idx->{'scope'} = uc($1);
+	next;
+    }
+
+    if (/^startref (.*)$/i) {
+	$idx->{'startref'} = $1;
+	next;
+    }
+
+    if (/^id (.*)$/i) {
+	$idx->{'id'} = $1;
+	$id{$1} = $idx;
+	next;
+    }
+
+    if (/^zone (.*)$/i) {
+	my($href) = $1;
+	$_ = scalar(<F>);
+	chop;
+	die "Bad zone: $_\n" if !/^title (.*)$/i;
+	$idx->{'zone'}->{$href} = $1;
+	next;
+    }
+
+    die "Unrecognized: $_\n";
+}
+close (F);
+
+print STDERR "$termcount entries loaded...\n";
+
+# Fixup the startrefs...
+# In DocBook, STARTREF is a #CONREF attribute; support this by copying
+# all of the fields from the indexterm with the id specified by STARTREF
+# to the indexterm that has the STARTREF.
+foreach $idx (@term) {
+    my($ididx, $field);
+    if ($idx->{'startref'}) {
+	$ididx = $id{$idx->{'startref'}};
+	foreach $field ('primary', 'secondary', 'tertiary', 'see', 'seealso',
+		        'psortas', 'ssortas', 'tsortas', 'significance',
+		        'class', 'scope') {
+	    $idx->{$field} = $ididx->{$field};
+	}
+    }
+}
+
+# Sort the index terms
+@term = sort termsort @term;
+
+# Move all of the non-alphabetic entries to the front of the index.
+@term = sortsymbols(@term);
+
+safe_open(*OUT, $outfile);
+
+# Write the index...
+if ($indexid) {
+    print OUT "<$indextag id='$indexid'>\n\n";
+} else {
+    print OUT "<$indextag>\n\n";
+}
+
+print OUT "<!-- This file was produced by collateindex.pl.         -->\n";
+print OUT "<!-- Remove this comment if you edit this file by hand! -->\n";
+
+print OUT "<!-- ULINK is abused here.
+      
+      The URL attribute holds the URL that points from the index entry
+      back to the appropriate place in the output produced by the HTML
+      stylesheet. (It's much easier to calculate this URL in the first
+      pass.)
+
+      The Role attribute holds the ID (either real or manufactured) of
+      the corresponding INDEXTERM.  This is used by the print backends
+      to produce page numbers.
+
+      The entries below are sorted and collated into the correct order.
+      Duplicates may be removed in the HTML backend, but in the print
+      backends, it is impossible to suppress duplicate pages or coalesce
+      sequences of pages into a range.
+-->\n\n";
+
+print OUT "<title>$title</title>\n\n" if $title;
+
+$last = {};     # the last indexterm we processed
+$first = 1;     # this is the first one
+$group = "";    # we're not in a group yet
+$lastout = "";  # we've not put anything out yet
+
+foreach $idx (@term) {
+    next if $idx->{'startref'}; # no way to represent spans...
+    next if ($idx->{'scope'} eq 'LOCAL') && ($scope eq 'GLOBAL');
+    next if ($idx->{'scope'} eq 'GLOBAL') && ($scope eq 'LOCAL');
+    next if &same($idx, $last); # suppress duplicates
+
+    $termcount--;
+
+    # If primary changes, output a whole new index term, otherwise just
+    # output another secondary or tertiary, as appropriate.  We know from
+    # sorting that the terms will always be in the right order.
+    if (!&tsame($last, $idx, 'primary')) { 
+	print "DIFF PRIM\n" if $debug;
+	&end_entry() if not $first;
+
+	if ($lettergroups) {
+	    # If we're grouping, make the right indexdivs
+	    $letter = $idx->{'psortas'};
+	    $letter = $idx->{'primary'} if !$letter;
+	    $letter = uc(substr($letter, 0, 1));
+	    
+	    # symbols are a special case
+	    if (($letter lt 'A') || ($letter gt 'Z')) {
+		if (($group eq '')
+		    || (($group ge 'A') && ($group le 'Z'))) {
+		    print OUT "</indexdiv>\n" if !$first;
+		    print OUT "<indexdiv><title>$symbolsname</title>\n\n";
+		    $group = $letter;
+		}
+	    } elsif (($group eq '') || ($group ne $letter)) {
+		print OUT "</indexdiv>\n" if !$first;
+		print OUT "<indexdiv><title>$letter</title>\n\n";
+		$group = $letter;
+	    }
+	}
+
+	$first = 0; # there can only be on first ;-)
+
+	print OUT "<indexentry>\n";
+	print OUT "  <primaryie>", $idx->{'primary'};
+	$lastout = "primaryie";
+
+ 	if ($idx->{'secondary'}) {
+	    print OUT "\n  </primaryie>\n";
+	    print OUT "  <secondaryie>", $idx->{'secondary'};
+	    $lastout = "secondaryie";
+	};
+
+	if ($idx->{'tertiary'}) {
+	    print OUT "\n  </secondaryie>\n";
+	    print OUT "  <tertiaryie>", $idx->{'tertiary'};
+	    $lastout = "tertiaryie";
+	}
+    } elsif (!&tsame($last, $idx, 'secondary')) {
+	print "DIFF SEC\n" if $debug;
+
+	print OUT "\n  </$lastout>\n" if $lastout;
+
+	print OUT "  <secondaryie>", $idx->{'secondary'};
+	$lastout = "secondaryie";
+	if ($idx->{'tertiary'}) {
+	    print OUT "\n  </secondaryie>\n";
+	    print OUT "  <tertiaryie>", $idx->{'tertiary'};
+	    $lastout = "tertiaryie";
+	}
+    } elsif (!&tsame($last, $idx, 'tertiary')) {
+	print "DIFF TERT\n" if $debug;
+
+	print OUT "\n  </$lastout>\n" if $lastout;
+
+	if ($idx->{'tertiary'}) {
+	    print OUT "  <tertiaryie>", $idx->{'tertiary'};
+	    $lastout = "tertiaryie";
+	}
+    }
+
+    &print_term($idx);
+    
+    $last = $idx;
+}
+
+# Termcount is > 0 iff some entries were skipped.
+print STDERR "$termcount entries ignored...\n";
+
+&end_entry();
+
+print OUT "</indexdiv>\n" if $lettergroups;
+print OUT "</$indextag>\n";
+
+close (OUT);
+
+print STDERR "Done.\n";
+
+sub same {
+    my($a) = shift;
+    my($b) = shift;
+
+    my($aP) = $a->{'psortas'} || $a->{'primary'};   
+    my($aS) = $a->{'ssortas'} || $a->{'secondary'}; 
+    my($aT) = $a->{'tsortas'} || $a->{'tertiary'};  
+	                                            
+    my($bP) = $b->{'psortas'} || $b->{'primary'};   
+    my($bS) = $b->{'ssortas'} || $b->{'secondary'}; 
+    my($bT) = $b->{'tsortas'} || $b->{'tertiary'};  
+
+    my($same);
+
+    $aP =~ s/^\s*//; $aP =~ s/\s*$//; $aP = uc($aP);
+    $aS =~ s/^\s*//; $aS =~ s/\s*$//; $aS = uc($aS);
+    $aT =~ s/^\s*//; $aT =~ s/\s*$//; $aT = uc($aT);
+    $bP =~ s/^\s*//; $bP =~ s/\s*$//; $bP = uc($bP);
+    $bS =~ s/^\s*//; $bS =~ s/\s*$//; $bS = uc($bS);
+    $bT =~ s/^\s*//; $bT =~ s/\s*$//; $bT = uc($bT);
+
+#    print "[$aP]=[$bP]\n";
+#    print "[$aS]=[$bS]\n";
+#    print "[$aT]=[$bT]\n";
+
+    # Two index terms are the same if:
+    # 1. the primary, secondary, and tertiary entries are the same
+    #    (or have the same SORTAS)
+    # AND
+    # 2. They occur in the same titled section
+    # AND
+    # 3. They point to the same place
+    #
+    # Notes: Scope is used to suppress some entries, but can't be used
+    #          for comparing duplicates.
+    #        Interpretation of "the same place" depends on whether or
+    #          not $linkpoints is true.
+
+    $same = (($aP eq $bP)
+	     && ($aS eq $bS)
+	     && ($aT eq $bT)
+	     && ($a->{'title'} eq $b->{'title'})
+	     && ($a->{'href'} eq $b->{'href'}));
+
+    # If we're linking to points, they're only the same if they link
+    # to exactly the same spot.  (surely this is redundant?)
+    $same = $same && ($a->{'hrefpoint'} eq $b->{'hrefpoint'})
+	if $linkpoints;
+
+    $same;
+}
+
+sub tsame {
+    # Unlike same(), tsame only compares a single term
+    my($a) = shift;
+    my($b) = shift;
+    my($term) = shift;
+    my($sterm) = substr($term, 0, 1) . "sortas";
+    my($A, $B);
+
+    $A = $a->{$sterm} || $a->{$term};
+    $B = $b->{$sterm} || $b->{$term};
+
+    $A =~ s/^\s*//; $A =~ s/\s*$//; $A = uc($A);
+    $B =~ s/^\s*//; $B =~ s/\s*$//; $B = uc($B);
+
+    return $A eq $B;
+}
+
+sub end_entry {
+    # End any open elements...
+    print OUT "\n  </$lastout>\n" if $lastout;
+    print OUT "</indexentry>\n\n";
+    $lastout = "";
+}
+
+sub print_term {
+    # Print out the links for an indexterm.  There can be more than
+    # one if the term has a ZONE that points to more than one place.
+    # (do we do the right thing in that case?)
+    my($idx) = shift;
+    my($key, $indent, @hrefs);
+    my(%href) = ();
+    my(%phref) = ();
+
+    $indent = "    ";
+
+    if ($idx->{'see'}) {
+	# it'd be nice to make this a link...
+	if ($lastout) {
+	    print OUT "\n  </$lastout>\n";
+	    $lastout = "";
+	}
+	print OUT $indent, "<seeie>", $idx->{'see'}, "</seeie>\n";
+	return;
+    }
+
+    if ($idx->{'seealso'}) {
+	# it'd be nice to make this a link...
+	if ($lastout) {
+	    print OUT "\n  </$lastout>\n";
+	    $lastout = "";
+	}
+	print OUT $indent, "<seealsoie>", $idx->{'seealso'}, "</seealsoie>\n";
+	return;
+    }
+
+    if (keys %{$idx->{'zone'}}) {
+	foreach $key (keys %{$idx->{'zone'}}) {
+	    $href{$key} = $idx->{'zone'}->{$key};
+	    $phref{$key} = $idx->{'zone'}->{$key};
+	}
+    } else {
+	$href{$idx->{'href'}} = $idx->{'title'};
+	$phref{$idx->{'href'}} = $idx->{'hrefpoint'};
+    }
+
+    # We can't use <LINK> because we don't know the ID of the term in the
+    # original source (and, in fact, it might not have one).
+    print OUT ",\n";
+    @hrefs = keys %href;
+    while (@hrefs) {
+	my($linkend) = "";
+	my($role) = "";
+	$key = shift @hrefs;
+	if ($linkpoints) {
+	    $linkend = $phref{$key};
+	} else {
+	    $linkend = $key;
+	}
+
+	$role = $linkend;
+	$role = $1 if $role =~ /\#(.*)$/;
+
+	print OUT $indent;
+	print OUT "<ulink url=\"$linkend\" role=\"$role\">";
+	print OUT "<emphasis>" if ($idx->{'significance'} eq 'PREFERRED');
+	print OUT $href{$key};
+	print OUT "</emphasis>" if ($idx->{'significance'} eq 'PREFERRED');
+	print OUT "</ulink>";
+    }
+}
+
+sub termsort {
+    my($aP) = $a->{'psortas'} || $a->{'primary'};   
+    my($aS) = $a->{'ssortas'} || $a->{'secondary'}; 
+    my($aT) = $a->{'tsortas'} || $a->{'tertiary'};  
+    my($ap) = $a->{'count'};
+	                                            
+    my($bP) = $b->{'psortas'} || $b->{'primary'};   
+    my($bS) = $b->{'ssortas'} || $b->{'secondary'}; 
+    my($bT) = $b->{'tsortas'} || $b->{'tertiary'};  
+    my($bp) = $b->{'count'};
+
+    $aP =~ s/^\s*//; $aP =~ s/\s*$//; $aP = uc($aP);
+    $aS =~ s/^\s*//; $aS =~ s/\s*$//; $aS = uc($aS);
+    $aT =~ s/^\s*//; $aT =~ s/\s*$//; $aT = uc($aT);
+    $bP =~ s/^\s*//; $bP =~ s/\s*$//; $bP = uc($bP);
+    $bS =~ s/^\s*//; $bS =~ s/\s*$//; $bS = uc($bS);
+    $bT =~ s/^\s*//; $bT =~ s/\s*$//; $bT = uc($bT);
+
+    if ($aP eq $bP) {
+	if ($aS eq $bS) {
+	    if ($aT eq $bT) {
+		# make sure seealso's always sort to the bottom
+		return 1 if ($a->{'seealso'});
+		return -1  if ($b->{'seealso'});
+		# if everything else is the same, keep these elements
+		# in document order (so the index links are in the right
+		# order)
+		return $ap <=> $bp;
+	    } else {
+		return $aT cmp $bT;
+	    }
+	} else {
+	    return $aS cmp $bS;
+	}
+    } else {
+	return $aP cmp $bP;
+    }
+}
+
+sub sortsymbols {
+    my(@term) = @_;
+    my(@new) = ();
+    my(@sym) = ();
+    my($letter);
+    my($idx);
+
+    # Move the non-letter things to the front.  Should digits be thier
+    # own group?  Maybe...
+    foreach $idx (@term) {
+	$letter = $idx->{'psortas'};
+	$letter = $idx->{'primary'} if !$letter;
+	$letter = uc(substr($letter, 0, 1));
+
+	if (($letter lt 'A') || ($letter gt 'Z')) {
+	    push (@sym, $idx);
+	} else {
+	    push (@new, $idx);
+	}
+    }
+
+    return (@sym, @new);
+}
+
+sub safe_open {
+    local(*OUT) = shift;
+    local(*F, $_);
+
+    if (($outfile ne '-') && (!$forceoutput)) {
+	my($handedit) = 1;
+	if (open (OUT, $outfile)) {
+	    while (<OUT>) {
+		if (/<!-- Remove this comment if you edit this file by hand! -->/){
+		    $handedit = 0;
+		    last;
+		}
+	    } 
+	    close (OUT);
+	} else {
+	    $handedit = 0;
+	}
+	
+	if ($handedit) {
+	    print "\n$outfile appears to have been edited by hand; use -f or\n";
+	    print "      change the output file.\n";
+	    exit 1;
+	}
+    }
+
+    open (OUT, ">$outfile") || die "$usage\nCannot write to $outfile.\n";
+
+    if ($preamble) { 
+	# Copy the preamble
+	if (open(F, $preamble)) {
+	    while (<F>) {
+		print OUT $_;
+	    }
+	    close(F);
+	} else {
+	    warn "$0: cannot open preamble $preamble.\n";
+	}
+    }
+}
-- 
cgit