From 78c554474a00eff0388242266ebe166db56d2800 Mon Sep 17 00:00:00 2001 From: "Christopher R. Hertel" Date: Sun, 19 Sep 2004 21:25:24 +0000 Subject: r2420: Way back at the 1st SambaXP, Simo pointed out a subtle bug related to the interaction between the splay tree code and the code used to find a leaf node. The problem is rare, and with most sites using the newer hashing algorithm it's probably not important to fix it. I have fixed it, however. (This used to be commit b3997aeaf47d5cf0d5ed63038f3b22c9dfcdff4c) --- source3/ubiqx/ubi_BinTree.c | 68 +++++++++++++++++++++++++++++++++++---------- source3/ubiqx/ubi_BinTree.h | 31 ++++++++++++++++++--- 2 files changed, 81 insertions(+), 18 deletions(-) diff --git a/source3/ubiqx/ubi_BinTree.c b/source3/ubiqx/ubi_BinTree.c index 8a4d461280..e452ac10dc 100644 --- a/source3/ubiqx/ubi_BinTree.c +++ b/source3/ubiqx/ubi_BinTree.c @@ -26,7 +26,16 @@ * * -------------------------------------------------------------------------- ** * - * Log: ubi_BinTree.c,v + * Log: ubi_BinTree.c,v + * Revision 4.12 2004/06/06 04:51:56 crh + * Fixed a small typo in ubi_BinTree.c (leftover testing cruft). + * Did a small amount of formatting touchup to ubi_BinTree.h. + * + * Revision 4.11 2004/06/06 03:14:09 crh + * Rewrote the ubi_btLeafNode() function. It now takes several paths in an + * effort to find a deeper leaf node. There is a small amount of extra + * overhead, but it is limited. + * * Revision 4.10 2000/06/06 20:38:40 crh * In the ReplaceNode() function, the old node header was being copied * to the new node header using a byte-by-byte copy. This was causing @@ -181,14 +190,15 @@ #include "ubi_BinTree.h" /* Header for this module. */ + /* ========================================================================== ** * Static data. */ static char ModuleID[] = "ubi_BinTree\n\ -\tRevision: 4.10 \n\ -\tDate: 2000/06/06 20:38:40 \n\ -\tAuthor: crh \n"; +\tRevision: 4.12\n\ +\tDate: 2004/06/06 04:51:56\n\ +\tAuthor: crh\n"; /* ========================================================================== ** * Internal (private) functions. @@ -1061,8 +1071,8 @@ ubi_btNodePtr ubi_btLeafNode( ubi_btNodePtr leader ) * * Input: leader - Pointer to a node at which to start the descent. * - * Output: A pointer to a leaf node selected in a somewhat arbitrary - * manner. + * Output: A pointer to a leaf node, selected in a somewhat arbitrary + * manner but with an effort to dig deep. * * Notes: I wrote this function because I was using splay trees as a * database cache. The cache had a maximum size on it, and I @@ -1071,7 +1081,7 @@ ubi_btNodePtr ubi_btLeafNode( ubi_btNodePtr leader ) * tend toward the bottom of the tree, meaning that leaf nodes * are good candidates for removal. (I really can't think of * any other reason to use this function.) - * + In a simple binary tree or an AVL tree, the most recently + * + In a simple binary tree, or in an AVL tree, the most recently * added nodes tend to be nearer the bottom, making this a *bad* * way to choose which node to remove from the cache. * + Randomizing the traversal order is probably a good idea. You @@ -1079,25 +1089,55 @@ ubi_btNodePtr ubi_btLeafNode( ubi_btNodePtr leader ) * in pointers to nodes other than the root node each time. A * pointer to any node in the tree will do. Of course, if you * pass a pointer to a leaf node you'll get the same thing back. + * + In an unbalanced splay tree, if you simply traverse downward + * until you hit a leaf node it is possible to accidentally + * stumble onto a short path. The result will be a leaf node + * that is actually very high in the tree--possibly a very + * recently accessed node. Not good. This function can follow + * multiple paths in an effort to find a leaf node deeper + * in the tree. Following a single path, of course, is the + * fastest way to find a leaf node. A complete traversal would + * be sure to find the deepest leaf but would be very costly in + * terms of time. This function uses a compromise that has + * worked well in testing. * * ------------------------------------------------------------------------ ** */ { - ubi_btNodePtr follower = NULL; + #define MAXPATHS 4 /* Set higher for more maximum paths, lower for fewer. */ + ubi_trNodePtr p[MAXPATHS]; + ubi_trNodePtr q[MAXPATHS]; int whichway = ubi_trLEFT; + int paths; + int i, j; + + /* If the subtree is empty, return NULL. + */ + if( NULL == leader ) + return( NULL ); - while( NULL != leader ) + /* Initialize the p[] array with a pointer to the single node we've been + * given as a starting point. + */ + p[0] = leader; + paths = 1; + while( paths > 0 ) { - follower = leader; - leader = follower->Link[ whichway ]; - if( NULL == leader ) + for( i = 0; i < paths; i++ ) + q[i] = p[i]; + + for( i = j = 0; (i < paths) && (j < MAXPATHS); i++ ) { + if( NULL != q[i]->Link[whichway] ) + p[j++] = q[i]->Link[whichway]; whichway = ubi_trRevWay( whichway ); - leader = follower->Link[ whichway ]; + if( (j < MAXPATHS) && (NULL != q[i]->Link[whichway]) ) + p[j++] = q[i]->Link[whichway]; } + paths = j; } - return( follower ); + return( q[0] ); } /* ubi_btLeafNode */ int ubi_btModuleID( int size, char *list[] ) diff --git a/source3/ubiqx/ubi_BinTree.h b/source3/ubiqx/ubi_BinTree.h index c0c6d59309..43ca1a9871 100644 --- a/source3/ubiqx/ubi_BinTree.h +++ b/source3/ubiqx/ubi_BinTree.h @@ -28,7 +28,16 @@ * * -------------------------------------------------------------------------- ** * - * Log: ubi_BinTree.h,v + * Log: ubi_BinTree.h,v + * Revision 4.12 2004/06/06 04:51:56 crh + * Fixed a small typo in ubi_BinTree.c (leftover testing cruft). + * Did a small amount of formatting touchup to ubi_BinTree.h. + * + * Revision 4.11 2004/06/06 03:14:09 crh + * Rewrote the ubi_btLeafNode() function. It now takes several paths in an + * effort to find a deeper leaf node. There is a small amount of extra + * overhead, but it is limited. + * * Revision 4.10 2000/06/06 20:38:40 crh * In the ReplaceNode() function, the old node header was being copied * to the new node header using a byte-by-byte copy. This was causing @@ -260,6 +269,7 @@ typedef enum { * is left as is. * -------------------------------------------------------------------------- ** */ + #define ubi_trNormalize(W) ((char)( (W) - ubi_trEQUAL )) #define ubi_trAbNormal(W) ((char)( ((char)ubi_btSgn( (long)(W) )) \ + ubi_trEQUAL )) @@ -270,6 +280,7 @@ typedef enum { * DUPlicate KEY bits of the tree root flags field. * -------------------------------------------------------------------------- ** */ + #define ubi_trDups_OK(A) \ ((ubi_trDUPKEY & ((A)->flags))?(ubi_trTRUE):(ubi_trFALSE)) #define ubi_trOvwt_OK(A) \ @@ -337,6 +348,7 @@ typedef void *ubi_btItemPtr; /* A pointer to key data within a node. */ * * ------------------------------------------------------------------------- ** */ + typedef struct ubi_btNodeStruct { struct ubi_btNodeStruct *Link[ 3 ]; char gender; @@ -747,8 +759,8 @@ ubi_btNodePtr ubi_btLeafNode( ubi_btNodePtr leader ); * * Input: leader - Pointer to a node at which to start the descent. * - * Output: A pointer to a leaf node selected in a somewhat arbitrary - * manner. + * Output: A pointer to a leaf node, selected in a somewhat arbitrary + * manner but with an effort to dig deep. * * Notes: I wrote this function because I was using splay trees as a * database cache. The cache had a maximum size on it, and I @@ -757,7 +769,7 @@ ubi_btNodePtr ubi_btLeafNode( ubi_btNodePtr leader ); * tend toward the bottom of the tree, meaning that leaf nodes * are good candidates for removal. (I really can't think of * any other reason to use this function.) - * + In a simple binary tree or an AVL tree, the most recently + * + In a simple binary tree, or in an AVL tree, the most recently * added nodes tend to be nearer the bottom, making this a *bad* * way to choose which node to remove from the cache. * + Randomizing the traversal order is probably a good idea. You @@ -765,6 +777,17 @@ ubi_btNodePtr ubi_btLeafNode( ubi_btNodePtr leader ); * in pointers to nodes other than the root node each time. A * pointer to any node in the tree will do. Of course, if you * pass a pointer to a leaf node you'll get the same thing back. + * + In an unbalanced splay tree, if you simply traverse downward + * until you hit a leaf node it is possible to accidentally + * stumble onto a short path. The result will be a leaf node + * that is actually very high in the tree--possibly a very + * recently accessed node. Not good. This function can follow + * multiple paths in an effort to find a leaf node deeper + * in the tree. Following a single path, of course, is the + * fastest way to find a leaf node. A complete traversal would + * be sure to find the deepest leaf but would be very costly in + * terms of time. This function uses a compromise that has + * worked well in testing. * * ------------------------------------------------------------------------ ** */ -- cgit