r3029: implemented byte range lock timeouts.

This adds a pvfs_wait_message() routine which uses the new messaging system, event timers and talloc destructors to give a nice generic async event handling system with a easy to use interface. The extensions to pvfs_lock.c are based on calls to pvfs_wait_message() routines. We now pass all of our smbtorture locking tests, although while writing this code I have thought of some additonal tests that should be added, particularly for lock cancel operations. I'll work on that soon. This commit also extends the smbtorture lock tests to test the rather weird 0xEEFFFFFF locking semantics that I have discovered in win2003. Win2003 treats the 0xEEFFFFFF boundary as special, and will give different error codes on either side of it. Locks on both sides are allowed, the only difference is which error code is given when a lock is denied. Anyone like to hazard a guess as to why? It has me stumped. (This used to be commit 4395c0557ab175d6a8dd99df03c266325949ffa5)
author: Andrew Tridgell <tridge@samba.org> 2004-10-18 07:40:17 +0000
committer: Gerald (Jerry) Carter <jerry@samba.org> 2007-10-10 13:00:00 -0500
commit: d0cc571e30bf49443ac7d1b1a0b896ee72d7d9a6 (patch)
tree: a0c43b9894da40ab6b78cdad8a6a6964d8067fe7 /source4/ntvfs/common
parent: d37acd0fe71923987a68377bf01040eaed245d9f (diff)
download: samba-d0cc571e30bf49443ac7d1b1a0b896ee72d7d9a6.tar.gz
samba-d0cc571e30bf49443ac7d1b1a0b896ee72d7d9a6.tar.bz2
samba-d0cc571e30bf49443ac7d1b1a0b896ee72d7d9a6.zip
1 files changed, 244 insertions, 28 deletions
diff --git a/source4/ntvfs/common/brlock.c b/source4/ntvfs/common/brlock.c
index 0eb644e943..792ee52ad5 100644
--- a/source4/ntvfs/common/brlock.c
+++ b/source4/ntvfs/common/brlock.c
@@ -27,12 +27,6 @@
 
 #include "includes.h"
 
-struct brl_context {
-	struct tdb_wrap *w;
-	servid_t server;
-	uint16_t tid;
-};
-
 /*
   in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
   a file. For a local posix filesystem this will usually be a combination
@@ -60,13 +54,25 @@ struct lock_struct {
 	uint64_t size;
 	uint16_t fnum;
 	enum brl_type lock_type;
+	void *notify_ptr;
+};
+
+struct brl_context {
+	struct tdb_wrap *w;
+	servid_t server;
+	uint16_t tid;
+	void *messaging_ctx;
+	struct lock_struct last_lock_failure;
 };
 
+
 /*
   Open up the brlock.tdb database. Close it down using
-  talloc_free()
+  talloc_free(). We need the messaging_ctx to allow for
+  pending lock notifications.
 */
-void *brl_init(TALLOC_CTX *mem_ctx, servid_t server, uint16_t tid)
+void *brl_init(TALLOC_CTX *mem_ctx, servid_t server, uint16_t tid, 
+	       void *messaging_ctx)
 {
 	char *path;
 	struct brl_context *brl;
@@ -88,6 +94,8 @@ void *brl_init(TALLOC_CTX *mem_ctx, servid_t server, uint16_t tid)
 
 	brl->server = server;
 	brl->tid = tid;
+	brl->messaging_ctx = messaging_ctx;
+	ZERO_STRUCT(brl->last_lock_failure);
 
 	return (void *)brl;
 }
@@ -104,11 +112,30 @@ static BOOL brl_same_context(struct lock_context *ctx1, struct lock_context *ctx
 }
 
 /*
+  see if lck1 and lck2 overlap
+*/
+static BOOL brl_overlap(struct lock_struct *lck1, 
+			struct lock_struct *lck2)
+{
+	if (lck1->start >= (lck2->start + lck2->size) ||
+	    lck2->start >= (lck1->start + lck1->size)) {
+		return False;
+	}
+	return True;
+} 
+
+/*
  See if lock2 can be added when lock1 is in place.
 */
 static BOOL brl_conflict(struct lock_struct *lck1, 
 			 struct lock_struct *lck2)
 {
+	/* pending locks don't conflict with anything */
+	if (lck1->lock_type >= PENDING_READ_LOCK ||
+	    lck2->lock_type >= PENDING_READ_LOCK) {
+		return False;
+	}
+
 	if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
 		return False;
 	}
@@ -118,12 +145,7 @@ static BOOL brl_conflict(struct lock_struct *lck1,
 		return False;
 	}
 
-	if (lck1->start >= (lck2->start + lck2->size) ||
-	    lck2->start >= (lck1->start + lck1->size)) {
-		return False;
-	}
-	    
-	return True;
+	return brl_overlap(lck1, lck2);
 } 
 
 
@@ -133,32 +155,68 @@ static BOOL brl_conflict(struct lock_struct *lck1,
 */
 static BOOL brl_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
 {
+	/* pending locks don't conflict with anything */
+	if (lck1->lock_type >= PENDING_READ_LOCK ||
+	    lck2->lock_type >= PENDING_READ_LOCK) {
+		return False;
+	}
+
 	if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
 		return False;
 
+	/*
+	 * note that incoming write calls conflict with existing READ
+	 * locks even if the context is the same. JRA. See LOCKTEST7
+	 * in smbtorture.
+	 */
 	if (brl_same_context(&lck1->context, &lck2->context) &&
-	    lck1->fnum == lck2->fnum) {
+	    lck1->fnum == lck2->fnum &&
+	    (lck2->lock_type == READ_LOCK || lck1->lock_type == WRITE_LOCK)) {
 		return False;
 	}
 
-	if (lck1->start >= (lck2->start + lck2->size) ||
-	    lck2->start >= (lck1->start + lck1->size))
-		return False;
-	    
-	return True;
+	return brl_overlap(lck1, lck2);
 } 
 
 
+/*
+  amazingly enough, w2k3 "remembers" whether the last lock failure
+  is the same as this one and changes its error code. I wonder if any
+  app depends on this?
+*/
+static NTSTATUS brl_lock_failed(struct brl_context *brl, struct lock_struct *lock)
+{
+	if (brl_same_context(&lock->context, &brl->last_lock_failure.context) &&
+	    lock->fnum == brl->last_lock_failure.fnum &&
+	    lock->start == brl->last_lock_failure.start &&
+	    lock->size == brl->last_lock_failure.size) {
+		return NT_STATUS_FILE_LOCK_CONFLICT;
+	}
+	brl->last_lock_failure = *lock;
+	if (lock->start >= 0xEF000000) {
+		/* amazing the little things you learn with a test
+		   suite. Locks beyond this offset (as a 64 bit
+		   number!) always generate the conflict error
+		   code. */
+		return NT_STATUS_FILE_LOCK_CONFLICT;
+	}
+	return NT_STATUS_LOCK_NOT_GRANTED;
+}
 
 /*
- Lock a range of bytes.
+  Lock a range of bytes.  The lock_type can be a PENDING_*_LOCK, in
+  which case a real lock is first tried, and if that fails then a
+  pending lock is created. When the pending lock is triggered (by
+  someone else closing an overlapping lock range) a messaging
+  notification is sent, identified by the notify_ptr
 */
 NTSTATUS brl_lock(void *brl_ctx,
 		  DATA_BLOB *file_key, 
 		  uint16_t smbpid,
 		  uint16_t fnum, 
 		  uint64_t start, uint64_t size, 
-		  enum brl_type lock_type)
+		  enum brl_type lock_type,
+		  void *notify_ptr)
 {
 	struct brl_context *brl = brl_ctx;
 	TDB_DATA kbuf, dbuf;
@@ -174,6 +232,20 @@ NTSTATUS brl_lock(void *brl_ctx,
 		return NT_STATUS_INTERNAL_DB_CORRUPTION;
 	}
 
+	/* if this is a pending lock, then with the chainlock held we
+	   try to get the real lock. If we succeed then we don't need
+	   to make it pending. This prevents a possible race condition
+	   where the pending lock gets created after the lock that is
+	   preventing the real lock gets removed */
+	if (lock_type >= PENDING_READ_LOCK) {
+		enum brl_type rw = (lock_type==PENDING_READ_LOCK? READ_LOCK : WRITE_LOCK);
+		status = brl_lock(brl_ctx, file_key, smbpid, fnum, start, size, rw, NULL);
+		if (NT_STATUS_IS_OK(status)) {
+			tdb_chainunlock(brl->w->tdb, kbuf);
+			return NT_STATUS_OK;
+		}
+	}
+
 	dbuf = tdb_fetch(brl->w->tdb, kbuf);
 
 	lock.context.smbpid = smbpid;
@@ -183,6 +255,7 @@ NTSTATUS brl_lock(void *brl_ctx,
 	lock.size = size;
 	lock.fnum = fnum;
 	lock.lock_type = lock_type;
+	lock.notify_ptr = notify_ptr;
 
 	if (dbuf.dptr) {
 		/* there are existing locks - make sure they don't conflict */
@@ -190,7 +263,7 @@ NTSTATUS brl_lock(void *brl_ctx,
 		count = dbuf.dsize / sizeof(*locks);
 		for (i=0; i<count; i++) {
 			if (brl_conflict(&locks[i], &lock)) {
-				status = NT_STATUS_LOCK_NOT_GRANTED;
+				status = brl_lock_failed(brl, &lock);
 				goto fail;
 			}
 		}
@@ -214,6 +287,14 @@ NTSTATUS brl_lock(void *brl_ctx,
 
 	free(dbuf.dptr);
 	tdb_chainunlock(brl->w->tdb, kbuf);
+
+	/* the caller needs to know if the real lock was granted. If
+	   we have reached here then it must be a pending lock that
+	   was granted, so tell them the lock failed */
+	if (lock_type >= PENDING_READ_LOCK) {
+		return brl_lock_failed(brl, &lock);
+	}
+
 	return NT_STATUS_OK;
 
  fail:
@@ -225,6 +306,57 @@ NTSTATUS brl_lock(void *brl_ctx,
 
 
 /*
+  we are removing a lock that might be holding up a pending lock. Scan for pending
+  locks that cover this range and if we find any then notify the server that it should
+  retry the lock
+*/
+static void brl_notify_unlock(struct brl_context *brl,
+			      struct lock_struct *locks, int count, 
+			      struct lock_struct *removed_lock)
+{
+	int i, last_notice;
+
+	/* the last_notice logic is to prevent stampeding on a lock
+	   range. It prevents us sending hundreds of notifies on the
+	   same range of bytes. It doesn't prevent all possible
+	   stampedes, but it does prevent the most common problem */
+	last_notice = -1;
+
+	for (i=0;i<count;i++) {
+		if (locks[i].lock_type >= PENDING_READ_LOCK &&
+		    brl_overlap(&locks[i], removed_lock)) {
+			DATA_BLOB data;
+
+			if (last_notice != -1 && brl_overlap(&locks[i], &locks[last_notice])) {
+				continue;
+			}
+			last_notice = i;
+			data.data = (void *)&locks[i].notify_ptr;
+			data.length = sizeof(void *);
+			messaging_send(brl->messaging_ctx, locks[i].context.server, MSG_BRL_RETRY, &data);
+		}
+	}
+}
+
+
+/*
+  send notifications for all pending locks - the file is being closed by this
+  user
+*/
+static void brl_notify_all(struct brl_context *brl,
+			   struct lock_struct *locks, int count)
+{
+	int i;
+	for (i=0;i<count;i++) {
+		if (locks->lock_type >= PENDING_READ_LOCK) {
+			brl_notify_unlock(brl, locks, count, &locks[i]);
+		}
+	}
+}
+
+
+
+/*
  Unlock a range of bytes.
 */
 NTSTATUS brl_unlock(void *brl_ctx,
@@ -261,15 +393,92 @@ NTSTATUS brl_unlock(void *brl_ctx,
 	locks = (struct lock_struct *)dbuf.dptr;
 	count = dbuf.dsize / sizeof(*locks);
 
-	locks = (struct lock_struct *)dbuf.dptr;
-	count = dbuf.dsize / sizeof(*locks);
 	for (i=0; i<count; i++) {
 		struct lock_struct *lock = &locks[i];
 		
 		if (brl_same_context(&lock->context, &context) &&
 		    lock->fnum == fnum &&
 		    lock->start == start &&
-		    lock->size == size) {
+		    lock->size == size &&
+		    lock->notify_ptr == NULL) {
+			/* found it - delete it */
+			if (count == 1) {
+				if (tdb_delete(brl->w->tdb, kbuf) != 0) {
+					status = NT_STATUS_INTERNAL_DB_CORRUPTION;
+					goto fail;
+				}
+			} else {
+				struct lock_struct removed_lock = *lock;
+				if (i < count-1) {
+					memmove(&locks[i], &locks[i+1], 
+						sizeof(*locks)*((count-1) - i));
+				}
+				count--;
+
+				/* send notifications for any relevant pending locks */
+				brl_notify_unlock(brl, locks, count, &removed_lock);
+
+				dbuf.dsize = count * sizeof(*locks);
+
+				if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
+					status = NT_STATUS_INTERNAL_DB_CORRUPTION;
+					goto fail;
+				}
+			}
+			
+			free(dbuf.dptr);
+			tdb_chainunlock(brl->w->tdb, kbuf);
+			return NT_STATUS_OK;
+		}
+	}
+	
+	/* we didn't find it */
+	status = NT_STATUS_RANGE_NOT_LOCKED;
+
+ fail:
+	free(dbuf.dptr);
+	tdb_chainunlock(brl->w->tdb, kbuf);
+	return status;
+}
+
+
+/*
+  remove a pending lock. This is called when the caller has either
+  given up trying to establish a lock or when they have succeeded in
+  getting it. In either case they no longer need to be notified.
+*/
+NTSTATUS brl_remove_pending(void *brl_ctx,
+			    DATA_BLOB *file_key, 
+			    void *notify_ptr)
+{
+	struct brl_context *brl = brl_ctx;
+	TDB_DATA kbuf, dbuf;
+	int count, i;
+	struct lock_struct *locks;
+	NTSTATUS status;
+
+	kbuf.dptr = file_key->data;
+	kbuf.dsize = file_key->length;
+
+	if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
+		return NT_STATUS_INTERNAL_DB_CORRUPTION;
+	}
+
+	dbuf = tdb_fetch(brl->w->tdb, kbuf);
+	if (!dbuf.dptr) {
+		tdb_chainunlock(brl->w->tdb, kbuf);
+		return NT_STATUS_RANGE_NOT_LOCKED;
+	}
+
+	/* there are existing locks - find a match */
+	locks = (struct lock_struct *)dbuf.dptr;
+	count = dbuf.dsize / sizeof(*locks);
+
+	for (i=0; i<count; i++) {
+		struct lock_struct *lock = &locks[i];
+		
+		if (lock->notify_ptr == notify_ptr &&
+		    lock->context.server == brl->server) {
 			/* found it - delete it */
 			if (count == 1) {
 				if (tdb_delete(brl->w->tdb, kbuf) != 0) {
@@ -281,7 +490,8 @@ NTSTATUS brl_unlock(void *brl_ctx,
 					memmove(&locks[i], &locks[i+1], 
 						sizeof(*locks)*((count-1) - i));
 				}
-				dbuf.dsize -= sizeof(*locks);
+				count--;
+				dbuf.dsize = count * sizeof(*locks);
 				if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 					status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 					goto fail;
@@ -404,7 +614,13 @@ NTSTATUS brl_close(void *brl_ctx,
 			status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 		}
 	} else if (dcount != 0) {
-		dbuf.dsize -= dcount * sizeof(*locks);
+		/* tell all pending lock holders for this file that
+		   they have a chance now. This is a bit indiscriminant,
+		   but works OK */
+		brl_notify_all(brl, locks, count);
+
+		dbuf.dsize = count * sizeof(*locks);
+
 		if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 			status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 		}
author	Andrew Tridgell <tridge@samba.org>	2004-10-18 07:40:17 +0000
committer	Gerald (Jerry) Carter <jerry@samba.org>	2007-10-10 13:00:00 -0500
commit	d0cc571e30bf49443ac7d1b1a0b896ee72d7d9a6 (patch)
tree	a0c43b9894da40ab6b78cdad8a6a6964d8067fe7 /source4/ntvfs/common
parent	d37acd0fe71923987a68377bf01040eaed245d9f (diff)
download	samba-d0cc571e30bf49443ac7d1b1a0b896ee72d7d9a6.tar.gz samba-d0cc571e30bf49443ac7d1b1a0b896ee72d7d9a6.tar.bz2 samba-d0cc571e30bf49443ac7d1b1a0b896ee72d7d9a6.zip