1 files changed, 244 insertions, 28 deletions
diff --git a/source4/ntvfs/common/brlock.c b/source4/ntvfs/common/brlock.c
index 0eb644e943..792ee52ad5 100644
--- a/source4/ntvfs/common/brlock.c
+++ b/source4/ntvfs/common/brlock.c
@@ -27,12 +27,6 @@
 
 #include "includes.h"
 
-struct brl_context {
-	struct tdb_wrap *w;
-	servid_t server;
-	uint16_t tid;
-};
-
 /*
   in this module a "DATA_BLOB *file_key" is a blob that uniquely identifies
   a file. For a local posix filesystem this will usually be a combination
@@ -60,13 +54,25 @@ struct lock_struct {
 	uint64_t size;
 	uint16_t fnum;
 	enum brl_type lock_type;
+	void *notify_ptr;
+};
+
+struct brl_context {
+	struct tdb_wrap *w;
+	servid_t server;
+	uint16_t tid;
+	void *messaging_ctx;
+	struct lock_struct last_lock_failure;
 };
 
+
 /*
   Open up the brlock.tdb database. Close it down using
-  talloc_free()
+  talloc_free(). We need the messaging_ctx to allow for
+  pending lock notifications.
 */
-void *brl_init(TALLOC_CTX *mem_ctx, servid_t server, uint16_t tid)
+void *brl_init(TALLOC_CTX *mem_ctx, servid_t server, uint16_t tid, 
+	       void *messaging_ctx)
 {
 	char *path;
 	struct brl_context *brl;
@@ -88,6 +94,8 @@ void *brl_init(TALLOC_CTX *mem_ctx, servid_t server, uint16_t tid)
 
 	brl->server = server;
 	brl->tid = tid;
+	brl->messaging_ctx = messaging_ctx;
+	ZERO_STRUCT(brl->last_lock_failure);
 
 	return (void *)brl;
 }
@@ -104,11 +112,30 @@ static BOOL brl_same_context(struct lock_context *ctx1, struct lock_context *ctx
 }
 
 /*
+  see if lck1 and lck2 overlap
+*/
+static BOOL brl_overlap(struct lock_struct *lck1, 
+			struct lock_struct *lck2)
+{
+	if (lck1->start >= (lck2->start + lck2->size) ||
+	    lck2->start >= (lck1->start + lck1->size)) {
+		return False;
+	}
+	return True;
+} 
+
+/*
  See if lock2 can be added when lock1 is in place.
 */
 static BOOL brl_conflict(struct lock_struct *lck1, 
 			 struct lock_struct *lck2)
 {
+	/* pending locks don't conflict with anything */
+	if (lck1->lock_type >= PENDING_READ_LOCK ||
+	    lck2->lock_type >= PENDING_READ_LOCK) {
+		return False;
+	}
+
 	if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
 		return False;
 	}
@@ -118,12 +145,7 @@ static BOOL brl_conflict(struct lock_struct *lck1,
 		return False;
 	}
 
-	if (lck1->start >= (lck2->start + lck2->size) ||
-	    lck2->start >= (lck1->start + lck1->size)) {
-		return False;
-	}
-	    
-	return True;
+	return brl_overlap(lck1, lck2);
 } 
 
 
@@ -133,32 +155,68 @@ static BOOL brl_conflict(struct lock_struct *lck1,
 */
 static BOOL brl_conflict_other(struct lock_struct *lck1, struct lock_struct *lck2)
 {
+	/* pending locks don't conflict with anything */
+	if (lck1->lock_type >= PENDING_READ_LOCK ||
+	    lck2->lock_type >= PENDING_READ_LOCK) {
+		return False;
+	}
+
 	if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
 		return False;
 
+	/*
+	 * note that incoming write calls conflict with existing READ
+	 * locks even if the context is the same. JRA. See LOCKTEST7
+	 * in smbtorture.
+	 */
 	if (brl_same_context(&lck1->context, &lck2->context) &&
-	    lck1->fnum == lck2->fnum) {
+	    lck1->fnum == lck2->fnum &&
+	    (lck2->lock_type == READ_LOCK || lck1->lock_type == WRITE_LOCK)) {
 		return False;
 	}
 
-	if (lck1->start >= (lck2->start + lck2->size) ||
-	    lck2->start >= (lck1->start + lck1->size))
-		return False;
-	    
-	return True;
+	return brl_overlap(lck1, lck2);
 } 
 
 
+/*
+  amazingly enough, w2k3 "remembers" whether the last lock failure
+  is the same as this one and changes its error code. I wonder if any
+  app depends on this?
+*/
+static NTSTATUS brl_lock_failed(struct brl_context *brl, struct lock_struct *lock)
+{
+	if (brl_same_context(&lock->context, &brl->last_lock_failure.context) &&
+	    lock->fnum == brl->last_lock_failure.fnum &&
+	    lock->start == brl->last_lock_failure.start &&
+	    lock->size == brl->last_lock_failure.size) {
+		return NT_STATUS_FILE_LOCK_CONFLICT;
+	}
+	brl->last_lock_failure = *lock;
+	if (lock->start >= 0xEF000000) {
+		/* amazing the little things you learn with a test
+		   suite. Locks beyond this offset (as a 64 bit
+		   number!) always generate the conflict error
+		   code. */
+		return NT_STATUS_FILE_LOCK_CONFLICT;
+	}
+	return NT_STATUS_LOCK_NOT_GRANTED;
+}
 
 /*
- Lock a range of bytes.
+  Lock a range of bytes.  The lock_type can be a PENDING_*_LOCK, in
+  which case a real lock is first tried, and if that fails then a
+  pending lock is created. When the pending lock is triggered (by
+  someone else closing an overlapping lock range) a messaging
+  notification is sent, identified by the notify_ptr
 */
 NTSTATUS brl_lock(void *brl_ctx,
 		  DATA_BLOB *file_key, 
 		  uint16_t smbpid,
 		  uint16_t fnum, 
 		  uint64_t start, uint64_t size, 
-		  enum brl_type lock_type)
+		  enum brl_type lock_type,
+		  void *notify_ptr)
 {
 	struct brl_context *brl = brl_ctx;
 	TDB_DATA kbuf, dbuf;
@@ -174,6 +232,20 @@ NTSTATUS brl_lock(void *brl_ctx,
 		return NT_STATUS_INTERNAL_DB_CORRUPTION;
 	}
 
+	/* if this is a pending lock, then with the chainlock held we
+	   try to get the real lock. If we succeed then we don't need
+	   to make it pending. This prevents a possible race condition
+	   where the pending lock gets created after the lock that is
+	   preventing the real lock gets removed */
+	if (lock_type >= PENDING_READ_LOCK) {
+		enum brl_type rw = (lock_type==PENDING_READ_LOCK? READ_LOCK : WRITE_LOCK);
+		status = brl_lock(brl_ctx, file_key, smbpid, fnum, start, size, rw, NULL);
+		if (NT_STATUS_IS_OK(status)) {
+			tdb_chainunlock(brl->w->tdb, kbuf);
+			return NT_STATUS_OK;
+		}
+	}
+
 	dbuf = tdb_fetch(brl->w->tdb, kbuf);
 
 	lock.context.smbpid = smbpid;
@@ -183,6 +255,7 @@ NTSTATUS brl_lock(void *brl_ctx,
 	lock.size = size;
 	lock.fnum = fnum;
 	lock.lock_type = lock_type;
+	lock.notify_ptr = notify_ptr;
 
 	if (dbuf.dptr) {
 		/* there are existing locks - make sure they don't conflict */
@@ -190,7 +263,7 @@ NTSTATUS brl_lock(void *brl_ctx,
 		count = dbuf.dsize / sizeof(*locks);
 		for (i=0; i<count; i++) {
 			if (brl_conflict(&locks[i], &lock)) {
-				status = NT_STATUS_LOCK_NOT_GRANTED;
+				status = brl_lock_failed(brl, &lock);
 				goto fail;
 			}
 		}
@@ -214,6 +287,14 @@ NTSTATUS brl_lock(void *brl_ctx,
 
 	free(dbuf.dptr);
 	tdb_chainunlock(brl->w->tdb, kbuf);
+
+	/* the caller needs to know if the real lock was granted. If
+	   we have reached here then it must be a pending lock that
+	   was granted, so tell them the lock failed */
+	if (lock_type >= PENDING_READ_LOCK) {
+		return brl_lock_failed(brl, &lock);
+	}
+
 	return NT_STATUS_OK;
 
  fail:
@@ -225,6 +306,57 @@ NTSTATUS brl_lock(void *brl_ctx,
 
 
 /*
+  we are removing a lock that might be holding up a pending lock. Scan for pending
+  locks that cover this range and if we find any then notify the server that it should
+  retry the lock
+*/
+static void brl_notify_unlock(struct brl_context *brl,
+			      struct lock_struct *locks, int count, 
+			      struct lock_struct *removed_lock)
+{
+	int i, last_notice;
+
+	/* the last_notice logic is to prevent stampeding on a lock
+	   range. It prevents us sending hundreds of notifies on the
+	   same range of bytes. It doesn't prevent all possible
+	   stampedes, but it does prevent the most common problem */
+	last_notice = -1;
+
+	for (i=0;i<count;i++) {
+		if (locks[i].lock_type >= PENDING_READ_LOCK &&
+		    brl_overlap(&locks[i], removed_lock)) {
+			DATA_BLOB data;
+
+			if (last_notice != -1 && brl_overlap(&locks[i], &locks[last_notice])) {
+				continue;
+			}
+			last_notice = i;
+			data.data = (void *)&locks[i].notify_ptr;
+			data.length = sizeof(void *);
+			messaging_send(brl->messaging_ctx, locks[i].context.server, MSG_BRL_RETRY, &data);
+		}
+	}
+}
+
+
+/*
+  send notifications for all pending locks - the file is being closed by this
+  user
+*/
+static void brl_notify_all(struct brl_context *brl,
+			   struct lock_struct *locks, int count)
+{
+	int i;
+	for (i=0;i<count;i++) {
+		if (locks->lock_type >= PENDING_READ_LOCK) {
+			brl_notify_unlock(brl, locks, count, &locks[i]);
+		}
+	}
+}
+
+
+
+/*
  Unlock a range of bytes.
 */
 NTSTATUS brl_unlock(void *brl_ctx,
@@ -261,15 +393,92 @@ NTSTATUS brl_unlock(void *brl_ctx,
 	locks = (struct lock_struct *)dbuf.dptr;
 	count = dbuf.dsize / sizeof(*locks);
 
-	locks = (struct lock_struct *)dbuf.dptr;
-	count = dbuf.dsize / sizeof(*locks);
 	for (i=0; i<count; i++) {
 		struct lock_struct *lock = &locks[i];
 		
 		if (brl_same_context(&lock->context, &context) &&
 		    lock->fnum == fnum &&
 		    lock->start == start &&
-		    lock->size == size) {
+		    lock->size == size &&
+		    lock->notify_ptr == NULL) {
+			/* found it - delete it */
+			if (count == 1) {
+				if (tdb_delete(brl->w->tdb, kbuf) != 0) {
+					status = NT_STATUS_INTERNAL_DB_CORRUPTION;
+					goto fail;
+				}
+			} else {
+				struct lock_struct removed_lock = *lock;
+				if (i < count-1) {
+					memmove(&locks[i], &locks[i+1], 
+						sizeof(*locks)*((count-1) - i));
+				}
+				count--;
+
+				/* send notifications for any relevant pending locks */
+				brl_notify_unlock(brl, locks, count, &removed_lock);
+
+				dbuf.dsize = count * sizeof(*locks);
+
+				if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
+					status = NT_STATUS_INTERNAL_DB_CORRUPTION;
+					goto fail;
+				}
+			}
+			
+			free(dbuf.dptr);
+			tdb_chainunlock(brl->w->tdb, kbuf);
+			return NT_STATUS_OK;
+		}
+	}
+	
+	/* we didn't find it */
+	status = NT_STATUS_RANGE_NOT_LOCKED;
+
+ fail:
+	free(dbuf.dptr);
+	tdb_chainunlock(brl->w->tdb, kbuf);
+	return status;
+}
+
+
+/*
+  remove a pending lock. This is called when the caller has either
+  given up trying to establish a lock or when they have succeeded in
+  getting it. In either case they no longer need to be notified.
+*/
+NTSTATUS brl_remove_pending(void *brl_ctx,
+			    DATA_BLOB *file_key, 
+			    void *notify_ptr)
+{
+	struct brl_context *brl = brl_ctx;
+	TDB_DATA kbuf, dbuf;
+	int count, i;
+	struct lock_struct *locks;
+	NTSTATUS status;
+
+	kbuf.dptr = file_key->data;
+	kbuf.dsize = file_key->length;
+
+	if (tdb_chainlock(brl->w->tdb, kbuf) != 0) {
+		return NT_STATUS_INTERNAL_DB_CORRUPTION;
+	}
+
+	dbuf = tdb_fetch(brl->w->tdb, kbuf);
+	if (!dbuf.dptr) {
+		tdb_chainunlock(brl->w->tdb, kbuf);
+		return NT_STATUS_RANGE_NOT_LOCKED;
+	}
+
+	/* there are existing locks - find a match */
+	locks = (struct lock_struct *)dbuf.dptr;
+	count = dbuf.dsize / sizeof(*locks);
+
+	for (i=0; i<count; i++) {
+		struct lock_struct *lock = &locks[i];
+		
+		if (lock->notify_ptr == notify_ptr &&
+		    lock->context.server == brl->server) {
 			/* found it - delete it */
 			if (count == 1) {
 				if (tdb_delete(brl->w->tdb, kbuf) != 0) {
@@ -281,7 +490,8 @@ NTSTATUS brl_unlock(void *brl_ctx,
 					memmove(&locks[i], &locks[i+1], 
 						sizeof(*locks)*((count-1) - i));
 				}
-				dbuf.dsize -= sizeof(*locks);
+				count--;
+				dbuf.dsize = count * sizeof(*locks);
 				if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 					status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 					goto fail;
@@ -404,7 +614,13 @@ NTSTATUS brl_close(void *brl_ctx,
 			status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 		}
 	} else if (dcount != 0) {
-		dbuf.dsize -= dcount * sizeof(*locks);
+		/* tell all pending lock holders for this file that
+		   they have a chance now. This is a bit indiscriminant,
+		   but works OK */
+		brl_notify_all(brl, locks, count);
+
+		dbuf.dsize = count * sizeof(*locks);
+
 		if (tdb_store(brl->w->tdb, kbuf, dbuf, TDB_REPLACE) != 0) {
 			status = NT_STATUS_INTERNAL_DB_CORRUPTION;
 		}