3 files changed, 4627 insertions, 0 deletions
diff --git a/source3/locking/brlock.c b/source3/locking/brlock.c
new file mode 100644
index 0000000000..032aaa56b6
--- /dev/null
+++ b/source3/locking/brlock.c
@@ -0,0 +1,1807 @@
+/* 
+   Unix SMB/CIFS implementation.
+   byte range locking code
+   Updated to handle range splits/merges.
+
+   Copyright (C) Andrew Tridgell 1992-2000
+   Copyright (C) Jeremy Allison 1992-2000
+   
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/* This module implements a tdb based byte range locking service,
+   replacing the fcntl() based byte range locking previously
+   used. This allows us to provide the same semantics as NT */
+
+#include "includes.h"
+
+#undef DBGC_CLASS
+#define DBGC_CLASS DBGC_LOCKING
+
+#define ZERO_ZERO 0
+
+/* The open brlock.tdb database. */
+
+static struct db_context *brlock_db;
+
+/****************************************************************************
+ Debug info at level 10 for lock struct.
+****************************************************************************/
+
+static void print_lock_struct(unsigned int i, struct lock_struct *pls)
+{
+	DEBUG(10,("[%u]: smbpid = %u, tid = %u, pid = %u, ",
+			i,
+			(unsigned int)pls->context.smbpid,
+			(unsigned int)pls->context.tid,
+			(unsigned int)procid_to_pid(&pls->context.pid) ));
+	
+	DEBUG(10,("start = %.0f, size = %.0f, fnum = %d, %s %s\n",
+		(double)pls->start,
+		(double)pls->size,
+		pls->fnum,
+		lock_type_name(pls->lock_type),
+		lock_flav_name(pls->lock_flav) ));
+}
+
+/****************************************************************************
+ See if two locking contexts are equal.
+****************************************************************************/
+
+bool brl_same_context(const struct lock_context *ctx1, 
+			     const struct lock_context *ctx2)
+{
+	return (procid_equal(&ctx1->pid, &ctx2->pid) &&
+		(ctx1->smbpid == ctx2->smbpid) &&
+		(ctx1->tid == ctx2->tid));
+}
+
+/****************************************************************************
+ See if lck1 and lck2 overlap.
+****************************************************************************/
+
+static bool brl_overlap(const struct lock_struct *lck1,
+                        const struct lock_struct *lck2)
+{
+	/* this extra check is not redundent - it copes with locks
+	   that go beyond the end of 64 bit file space */
+	if (lck1->size != 0 &&
+	    lck1->start == lck2->start &&
+	    lck1->size == lck2->size) {
+		return True;
+	}
+
+	if (lck1->start >= (lck2->start+lck2->size) ||
+	    lck2->start >= (lck1->start+lck1->size)) {
+		return False;
+	}
+	return True;
+}
+
+/****************************************************************************
+ See if lock2 can be added when lock1 is in place.
+****************************************************************************/
+
+static bool brl_conflict(const struct lock_struct *lck1, 
+			 const struct lock_struct *lck2)
+{
+	/* Ignore PENDING locks. */
+	if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
+		return False;
+
+	/* Read locks never conflict. */
+	if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
+		return False;
+	}
+
+	if (brl_same_context(&lck1->context, &lck2->context) &&
+	    lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
+		return False;
+	}
+
+	return brl_overlap(lck1, lck2);
+} 
+
+/****************************************************************************
+ See if lock2 can be added when lock1 is in place - when both locks are POSIX
+ flavour. POSIX locks ignore fnum - they only care about dev/ino which we
+ know already match.
+****************************************************************************/
+
+static bool brl_conflict_posix(const struct lock_struct *lck1, 
+			 	const struct lock_struct *lck2)
+{
+#if defined(DEVELOPER)
+	SMB_ASSERT(lck1->lock_flav == POSIX_LOCK);
+	SMB_ASSERT(lck2->lock_flav == POSIX_LOCK);
+#endif
+
+	/* Ignore PENDING locks. */
+	if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
+		return False;
+
+	/* Read locks never conflict. */
+	if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
+		return False;
+	}
+
+	/* Locks on the same context con't conflict. Ignore fnum. */
+	if (brl_same_context(&lck1->context, &lck2->context)) {
+		return False;
+	}
+
+	/* One is read, the other write, or the context is different,
+	   do they overlap ? */
+	return brl_overlap(lck1, lck2);
+} 
+
+#if ZERO_ZERO
+static bool brl_conflict1(const struct lock_struct *lck1, 
+			 const struct lock_struct *lck2)
+{
+	if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
+		return False;
+
+	if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) {
+		return False;
+	}
+
+	if (brl_same_context(&lck1->context, &lck2->context) &&
+	    lck2->lock_type == READ_LOCK && lck1->fnum == lck2->fnum) {
+		return False;
+	}
+
+	if (lck2->start == 0 && lck2->size == 0 && lck1->size != 0) {
+		return True;
+	}
+
+	if (lck1->start >= (lck2->start + lck2->size) ||
+	    lck2->start >= (lck1->start + lck1->size)) {
+		return False;
+	}
+	    
+	return True;
+} 
+#endif
+
+/****************************************************************************
+ Check to see if this lock conflicts, but ignore our own locks on the
+ same fnum only. This is the read/write lock check code path.
+ This is never used in the POSIX lock case.
+****************************************************************************/
+
+static bool brl_conflict_other(const struct lock_struct *lck1, const struct lock_struct *lck2)
+{
+	if (IS_PENDING_LOCK(lck1->lock_type) || IS_PENDING_LOCK(lck2->lock_type))
+		return False;
+
+	if (lck1->lock_type == READ_LOCK && lck2->lock_type == READ_LOCK) 
+		return False;
+
+	/* POSIX flavour locks never conflict here - this is only called
+	   in the read/write path. */
+
+	if (lck1->lock_flav == POSIX_LOCK && lck2->lock_flav == POSIX_LOCK)
+		return False;
+
+	/*
+	 * Incoming WRITE locks conflict with existing READ locks even
+	 * if the context is the same. JRA. See LOCKTEST7 in smbtorture.
+	 */
+
+	if (!(lck2->lock_type == WRITE_LOCK && lck1->lock_type == READ_LOCK)) {
+		if (brl_same_context(&lck1->context, &lck2->context) &&
+					lck1->fnum == lck2->fnum)
+			return False;
+	}
+
+	return brl_overlap(lck1, lck2);
+} 
+
+/****************************************************************************
+ Check if an unlock overlaps a pending lock.
+****************************************************************************/
+
+static bool brl_pending_overlap(const struct lock_struct *lock, const struct lock_struct *pend_lock)
+{
+	if ((lock->start <= pend_lock->start) && (lock->start + lock->size > pend_lock->start))
+		return True;
+	if ((lock->start >= pend_lock->start) && (lock->start <= pend_lock->start + pend_lock->size))
+		return True;
+	return False;
+}
+
+/****************************************************************************
+ Amazingly enough, w2k3 "remembers" whether the last lock failure on a fnum
+ is the same as this one and changes its error code. I wonder if any
+ app depends on this ?
+****************************************************************************/
+
+static NTSTATUS brl_lock_failed(files_struct *fsp, const struct lock_struct *lock, bool blocking_lock)
+{
+	if (lock->start >= 0xEF000000 && (lock->start >> 63) == 0) {
+		/* amazing the little things you learn with a test
+		   suite. Locks beyond this offset (as a 64 bit
+		   number!) always generate the conflict error code,
+		   unless the top bit is set */
+		if (!blocking_lock) {
+			fsp->last_lock_failure = *lock;
+		}
+		return NT_STATUS_FILE_LOCK_CONFLICT;
+	}
+
+	if (procid_equal(&lock->context.pid, &fsp->last_lock_failure.context.pid) &&
+			lock->context.tid == fsp->last_lock_failure.context.tid &&
+			lock->fnum == fsp->last_lock_failure.fnum &&
+			lock->start == fsp->last_lock_failure.start) {
+		return NT_STATUS_FILE_LOCK_CONFLICT;
+	}
+
+	if (!blocking_lock) {
+		fsp->last_lock_failure = *lock;
+	}
+	return NT_STATUS_LOCK_NOT_GRANTED;
+}
+
+/****************************************************************************
+ Open up the brlock.tdb database.
+****************************************************************************/
+
+void brl_init(bool read_only)
+{
+	if (brlock_db) {
+		return;
+	}
+	brlock_db = db_open(NULL, lock_path("brlock.tdb"),
+			    lp_open_files_db_hash_size(),
+			    TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST,
+			    read_only?O_RDONLY:(O_RDWR|O_CREAT), 0644 );
+	if (!brlock_db) {
+		DEBUG(0,("Failed to open byte range locking database %s\n",
+			lock_path("brlock.tdb")));
+		return;
+	}
+}
+
+/****************************************************************************
+ Close down the brlock.tdb database.
+****************************************************************************/
+
+void brl_shutdown(void)
+{
+	TALLOC_FREE(brlock_db);
+}
+
+#if ZERO_ZERO
+/****************************************************************************
+ Compare two locks for sorting.
+****************************************************************************/
+
+static int lock_compare(const struct lock_struct *lck1, 
+			 const struct lock_struct *lck2)
+{
+	if (lck1->start != lck2->start) {
+		return (lck1->start - lck2->start);
+	}
+	if (lck2->size != lck1->size) {
+		return ((int)lck1->size - (int)lck2->size);
+	}
+	return 0;
+}
+#endif
+
+/****************************************************************************
+ Lock a range of bytes - Windows lock semantics.
+****************************************************************************/
+
+static NTSTATUS brl_lock_windows(struct byte_range_lock *br_lck,
+			struct lock_struct *plock, bool blocking_lock)
+{
+	unsigned int i;
+	files_struct *fsp = br_lck->fsp;
+	struct lock_struct *locks = br_lck->lock_data;
+
+	for (i=0; i < br_lck->num_locks; i++) {
+		/* Do any Windows or POSIX locks conflict ? */
+		if (brl_conflict(&locks[i], plock)) {
+			/* Remember who blocked us. */
+			plock->context.smbpid = locks[i].context.smbpid;
+			return brl_lock_failed(fsp,plock,blocking_lock);
+		}
+#if ZERO_ZERO
+		if (plock->start == 0 && plock->size == 0 && 
+				locks[i].size == 0) {
+			break;
+		}
+#endif
+	}
+
+	/* We can get the Windows lock, now see if it needs to
+	   be mapped into a lower level POSIX one, and if so can
+	   we get it ? */
+
+	if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(fsp->conn->params)) {
+		int errno_ret;
+		if (!set_posix_lock_windows_flavour(fsp,
+				plock->start,
+				plock->size,
+				plock->lock_type,
+				&plock->context,
+				locks,
+				br_lck->num_locks,
+				&errno_ret)) {
+
+			/* We don't know who blocked us. */
+			plock->context.smbpid = 0xFFFFFFFF;
+
+			if (errno_ret == EACCES || errno_ret == EAGAIN) {
+				return NT_STATUS_FILE_LOCK_CONFLICT;
+			} else {
+				return map_nt_error_from_unix(errno);
+			}
+		}
+	}
+
+	/* no conflicts - add it to the list of locks */
+	locks = (struct lock_struct *)SMB_REALLOC(locks, (br_lck->num_locks + 1) * sizeof(*locks));
+	if (!locks) {
+		return NT_STATUS_NO_MEMORY;
+	}
+
+	memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
+	br_lck->num_locks += 1;
+	br_lck->lock_data = locks;
+	br_lck->modified = True;
+
+	return NT_STATUS_OK;
+}
+
+/****************************************************************************
+ Cope with POSIX range splits and merges.
+****************************************************************************/
+
+static unsigned int brlock_posix_split_merge(struct lock_struct *lck_arr,		/* Output array. */
+						const struct lock_struct *ex,		/* existing lock. */
+						const struct lock_struct *plock,	/* proposed lock. */
+						bool *lock_was_added)
+{
+	bool lock_types_differ = (ex->lock_type != plock->lock_type);
+
+	/* We can't merge non-conflicting locks on different context - ignore fnum. */
+
+	if (!brl_same_context(&ex->context, &plock->context)) {
+		/* Just copy. */
+		memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+		return 1;
+	}
+
+	/* We now know we have the same context. */
+
+	/* Did we overlap ? */
+
+/*********************************************
+                                             +---------+
+                                             | ex      |
+                                             +---------+
+                              +-------+
+                              | plock |
+                              +-------+
+OR....
+             +---------+
+             |  ex     |
+             +---------+
+**********************************************/
+
+	if ( (ex->start > (plock->start + plock->size)) ||
+			(plock->start > (ex->start + ex->size))) {
+		/* No overlap with this lock - copy existing. */
+		memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+		return 1;
+	}
+
+/*********************************************
+        +---------------------------+
+        |          ex               |
+        +---------------------------+
+        +---------------------------+
+        |       plock               | -> replace with plock.
+        +---------------------------+
+**********************************************/
+
+	if ( (ex->start >= plock->start) &&
+			(ex->start + ex->size <= plock->start + plock->size) ) {
+		memcpy(&lck_arr[0], plock, sizeof(struct lock_struct));
+		*lock_was_added = True;
+		return 1;
+	}
+
+/*********************************************
+        +-----------------------+
+        |          ex           |
+        +-----------------------+
+        +---------------+
+        |   plock       |
+        +---------------+
+OR....
+                        +-------+
+                        |  ex   |
+                        +-------+
+        +---------------+
+        |   plock       |
+        +---------------+
+
+BECOMES....
+        +---------------+-------+
+        |   plock       | ex    | - different lock types.
+        +---------------+-------+
+OR.... (merge)
+        +-----------------------+
+        |   ex                  | - same lock type.
+        +-----------------------+
+**********************************************/
+
+	if ( (ex->start >= plock->start) &&
+				(ex->start <= plock->start + plock->size) &&
+				(ex->start + ex->size > plock->start + plock->size) ) {
+
+		*lock_was_added = True;
+
+		/* If the lock types are the same, we merge, if different, we
+		   add the new lock before the old. */
+
+		if (lock_types_differ) {
+			/* Add new. */
+			memcpy(&lck_arr[0], plock, sizeof(struct lock_struct));
+			memcpy(&lck_arr[1], ex, sizeof(struct lock_struct));
+			/* Adjust existing start and size. */
+			lck_arr[1].start = plock->start + plock->size;
+			lck_arr[1].size = (ex->start + ex->size) - (plock->start + plock->size);
+			return 2;
+		} else {
+			/* Merge. */
+			memcpy(&lck_arr[0], plock, sizeof(struct lock_struct));
+			/* Set new start and size. */
+			lck_arr[0].start = plock->start;
+			lck_arr[0].size = (ex->start + ex->size) - plock->start;
+			return 1;
+		}
+	}
+
+/*********************************************
+   +-----------------------+
+   |  ex                   |
+   +-----------------------+
+           +---------------+
+           |   plock       |
+           +---------------+
+OR....
+   +-------+        
+   |  ex   |
+   +-------+
+           +---------------+
+           |   plock       |
+           +---------------+
+BECOMES....
+   +-------+---------------+
+   | ex    |   plock       | - different lock types
+   +-------+---------------+
+
+OR.... (merge)
+   +-----------------------+
+   | ex                    | - same lock type.
+   +-----------------------+
+
+**********************************************/
+
+	if ( (ex->start < plock->start) &&
+			(ex->start + ex->size >= plock->start) &&
+			(ex->start + ex->size <= plock->start + plock->size) ) {
+
+		*lock_was_added = True;
+
+		/* If the lock types are the same, we merge, if different, we
+		   add the new lock after the old. */
+
+		if (lock_types_differ) {
+			memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+			memcpy(&lck_arr[1], plock, sizeof(struct lock_struct));
+			/* Adjust existing size. */
+			lck_arr[0].size = plock->start - ex->start;
+			return 2;
+		} else {
+			/* Merge. */
+			memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+			/* Adjust existing size. */
+			lck_arr[0].size = (plock->start + plock->size) - ex->start;
+			return 1;
+		}
+	}
+
+/*********************************************
+        +---------------------------+
+        |        ex                 |
+        +---------------------------+
+                +---------+
+                |  plock  |
+                +---------+
+BECOMES.....
+        +-------+---------+---------+
+        | ex    |  plock  | ex      | - different lock types.
+        +-------+---------+---------+
+OR
+        +---------------------------+
+        |        ex                 | - same lock type.
+        +---------------------------+
+**********************************************/
+
+	if ( (ex->start < plock->start) && (ex->start + ex->size > plock->start + plock->size) ) {
+		*lock_was_added = True;
+
+		if (lock_types_differ) {
+
+			/* We have to split ex into two locks here. */
+
+			memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+			memcpy(&lck_arr[1], plock, sizeof(struct lock_struct));
+			memcpy(&lck_arr[2], ex, sizeof(struct lock_struct));
+
+			/* Adjust first existing size. */
+			lck_arr[0].size = plock->start - ex->start;
+
+			/* Adjust second existing start and size. */
+			lck_arr[2].start = plock->start + plock->size;
+			lck_arr[2].size = (ex->start + ex->size) - (plock->start + plock->size);
+			return 3;
+		} else {
+			/* Just eat plock. */
+			memcpy(&lck_arr[0], ex, sizeof(struct lock_struct));
+			return 1;
+		}
+	}
+
+	/* Never get here. */
+	smb_panic("brlock_posix_split_merge");
+	/* Notreached. */
+
+	/* Keep some compilers happy. */
+	return 0;
+}
+
+/****************************************************************************
+ Lock a range of bytes - POSIX lock semantics.
+ We must cope with range splits and merges.
+****************************************************************************/
+
+static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
+			       struct byte_range_lock *br_lck,
+			       struct lock_struct *plock)
+{
+	unsigned int i, count;
+	struct lock_struct *locks = br_lck->lock_data;
+	struct lock_struct *tp;
+	bool lock_was_added = False;
+	bool signal_pending_read = False;
+
+	/* No zero-zero locks for POSIX. */
+	if (plock->start == 0 && plock->size == 0) {
+		return NT_STATUS_INVALID_PARAMETER;
+	}
+
+	/* Don't allow 64-bit lock wrap. */
+	if (plock->start + plock->size < plock->start ||
+			plock->start + plock->size < plock->size) {
+		return NT_STATUS_INVALID_PARAMETER;
+	}
+
+	/* The worst case scenario here is we have to split an
+	   existing POSIX lock range into two, and add our lock,
+	   so we need at most 2 more entries. */
+
+	tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 2));
+	if (!tp) {
+		return NT_STATUS_NO_MEMORY;
+	}
+	
+	count = 0;
+	for (i=0; i < br_lck->num_locks; i++) {
+		struct lock_struct *curr_lock = &locks[i];
+
+		/* If we have a pending read lock, a lock downgrade should
+		   trigger a lock re-evaluation. */
+		if (curr_lock->lock_type == PENDING_READ_LOCK &&
+				brl_pending_overlap(plock, curr_lock)) {
+			signal_pending_read = True;
+		}
+
+		if (curr_lock->lock_flav == WINDOWS_LOCK) {
+			/* Do any Windows flavour locks conflict ? */
+			if (brl_conflict(curr_lock, plock)) {
+				/* No games with error messages. */
+				SAFE_FREE(tp);
+				/* Remember who blocked us. */
+				plock->context.smbpid = curr_lock->context.smbpid;
+				return NT_STATUS_FILE_LOCK_CONFLICT;
+			}
+			/* Just copy the Windows lock into the new array. */
+			memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
+			count++;
+		} else {
+			/* POSIX conflict semantics are different. */
+			if (brl_conflict_posix(curr_lock, plock)) {
+				/* Can't block ourselves with POSIX locks. */
+				/* No games with error messages. */
+				SAFE_FREE(tp);
+				/* Remember who blocked us. */
+				plock->context.smbpid = curr_lock->context.smbpid;
+				return NT_STATUS_FILE_LOCK_CONFLICT;
+			}
+
+			/* Work out overlaps. */
+			count += brlock_posix_split_merge(&tp[count], curr_lock, plock, &lock_was_added);
+		}
+	}
+
+	if (!lock_was_added) {
+		memcpy(&tp[count], plock, sizeof(struct lock_struct));
+		count++;
+	}
+
+	/* We can get the POSIX lock, now see if it needs to
+	   be mapped into a lower level POSIX one, and if so can
+	   we get it ? */
+
+	if (!IS_PENDING_LOCK(plock->lock_type) && lp_posix_locking(br_lck->fsp->conn->params)) {
+		int errno_ret;
+
+		/* The lower layer just needs to attempt to
+		   get the system POSIX lock. We've weeded out
+		   any conflicts above. */
+
+		if (!set_posix_lock_posix_flavour(br_lck->fsp,
+				plock->start,
+				plock->size,
+				plock->lock_type,
+				&errno_ret)) {
+
+			/* We don't know who blocked us. */
+			plock->context.smbpid = 0xFFFFFFFF;
+
+			if (errno_ret == EACCES || errno_ret == EAGAIN) {
+				SAFE_FREE(tp);
+				return NT_STATUS_FILE_LOCK_CONFLICT;
+			} else {
+				SAFE_FREE(tp);
+				return map_nt_error_from_unix(errno);
+			}
+		}
+	}
+
+	/* Realloc so we don't leak entries per lock call. */
+	tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
+	if (!tp) {
+		return NT_STATUS_NO_MEMORY;
+	}
+	br_lck->num_locks = count;
+	SAFE_FREE(br_lck->lock_data);
+	br_lck->lock_data = tp;
+	locks = tp;
+	br_lck->modified = True;
+
+	/* A successful downgrade from write to read lock can trigger a lock
+	   re-evalutation where waiting readers can now proceed. */
+
+	if (signal_pending_read) {
+		/* Send unlock messages to any pending read waiters that overlap. */
+		for (i=0; i < br_lck->num_locks; i++) {
+			struct lock_struct *pend_lock = &locks[i];
+
+			/* Ignore non-pending locks. */
+			if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
+				continue;
+			}
+
+			if (pend_lock->lock_type == PENDING_READ_LOCK &&
+					brl_pending_overlap(plock, pend_lock)) {
+				DEBUG(10,("brl_lock_posix: sending unlock message to pid %s\n",
+					procid_str_static(&pend_lock->context.pid )));
+
+				messaging_send(msg_ctx, pend_lock->context.pid,
+					       MSG_SMB_UNLOCK, &data_blob_null);
+			}
+		}
+	}
+
+	return NT_STATUS_OK;
+}
+
+/****************************************************************************
+ Lock a range of bytes.
+****************************************************************************/
+
+NTSTATUS brl_lock(struct messaging_context *msg_ctx,
+		struct byte_range_lock *br_lck,
+		uint32 smbpid,
+		struct server_id pid,
+		br_off start,
+		br_off size, 
+		enum brl_type lock_type,
+		enum brl_flavour lock_flav,
+		bool blocking_lock,
+		uint32 *psmbpid)
+{
+	NTSTATUS ret;
+	struct lock_struct lock;
+
+#if !ZERO_ZERO
+	if (start == 0 && size == 0) {
+		DEBUG(0,("client sent 0/0 lock - please report this\n"));
+	}
+#endif
+
+#ifdef DEVELOPER
+	/* Quieten valgrind on test. */
+	memset(&lock, '\0', sizeof(lock));
+#endif
+
+	lock.context.smbpid = smbpid;
+	lock.context.pid = pid;
+	lock.context.tid = br_lck->fsp->conn->cnum;
+	lock.start = start;
+	lock.size = size;
+	lock.fnum = br_lck->fsp->fnum;
+	lock.lock_type = lock_type;
+	lock.lock_flav = lock_flav;
+
+	if (lock_flav == WINDOWS_LOCK) {
+		ret = brl_lock_windows(br_lck, &lock, blocking_lock);
+	} else {
+		ret = brl_lock_posix(msg_ctx, br_lck, &lock);
+	}
+
+#if ZERO_ZERO
+	/* sort the lock list */
+	qsort(br_lck->lock_data, (size_t)br_lck->num_locks, sizeof(lock), lock_compare);
+#endif
+
+	/* If we're returning an error, return who blocked us. */
+	if (!NT_STATUS_IS_OK(ret) && psmbpid) {
+		*psmbpid = lock.context.smbpid;
+	}
+	return ret;
+}
+
+/****************************************************************************
+ Unlock a range of bytes - Windows semantics.
+****************************************************************************/
+
+static bool brl_unlock_windows(struct messaging_context *msg_ctx,
+			       struct byte_range_lock *br_lck,
+			       const struct lock_struct *plock)
+{
+	unsigned int i, j;
+	struct lock_struct *locks = br_lck->lock_data;
+	enum brl_type deleted_lock_type = READ_LOCK; /* shut the compiler up.... */
+
+#if ZERO_ZERO
+	/* Delete write locks by preference... The lock list
+	   is sorted in the zero zero case. */
+
+	for (i = 0; i < br_lck->num_locks; i++) {
+		struct lock_struct *lock = &locks[i];
+
+		if (lock->lock_type == WRITE_LOCK &&
+		    brl_same_context(&lock->context, &plock->context) &&
+		    lock->fnum == plock->fnum &&
+		    lock->lock_flav == WINDOWS_LOCK &&
+		    lock->start == plock->start &&
+		    lock->size == plock->size) {
+
+			/* found it - delete it */
+			deleted_lock_type = lock->lock_type;
+			break;
+		}
+	}
+
+	if (i != br_lck->num_locks) {
+		/* We found it - don't search again. */
+		goto unlock_continue;
+	}
+#endif
+
+	for (i = 0; i < br_lck->num_locks; i++) {
+		struct lock_struct *lock = &locks[i];
+
+		/* Only remove our own locks that match in start, size, and flavour. */
+		if (brl_same_context(&lock->context, &plock->context) &&
+					lock->fnum == plock->fnum &&
+					lock->lock_flav == WINDOWS_LOCK &&
+					lock->start == plock->start &&
+					lock->size == plock->size ) {
+			deleted_lock_type = lock->lock_type;
+			break;
+		}
+	}
+
+	if (i == br_lck->num_locks) {
+		/* we didn't find it */
+		return False;
+	}
+
+#if ZERO_ZERO
+  unlock_continue:
+#endif
+
+	/* Actually delete the lock. */
+	if (i < br_lck->num_locks - 1) {
+		memmove(&locks[i], &locks[i+1], 
+			sizeof(*locks)*((br_lck->num_locks-1) - i));
+	}
+
+	br_lck->num_locks -= 1;
+	br_lck->modified = True;
+
+	/* Unlock the underlying POSIX regions. */
+	if(lp_posix_locking(br_lck->fsp->conn->params)) {
+		release_posix_lock_windows_flavour(br_lck->fsp,
+				plock->start,
+				plock->size,
+				deleted_lock_type,
+				&plock->context,
+				locks,
+				br_lck->num_locks);
+	}
+
+	/* Send unlock messages to any pending waiters that overlap. */
+	for (j=0; j < br_lck->num_locks; j++) {
+		struct lock_struct *pend_lock = &locks[j];
+
+		/* Ignore non-pending locks. */
+		if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
+			continue;
+		}
+
+		/* We could send specific lock info here... */
+		if (brl_pending_overlap(plock, pend_lock)) {
+			DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
+				procid_str_static(&pend_lock->context.pid )));
+
+			messaging_send(msg_ctx, pend_lock->context.pid,
+				       MSG_SMB_UNLOCK, &data_blob_null);
+		}
+	}
+
+	return True;
+}
+
+/****************************************************************************
+ Unlock a range of bytes - POSIX semantics.
+****************************************************************************/
+
+static bool brl_unlock_posix(struct messaging_context *msg_ctx,
+			     struct byte_range_lock *br_lck,
+			     const struct lock_struct *plock)
+{
+	unsigned int i, j, count;
+	struct lock_struct *tp;
+	struct lock_struct *locks = br_lck->lock_data;
+	bool overlap_found = False;
+
+	/* No zero-zero locks for POSIX. */
+	if (plock->start == 0 && plock->size == 0) {
+		return False;
+	}
+
+	/* Don't allow 64-bit lock wrap. */
+	if (plock->start + plock->size < plock->start ||
+			plock->start + plock->size < plock->size) {
+		DEBUG(10,("brl_unlock_posix: lock wrap\n"));
+		return False;
+	}
+
+	/* The worst case scenario here is we have to split an
+	   existing POSIX lock range into two, so we need at most
+	   1 more entry. */
+
+	tp = SMB_MALLOC_ARRAY(struct lock_struct, (br_lck->num_locks + 1));
+	if (!tp) {
+		DEBUG(10,("brl_unlock_posix: malloc fail\n"));
+		return False;
+	}
+
+	count = 0;
+	for (i = 0; i < br_lck->num_locks; i++) {
+		struct lock_struct *lock = &locks[i];
+		struct lock_struct tmp_lock[3];
+		bool lock_was_added = False;
+		unsigned int tmp_count;
+
+		/* Only remove our own locks - ignore fnum. */
+		if (IS_PENDING_LOCK(lock->lock_type) ||
+				!brl_same_context(&lock->context, &plock->context)) {
+			memcpy(&tp[count], lock, sizeof(struct lock_struct));
+			count++;
+			continue;
+		}
+
+		/* Work out overlaps. */
+		tmp_count = brlock_posix_split_merge(&tmp_lock[0], &locks[i], plock, &lock_was_added);
+
+		if (tmp_count == 1) {
+			/* Ether the locks didn't overlap, or the unlock completely
+			   overlapped this lock. If it didn't overlap, then there's
+			   no change in the locks. */
+			if (tmp_lock[0].lock_type != UNLOCK_LOCK) {
+				SMB_ASSERT(tmp_lock[0].lock_type == locks[i].lock_type);
+				/* No change in this lock. */
+				memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
+				count++;
+			} else {
+				SMB_ASSERT(tmp_lock[0].lock_type == UNLOCK_LOCK);
+				overlap_found = True;
+			}
+			continue;
+		} else if (tmp_count == 2) {
+			/* The unlock overlapped an existing lock. Copy the truncated
+			   lock into the lock array. */
+			if (tmp_lock[0].lock_type != UNLOCK_LOCK) {
+				SMB_ASSERT(tmp_lock[0].lock_type == locks[i].lock_type);
+				SMB_ASSERT(tmp_lock[1].lock_type == UNLOCK_LOCK);
+				memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
+				if (tmp_lock[0].size != locks[i].size) {
+					overlap_found = True;
+				}
+			} else {
+				SMB_ASSERT(tmp_lock[0].lock_type == UNLOCK_LOCK);
+				SMB_ASSERT(tmp_lock[1].lock_type == locks[i].lock_type);
+				memcpy(&tp[count], &tmp_lock[1], sizeof(struct lock_struct));
+				if (tmp_lock[1].start != locks[i].start) {
+					overlap_found = True;
+				}
+			}
+			count++;
+			continue;
+		} else {
+			/* tmp_count == 3 - (we split a lock range in two). */
+			SMB_ASSERT(tmp_lock[0].lock_type == locks[i].lock_type);
+			SMB_ASSERT(tmp_lock[1].lock_type == UNLOCK_LOCK);
+			SMB_ASSERT(tmp_lock[2].lock_type == locks[i].lock_type);
+
+			memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
+			count++;
+			memcpy(&tp[count], &tmp_lock[2], sizeof(struct lock_struct));
+			count++;
+			overlap_found = True;
+			/* Optimisation... */
+			/* We know we're finished here as we can't overlap any
+			   more POSIX locks. Copy the rest of the lock array. */
+			if (i < br_lck->num_locks - 1) {
+				memcpy(&tp[count], &locks[i+1], 
+					sizeof(*locks)*((br_lck->num_locks-1) - i));
+				count += ((br_lck->num_locks-1) - i);
+			}
+			break;
+		}
+	}
+
+	if (!overlap_found) {
+		/* Just ignore - no change. */
+		SAFE_FREE(tp);
+		DEBUG(10,("brl_unlock_posix: No overlap - unlocked.\n"));
+		return True;
+	}
+
+	/* Unlock any POSIX regions. */
+	if(lp_posix_locking(br_lck->fsp->conn->params)) {
+		release_posix_lock_posix_flavour(br_lck->fsp,
+						plock->start,
+						plock->size,
+						&plock->context,
+						tp,
+						count);
+	}
+
+	/* Realloc so we don't leak entries per unlock call. */
+	if (count) {
+		tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
+		if (!tp) {
+			DEBUG(10,("brl_unlock_posix: realloc fail\n"));
+			return False;
+		}
+	} else {
+		/* We deleted the last lock. */
+		SAFE_FREE(tp);
+		tp = NULL;
+	}
+
+	br_lck->num_locks = count;
+	SAFE_FREE(br_lck->lock_data);
+	locks = tp;
+	br_lck->lock_data = tp;
+	br_lck->modified = True;
+
+	/* Send unlock messages to any pending waiters that overlap. */
+
+	for (j=0; j < br_lck->num_locks; j++) {
+		struct lock_struct *pend_lock = &locks[j];
+
+		/* Ignore non-pending locks. */
+		if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
+			continue;
+		}
+
+		/* We could send specific lock info here... */
+		if (brl_pending_overlap(plock, pend_lock)) {
+			DEBUG(10,("brl_unlock: sending unlock message to pid %s\n",
+				procid_str_static(&pend_lock->context.pid )));
+
+			messaging_send(msg_ctx, pend_lock->context.pid,
+				       MSG_SMB_UNLOCK, &data_blob_null);
+		}
+	}
+
+	return True;
+}
+
+/****************************************************************************
+ Unlock a range of bytes.
+****************************************************************************/
+
+bool brl_unlock(struct messaging_context *msg_ctx,
+		struct byte_range_lock *br_lck,
+		uint32 smbpid,
+		struct server_id pid,
+		br_off start,
+		br_off size,
+		enum brl_flavour lock_flav)
+{
+	struct lock_struct lock;
+
+	lock.context.smbpid = smbpid;
+	lock.context.pid = pid;
+	lock.context.tid = br_lck->fsp->conn->cnum;
+	lock.start = start;
+	lock.size = size;
+	lock.fnum = br_lck->fsp->fnum;
+	lock.lock_type = UNLOCK_LOCK;
+	lock.lock_flav = lock_flav;
+
+	if (lock_flav == WINDOWS_LOCK) {
+		return brl_unlock_windows(msg_ctx, br_lck, &lock);
+	} else {
+		return brl_unlock_posix(msg_ctx, br_lck, &lock);
+	}
+}
+
+/****************************************************************************
+ Test if we could add a lock if we wanted to.
+ Returns True if the region required is currently unlocked, False if locked.
+****************************************************************************/
+
+bool brl_locktest(struct byte_range_lock *br_lck,
+		uint32 smbpid,
+		struct server_id pid,
+		br_off start,
+		br_off size, 
+		enum brl_type lock_type,
+		enum brl_flavour lock_flav)
+{
+	bool ret = True;
+	unsigned int i;
+	struct lock_struct lock;
+	const struct lock_struct *locks = br_lck->lock_data;
+	files_struct *fsp = br_lck->fsp;
+
+	lock.context.smbpid = smbpid;
+	lock.context.pid = pid;
+	lock.context.tid = br_lck->fsp->conn->cnum;
+	lock.start = start;
+	lock.size = size;
+	lock.fnum = fsp->fnum;
+	lock.lock_type = lock_type;
+	lock.lock_flav = lock_flav;
+
+	/* Make sure existing locks don't conflict */
+	for (i=0; i < br_lck->num_locks; i++) {
+		/*
+		 * Our own locks don't conflict.
+		 */
+		if (brl_conflict_other(&locks[i], &lock)) {
+			return False;
+		}
+	}
+
+	/*
+	 * There is no lock held by an SMB daemon, check to
+	 * see if there is a POSIX lock from a UNIX or NFS process.
+	 * This only conflicts with Windows locks, not POSIX locks.
+	 */
+
+	if(lp_posix_locking(fsp->conn->params) && (lock_flav == WINDOWS_LOCK)) {
+		ret = is_posix_locked(fsp, &start, &size, &lock_type, WINDOWS_LOCK);
+
+		DEBUG(10,("brl_locktest: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
+			(double)start, (double)size, ret ? "locked" : "unlocked",
+			fsp->fnum, fsp->fsp_name ));
+
+		/* We need to return the inverse of is_posix_locked. */
+		ret = !ret;
+        }
+
+	/* no conflicts - we could have added it */
+	return ret;
+}
+
+/****************************************************************************
+ Query for existing locks.
+****************************************************************************/
+
+NTSTATUS brl_lockquery(struct byte_range_lock *br_lck,
+		uint32 *psmbpid,
+		struct server_id pid,
+		br_off *pstart,
+		br_off *psize, 
+		enum brl_type *plock_type,
+		enum brl_flavour lock_flav)
+{
+	unsigned int i;
+	struct lock_struct lock;
+	const struct lock_struct *locks = br_lck->lock_data;
+	files_struct *fsp = br_lck->fsp;
+
+	lock.context.smbpid = *psmbpid;
+	lock.context.pid = pid;
+	lock.context.tid = br_lck->fsp->conn->cnum;
+	lock.start = *pstart;
+	lock.size = *psize;
+	lock.fnum = fsp->fnum;
+	lock.lock_type = *plock_type;
+	lock.lock_flav = lock_flav;
+
+	/* Make sure existing locks don't conflict */
+	for (i=0; i < br_lck->num_locks; i++) {
+		const struct lock_struct *exlock = &locks[i];
+		bool conflict = False;
+
+		if (exlock->lock_flav == WINDOWS_LOCK) {
+			conflict = brl_conflict(exlock, &lock);
+		} else {	
+			conflict = brl_conflict_posix(exlock, &lock);
+		}
+
+		if (conflict) {
+			*psmbpid = exlock->context.smbpid;
+        		*pstart = exlock->start;
+		        *psize = exlock->size;
+        		*plock_type = exlock->lock_type;
+			return NT_STATUS_LOCK_NOT_GRANTED;
+		}
+	}
+
+	/*
+	 * There is no lock held by an SMB daemon, check to
+	 * see if there is a POSIX lock from a UNIX or NFS process.
+	 */
+
+	if(lp_posix_locking(fsp->conn->params)) {
+		bool ret = is_posix_locked(fsp, pstart, psize, plock_type, POSIX_LOCK);
+
+		DEBUG(10,("brl_lockquery: posix start=%.0f len=%.0f %s for fnum %d file %s\n",
+			(double)*pstart, (double)*psize, ret ? "locked" : "unlocked",
+			fsp->fnum, fsp->fsp_name ));
+
+		if (ret) {
+			/* Hmmm. No clue what to set smbpid to - use -1. */
+			*psmbpid = 0xFFFF;
+			return NT_STATUS_LOCK_NOT_GRANTED;
+		}
+        }
+
+	return NT_STATUS_OK;
+}
+
+/****************************************************************************
+ Remove a particular pending lock.
+****************************************************************************/
+
+bool brl_lock_cancel(struct byte_range_lock *br_lck,
+		uint32 smbpid,
+		struct server_id pid,
+		br_off start,
+		br_off size,
+		enum brl_flavour lock_flav)
+{
+	unsigned int i;
+	struct lock_struct *locks = br_lck->lock_data;
+	struct lock_context context;
+
+	context.smbpid = smbpid;
+	context.pid = pid;
+	context.tid = br_lck->fsp->conn->cnum;
+
+	for (i = 0; i < br_lck->num_locks; i++) {
+		struct lock_struct *lock = &locks[i];
+
+		/* For pending locks we *always* care about the fnum. */
+		if (brl_same_context(&lock->context, &context) &&
+				lock->fnum == br_lck->fsp->fnum &&
+				IS_PENDING_LOCK(lock->lock_type) &&
+				lock->lock_flav == lock_flav &&
+				lock->start == start &&
+				lock->size == size) {
+			break;
+		}
+	}
+
+	if (i == br_lck->num_locks) {
+		/* Didn't find it. */
+		return False;
+	}
+
+	if (i < br_lck->num_locks - 1) {
+		/* Found this particular pending lock - delete it */
+		memmove(&locks[i], &locks[i+1], 
+			sizeof(*locks)*((br_lck->num_locks-1) - i));
+	}
+
+	br_lck->num_locks -= 1;
+	br_lck->modified = True;
+	return True;
+}
+
+/****************************************************************************
+ Remove any locks associated with a open file.
+ We return True if this process owns any other Windows locks on this
+ fd and so we should not immediately close the fd.
+****************************************************************************/
+
+void brl_close_fnum(struct messaging_context *msg_ctx,
+		    struct byte_range_lock *br_lck)
+{
+	files_struct *fsp = br_lck->fsp;
+	uint16 tid = fsp->conn->cnum;
+	int fnum = fsp->fnum;
+	unsigned int i, j, dcount=0;
+	int num_deleted_windows_locks = 0;
+	struct lock_struct *locks = br_lck->lock_data;
+	struct server_id pid = procid_self();
+	bool unlock_individually = False;
+
+	if(lp_posix_locking(fsp->conn->params)) {
+
+		/* Check if there are any Windows locks associated with this dev/ino
+		   pair that are not this fnum. If so we need to call unlock on each
+		   one in order to release the system POSIX locks correctly. */
+
+		for (i=0; i < br_lck->num_locks; i++) {
+			struct lock_struct *lock = &locks[i];
+
+			if (!procid_equal(&lock->context.pid, &pid)) {
+				continue;
+			}
+
+			if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
+				continue; /* Ignore pending. */
+			}
+
+			if (lock->context.tid != tid || lock->fnum != fnum) {
+				unlock_individually = True;
+				break;
+			}
+		}
+
+		if (unlock_individually) {
+			struct lock_struct *locks_copy;
+			unsigned int num_locks_copy;
+
+			/* Copy the current lock array. */
+			if (br_lck->num_locks) {
+				locks_copy = (struct lock_struct *)TALLOC_MEMDUP(br_lck, locks, br_lck->num_locks * sizeof(struct lock_struct));
+				if (!locks_copy) {
+					smb_panic("brl_close_fnum: talloc failed");
+	 			}
+			} else {	
+				locks_copy = NULL;
+			}
+
+			num_locks_copy = br_lck->num_locks;
+
+			for (i=0; i < num_locks_copy; i++) {
+				struct lock_struct *lock = &locks_copy[i];
+
+				if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid) &&
+						(lock->fnum == fnum)) {
+					brl_unlock(msg_ctx,
+						br_lck,
+						lock->context.smbpid,
+						pid,
+						lock->start,
+						lock->size,
+						lock->lock_flav);
+				}
+			}
+			return;
+		}
+	}
+
+	/* We can bulk delete - any POSIX locks will be removed when the fd closes. */
+
+	/* Remove any existing locks for this fnum (or any fnum if they're POSIX). */
+
+	for (i=0; i < br_lck->num_locks; i++) {
+		struct lock_struct *lock = &locks[i];
+		bool del_this_lock = False;
+
+		if (lock->context.tid == tid && procid_equal(&lock->context.pid, &pid)) {
+			if ((lock->lock_flav == WINDOWS_LOCK) && (lock->fnum == fnum)) {
+				del_this_lock = True;
+				num_deleted_windows_locks++;
+			} else if (lock->lock_flav == POSIX_LOCK) {
+				del_this_lock = True;
+			}
+		}
+
+		if (del_this_lock) {
+			/* Send unlock messages to any pending waiters that overlap. */
+			for (j=0; j < br_lck->num_locks; j++) {
+				struct lock_struct *pend_lock = &locks[j];
+
+				/* Ignore our own or non-pending locks. */
+				if (!IS_PENDING_LOCK(pend_lock->lock_type)) {
+					continue;
+				}
+
+				/* Optimisation - don't send to this fnum as we're
+				   closing it. */
+				if (pend_lock->context.tid == tid &&
+				    procid_equal(&pend_lock->context.pid, &pid) &&
+				    pend_lock->fnum == fnum) {
+					continue;
+				}
+
+				/* We could send specific lock info here... */
+				if (brl_pending_overlap(lock, pend_lock)) {
+					messaging_send(msg_ctx, pend_lock->context.pid,
+						       MSG_SMB_UNLOCK, &data_blob_null);
+				}
+			}
+
+			/* found it - delete it */
+			if (br_lck->num_locks > 1 && i < br_lck->num_locks - 1) {
+				memmove(&locks[i], &locks[i+1], 
+					sizeof(*locks)*((br_lck->num_locks-1) - i));
+			}
+			br_lck->num_locks--;
+			br_lck->modified = True;
+			i--;
+			dcount++;
+		}
+	}
+
+	if(lp_posix_locking(fsp->conn->params) && num_deleted_windows_locks) {
+		/* Reduce the Windows lock POSIX reference count on this dev/ino pair. */
+		reduce_windows_lock_ref_count(fsp, num_deleted_windows_locks);
+	}
+}
+
+/****************************************************************************
+ Ensure this set of lock entries is valid.
+****************************************************************************/
+
+static bool validate_lock_entries(unsigned int *pnum_entries, struct lock_struct **pplocks)
+{
+	unsigned int i;
+	unsigned int num_valid_entries = 0;
+	struct lock_struct *locks = *pplocks;
+
+	for (i = 0; i < *pnum_entries; i++) {
+		struct lock_struct *lock_data = &locks[i];
+		if (!process_exists(lock_data->context.pid)) {
+			/* This process no longer exists - mark this
+			   entry as invalid by zeroing it. */
+			ZERO_STRUCTP(lock_data);
+		} else {
+			num_valid_entries++;
+		}
+	}
+
+	if (num_valid_entries != *pnum_entries) {
+		struct lock_struct *new_lock_data = NULL;
+
+		if (num_valid_entries) {
+			new_lock_data = SMB_MALLOC_ARRAY(struct lock_struct, num_valid_entries);
+			if (!new_lock_data) {
+				DEBUG(3, ("malloc fail\n"));
+				return False;
+			}
+
+			num_valid_entries = 0;
+			for (i = 0; i < *pnum_entries; i++) {
+				struct lock_struct *lock_data = &locks[i];
+				if (lock_data->context.smbpid &&
+						lock_data->context.tid) {
+					/* Valid (nonzero) entry - copy it. */
+					memcpy(&new_lock_data[num_valid_entries],
+						lock_data, sizeof(struct lock_struct));
+					num_valid_entries++;
+				}
+			}
+		}
+
+		SAFE_FREE(*pplocks);
+		*pplocks = new_lock_data;
+		*pnum_entries = num_valid_entries;
+	}
+
+	return True;
+}
+
+struct brl_forall_cb {
+	void (*fn)(struct file_id id, struct server_id pid,
+		   enum brl_type lock_type,
+		   enum brl_flavour lock_flav,
+		   br_off start, br_off size,
+		   void *private_data);
+	void *private_data;
+};
+
+/****************************************************************************
+ Traverse the whole database with this function, calling traverse_callback
+ on each lock.
+****************************************************************************/
+
+static int traverse_fn(struct db_record *rec, void *state)
+{
+	struct brl_forall_cb *cb = (struct brl_forall_cb *)state;
+	struct lock_struct *locks;
+	struct file_id *key;
+	unsigned int i;
+	unsigned int num_locks = 0;
+	unsigned int orig_num_locks = 0;
+
+	/* In a traverse function we must make a copy of
+	   dbuf before modifying it. */
+
+	locks = (struct lock_struct *)memdup(rec->value.dptr,
+					     rec->value.dsize);
+	if (!locks) {
+		return -1; /* Terminate traversal. */
+	}
+
+	key = (struct file_id *)rec->key.dptr;
+	orig_num_locks = num_locks = rec->value.dsize/sizeof(*locks);
+
+	/* Ensure the lock db is clean of entries from invalid processes. */
+
+	if (!validate_lock_entries(&num_locks, &locks)) {
+		SAFE_FREE(locks);
+		return -1; /* Terminate traversal */
+	}
+
+	if (orig_num_locks != num_locks) {
+		if (num_locks) {
+			TDB_DATA data;
+			data.dptr = (uint8_t *)locks;
+			data.dsize = num_locks*sizeof(struct lock_struct);
+			rec->store(rec, data, TDB_REPLACE);
+		} else {
+			rec->delete_rec(rec);
+		}
+	}
+
+	if (cb->fn) {
+		for ( i=0; i<num_locks; i++) {
+			cb->fn(*key,
+				locks[i].context.pid,
+				locks[i].lock_type,
+				locks[i].lock_flav,
+				locks[i].start,
+				locks[i].size,
+				cb->private_data);
+		}
+	}
+
+	SAFE_FREE(locks);
+	return 0;
+}
+
+/*******************************************************************
+ Call the specified function on each lock in the database.
+********************************************************************/
+
+int brl_forall(void (*fn)(struct file_id id, struct server_id pid,
+			  enum brl_type lock_type,
+			  enum brl_flavour lock_flav,
+			  br_off start, br_off size,
+			  void *private_data),
+	       void *private_data)
+{
+	struct brl_forall_cb cb;
+
+	if (!brlock_db) {
+		return 0;
+	}
+	cb.fn = fn;
+	cb.private_data = private_data;
+	return brlock_db->traverse(brlock_db, traverse_fn, &cb);
+}
+
+/*******************************************************************
+ Store a potentially modified set of byte range lock data back into
+ the database.
+ Unlock the record.
+********************************************************************/
+
+static int byte_range_lock_destructor(struct byte_range_lock *br_lck)
+{
+	if (br_lck->read_only) {
+		SMB_ASSERT(!br_lck->modified);
+	}
+
+	if (!br_lck->modified) {
+		goto done;
+	}
+
+	if (br_lck->num_locks == 0) {
+		/* No locks - delete this entry. */
+		NTSTATUS status = br_lck->record->delete_rec(br_lck->record);
+		if (!NT_STATUS_IS_OK(status)) {
+			DEBUG(0, ("delete_rec returned %s\n",
+				  nt_errstr(status)));
+			smb_panic("Could not delete byte range lock entry");
+		}
+	} else {
+		TDB_DATA data;
+		NTSTATUS status;
+
+		data.dptr = (uint8 *)br_lck->lock_data;
+		data.dsize = br_lck->num_locks * sizeof(struct lock_struct);
+
+		status = br_lck->record->store(br_lck->record, data,
+					       TDB_REPLACE);
+		if (!NT_STATUS_IS_OK(status)) {
+			DEBUG(0, ("store returned %s\n", nt_errstr(status)));
+			smb_panic("Could not store byte range mode entry");
+		}
+	}
+
+ done:
+
+	SAFE_FREE(br_lck->lock_data);
+	TALLOC_FREE(br_lck->record);
+	return 0;
+}
+
+/*******************************************************************
+ Fetch a set of byte range lock data from the database.
+ Leave the record locked.
+ TALLOC_FREE(brl) will release the lock in the destructor.
+********************************************************************/
+
+static struct byte_range_lock *brl_get_locks_internal(TALLOC_CTX *mem_ctx,
+					files_struct *fsp, bool read_only)
+{
+	TDB_DATA key, data;
+	struct byte_range_lock *br_lck = TALLOC_P(mem_ctx, struct byte_range_lock);
+
+	if (br_lck == NULL) {
+		return NULL;
+	}
+
+	br_lck->fsp = fsp;
+	br_lck->num_locks = 0;
+	br_lck->modified = False;
+	memset(&br_lck->key, '\0', sizeof(struct file_id));
+	br_lck->key = fsp->file_id;
+
+	key.dptr = (uint8 *)&br_lck->key;
+	key.dsize = sizeof(struct file_id);
+
+	if (!fsp->lockdb_clean) {
+		/* We must be read/write to clean
+		   the dead entries. */
+		read_only = False;
+	}
+
+	if (read_only) {
+		if (brlock_db->fetch(brlock_db, br_lck, key, &data) == -1) {
+			DEBUG(3, ("Could not fetch byte range lock record\n"));
+			TALLOC_FREE(br_lck);
+			return NULL;
+		}
+		br_lck->record = NULL;
+	}
+	else {
+		br_lck->record = brlock_db->fetch_locked(brlock_db, br_lck, key);
+
+		if (br_lck->record == NULL) {
+			DEBUG(3, ("Could not lock byte range lock entry\n"));
+			TALLOC_FREE(br_lck);
+			return NULL;
+		}
+
+		data = br_lck->record->value;
+	}
+
+	br_lck->read_only = read_only;
+	br_lck->lock_data = NULL;
+
+	talloc_set_destructor(br_lck, byte_range_lock_destructor);
+
+	br_lck->num_locks = data.dsize / sizeof(struct lock_struct);
+
+	if (br_lck->num_locks != 0) {
+		br_lck->lock_data = SMB_MALLOC_ARRAY(struct lock_struct,
+						     br_lck->num_locks);
+		if (br_lck->lock_data == NULL) {
+			DEBUG(0, ("malloc failed\n"));
+			TALLOC_FREE(br_lck);
+			return NULL;
+		}
+
+		memcpy(br_lck->lock_data, data.dptr, data.dsize);
+	}
+	
+	if (!fsp->lockdb_clean) {
+		int orig_num_locks = br_lck->num_locks;
+
+		/* This is the first time we've accessed this. */
+		/* Go through and ensure all entries exist - remove any that don't. */
+		/* Makes the lockdb self cleaning at low cost. */
+
+		if (!validate_lock_entries(&br_lck->num_locks,
+					   &br_lck->lock_data)) {
+			SAFE_FREE(br_lck->lock_data);
+			TALLOC_FREE(br_lck);
+			return NULL;
+		}
+
+		/* Ensure invalid locks are cleaned up in the destructor. */
+		if (orig_num_locks != br_lck->num_locks) {
+			br_lck->modified = True;
+		}
+
+		/* Mark the lockdb as "clean" as seen from this open file. */
+		fsp->lockdb_clean = True;
+	}
+
+	if (DEBUGLEVEL >= 10) {
+		unsigned int i;
+		struct lock_struct *locks = br_lck->lock_data;
+		DEBUG(10,("brl_get_locks_internal: %u current locks on file_id %s\n",
+			br_lck->num_locks,
+			  file_id_string_tos(&fsp->file_id)));
+		for( i = 0; i < br_lck->num_locks; i++) {
+			print_lock_struct(i, &locks[i]);
+		}
+	}
+	return br_lck;
+}
+
+struct byte_range_lock *brl_get_locks(TALLOC_CTX *mem_ctx,
+					files_struct *fsp)
+{
+	return brl_get_locks_internal(mem_ctx, fsp, False);
+}
+
+struct byte_range_lock *brl_get_locks_readonly(TALLOC_CTX *mem_ctx,
+					files_struct *fsp)
+{
+	return brl_get_locks_internal(mem_ctx, fsp, True);
+}
+
+struct brl_revalidate_state {
+	ssize_t array_size;
+	uint32 num_pids;
+	struct server_id *pids;
+};
+
+/*
+ * Collect PIDs of all processes with pending entries
+ */
+
+static void brl_revalidate_collect(struct file_id id, struct server_id pid,
+				   enum brl_type lock_type,
+				   enum brl_flavour lock_flav,
+				   br_off start, br_off size,
+				   void *private_data)
+{
+	struct brl_revalidate_state *state =
+		(struct brl_revalidate_state *)private_data;
+
+	if (!IS_PENDING_LOCK(lock_type)) {
+		return;
+	}
+
+	add_to_large_array(state, sizeof(pid), (void *)&pid,
+			   &state->pids, &state->num_pids,
+			   &state->array_size);
+}
+
+/*
+ * qsort callback to sort the processes
+ */
+
+static int compare_procids(const void *p1, const void *p2)
+{
+	const struct server_id *i1 = (struct server_id *)p1;
+	const struct server_id *i2 = (struct server_id *)p2;
+
+	if (i1->pid < i2->pid) return -1;
+	if (i2->pid > i2->pid) return 1;
+	return 0;
+}
+
+/*
+ * Send a MSG_SMB_UNLOCK message to all processes with pending byte range
+ * locks so that they retry. Mainly used in the cluster code after a node has
+ * died.
+ *
+ * Done in two steps to avoid double-sends: First we collect all entries in an
+ * array, then qsort that array and only send to non-dupes.
+ */
+
+static void brl_revalidate(struct messaging_context *msg_ctx,
+			   void *private_data,
+			   uint32_t msg_type,
+			   struct server_id server_id,
+			   DATA_BLOB *data)
+{
+	struct brl_revalidate_state *state;
+	uint32 i;
+	struct server_id last_pid;
+
+	if (!(state = TALLOC_ZERO_P(NULL, struct brl_revalidate_state))) {
+		DEBUG(0, ("talloc failed\n"));
+		return;
+	}
+
+	brl_forall(brl_revalidate_collect, state);
+
+	if (state->array_size == -1) {
+		DEBUG(0, ("talloc failed\n"));
+		goto done;
+	}
+
+	if (state->num_pids == 0) {
+		goto done;
+	}
+
+	qsort(state->pids, state->num_pids, sizeof(state->pids[0]),
+	      compare_procids);
+
+	ZERO_STRUCT(last_pid);
+
+	for (i=0; i<state->num_pids; i++) {
+		if (procid_equal(&last_pid, &state->pids[i])) {
+			/*
+			 * We've seen that one already
+			 */
+			continue;
+		}
+
+		messaging_send(msg_ctx, state->pids[i], MSG_SMB_UNLOCK,
+			       &data_blob_null);
+		last_pid = state->pids[i];
+	}
+
+ done:
+	TALLOC_FREE(state);
+	return;
+}
+
+void brl_register_msgs(struct messaging_context *msg_ctx)
+{
+	messaging_register(msg_ctx, NULL, MSG_SMB_BRL_VALIDATE,
+			   brl_revalidate);
+}
diff --git a/source3/locking/locking.c b/source3/locking/locking.c
new file mode 100644
index 0000000000..368ab1687c
--- /dev/null
+++ b/source3/locking/locking.c
@@ -0,0 +1,1511 @@
+/* 
+   Unix SMB/CIFS implementation.
+   Locking functions
+   Copyright (C) Andrew Tridgell 1992-2000
+   Copyright (C) Jeremy Allison 1992-2006
+   Copyright (C) Volker Lendecke 2005
+   
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+   Revision History:
+
+   12 aug 96: Erik.Devriendt@te6.siemens.be
+   added support for shared memory implementation of share mode locking
+
+   May 1997. Jeremy Allison (jallison@whistle.com). Modified share mode
+   locking to deal with multiple share modes per open file.
+
+   September 1997. Jeremy Allison (jallison@whistle.com). Added oplock
+   support.
+
+   rewrtten completely to use new tdb code. Tridge, Dec '99
+
+   Added POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
+   Added Unix Extensions POSIX locking support. Jeremy Allison Mar 2006.
+*/
+
+#include "includes.h"
+
+#undef DBGC_CLASS
+#define DBGC_CLASS DBGC_LOCKING
+
+#define NO_LOCKING_COUNT (-1)
+
+/* the locking database handle */
+static struct db_context *lock_db;
+
+/****************************************************************************
+ Debugging aids :-).
+****************************************************************************/
+
+const char *lock_type_name(enum brl_type lock_type)
+{
+	switch (lock_type) {
+		case READ_LOCK:
+			return "READ";
+		case WRITE_LOCK:
+			return "WRITE";
+		case PENDING_READ_LOCK:
+			return "PENDING_READ";
+		case PENDING_WRITE_LOCK:
+			return "PENDING_WRITE";
+		default:
+			return "other";
+	}
+}
+
+const char *lock_flav_name(enum brl_flavour lock_flav)
+{
+	return (lock_flav == WINDOWS_LOCK) ? "WINDOWS_LOCK" : "POSIX_LOCK";
+}
+
+/****************************************************************************
+ Utility function called to see if a file region is locked.
+ Called in the read/write codepath.
+****************************************************************************/
+
+bool is_locked(files_struct *fsp,
+		uint32 smbpid,
+		SMB_BIG_UINT count,
+		SMB_BIG_UINT offset, 
+		enum brl_type lock_type)
+{
+	int strict_locking = lp_strict_locking(fsp->conn->params);
+	enum brl_flavour lock_flav = lp_posix_cifsu_locktype(fsp);
+	bool ret = True;
+	
+	if (count == 0) {
+		return False;
+	}
+
+	if (!lp_locking(fsp->conn->params) || !strict_locking) {
+		return False;
+	}
+
+	if (strict_locking == Auto) {
+		if  (EXCLUSIVE_OPLOCK_TYPE(fsp->oplock_type) && (lock_type == READ_LOCK || lock_type == WRITE_LOCK)) {
+			DEBUG(10,("is_locked: optimisation - exclusive oplock on file %s\n", fsp->fsp_name ));
+			ret = False;
+		} else if ((fsp->oplock_type == LEVEL_II_OPLOCK) &&
+			   (lock_type == READ_LOCK)) {
+			DEBUG(10,("is_locked: optimisation - level II oplock on file %s\n", fsp->fsp_name ));
+			ret = False;
+		} else {
+			struct byte_range_lock *br_lck = brl_get_locks_readonly(talloc_tos(), fsp);
+			if (!br_lck) {
+				return False;
+			}
+			ret = !brl_locktest(br_lck,
+					smbpid,
+					procid_self(),
+					offset,
+					count,
+					lock_type,
+					lock_flav);
+			TALLOC_FREE(br_lck);
+		}
+	} else {
+		struct byte_range_lock *br_lck = brl_get_locks_readonly(talloc_tos(), fsp);
+		if (!br_lck) {
+			return False;
+		}
+		ret = !brl_locktest(br_lck,
+				smbpid,
+				procid_self(),
+				offset,
+				count,
+				lock_type,
+				lock_flav);
+		TALLOC_FREE(br_lck);
+	}
+
+	DEBUG(10,("is_locked: flavour = %s brl start=%.0f len=%.0f %s for fnum %d file %s\n",
+			lock_flav_name(lock_flav),
+			(double)offset, (double)count, ret ? "locked" : "unlocked",
+			fsp->fnum, fsp->fsp_name ));
+
+	return ret;
+}
+
+/****************************************************************************
+ Find out if a lock could be granted - return who is blocking us if we can't.
+****************************************************************************/
+
+NTSTATUS query_lock(files_struct *fsp,
+			uint32 *psmbpid,
+			SMB_BIG_UINT *pcount,
+			SMB_BIG_UINT *poffset,
+			enum brl_type *plock_type,
+			enum brl_flavour lock_flav)
+{
+	struct byte_range_lock *br_lck = NULL;
+	NTSTATUS status = NT_STATUS_LOCK_NOT_GRANTED;
+
+	if (!fsp->can_lock) {
+		return fsp->is_directory ? NT_STATUS_INVALID_DEVICE_REQUEST : NT_STATUS_INVALID_HANDLE;
+	}
+
+	if (!lp_locking(fsp->conn->params)) {
+		return NT_STATUS_OK;
+	}
+
+	br_lck = brl_get_locks_readonly(talloc_tos(), fsp);
+	if (!br_lck) {
+		return NT_STATUS_NO_MEMORY;
+	}
+
+	status = brl_lockquery(br_lck,
+			psmbpid,
+			procid_self(),
+			poffset,
+			pcount,
+			plock_type,
+			lock_flav);
+
+	TALLOC_FREE(br_lck);
+	return status;
+}
+
+/****************************************************************************
+ Utility function called by locking requests.
+****************************************************************************/
+
+struct byte_range_lock *do_lock(struct messaging_context *msg_ctx,
+			files_struct *fsp,
+			uint32 lock_pid,
+			SMB_BIG_UINT count,
+			SMB_BIG_UINT offset,
+			enum brl_type lock_type,
+			enum brl_flavour lock_flav,
+			bool blocking_lock,
+			NTSTATUS *perr,
+			uint32 *plock_pid)
+{
+	struct byte_range_lock *br_lck = NULL;
+
+	if (!fsp->can_lock) {
+		*perr = fsp->is_directory ? NT_STATUS_INVALID_DEVICE_REQUEST : NT_STATUS_INVALID_HANDLE;
+		return NULL;
+	}
+
+	if (!lp_locking(fsp->conn->params)) {
+		*perr = NT_STATUS_OK;
+		return NULL;
+	}
+
+	/* NOTE! 0 byte long ranges ARE allowed and should be stored  */
+
+	DEBUG(10,("do_lock: lock flavour %s lock type %s start=%.0f len=%.0f requested for fnum %d file %s\n",
+		lock_flav_name(lock_flav), lock_type_name(lock_type),
+		(double)offset, (double)count, fsp->fnum, fsp->fsp_name ));
+
+	br_lck = brl_get_locks(talloc_tos(), fsp);
+	if (!br_lck) {
+		*perr = NT_STATUS_NO_MEMORY;
+		return NULL;
+	}
+
+	*perr = brl_lock(msg_ctx,
+			br_lck,
+			lock_pid,
+			procid_self(),
+			offset,
+			count, 
+			lock_type,
+			lock_flav,
+			blocking_lock,
+			plock_pid);
+
+	if (lock_flav == WINDOWS_LOCK &&
+			fsp->current_lock_count != NO_LOCKING_COUNT) {
+		/* blocking ie. pending, locks also count here,
+		 * as this is an efficiency counter to avoid checking
+		 * the lock db. on close. JRA. */
+
+		fsp->current_lock_count++;
+	} else {
+		/* Notice that this has had a POSIX lock request.
+		 * We can't count locks after this so forget them.
+		 */
+		fsp->current_lock_count = NO_LOCKING_COUNT;
+	}
+
+	return br_lck;
+}
+
+/****************************************************************************
+ Utility function called by unlocking requests.
+****************************************************************************/
+
+NTSTATUS do_unlock(struct messaging_context *msg_ctx,
+			files_struct *fsp,
+			uint32 lock_pid,
+			SMB_BIG_UINT count,
+			SMB_BIG_UINT offset,
+			enum brl_flavour lock_flav)
+{
+	bool ok = False;
+	struct byte_range_lock *br_lck = NULL;
+	
+	if (!fsp->can_lock) {
+		return fsp->is_directory ? NT_STATUS_INVALID_DEVICE_REQUEST : NT_STATUS_INVALID_HANDLE;
+	}
+	
+	if (!lp_locking(fsp->conn->params)) {
+		return NT_STATUS_OK;
+	}
+	
+	DEBUG(10,("do_unlock: unlock start=%.0f len=%.0f requested for fnum %d file %s\n",
+		  (double)offset, (double)count, fsp->fnum, fsp->fsp_name ));
+
+	br_lck = brl_get_locks(talloc_tos(), fsp);
+	if (!br_lck) {
+		return NT_STATUS_NO_MEMORY;
+	}
+
+	ok = brl_unlock(msg_ctx,
+			br_lck,
+			lock_pid,
+			procid_self(),
+			offset,
+			count,
+			lock_flav);
+   
+	TALLOC_FREE(br_lck);
+
+	if (!ok) {
+		DEBUG(10,("do_unlock: returning ERRlock.\n" ));
+		return NT_STATUS_RANGE_NOT_LOCKED;
+	}
+
+	if (lock_flav == WINDOWS_LOCK &&
+			fsp->current_lock_count != NO_LOCKING_COUNT) {
+		SMB_ASSERT(fsp->current_lock_count > 0);
+		fsp->current_lock_count--;
+	}
+
+	return NT_STATUS_OK;
+}
+
+/****************************************************************************
+ Cancel any pending blocked locks.
+****************************************************************************/
+
+NTSTATUS do_lock_cancel(files_struct *fsp,
+			uint32 lock_pid,
+			SMB_BIG_UINT count,
+			SMB_BIG_UINT offset,
+			enum brl_flavour lock_flav)
+{
+	bool ok = False;
+	struct byte_range_lock *br_lck = NULL;
+	
+	if (!fsp->can_lock) {
+		return fsp->is_directory ?
+			NT_STATUS_INVALID_DEVICE_REQUEST : NT_STATUS_INVALID_HANDLE;
+	}
+	
+	if (!lp_locking(fsp->conn->params)) {
+		return NT_STATUS_DOS(ERRDOS, ERRcancelviolation);
+	}
+
+	DEBUG(10,("do_lock_cancel: cancel start=%.0f len=%.0f requested for fnum %d file %s\n",
+		  (double)offset, (double)count, fsp->fnum, fsp->fsp_name ));
+
+	br_lck = brl_get_locks(talloc_tos(), fsp);
+	if (!br_lck) {
+		return NT_STATUS_NO_MEMORY;
+	}
+
+	ok = brl_lock_cancel(br_lck,
+			lock_pid,
+			procid_self(),
+			offset,
+			count,
+			lock_flav);
+   
+	TALLOC_FREE(br_lck);
+
+	if (!ok) {
+		DEBUG(10,("do_lock_cancel: returning ERRcancelviolation.\n" ));
+		return NT_STATUS_DOS(ERRDOS, ERRcancelviolation);
+	}
+
+	if (lock_flav == WINDOWS_LOCK &&
+			fsp->current_lock_count != NO_LOCKING_COUNT) {
+		SMB_ASSERT(fsp->current_lock_count > 0);
+		fsp->current_lock_count--;
+	}
+
+	return NT_STATUS_OK;
+}
+
+/****************************************************************************
+ Remove any locks on this fd. Called from file_close().
+****************************************************************************/
+
+void locking_close_file(struct messaging_context *msg_ctx,
+			files_struct *fsp)
+{
+	struct byte_range_lock *br_lck;
+
+	if (!lp_locking(fsp->conn->params)) {
+		return;
+	}
+
+	/* If we have not outstanding locks or pending
+	 * locks then we don't need to look in the lock db.
+	 */
+
+	if (fsp->current_lock_count == 0) {
+		return;
+	}
+
+	br_lck = brl_get_locks(talloc_tos(),fsp);
+
+	if (br_lck) {
+		cancel_pending_lock_requests_by_fid(fsp, br_lck);
+		brl_close_fnum(msg_ctx, br_lck);
+		TALLOC_FREE(br_lck);
+	}
+}
+
+/****************************************************************************
+ Initialise the locking functions.
+****************************************************************************/
+
+static bool locking_init_internal(bool read_only)
+{
+	brl_init(read_only);
+
+	if (lock_db)
+		return True;
+
+	lock_db = db_open(NULL, lock_path("locking.tdb"),
+			  lp_open_files_db_hash_size(),
+			  TDB_DEFAULT|TDB_VOLATILE|TDB_CLEAR_IF_FIRST,
+			  read_only?O_RDONLY:O_RDWR|O_CREAT, 0644);
+
+	if (!lock_db) {
+		DEBUG(0,("ERROR: Failed to initialise locking database\n"));
+		return False;
+	}
+
+	if (!posix_locking_init(read_only))
+		return False;
+
+	return True;
+}
+
+bool locking_init(void)
+{
+	return locking_init_internal(false);
+}
+
+bool locking_init_readonly(void)
+{
+	return locking_init_internal(true);
+}
+
+/*******************************************************************
+ Deinitialize the share_mode management.
+******************************************************************/
+
+bool locking_end(void)
+{
+	brl_shutdown();
+	TALLOC_FREE(lock_db);
+	return true;
+}
+
+/*******************************************************************
+ Form a static locking key for a dev/inode pair.
+******************************************************************/
+
+static TDB_DATA locking_key(const struct file_id *id, struct file_id *tmp)
+{
+	*tmp = *id;
+	return make_tdb_data((const uint8_t *)tmp, sizeof(*tmp));
+}
+
+/*******************************************************************
+ Print out a share mode.
+********************************************************************/
+
+char *share_mode_str(TALLOC_CTX *ctx, int num, const struct share_mode_entry *e)
+{
+	return talloc_asprintf(ctx, "share_mode_entry[%d]: %s "
+		 "pid = %s, share_access = 0x%x, private_options = 0x%x, "
+		 "access_mask = 0x%x, mid = 0x%x, type= 0x%x, gen_id = %lu, "
+		 "uid = %u, flags = %u, file_id %s",
+		 num,
+		 e->op_type == UNUSED_SHARE_MODE_ENTRY ? "UNUSED" : "",
+		 procid_str_static(&e->pid),
+		 e->share_access, e->private_options,
+		 e->access_mask, e->op_mid, e->op_type, e->share_file_id,
+		 (unsigned int)e->uid, (unsigned int)e->flags,
+		 file_id_string_tos(&e->id));
+}
+
+/*******************************************************************
+ Print out a share mode table.
+********************************************************************/
+
+static void print_share_mode_table(struct locking_data *data)
+{
+	int num_share_modes = data->u.s.num_share_mode_entries;
+	struct share_mode_entry *shares =
+		(struct share_mode_entry *)(data + 1);
+	int i;
+
+	for (i = 0; i < num_share_modes; i++) {
+		struct share_mode_entry entry;
+		char *str;
+
+		/*
+		 * We need to memcpy the entry here due to alignment
+		 * restrictions that are not met when directly accessing
+		 * shares[i]
+		 */
+
+		memcpy(&entry, &shares[i], sizeof(struct share_mode_entry));
+		str = share_mode_str(talloc_tos(), i, &entry);
+
+		DEBUG(10,("print_share_mode_table: %s\n", str ? str : ""));
+		TALLOC_FREE(str);
+	}
+}
+
+/*******************************************************************
+ Get all share mode entries for a dev/inode pair.
+********************************************************************/
+
+static bool parse_share_modes(TDB_DATA dbuf, struct share_mode_lock *lck)
+{
+	struct locking_data data;
+	int i;
+
+	if (dbuf.dsize < sizeof(struct locking_data)) {
+		smb_panic("parse_share_modes: buffer too short");
+	}
+
+	memcpy(&data, dbuf.dptr, sizeof(data));
+
+	lck->delete_on_close = data.u.s.delete_on_close;
+	lck->old_write_time = data.u.s.old_write_time;
+	lck->changed_write_time = data.u.s.changed_write_time;
+	lck->num_share_modes = data.u.s.num_share_mode_entries;
+
+	DEBUG(10, ("parse_share_modes: delete_on_close: %d, owrt: %s, "
+		   "cwrt: %s, tok: %u, num_share_modes: %d\n",
+		   lck->delete_on_close,
+		   timestring(debug_ctx(),
+			      convert_timespec_to_time_t(lck->old_write_time)),
+		   timestring(debug_ctx(),
+			      convert_timespec_to_time_t(
+				      lck->changed_write_time)),
+		   (unsigned int)data.u.s.delete_token_size,
+		   lck->num_share_modes));
+
+	if ((lck->num_share_modes < 0) || (lck->num_share_modes > 1000000)) {
+		DEBUG(0, ("invalid number of share modes: %d\n",
+			  lck->num_share_modes));
+		smb_panic("parse_share_modes: invalid number of share modes");
+	}
+
+	lck->share_modes = NULL;
+	
+	if (lck->num_share_modes != 0) {
+
+		if (dbuf.dsize < (sizeof(struct locking_data) +
+				  (lck->num_share_modes *
+				   sizeof(struct share_mode_entry)))) {
+			smb_panic("parse_share_modes: buffer too short");
+		}
+				  
+		lck->share_modes = (struct share_mode_entry *)
+			TALLOC_MEMDUP(lck,
+				      dbuf.dptr+sizeof(struct locking_data),
+				      lck->num_share_modes *
+				      sizeof(struct share_mode_entry));
+
+		if (lck->share_modes == NULL) {
+			smb_panic("parse_share_modes: talloc failed");
+		}
+	}
+
+	/* Get any delete token. */
+	if (data.u.s.delete_token_size) {
+		uint8 *p = dbuf.dptr + sizeof(struct locking_data) +
+				(lck->num_share_modes *
+				sizeof(struct share_mode_entry));
+
+		if ((data.u.s.delete_token_size < sizeof(uid_t) + sizeof(gid_t)) ||
+				((data.u.s.delete_token_size - sizeof(uid_t)) % sizeof(gid_t)) != 0) {
+			DEBUG(0, ("parse_share_modes: invalid token size %d\n",
+				data.u.s.delete_token_size));
+			smb_panic("parse_share_modes: invalid token size");
+		}
+
+		lck->delete_token = TALLOC_P(lck, UNIX_USER_TOKEN);
+		if (!lck->delete_token) {
+			smb_panic("parse_share_modes: talloc failed");
+		}
+
+		/* Copy out the uid and gid. */
+		memcpy(&lck->delete_token->uid, p, sizeof(uid_t));
+		p += sizeof(uid_t);
+		memcpy(&lck->delete_token->gid, p, sizeof(gid_t));
+		p += sizeof(gid_t);
+
+		/* Any supplementary groups ? */
+		lck->delete_token->ngroups = (data.u.s.delete_token_size > (sizeof(uid_t) + sizeof(gid_t))) ?
+					((data.u.s.delete_token_size -
+						(sizeof(uid_t) + sizeof(gid_t)))/sizeof(gid_t)) : 0;
+
+		if (lck->delete_token->ngroups) {
+			/* Make this a talloc child of lck->delete_token. */
+			lck->delete_token->groups = TALLOC_ARRAY(lck->delete_token, gid_t,
+							lck->delete_token->ngroups);
+			if (!lck->delete_token) {
+				smb_panic("parse_share_modes: talloc failed");
+			}
+
+			for (i = 0; i < lck->delete_token->ngroups; i++) {
+				memcpy(&lck->delete_token->groups[i], p, sizeof(gid_t));
+				p += sizeof(gid_t);
+			}
+		}
+
+	} else {
+		lck->delete_token = NULL;
+	}
+
+	/* Save off the associated service path and filename. */
+	lck->servicepath = (const char *)dbuf.dptr + sizeof(struct locking_data) +
+		(lck->num_share_modes *	sizeof(struct share_mode_entry)) +
+		data.u.s.delete_token_size;
+
+	lck->filename = (const char *)dbuf.dptr + sizeof(struct locking_data) +
+		(lck->num_share_modes *	sizeof(struct share_mode_entry)) +
+		data.u.s.delete_token_size +
+		strlen(lck->servicepath) + 1;
+
+	/*
+	 * Ensure that each entry has a real process attached.
+	 */
+
+	for (i = 0; i < lck->num_share_modes; i++) {
+		struct share_mode_entry *entry_p = &lck->share_modes[i];
+		char *str = NULL;
+		if (DEBUGLEVEL >= 10) {
+			str = share_mode_str(NULL, i, entry_p);
+		}
+		DEBUG(10,("parse_share_modes: %s\n",
+			str ? str : ""));
+		if (!process_exists(entry_p->pid)) {
+			DEBUG(10,("parse_share_modes: deleted %s\n",
+				str ? str : ""));
+			entry_p->op_type = UNUSED_SHARE_MODE_ENTRY;
+			lck->modified = True;
+		}
+		TALLOC_FREE(str);
+	}
+
+	return True;
+}
+
+static TDB_DATA unparse_share_modes(struct share_mode_lock *lck)
+{
+	TDB_DATA result;
+	int num_valid = 0;
+	int i;
+	struct locking_data *data;
+	ssize_t offset;
+	ssize_t sp_len;
+	uint32 delete_token_size;
+
+	result.dptr = NULL;
+	result.dsize = 0;
+
+	for (i=0; i<lck->num_share_modes; i++) {
+		if (!is_unused_share_mode_entry(&lck->share_modes[i])) {
+			num_valid += 1;
+		}
+	}
+
+	if (num_valid == 0) {
+		return result;
+	}
+
+	sp_len = strlen(lck->servicepath);
+	delete_token_size = (lck->delete_token ?
+			(sizeof(uid_t) + sizeof(gid_t) + (lck->delete_token->ngroups*sizeof(gid_t))) : 0);
+
+	result.dsize = sizeof(*data) +
+		lck->num_share_modes * sizeof(struct share_mode_entry) +
+		delete_token_size +
+		sp_len + 1 +
+		strlen(lck->filename) + 1;
+	result.dptr = TALLOC_ARRAY(lck, uint8, result.dsize);
+
+	if (result.dptr == NULL) {
+		smb_panic("talloc failed");
+	}
+
+	data = (struct locking_data *)result.dptr;
+	ZERO_STRUCTP(data);
+	data->u.s.num_share_mode_entries = lck->num_share_modes;
+	data->u.s.delete_on_close = lck->delete_on_close;
+	data->u.s.old_write_time = lck->old_write_time;
+	data->u.s.changed_write_time = lck->changed_write_time;
+	data->u.s.delete_token_size = delete_token_size;
+
+	DEBUG(10,("unparse_share_modes: del: %d, owrt: %s cwrt: %s, tok: %u, "
+		  "num: %d\n", data->u.s.delete_on_close,
+		  timestring(debug_ctx(),
+			     convert_timespec_to_time_t(lck->old_write_time)),
+		  timestring(debug_ctx(),
+			     convert_timespec_to_time_t(
+				     lck->changed_write_time)),
+		  (unsigned int)data->u.s.delete_token_size,
+		  data->u.s.num_share_mode_entries));
+
+	memcpy(result.dptr + sizeof(*data), lck->share_modes,
+	       sizeof(struct share_mode_entry)*lck->num_share_modes);
+	offset = sizeof(*data) +
+		sizeof(struct share_mode_entry)*lck->num_share_modes;
+
+	/* Store any delete on close token. */
+	if (lck->delete_token) {
+		uint8 *p = result.dptr + offset;
+
+		memcpy(p, &lck->delete_token->uid, sizeof(uid_t));
+		p += sizeof(uid_t);
+
+		memcpy(p, &lck->delete_token->gid, sizeof(gid_t));
+		p += sizeof(gid_t);
+
+		for (i = 0; i < lck->delete_token->ngroups; i++) {
+			memcpy(p, &lck->delete_token->groups[i], sizeof(gid_t));
+			p += sizeof(gid_t);
+		}
+		offset = p - result.dptr;
+	}
+
+	safe_strcpy((char *)result.dptr + offset, lck->servicepath,
+		    result.dsize - offset - 1);
+	offset += sp_len + 1;
+	safe_strcpy((char *)result.dptr + offset, lck->filename,
+		    result.dsize - offset - 1);
+
+	if (DEBUGLEVEL >= 10) {
+		print_share_mode_table(data);
+	}
+
+	return result;
+}
+
+static int share_mode_lock_destructor(struct share_mode_lock *lck)
+{
+	NTSTATUS status;
+	TDB_DATA data;
+
+	if (!lck->modified) {
+		return 0;
+	}
+
+	data = unparse_share_modes(lck);
+
+	if (data.dptr == NULL) {
+		if (!lck->fresh) {
+			/* There has been an entry before, delete it */
+
+			status = lck->record->delete_rec(lck->record);
+			if (!NT_STATUS_IS_OK(status)) {
+				DEBUG(0, ("delete_rec returned %s\n",
+					  nt_errstr(status)));
+				smb_panic("could not delete share entry");
+			}
+		}
+		goto done;
+	}
+
+	status = lck->record->store(lck->record, data, TDB_REPLACE);
+	if (!NT_STATUS_IS_OK(status)) {
+		DEBUG(0, ("store returned %s\n", nt_errstr(status)));
+		smb_panic("could not store share mode entry");
+	}
+
+ done:
+
+	return 0;
+}
+
+static bool fill_share_mode_lock(struct share_mode_lock *lck,
+				 struct file_id id,
+				 const char *servicepath,
+				 const char *fname,
+				 TDB_DATA share_mode_data,
+				 const struct timespec *old_write_time)
+{
+	/* Ensure we set every field here as the destructor must be
+	   valid even if parse_share_modes fails. */
+
+	lck->servicepath = NULL;
+	lck->filename = NULL;
+	lck->id = id;
+	lck->num_share_modes = 0;
+	lck->share_modes = NULL;
+	lck->delete_token = NULL;
+	lck->delete_on_close = False;
+	ZERO_STRUCT(lck->old_write_time);
+	ZERO_STRUCT(lck->changed_write_time);
+	lck->fresh = False;
+	lck->modified = False;
+
+	lck->fresh = (share_mode_data.dptr == NULL);
+
+	if (lck->fresh) {
+		if (fname == NULL || servicepath == NULL
+		    || old_write_time == NULL) {
+			return False;
+		}
+		lck->filename = talloc_strdup(lck, fname);
+		lck->servicepath = talloc_strdup(lck, servicepath);
+		if (lck->filename == NULL || lck->servicepath == NULL) {
+			DEBUG(0, ("talloc failed\n"));
+			return False;
+		}
+		lck->old_write_time = *old_write_time;
+	} else {
+		if (!parse_share_modes(share_mode_data, lck)) {
+			DEBUG(0, ("Could not parse share modes\n"));
+			return False;
+		}
+	}
+
+	return True;
+}
+
+struct share_mode_lock *get_share_mode_lock(TALLOC_CTX *mem_ctx,
+					    const struct file_id id,
+					    const char *servicepath,
+					    const char *fname,
+					    const struct timespec *old_write_time)
+{
+	struct share_mode_lock *lck;
+	struct file_id tmp;
+	TDB_DATA key = locking_key(&id, &tmp);
+
+	if (!(lck = TALLOC_P(mem_ctx, struct share_mode_lock))) {
+		DEBUG(0, ("talloc failed\n"));
+		return NULL;
+	}
+
+	if (!(lck->record = lock_db->fetch_locked(lock_db, lck, key))) {
+		DEBUG(3, ("Could not lock share entry\n"));
+		TALLOC_FREE(lck);
+		return NULL;
+	}
+
+	if (!fill_share_mode_lock(lck, id, servicepath, fname,
+				  lck->record->value, old_write_time)) {
+		DEBUG(3, ("fill_share_mode_lock failed\n"));
+		TALLOC_FREE(lck);
+		return NULL;
+	}
+
+	talloc_set_destructor(lck, share_mode_lock_destructor);
+
+	return lck;
+}
+
+struct share_mode_lock *fetch_share_mode_unlocked(TALLOC_CTX *mem_ctx,
+						  const struct file_id id,
+						  const char *servicepath,
+						  const char *fname)
+{
+	struct share_mode_lock *lck;
+	struct file_id tmp;
+	TDB_DATA key = locking_key(&id, &tmp);
+	TDB_DATA data;
+
+	if (!(lck = TALLOC_P(mem_ctx, struct share_mode_lock))) {
+		DEBUG(0, ("talloc failed\n"));
+		return NULL;
+	}
+
+	if (lock_db->fetch(lock_db, lck, key, &data) == -1) {
+		DEBUG(3, ("Could not fetch share entry\n"));
+		TALLOC_FREE(lck);
+		return NULL;
+	}
+
+	if (!fill_share_mode_lock(lck, id, servicepath, fname, data, NULL)) {
+		DEBUG(3, ("fill_share_mode_lock failed\n"));
+		TALLOC_FREE(lck);
+		return NULL;
+	}
+
+	return lck;
+}
+
+/*******************************************************************
+ Sets the service name and filename for rename.
+ At this point we emit "file renamed" messages to all
+ process id's that have this file open.
+ Based on an initial code idea from SATOH Fumiyasu <fumiya@samba.gr.jp>
+********************************************************************/
+
+bool rename_share_filename(struct messaging_context *msg_ctx,
+			struct share_mode_lock *lck,
+			const char *servicepath,
+			const char *newname)
+{
+	size_t sp_len;
+	size_t fn_len;
+	size_t msg_len;
+	char *frm = NULL;
+	int i;
+
+	DEBUG(10, ("rename_share_filename: servicepath %s newname %s\n",
+		servicepath, newname));
+
+	/*
+	 * rename_internal_fsp() and rename_internals() add './' to
+	 * head of newname if newname does not contain a '/'.
+	 */
+	while (newname[0] && newname[1] && newname[0] == '.' && newname[1] == '/') {
+		newname += 2;
+	}
+
+	lck->servicepath = talloc_strdup(lck, servicepath);
+	lck->filename = talloc_strdup(lck, newname);
+	if (lck->filename == NULL || lck->servicepath == NULL) {
+		DEBUG(0, ("rename_share_filename: talloc failed\n"));
+		return False;
+	}
+	lck->modified = True;
+
+	sp_len = strlen(lck->servicepath);
+	fn_len = strlen(lck->filename);
+
+	msg_len = MSG_FILE_RENAMED_MIN_SIZE + sp_len + 1 + fn_len + 1;
+
+	/* Set up the name changed message. */
+	frm = TALLOC_ARRAY(lck, char, msg_len);
+	if (!frm) {
+		return False;
+	}
+
+	push_file_id_16(frm, &lck->id);
+
+	DEBUG(10,("rename_share_filename: msg_len = %u\n", (unsigned int)msg_len ));
+
+	safe_strcpy(&frm[16], lck->servicepath, sp_len);
+	safe_strcpy(&frm[16 + sp_len + 1], lck->filename, fn_len);
+
+	/* Send the messages. */
+	for (i=0; i<lck->num_share_modes; i++) {
+		struct share_mode_entry *se = &lck->share_modes[i];
+		if (!is_valid_share_mode_entry(se)) {
+			continue;
+		}
+		/* But not to ourselves... */
+		if (procid_is_me(&se->pid)) {
+			continue;
+		}
+
+		DEBUG(10,("rename_share_filename: sending rename message to pid %s "
+			  "file_id %s sharepath %s newname %s\n",
+			  procid_str_static(&se->pid),
+			  file_id_string_tos(&lck->id),
+			  lck->servicepath, lck->filename ));
+
+		messaging_send_buf(msg_ctx, se->pid, MSG_SMB_FILE_RENAME,
+				   (uint8 *)frm, msg_len);
+	}
+
+	return True;
+}
+
+void get_file_infos(struct file_id id,
+		    bool *delete_on_close,
+		    struct timespec *write_time)
+{
+	struct share_mode_lock *lck;
+
+	if (delete_on_close) {
+		*delete_on_close = false;
+	}
+
+	if (write_time) {
+		ZERO_STRUCTP(write_time);
+	}
+
+	if (!(lck = fetch_share_mode_unlocked(talloc_tos(), id, NULL, NULL))) {
+		return;
+	}
+
+	if (delete_on_close) {
+		*delete_on_close = lck->delete_on_close;
+	}
+
+	if (write_time) {
+		struct timespec wt;
+
+		wt = lck->changed_write_time;
+		if (null_timespec(wt)) {
+			wt = lck->old_write_time;
+		}
+
+		*write_time = wt;
+	}
+
+	TALLOC_FREE(lck);
+}
+
+bool is_valid_share_mode_entry(const struct share_mode_entry *e)
+{
+	int num_props = 0;
+
+	if (e->op_type == UNUSED_SHARE_MODE_ENTRY) {
+		/* cope with dead entries from the process not
+		   existing. These should not be considered valid,
+		   otherwise we end up doing zero timeout sharing
+		   violation */
+		return False;
+	}
+
+	num_props += ((e->op_type == NO_OPLOCK) ? 1 : 0);
+	num_props += (EXCLUSIVE_OPLOCK_TYPE(e->op_type) ? 1 : 0);
+	num_props += (LEVEL_II_OPLOCK_TYPE(e->op_type) ? 1 : 0);
+
+	SMB_ASSERT(num_props <= 1);
+	return (num_props != 0);
+}
+
+bool is_deferred_open_entry(const struct share_mode_entry *e)
+{
+	return (e->op_type == DEFERRED_OPEN_ENTRY);
+}
+
+bool is_unused_share_mode_entry(const struct share_mode_entry *e)
+{
+	return (e->op_type == UNUSED_SHARE_MODE_ENTRY);
+}
+
+/*******************************************************************
+ Fill a share mode entry.
+********************************************************************/
+
+static void fill_share_mode_entry(struct share_mode_entry *e,
+				  files_struct *fsp,
+				  uid_t uid, uint16 mid, uint16 op_type)
+{
+	ZERO_STRUCTP(e);
+	e->pid = procid_self();
+	e->share_access = fsp->share_access;
+	e->private_options = fsp->fh->private_options;
+	e->access_mask = fsp->access_mask;
+	e->op_mid = mid;
+	e->op_type = op_type;
+	e->time.tv_sec = fsp->open_time.tv_sec;
+	e->time.tv_usec = fsp->open_time.tv_usec;
+	e->id = fsp->file_id;
+	e->share_file_id = fsp->fh->gen_id;
+	e->uid = (uint32)uid;
+	e->flags = fsp->posix_open ? SHARE_MODE_FLAG_POSIX_OPEN : 0;
+}
+
+static void fill_deferred_open_entry(struct share_mode_entry *e,
+				     const struct timeval request_time,
+				     struct file_id id, uint16 mid)
+{
+	ZERO_STRUCTP(e);
+	e->pid = procid_self();
+	e->op_mid = mid;
+	e->op_type = DEFERRED_OPEN_ENTRY;
+	e->time.tv_sec = request_time.tv_sec;
+	e->time.tv_usec = request_time.tv_usec;
+	e->id = id;
+	e->uid = (uint32)-1;
+	e->flags = 0;
+}
+
+static void add_share_mode_entry(struct share_mode_lock *lck,
+				 const struct share_mode_entry *entry)
+{
+	int i;
+
+	for (i=0; i<lck->num_share_modes; i++) {
+		struct share_mode_entry *e = &lck->share_modes[i];
+		if (is_unused_share_mode_entry(e)) {
+			*e = *entry;
+			break;
+		}
+	}
+
+	if (i == lck->num_share_modes) {
+		/* No unused entry found */
+		ADD_TO_ARRAY(lck, struct share_mode_entry, *entry,
+			     &lck->share_modes, &lck->num_share_modes);
+	}
+	lck->modified = True;
+}
+
+void set_share_mode(struct share_mode_lock *lck, files_struct *fsp,
+			uid_t uid, uint16 mid, uint16 op_type, bool initial_delete_on_close_allowed)
+{
+	struct share_mode_entry entry;
+	fill_share_mode_entry(&entry, fsp, uid, mid, op_type);
+	if (initial_delete_on_close_allowed) {
+		entry.flags |= SHARE_MODE_ALLOW_INITIAL_DELETE_ON_CLOSE;
+	}
+	add_share_mode_entry(lck, &entry);
+}
+
+void add_deferred_open(struct share_mode_lock *lck, uint16 mid,
+		       struct timeval request_time,
+		       struct file_id id)
+{
+	struct share_mode_entry entry;
+	fill_deferred_open_entry(&entry, request_time, id, mid);
+	add_share_mode_entry(lck, &entry);
+}
+
+/*******************************************************************
+ Check if two share mode entries are identical, ignoring oplock 
+ and mid info and desired_access. (Removed paranoia test - it's
+ not automatically a logic error if they are identical. JRA.)
+********************************************************************/
+
+static bool share_modes_identical(struct share_mode_entry *e1,
+				  struct share_mode_entry *e2)
+{
+	/* We used to check for e1->share_access == e2->share_access here
+	   as well as the other fields but 2 different DOS or FCB opens
+	   sharing the same share mode entry may validly differ in
+	   fsp->share_access field. */
+
+	return (procid_equal(&e1->pid, &e2->pid) &&
+		file_id_equal(&e1->id, &e2->id) &&
+		e1->share_file_id == e2->share_file_id );
+}
+
+static bool deferred_open_identical(struct share_mode_entry *e1,
+				    struct share_mode_entry *e2)
+{
+	return (procid_equal(&e1->pid, &e2->pid) &&
+		(e1->op_mid == e2->op_mid) &&
+		file_id_equal(&e1->id, &e2->id));
+}
+
+static struct share_mode_entry *find_share_mode_entry(struct share_mode_lock *lck,
+						      struct share_mode_entry *entry)
+{
+	int i;
+
+	for (i=0; i<lck->num_share_modes; i++) {
+		struct share_mode_entry *e = &lck->share_modes[i];
+		if (is_valid_share_mode_entry(entry) &&
+		    is_valid_share_mode_entry(e) &&
+		    share_modes_identical(e, entry)) {
+			return e;
+		}
+		if (is_deferred_open_entry(entry) &&
+		    is_deferred_open_entry(e) &&
+		    deferred_open_identical(e, entry)) {
+			return e;
+		}
+	}
+	return NULL;
+}
+
+/*******************************************************************
+ Del the share mode of a file for this process. Return the number of
+ entries left.
+********************************************************************/
+
+bool del_share_mode(struct share_mode_lock *lck, files_struct *fsp)
+{
+	struct share_mode_entry entry, *e;
+
+	/* Don't care about the pid owner being correct here - just a search. */
+	fill_share_mode_entry(&entry, fsp, (uid_t)-1, 0, NO_OPLOCK);
+
+	e = find_share_mode_entry(lck, &entry);
+	if (e == NULL) {
+		return False;
+	}
+
+	e->op_type = UNUSED_SHARE_MODE_ENTRY;
+	lck->modified = True;
+	return True;
+}
+
+void del_deferred_open_entry(struct share_mode_lock *lck, uint16 mid)
+{
+	struct share_mode_entry entry, *e;
+
+	fill_deferred_open_entry(&entry, timeval_zero(),
+				 lck->id, mid);
+
+	e = find_share_mode_entry(lck, &entry);
+	if (e == NULL) {
+		return;
+	}
+
+	e->op_type = UNUSED_SHARE_MODE_ENTRY;
+	lck->modified = True;
+}
+
+/*******************************************************************
+ Remove an oplock mid and mode entry from a share mode.
+********************************************************************/
+
+bool remove_share_oplock(struct share_mode_lock *lck, files_struct *fsp)
+{
+	struct share_mode_entry entry, *e;
+
+	/* Don't care about the pid owner being correct here - just a search. */
+	fill_share_mode_entry(&entry, fsp, (uid_t)-1, 0, NO_OPLOCK);
+
+	e = find_share_mode_entry(lck, &entry);
+	if (e == NULL) {
+		return False;
+	}
+
+	e->op_mid = 0;
+	e->op_type = NO_OPLOCK;
+	lck->modified = True;
+	return True;
+}
+
+/*******************************************************************
+ Downgrade a oplock type from exclusive to level II.
+********************************************************************/
+
+bool downgrade_share_oplock(struct share_mode_lock *lck, files_struct *fsp)
+{
+	struct share_mode_entry entry, *e;
+
+	/* Don't care about the pid owner being correct here - just a search. */
+	fill_share_mode_entry(&entry, fsp, (uid_t)-1, 0, NO_OPLOCK);
+
+	e = find_share_mode_entry(lck, &entry);
+	if (e == NULL) {
+		return False;
+	}
+
+	e->op_type = LEVEL_II_OPLOCK;
+	lck->modified = True;
+	return True;
+}
+
+/****************************************************************************
+ Deal with the internal needs of setting the delete on close flag. Note that
+ as the tdb locking is recursive, it is safe to call this from within 
+ open_file_ntcreate. JRA.
+****************************************************************************/
+
+NTSTATUS can_set_delete_on_close(files_struct *fsp, bool delete_on_close,
+				 uint32 dosmode)
+{
+	if (!delete_on_close) {
+		return NT_STATUS_OK;
+	}
+
+	/*
+	 * Only allow delete on close for writable files.
+	 */
+
+	if ((dosmode & aRONLY) &&
+	    !lp_delete_readonly(SNUM(fsp->conn))) {
+		DEBUG(10,("can_set_delete_on_close: file %s delete on close "
+			  "flag set but file attribute is readonly.\n",
+			  fsp->fsp_name ));
+		return NT_STATUS_CANNOT_DELETE;
+	}
+
+	/*
+	 * Only allow delete on close for writable shares.
+	 */
+
+	if (!CAN_WRITE(fsp->conn)) {
+		DEBUG(10,("can_set_delete_on_close: file %s delete on "
+			  "close flag set but write access denied on share.\n",
+			  fsp->fsp_name ));
+		return NT_STATUS_ACCESS_DENIED;
+	}
+
+	/*
+	 * Only allow delete on close for files/directories opened with delete
+	 * intent.
+	 */
+
+	if (!(fsp->access_mask & DELETE_ACCESS)) {
+		DEBUG(10,("can_set_delete_on_close: file %s delete on "
+			  "close flag set but delete access denied.\n",
+			  fsp->fsp_name ));
+		return NT_STATUS_ACCESS_DENIED;
+	}
+
+	/* Don't allow delete on close for non-empty directories. */
+	if (fsp->is_directory) {
+		return can_delete_directory(fsp->conn, fsp->fsp_name);
+	}
+
+	return NT_STATUS_OK;
+}
+
+/****************************************************************************
+ Do we have an open file handle that created this entry ?
+****************************************************************************/
+
+bool can_set_initial_delete_on_close(const struct share_mode_lock *lck)
+{
+	int i;
+
+	for (i=0; i<lck->num_share_modes; i++) {
+		if (lck->share_modes[i].flags & SHARE_MODE_ALLOW_INITIAL_DELETE_ON_CLOSE) {
+			return True;
+		}
+	}
+	return False;
+}
+
+/*************************************************************************
+ Return a talloced copy of a UNIX_USER_TOKEN. NULL on fail.
+ (Should this be in locking.c.... ?).
+*************************************************************************/
+
+static UNIX_USER_TOKEN *copy_unix_token(TALLOC_CTX *ctx, UNIX_USER_TOKEN *tok)
+{
+	UNIX_USER_TOKEN *cpy;
+
+	if (tok == NULL) {
+		return NULL;
+	}
+
+	cpy = TALLOC_P(ctx, UNIX_USER_TOKEN);
+	if (!cpy) {
+		return NULL;
+	}
+
+	cpy->uid = tok->uid;
+	cpy->gid = tok->gid;
+	cpy->ngroups = tok->ngroups;
+	if (tok->ngroups) {
+		/* Make this a talloc child of cpy. */
+		cpy->groups = TALLOC_ARRAY(cpy, gid_t, tok->ngroups);
+		if (!cpy->groups) {
+			return NULL;
+		}
+		memcpy(cpy->groups, tok->groups, tok->ngroups * sizeof(gid_t));
+	}
+	return cpy;
+}
+
+/****************************************************************************
+ Replace the delete on close token.
+****************************************************************************/
+
+void set_delete_on_close_token(struct share_mode_lock *lck, UNIX_USER_TOKEN *tok)
+{
+	TALLOC_FREE(lck->delete_token); /* Also deletes groups... */
+
+	/* Copy the new token (can be NULL). */
+	lck->delete_token = copy_unix_token(lck, tok);
+	lck->modified = True;
+}
+
+/****************************************************************************
+ Sets the delete on close flag over all share modes on this file.
+ Modify the share mode entry for all files open
+ on this device and inode to tell other smbds we have
+ changed the delete on close flag. This will be noticed
+ in the close code, the last closer will delete the file
+ if flag is set.
+ This makes a copy of any UNIX_USER_TOKEN into the
+ lck entry. This function is used when the lock is already granted.
+****************************************************************************/
+
+void set_delete_on_close_lck(struct share_mode_lock *lck, bool delete_on_close, UNIX_USER_TOKEN *tok)
+{
+	if (lck->delete_on_close != delete_on_close) {
+		set_delete_on_close_token(lck, tok);
+		lck->delete_on_close = delete_on_close;
+		if (delete_on_close) {
+			SMB_ASSERT(lck->delete_token != NULL);
+		}
+		lck->modified = True;
+	}
+}
+
+bool set_delete_on_close(files_struct *fsp, bool delete_on_close, UNIX_USER_TOKEN *tok)
+{
+	struct share_mode_lock *lck;
+	
+	DEBUG(10,("set_delete_on_close: %s delete on close flag for "
+		  "fnum = %d, file %s\n",
+		  delete_on_close ? "Adding" : "Removing", fsp->fnum,
+		  fsp->fsp_name ));
+
+	lck = get_share_mode_lock(talloc_tos(), fsp->file_id, NULL, NULL,
+				  NULL);
+	if (lck == NULL) {
+		return False;
+	}
+
+	set_delete_on_close_lck(lck, delete_on_close, tok);
+
+	if (fsp->is_directory) {
+		send_stat_cache_delete_message(fsp->fsp_name);
+	}
+
+	TALLOC_FREE(lck);
+	return True;
+}
+
+/****************************************************************************
+ Sets the allow initial delete on close flag for this share mode.
+****************************************************************************/
+
+bool set_allow_initial_delete_on_close(struct share_mode_lock *lck, files_struct *fsp, bool delete_on_close)
+{
+	struct share_mode_entry entry, *e;
+
+	/* Don't care about the pid owner being correct here - just a search. */
+	fill_share_mode_entry(&entry, fsp, (uid_t)-1, 0, NO_OPLOCK);
+
+	e = find_share_mode_entry(lck, &entry);
+	if (e == NULL) {
+		return False;
+	}
+
+	if (delete_on_close) {
+		e->flags |= SHARE_MODE_ALLOW_INITIAL_DELETE_ON_CLOSE;
+	} else {
+		e->flags &= ~SHARE_MODE_ALLOW_INITIAL_DELETE_ON_CLOSE;
+	}
+	lck->modified = True;
+	return True;
+}
+
+bool set_sticky_write_time(struct file_id fileid, struct timespec write_time)
+{
+	struct share_mode_lock *lck;
+
+	DEBUG(5,("set_sticky_write_time: %s id=%s\n",
+		 timestring(debug_ctx(),
+			    convert_timespec_to_time_t(write_time)),
+		 file_id_string_tos(&fileid)));
+
+	lck = get_share_mode_lock(NULL, fileid, NULL, NULL, NULL);
+	if (lck == NULL) {
+		return False;
+	}
+
+	if (timespec_compare(&lck->changed_write_time, &write_time) != 0) {
+		lck->modified = True;
+		lck->changed_write_time = write_time;
+	}
+
+	TALLOC_FREE(lck);
+	return True;
+}
+
+bool set_write_time(struct file_id fileid, struct timespec write_time)
+{
+	struct share_mode_lock *lck;
+
+	DEBUG(5,("set_write_time: %s id=%s\n",
+		 timestring(debug_ctx(),
+			    convert_timespec_to_time_t(write_time)),
+		 file_id_string_tos(&fileid)));
+
+	lck = get_share_mode_lock(NULL, fileid, NULL, NULL, NULL);
+	if (lck == NULL) {
+		return False;
+	}
+
+	if (timespec_compare(&lck->old_write_time, &write_time) != 0) {
+		lck->modified = True;
+		lck->old_write_time = write_time;
+	}
+
+	TALLOC_FREE(lck);
+	return True;
+}
+
+
+struct forall_state {
+	void (*fn)(const struct share_mode_entry *entry,
+		   const char *sharepath,
+		   const char *fname,
+		   void *private_data);
+	void *private_data;
+};
+
+static int traverse_fn(struct db_record *rec, void *_state)
+{
+	struct forall_state *state = (struct forall_state *)_state;
+	struct locking_data *data;
+	struct share_mode_entry *shares;
+	const char *sharepath;
+	const char *fname;
+	int i;
+
+	/* Ensure this is a locking_key record. */
+	if (rec->key.dsize != sizeof(struct file_id))
+		return 0;
+
+	data = (struct locking_data *)rec->value.dptr;
+	shares = (struct share_mode_entry *)(rec->value.dptr + sizeof(*data));
+	sharepath = (const char *)rec->value.dptr + sizeof(*data) +
+		data->u.s.num_share_mode_entries*sizeof(*shares) +
+		data->u.s.delete_token_size;
+	fname = (const char *)rec->value.dptr + sizeof(*data) +
+		data->u.s.num_share_mode_entries*sizeof(*shares) +
+		data->u.s.delete_token_size +
+		strlen(sharepath) + 1;
+
+	for (i=0;i<data->u.s.num_share_mode_entries;i++) {
+		state->fn(&shares[i], sharepath, fname,
+			  state->private_data);
+	}
+	return 0;
+}
+
+/*******************************************************************
+ Call the specified function on each entry under management by the
+ share mode system.
+********************************************************************/
+
+int share_mode_forall(void (*fn)(const struct share_mode_entry *, const char *,
+				 const char *, void *),
+		      void *private_data)
+{
+	struct forall_state state;
+
+	if (lock_db == NULL)
+		return 0;
+
+	state.fn = fn;
+	state.private_data = private_data;
+
+	return lock_db->traverse_read(lock_db, traverse_fn, (void *)&state);
+}
diff --git a/source3/locking/posix.c b/source3/locking/posix.c
new file mode 100644
index 0000000000..32e1ee9fbf
--- /dev/null
+++ b/source3/locking/posix.c
@@ -0,0 +1,1309 @@
+/* 
+   Unix SMB/CIFS implementation.
+   Locking functions
+   Copyright (C) Jeremy Allison 1992-2006
+   
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+   
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+   Revision History:
+
+   POSIX locking support. Jeremy Allison (jeremy@valinux.com), Apr. 2000.
+*/
+
+#include "includes.h"
+
+#undef DBGC_CLASS
+#define DBGC_CLASS DBGC_LOCKING
+
+/*
+ * The pending close database handle.
+ */
+
+static struct db_context *posix_pending_close_db;
+
+/****************************************************************************
+ First - the functions that deal with the underlying system locks - these
+ functions are used no matter if we're mapping CIFS Windows locks or CIFS
+ POSIX locks onto POSIX.
+****************************************************************************/
+
+/****************************************************************************
+ Utility function to map a lock type correctly depending on the open
+ mode of a file.
+****************************************************************************/
+
+static int map_posix_lock_type( files_struct *fsp, enum brl_type lock_type)
+{
+	if((lock_type == WRITE_LOCK) && !fsp->can_write) {
+		/*
+		 * Many UNIX's cannot get a write lock on a file opened read-only.
+		 * Win32 locking semantics allow this.
+		 * Do the best we can and attempt a read-only lock.
+		 */
+		DEBUG(10,("map_posix_lock_type: Downgrading write lock to read due to read-only file.\n"));
+		return F_RDLCK;
+	}
+
+	/*
+	 * This return should be the most normal, as we attempt
+	 * to always open files read/write.
+	 */
+
+	return (lock_type == READ_LOCK) ? F_RDLCK : F_WRLCK;
+}
+
+/****************************************************************************
+ Debugging aid :-).
+****************************************************************************/
+
+static const char *posix_lock_type_name(int lock_type)
+{
+	return (lock_type == F_RDLCK) ? "READ" : "WRITE";
+}
+
+/****************************************************************************
+ Check to see if the given unsigned lock range is within the possible POSIX
+ range. Modifies the given args to be in range if possible, just returns
+ False if not.
+****************************************************************************/
+
+static bool posix_lock_in_range(SMB_OFF_T *offset_out, SMB_OFF_T *count_out,
+				SMB_BIG_UINT u_offset, SMB_BIG_UINT u_count)
+{
+	SMB_OFF_T offset = (SMB_OFF_T)u_offset;
+	SMB_OFF_T count = (SMB_OFF_T)u_count;
+
+	/*
+	 * For the type of system we are, attempt to
+	 * find the maximum positive lock offset as an SMB_OFF_T.
+	 */
+
+#if defined(MAX_POSITIVE_LOCK_OFFSET) /* Some systems have arbitrary limits. */
+
+	SMB_OFF_T max_positive_lock_offset = (MAX_POSITIVE_LOCK_OFFSET);
+
+#elif defined(LARGE_SMB_OFF_T) && !defined(HAVE_BROKEN_FCNTL64_LOCKS)
+
+	/*
+	 * In this case SMB_OFF_T is 64 bits,
+	 * and the underlying system can handle 64 bit signed locks.
+	 */
+
+	SMB_OFF_T mask2 = ((SMB_OFF_T)0x4) << (SMB_OFF_T_BITS-4);
+	SMB_OFF_T mask = (mask2<<1);
+	SMB_OFF_T max_positive_lock_offset = ~mask;
+
+#else /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
+
+	/*
+	 * In this case either SMB_OFF_T is 32 bits,
+	 * or the underlying system cannot handle 64 bit signed locks.
+	 * All offsets & counts must be 2^31 or less.
+	 */
+
+	SMB_OFF_T max_positive_lock_offset = 0x7FFFFFFF;
+
+#endif /* !LARGE_SMB_OFF_T || HAVE_BROKEN_FCNTL64_LOCKS */
+
+	/*
+	 * POSIX locks of length zero mean lock to end-of-file.
+	 * Win32 locks of length zero are point probes. Ignore
+	 * any Win32 locks of length zero. JRA.
+	 */
+
+	if (count == (SMB_OFF_T)0) {
+		DEBUG(10,("posix_lock_in_range: count = 0, ignoring.\n"));
+		return False;
+	}
+
+	/*
+	 * If the given offset was > max_positive_lock_offset then we cannot map this at all
+	 * ignore this lock.
+	 */
+
+	if (u_offset & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
+		DEBUG(10,("posix_lock_in_range: (offset = %.0f) offset > %.0f and we cannot handle this. Ignoring lock.\n",
+				(double)u_offset, (double)((SMB_BIG_UINT)max_positive_lock_offset) ));
+		return False;
+	}
+
+	/*
+	 * We must truncate the count to less than max_positive_lock_offset.
+	 */
+
+	if (u_count & ~((SMB_BIG_UINT)max_positive_lock_offset)) {
+		count = max_positive_lock_offset;
+	}
+
+	/*
+	 * Truncate count to end at max lock offset.
+	 */
+
+	if (offset + count < 0 || offset + count > max_positive_lock_offset) {
+		count = max_positive_lock_offset - offset;
+	}
+
+	/*
+	 * If we ate all the count, ignore this lock.
+	 */
+
+	if (count == 0) {
+		DEBUG(10,("posix_lock_in_range: Count = 0. Ignoring lock u_offset = %.0f, u_count = %.0f\n",
+				(double)u_offset, (double)u_count ));
+		return False;
+	}
+
+	/*
+	 * The mapping was successful.
+	 */
+
+	DEBUG(10,("posix_lock_in_range: offset_out = %.0f, count_out = %.0f\n",
+			(double)offset, (double)count ));
+
+	*offset_out = offset;
+	*count_out = count;
+	
+	return True;
+}
+
+/****************************************************************************
+ Actual function that does POSIX locks. Copes with 64 -> 32 bit cruft and
+ broken NFS implementations.
+****************************************************************************/
+
+static bool posix_fcntl_lock(files_struct *fsp, int op, SMB_OFF_T offset, SMB_OFF_T count, int type)
+{
+	bool ret;
+
+	DEBUG(8,("posix_fcntl_lock %d %d %.0f %.0f %d\n",fsp->fh->fd,op,(double)offset,(double)count,type));
+
+	ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
+
+	if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
+
+		DEBUG(0,("posix_fcntl_lock: WARNING: lock request at offset %.0f, length %.0f returned\n",
+					(double)offset,(double)count));
+		DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
+		DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
+
+		/*
+		 * If the offset is > 0x7FFFFFFF then this will cause problems on
+		 * 32 bit NFS mounted filesystems. Just ignore it.
+		 */
+
+		if (offset & ~((SMB_OFF_T)0x7fffffff)) {
+			DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
+			return True;
+		}
+
+		if (count & ~((SMB_OFF_T)0x7fffffff)) {
+			/* 32 bit NFS file system, retry with smaller offset */
+			DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
+			errno = 0;
+			count &= 0x7fffffff;
+			ret = SMB_VFS_LOCK(fsp, op, offset, count, type);
+		}
+	}
+
+	DEBUG(8,("posix_fcntl_lock: Lock call %s\n", ret ? "successful" : "failed"));
+	return ret;
+}
+
+/****************************************************************************
+ Actual function that gets POSIX locks. Copes with 64 -> 32 bit cruft and
+ broken NFS implementations.
+****************************************************************************/
+
+static bool posix_fcntl_getlock(files_struct *fsp, SMB_OFF_T *poffset, SMB_OFF_T *pcount, int *ptype)
+{
+	pid_t pid;
+	bool ret;
+
+	DEBUG(8,("posix_fcntl_getlock %d %.0f %.0f %d\n",
+		fsp->fh->fd,(double)*poffset,(double)*pcount,*ptype));
+
+	ret = SMB_VFS_GETLOCK(fsp, poffset, pcount, ptype, &pid);
+
+	if (!ret && ((errno == EFBIG) || (errno == ENOLCK) || (errno ==  EINVAL))) {
+
+		DEBUG(0,("posix_fcntl_getlock: WARNING: lock request at offset %.0f, length %.0f returned\n",
+					(double)*poffset,(double)*pcount));
+		DEBUGADD(0,("an %s error. This can happen when using 64 bit lock offsets\n", strerror(errno)));
+		DEBUGADD(0,("on 32 bit NFS mounted file systems.\n"));
+
+		/*
+		 * If the offset is > 0x7FFFFFFF then this will cause problems on
+		 * 32 bit NFS mounted filesystems. Just ignore it.
+		 */
+
+		if (*poffset & ~((SMB_OFF_T)0x7fffffff)) {
+			DEBUG(0,("Offset greater than 31 bits. Returning success.\n"));
+			return True;
+		}
+
+		if (*pcount & ~((SMB_OFF_T)0x7fffffff)) {
+			/* 32 bit NFS file system, retry with smaller offset */
+			DEBUG(0,("Count greater than 31 bits - retrying with 31 bit truncated length.\n"));
+			errno = 0;
+			*pcount &= 0x7fffffff;
+			ret = SMB_VFS_GETLOCK(fsp,poffset,pcount,ptype,&pid);
+		}
+	}
+
+	DEBUG(8,("posix_fcntl_getlock: Lock query call %s\n", ret ? "successful" : "failed"));
+	return ret;
+}
+
+/****************************************************************************
+ POSIX function to see if a file region is locked. Returns True if the
+ region is locked, False otherwise.
+****************************************************************************/
+
+bool is_posix_locked(files_struct *fsp,
+			SMB_BIG_UINT *pu_offset,
+			SMB_BIG_UINT *pu_count,
+			enum brl_type *plock_type,
+			enum brl_flavour lock_flav)
+{
+	SMB_OFF_T offset;
+	SMB_OFF_T count;
+	int posix_lock_type = map_posix_lock_type(fsp,*plock_type);
+
+	DEBUG(10,("is_posix_locked: File %s, offset = %.0f, count = %.0f, type = %s\n",
+		fsp->fsp_name, (double)*pu_offset, (double)*pu_count, posix_lock_type_name(*plock_type) ));
+
+	/*
+	 * If the requested lock won't fit in the POSIX range, we will
+	 * never set it, so presume it is not locked.
+	 */
+
+	if(!posix_lock_in_range(&offset, &count, *pu_offset, *pu_count)) {
+		return False;
+	}
+
+	if (!posix_fcntl_getlock(fsp,&offset,&count,&posix_lock_type)) {
+		return False;
+	}
+
+	if (posix_lock_type == F_UNLCK) {
+		return False;
+	}
+
+	if (lock_flav == POSIX_LOCK) {
+		/* Only POSIX lock queries need to know the details. */
+		*pu_offset = (SMB_BIG_UINT)offset;
+		*pu_count = (SMB_BIG_UINT)count;
+		*plock_type = (posix_lock_type == F_RDLCK) ? READ_LOCK : WRITE_LOCK;
+	}
+	return True;
+}
+
+/****************************************************************************
+ Next - the functions that deal with in memory database storing representations
+ of either Windows CIFS locks or POSIX CIFS locks.
+****************************************************************************/
+
+/* The key used in the in-memory POSIX databases. */
+
+struct lock_ref_count_key {
+	struct file_id id;
+	char r;
+}; 
+
+/*******************************************************************
+ Form a static locking key for a dev/inode pair for the lock ref count
+******************************************************************/
+
+static TDB_DATA locking_ref_count_key_fsp(files_struct *fsp,
+					  struct lock_ref_count_key *tmp)
+{
+	ZERO_STRUCTP(tmp);
+	tmp->id = fsp->file_id;
+	tmp->r = 'r';
+	return make_tdb_data((uint8_t *)tmp, sizeof(*tmp));
+}
+
+/*******************************************************************
+ Convenience function to get an fd_array key from an fsp.
+******************************************************************/
+
+static TDB_DATA fd_array_key_fsp(files_struct *fsp)
+{
+	return make_tdb_data((uint8 *)&fsp->file_id, sizeof(fsp->file_id));
+}
+
+/*******************************************************************
+ Create the in-memory POSIX lock databases.
+********************************************************************/
+
+bool posix_locking_init(bool read_only)
+{
+	if (posix_pending_close_db != NULL) {
+		return true;
+	}
+
+	posix_pending_close_db = db_open_rbt(NULL);
+
+	if (posix_pending_close_db == NULL) {
+		DEBUG(0,("Failed to open POSIX pending close database.\n"));
+		return false;
+	}
+
+	return true;
+}
+
+/*******************************************************************
+ Delete the in-memory POSIX lock databases.
+********************************************************************/
+
+bool posix_locking_end(void)
+{
+	/*
+	 * Shouldn't we close all fd's here?
+	 */
+	TALLOC_FREE(posix_pending_close_db);
+	return true;
+}
+
+/****************************************************************************
+ Next - the functions that deal with storing fd's that have outstanding
+ POSIX locks when closed.
+****************************************************************************/
+
+/****************************************************************************
+ The records in posix_pending_close_tdb are composed of an array of ints
+ keyed by dev/ino pair.
+ The first int is a reference count of the number of outstanding locks on
+ all open fd's on this dev/ino pair. Any subsequent ints are the fd's that
+ were open on this dev/ino pair that should have been closed, but can't as
+ the lock ref count is non zero.
+****************************************************************************/
+
+/****************************************************************************
+ Keep a reference count of the number of Windows locks open on this dev/ino
+ pair. Creates entry if it doesn't exist.
+****************************************************************************/
+
+static void increment_windows_lock_ref_count(files_struct *fsp)
+{
+	struct lock_ref_count_key tmp;
+	struct db_record *rec;
+	int lock_ref_count = 0;
+	NTSTATUS status;
+
+	rec = posix_pending_close_db->fetch_locked(
+		posix_pending_close_db, talloc_tos(),
+		locking_ref_count_key_fsp(fsp, &tmp));
+
+	SMB_ASSERT(rec != NULL);
+
+	if (rec->value.dptr != NULL) {
+		SMB_ASSERT(rec->value.dsize == sizeof(lock_ref_count));
+		memcpy(&lock_ref_count, rec->value.dptr,
+		       sizeof(lock_ref_count));
+	}
+
+	lock_ref_count++;
+
+	status = rec->store(rec, make_tdb_data((uint8 *)&lock_ref_count,
+					       sizeof(lock_ref_count)), 0);
+
+	SMB_ASSERT(NT_STATUS_IS_OK(status));
+
+	TALLOC_FREE(rec);
+
+	DEBUG(10,("increment_windows_lock_ref_count for file now %s = %d\n",
+		  fsp->fsp_name, lock_ref_count ));
+}
+
+/****************************************************************************
+ Bulk delete - subtract as many locks as we've just deleted.
+****************************************************************************/
+
+void reduce_windows_lock_ref_count(files_struct *fsp, unsigned int dcount)
+{
+	struct lock_ref_count_key tmp;
+	struct db_record *rec;
+	int lock_ref_count = 0;
+	NTSTATUS status;
+
+	rec = posix_pending_close_db->fetch_locked(
+		posix_pending_close_db, talloc_tos(),
+		locking_ref_count_key_fsp(fsp, &tmp));
+
+	SMB_ASSERT((rec != NULL)
+		   && (rec->value.dptr != NULL)
+		   && (rec->value.dsize == sizeof(lock_ref_count)));
+
+	memcpy(&lock_ref_count, rec->value.dptr, sizeof(lock_ref_count));
+
+	SMB_ASSERT(lock_ref_count > 0);
+
+	lock_ref_count -= dcount;
+
+	status = rec->store(rec, make_tdb_data((uint8 *)&lock_ref_count,
+					       sizeof(lock_ref_count)), 0);
+
+	SMB_ASSERT(NT_STATUS_IS_OK(status));
+
+	TALLOC_FREE(rec);
+
+	DEBUG(10,("reduce_windows_lock_ref_count for file now %s = %d\n",
+		  fsp->fsp_name, lock_ref_count ));
+}
+
+static void decrement_windows_lock_ref_count(files_struct *fsp)
+{
+	reduce_windows_lock_ref_count(fsp, 1);
+}
+
+/****************************************************************************
+ Fetch the lock ref count.
+****************************************************************************/
+
+static int get_windows_lock_ref_count(files_struct *fsp)
+{
+	struct lock_ref_count_key tmp;
+	TDB_DATA dbuf;
+	int res;
+	int lock_ref_count = 0;
+
+	res = posix_pending_close_db->fetch(
+		posix_pending_close_db, talloc_tos(),
+		locking_ref_count_key_fsp(fsp, &tmp), &dbuf);
+
+	SMB_ASSERT(res == 0);
+
+	if (dbuf.dsize != 0) {
+		SMB_ASSERT(dbuf.dsize == sizeof(lock_ref_count));
+		memcpy(&lock_ref_count, dbuf.dptr, sizeof(lock_ref_count));
+		TALLOC_FREE(dbuf.dptr);
+	}
+
+	DEBUG(10,("get_windows_lock_count for file %s = %d\n",
+		  fsp->fsp_name, lock_ref_count ));
+
+	return lock_ref_count;
+}
+
+/****************************************************************************
+ Delete a lock_ref_count entry.
+****************************************************************************/
+
+static void delete_windows_lock_ref_count(files_struct *fsp)
+{
+	struct lock_ref_count_key tmp;
+	struct db_record *rec;
+
+	rec = posix_pending_close_db->fetch_locked(
+		posix_pending_close_db, talloc_tos(),
+		locking_ref_count_key_fsp(fsp, &tmp));
+
+	SMB_ASSERT(rec != NULL);
+
+	/* Not a bug if it doesn't exist - no locks were ever granted. */
+
+	rec->delete_rec(rec);
+	TALLOC_FREE(rec);
+
+	DEBUG(10,("delete_windows_lock_ref_count for file %s\n",
+		  fsp->fsp_name));
+}
+
+/****************************************************************************
+ Add an fd to the pending close tdb.
+****************************************************************************/
+
+static void add_fd_to_close_entry(files_struct *fsp)
+{
+	struct db_record *rec;
+	uint8_t *new_data;
+	NTSTATUS status;
+
+	rec = posix_pending_close_db->fetch_locked(
+		posix_pending_close_db, talloc_tos(),
+		fd_array_key_fsp(fsp));
+
+	SMB_ASSERT(rec != NULL);
+
+	new_data = TALLOC_ARRAY(
+		rec, uint8_t, rec->value.dsize + sizeof(fsp->fh->fd));
+
+	SMB_ASSERT(new_data != NULL);
+
+	memcpy(new_data, rec->value.dptr, rec->value.dsize);
+	memcpy(new_data + rec->value.dsize,
+	       &fsp->fh->fd, sizeof(fsp->fh->fd));
+
+	status = rec->store(
+		rec, make_tdb_data(new_data,
+				   rec->value.dsize + sizeof(fsp->fh->fd)), 0);
+
+	SMB_ASSERT(NT_STATUS_IS_OK(status));
+
+	TALLOC_FREE(rec);
+
+	DEBUG(10,("add_fd_to_close_entry: added fd %d file %s\n",
+		  fsp->fh->fd, fsp->fsp_name ));
+}
+
+/****************************************************************************
+ Remove all fd entries for a specific dev/inode pair from the tdb.
+****************************************************************************/
+
+static void delete_close_entries(files_struct *fsp)
+{
+	struct db_record *rec;
+
+	rec = posix_pending_close_db->fetch_locked(
+		posix_pending_close_db, talloc_tos(),
+		fd_array_key_fsp(fsp));
+
+	SMB_ASSERT(rec != NULL);
+	rec->delete_rec(rec);
+	TALLOC_FREE(rec);
+}
+
+/****************************************************************************
+ Get the array of POSIX pending close records for an open fsp. Returns number
+ of entries.
+****************************************************************************/
+
+static size_t get_posix_pending_close_entries(TALLOC_CTX *mem_ctx,
+					      files_struct *fsp, int **entries)
+{
+	TDB_DATA dbuf;
+	int res;
+
+	res = posix_pending_close_db->fetch(
+		posix_pending_close_db, mem_ctx, fd_array_key_fsp(fsp),
+		&dbuf);
+
+	SMB_ASSERT(res == 0);
+
+	if (dbuf.dsize == 0) {
+		*entries = NULL;
+		return 0;
+	}
+
+	*entries = (int *)dbuf.dptr;
+	return (size_t)(dbuf.dsize / sizeof(int));
+}
+
+/****************************************************************************
+ Deal with pending closes needed by POSIX locking support.
+ Note that posix_locking_close_file() is expected to have been called
+ to delete all locks on this fsp before this function is called.
+****************************************************************************/
+
+int fd_close_posix(struct files_struct *fsp)
+{
+	int saved_errno = 0;
+	int ret;
+	int *fd_array = NULL;
+	size_t count, i;
+
+	if (!lp_locking(fsp->conn->params) ||
+	    !lp_posix_locking(fsp->conn->params))
+	{
+		/*
+		 * No locking or POSIX to worry about or we want POSIX semantics
+		 * which will lose all locks on all fd's open on this dev/inode,
+		 * just close.
+		 */
+		return close(fsp->fh->fd);
+	}
+
+	if (get_windows_lock_ref_count(fsp)) {
+
+		/*
+		 * There are outstanding locks on this dev/inode pair on
+		 * other fds. Add our fd to the pending close tdb and set
+		 * fsp->fh->fd to -1.
+		 */
+
+		add_fd_to_close_entry(fsp);
+		return 0;
+	}
+
+	/*
+	 * No outstanding locks. Get the pending close fd's
+	 * from the tdb and close them all.
+	 */
+
+	count = get_posix_pending_close_entries(talloc_tos(), fsp, &fd_array);
+
+	if (count) {
+		DEBUG(10,("fd_close_posix: doing close on %u fd's.\n",
+			  (unsigned int)count));
+
+		for(i = 0; i < count; i++) {
+			if (close(fd_array[i]) == -1) {
+				saved_errno = errno;
+			}
+		}
+
+		/*
+		 * Delete all fd's stored in the tdb
+		 * for this dev/inode pair.
+		 */
+
+		delete_close_entries(fsp);
+	}
+
+	TALLOC_FREE(fd_array);
+
+	/* Don't need a lock ref count on this dev/ino anymore. */
+	delete_windows_lock_ref_count(fsp);
+
+	/*
+	 * Finally close the fd associated with this fsp.
+	 */
+
+	ret = close(fsp->fh->fd);
+
+	if (ret == 0 && saved_errno != 0) {
+		errno = saved_errno;
+		ret = -1;
+	}
+
+	return ret;
+}
+
+/****************************************************************************
+ Next - the functions that deal with the mapping CIFS Windows locks onto
+ the underlying system POSIX locks.
+****************************************************************************/
+
+/*
+ * Structure used when splitting a lock range
+ * into a POSIX lock range. Doubly linked list.
+ */
+
+struct lock_list {
+	struct lock_list *next;
+	struct lock_list *prev;
+	SMB_OFF_T start;
+	SMB_OFF_T size;
+};
+
+/****************************************************************************
+ Create a list of lock ranges that don't overlap a given range. Used in calculating
+ POSIX locks and unlocks. This is a difficult function that requires ASCII art to
+ understand it :-).
+****************************************************************************/
+
+static struct lock_list *posix_lock_list(TALLOC_CTX *ctx,
+						struct lock_list *lhead,
+						const struct lock_context *lock_ctx, /* Lock context lhead belongs to. */
+						files_struct *fsp,
+						const struct lock_struct *plocks,
+						int num_locks)
+{
+	int i;
+
+	/*
+	 * Check the current lock list on this dev/inode pair.
+	 * Quit if the list is deleted.
+	 */
+
+	DEBUG(10,("posix_lock_list: curr: start=%.0f,size=%.0f\n",
+		(double)lhead->start, (double)lhead->size ));
+
+	for (i=0; i<num_locks && lhead; i++) {
+		const struct lock_struct *lock = &plocks[i];
+		struct lock_list *l_curr;
+
+		/* Ignore all but read/write locks. */
+		if (lock->lock_type != READ_LOCK && lock->lock_type != WRITE_LOCK) {
+			continue;
+		}
+
+		/* Ignore locks not owned by this process. */
+		if (!procid_equal(&lock->context.pid, &lock_ctx->pid)) {
+			continue;
+		}
+
+		/*
+		 * Walk the lock list, checking for overlaps. Note that
+		 * the lock list can expand within this loop if the current
+		 * range being examined needs to be split.
+		 */
+
+		for (l_curr = lhead; l_curr;) {
+
+			DEBUG(10,("posix_lock_list: lock: fnum=%d: start=%.0f,size=%.0f:type=%s", lock->fnum,
+				(double)lock->start, (double)lock->size, posix_lock_type_name(lock->lock_type) ));
+
+			if ( (l_curr->start >= (lock->start + lock->size)) ||
+				 (lock->start >= (l_curr->start + l_curr->size))) {
+
+				/* No overlap with existing lock - leave this range alone. */
+/*********************************************
+                                             +---------+
+                                             | l_curr  |
+                                             +---------+
+                                +-------+
+                                | lock  |
+                                +-------+
+OR....
+             +---------+
+             |  l_curr |
+             +---------+
+**********************************************/
+
+				DEBUG(10,(" no overlap case.\n" ));
+
+				l_curr = l_curr->next;
+
+			} else if ( (l_curr->start >= lock->start) &&
+						(l_curr->start + l_curr->size <= lock->start + lock->size) ) {
+
+				/*
+				 * This range is completely overlapped by this existing lock range
+				 * and thus should have no effect. Delete it from the list.
+				 */
+/*********************************************
+                +---------+
+                |  l_curr |
+                +---------+
+        +---------------------------+
+        |       lock                |
+        +---------------------------+
+**********************************************/
+				/* Save the next pointer */
+				struct lock_list *ul_next = l_curr->next;
+
+				DEBUG(10,(" delete case.\n" ));
+
+				DLIST_REMOVE(lhead, l_curr);
+				if(lhead == NULL) {
+					break; /* No more list... */
+				}
+
+				l_curr = ul_next;
+				
+			} else if ( (l_curr->start >= lock->start) &&
+						(l_curr->start < lock->start + lock->size) &&
+						(l_curr->start + l_curr->size > lock->start + lock->size) ) {
+
+				/*
+				 * This range overlaps the existing lock range at the high end.
+				 * Truncate by moving start to existing range end and reducing size.
+				 */
+/*********************************************
+                +---------------+
+                |  l_curr       |
+                +---------------+
+        +---------------+
+        |    lock       |
+        +---------------+
+BECOMES....
+                        +-------+
+                        | l_curr|
+                        +-------+
+**********************************************/
+
+				l_curr->size = (l_curr->start + l_curr->size) - (lock->start + lock->size);
+				l_curr->start = lock->start + lock->size;
+
+				DEBUG(10,(" truncate high case: start=%.0f,size=%.0f\n",
+								(double)l_curr->start, (double)l_curr->size ));
+
+				l_curr = l_curr->next;
+
+			} else if ( (l_curr->start < lock->start) &&
+						(l_curr->start + l_curr->size > lock->start) &&
+						(l_curr->start + l_curr->size <= lock->start + lock->size) ) {
+
+				/*
+				 * This range overlaps the existing lock range at the low end.
+				 * Truncate by reducing size.
+				 */
+/*********************************************
+   +---------------+
+   |  l_curr       |
+   +---------------+
+           +---------------+
+           |    lock       |
+           +---------------+
+BECOMES....
+   +-------+
+   | l_curr|
+   +-------+
+**********************************************/
+
+				l_curr->size = lock->start - l_curr->start;
+
+				DEBUG(10,(" truncate low case: start=%.0f,size=%.0f\n",
+								(double)l_curr->start, (double)l_curr->size ));
+
+				l_curr = l_curr->next;
+		
+			} else if ( (l_curr->start < lock->start) &&
+						(l_curr->start + l_curr->size > lock->start + lock->size) ) {
+				/*
+				 * Worst case scenario. Range completely overlaps an existing
+				 * lock range. Split the request into two, push the new (upper) request
+				 * into the dlink list, and continue with the entry after l_new (as we
+				 * know that l_new will not overlap with this lock).
+				 */
+/*********************************************
+        +---------------------------+
+        |        l_curr             |
+        +---------------------------+
+                +---------+
+                | lock    |
+                +---------+
+BECOMES.....
+        +-------+         +---------+
+        | l_curr|         | l_new   |
+        +-------+         +---------+
+**********************************************/
+				struct lock_list *l_new = TALLOC_P(ctx, struct lock_list);
+
+				if(l_new == NULL) {
+					DEBUG(0,("posix_lock_list: talloc fail.\n"));
+					return NULL; /* The talloc_destroy takes care of cleanup. */
+				}
+
+				ZERO_STRUCTP(l_new);
+				l_new->start = lock->start + lock->size;
+				l_new->size = l_curr->start + l_curr->size - l_new->start;
+
+				/* Truncate the l_curr. */
+				l_curr->size = lock->start - l_curr->start;
+
+				DEBUG(10,(" split case: curr: start=%.0f,size=%.0f \
+new: start=%.0f,size=%.0f\n", (double)l_curr->start, (double)l_curr->size,
+								(double)l_new->start, (double)l_new->size ));
+
+				/*
+				 * Add into the dlink list after the l_curr point - NOT at lhead. 
+				 * Note we can't use DLINK_ADD here as this inserts at the head of the given list.
+				 */
+
+				l_new->prev = l_curr;
+				l_new->next = l_curr->next;
+				l_curr->next = l_new;
+
+				/* And move after the link we added. */
+				l_curr = l_new->next;
+
+			} else {
+
+				/*
+				 * This logic case should never happen. Ensure this is the
+				 * case by forcing an abort.... Remove in production.
+				 */
+				char *msg = NULL;
+
+				/* Don't check if alloc succeeds here - we're
+				 * forcing a core dump anyway. */
+
+				asprintf(&msg, "logic flaw in cases: l_curr: start = %.0f, size = %.0f : \
+lock: start = %.0f, size = %.0f", (double)l_curr->start, (double)l_curr->size, (double)lock->start, (double)lock->size );
+
+				smb_panic(msg);
+			}
+		} /* end for ( l_curr = lhead; l_curr;) */
+	} /* end for (i=0; i<num_locks && ul_head; i++) */
+
+	return lhead;
+}
+
+/****************************************************************************
+ POSIX function to acquire a lock. Returns True if the
+ lock could be granted, False if not.
+****************************************************************************/
+
+bool set_posix_lock_windows_flavour(files_struct *fsp,
+			SMB_BIG_UINT u_offset,
+			SMB_BIG_UINT u_count,
+			enum brl_type lock_type,
+			const struct lock_context *lock_ctx,
+			const struct lock_struct *plocks,
+			int num_locks,
+			int *errno_ret)
+{
+	SMB_OFF_T offset;
+	SMB_OFF_T count;
+	int posix_lock_type = map_posix_lock_type(fsp,lock_type);
+	bool ret = True;
+	size_t lock_count;
+	TALLOC_CTX *l_ctx = NULL;
+	struct lock_list *llist = NULL;
+	struct lock_list *ll = NULL;
+
+	DEBUG(5,("set_posix_lock_windows_flavour: File %s, offset = %.0f, count = %.0f, type = %s\n",
+			fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
+
+	/*
+	 * If the requested lock won't fit in the POSIX range, we will
+	 * pretend it was successful.
+	 */
+
+	if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
+		increment_windows_lock_ref_count(fsp);
+		return True;
+	}
+
+	/*
+	 * Windows is very strange. It allows read locks to be overlayed
+	 * (even over a write lock), but leaves the write lock in force until the first
+	 * unlock. It also reference counts the locks. This means the following sequence :
+	 *
+	 * process1                                      process2
+	 * ------------------------------------------------------------------------
+	 * WRITE LOCK : start = 2, len = 10
+	 *                                            READ LOCK: start =0, len = 10 - FAIL
+	 * READ LOCK : start = 0, len = 14 
+	 *                                            READ LOCK: start =0, len = 10 - FAIL
+	 * UNLOCK : start = 2, len = 10
+	 *                                            READ LOCK: start =0, len = 10 - OK
+	 *
+	 * Under POSIX, the same sequence in steps 1 and 2 would not be reference counted, but
+	 * would leave a single read lock over the 0-14 region.
+	 */
+	
+	if ((l_ctx = talloc_init("set_posix_lock")) == NULL) {
+		DEBUG(0,("set_posix_lock_windows_flavour: unable to init talloc context.\n"));
+		return False;
+	}
+
+	if ((ll = TALLOC_P(l_ctx, struct lock_list)) == NULL) {
+		DEBUG(0,("set_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
+		talloc_destroy(l_ctx);
+		return False;
+	}
+
+	/*
+	 * Create the initial list entry containing the
+	 * lock we want to add.
+	 */
+
+	ZERO_STRUCTP(ll);
+	ll->start = offset;
+	ll->size = count;
+
+	DLIST_ADD(llist, ll);
+
+	/*
+	 * The following call calculates if there are any
+	 * overlapping locks held by this process on
+	 * fd's open on the same file and splits this list
+	 * into a list of lock ranges that do not overlap with existing
+	 * POSIX locks.
+	 */
+
+	llist = posix_lock_list(l_ctx,
+				llist,
+				lock_ctx, /* Lock context llist belongs to. */
+				fsp,
+				plocks,
+				num_locks);
+
+	/*
+	 * Add the POSIX locks on the list of ranges returned.
+	 * As the lock is supposed to be added atomically, we need to
+	 * back out all the locks if any one of these calls fail.
+	 */
+
+	for (lock_count = 0, ll = llist; ll; ll = ll->next, lock_count++) {
+		offset = ll->start;
+		count = ll->size;
+
+		DEBUG(5,("set_posix_lock_windows_flavour: Real lock: Type = %s: offset = %.0f, count = %.0f\n",
+			posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
+
+		if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
+			*errno_ret = errno;
+			DEBUG(5,("set_posix_lock_windows_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
+				posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
+			ret = False;
+			break;
+		}
+	}
+
+	if (!ret) {
+
+		/*
+		 * Back out all the POSIX locks we have on fail.
+		 */
+
+		for (ll = llist; lock_count; ll = ll->next, lock_count--) {
+			offset = ll->start;
+			count = ll->size;
+
+			DEBUG(5,("set_posix_lock_windows_flavour: Backing out locks: Type = %s: offset = %.0f, count = %.0f\n",
+				posix_lock_type_name(posix_lock_type), (double)offset, (double)count ));
+
+			posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK);
+		}
+	} else {
+		/* Remember the number of Windows locks we have on this dev/ino pair. */
+		increment_windows_lock_ref_count(fsp);
+	}
+
+	talloc_destroy(l_ctx);
+	return ret;
+}
+
+/****************************************************************************
+ POSIX function to release a lock. Returns True if the
+ lock could be released, False if not.
+****************************************************************************/
+
+bool release_posix_lock_windows_flavour(files_struct *fsp,
+				SMB_BIG_UINT u_offset,
+				SMB_BIG_UINT u_count,
+				enum brl_type deleted_lock_type,
+				const struct lock_context *lock_ctx,
+				const struct lock_struct *plocks,
+				int num_locks)
+{
+	SMB_OFF_T offset;
+	SMB_OFF_T count;
+	bool ret = True;
+	TALLOC_CTX *ul_ctx = NULL;
+	struct lock_list *ulist = NULL;
+	struct lock_list *ul = NULL;
+
+	DEBUG(5,("release_posix_lock_windows_flavour: File %s, offset = %.0f, count = %.0f\n",
+		fsp->fsp_name, (double)u_offset, (double)u_count ));
+
+	/* Remember the number of Windows locks we have on this dev/ino pair. */
+	decrement_windows_lock_ref_count(fsp);
+
+	/*
+	 * If the requested lock won't fit in the POSIX range, we will
+	 * pretend it was successful.
+	 */
+
+	if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
+		return True;
+	}
+
+	if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
+		DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
+		return False;
+	}
+
+	if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
+		DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
+		talloc_destroy(ul_ctx);
+		return False;
+	}
+
+	/*
+	 * Create the initial list entry containing the
+	 * lock we want to remove.
+	 */
+
+	ZERO_STRUCTP(ul);
+	ul->start = offset;
+	ul->size = count;
+
+	DLIST_ADD(ulist, ul);
+
+	/*
+	 * The following call calculates if there are any
+	 * overlapping locks held by this process on
+	 * fd's open on the same file and creates a
+	 * list of unlock ranges that will allow
+	 * POSIX lock ranges to remain on the file whilst the
+	 * unlocks are performed.
+	 */
+
+	ulist = posix_lock_list(ul_ctx,
+				ulist,
+				lock_ctx, /* Lock context ulist belongs to. */
+				fsp,
+				plocks,
+				num_locks);
+
+	/*
+	 * If there were any overlapped entries (list is > 1 or size or start have changed),
+	 * and the lock_type we just deleted from
+	 * the upper layer tdb was a write lock, then before doing the unlock we need to downgrade
+	 * the POSIX lock to a read lock. This allows any overlapping read locks
+	 * to be atomically maintained.
+	 */
+
+	if (deleted_lock_type == WRITE_LOCK &&
+			(!ulist || ulist->next != NULL || ulist->start != offset || ulist->size != count)) {
+
+		DEBUG(5,("release_posix_lock_windows_flavour: downgrading lock to READ: offset = %.0f, count = %.0f\n",
+			(double)offset, (double)count ));
+
+		if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_RDLCK)) {
+			DEBUG(0,("release_posix_lock_windows_flavour: downgrade of lock failed with error %s !\n", strerror(errno) ));
+			talloc_destroy(ul_ctx);
+			return False;
+		}
+	}
+
+	/*
+	 * Release the POSIX locks on the list of ranges returned.
+	 */
+
+	for(; ulist; ulist = ulist->next) {
+		offset = ulist->start;
+		count = ulist->size;
+
+		DEBUG(5,("release_posix_lock_windows_flavour: Real unlock: offset = %.0f, count = %.0f\n",
+			(double)offset, (double)count ));
+
+		if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
+			ret = False;
+		}
+	}
+
+	talloc_destroy(ul_ctx);
+	return ret;
+}
+
+/****************************************************************************
+ Next - the functions that deal with mapping CIFS POSIX locks onto
+ the underlying system POSIX locks.
+****************************************************************************/
+
+/****************************************************************************
+ POSIX function to acquire a lock. Returns True if the
+ lock could be granted, False if not.
+ As POSIX locks don't stack or conflict (they just overwrite)
+ we can map the requested lock directly onto a system one. We
+ know it doesn't conflict with locks on other contexts as the
+ upper layer would have refused it.
+****************************************************************************/
+
+bool set_posix_lock_posix_flavour(files_struct *fsp,
+			SMB_BIG_UINT u_offset,
+			SMB_BIG_UINT u_count,
+			enum brl_type lock_type,
+			int *errno_ret)
+{
+	SMB_OFF_T offset;
+	SMB_OFF_T count;
+	int posix_lock_type = map_posix_lock_type(fsp,lock_type);
+
+	DEBUG(5,("set_posix_lock_posix_flavour: File %s, offset = %.0f, count = %.0f, type = %s\n",
+			fsp->fsp_name, (double)u_offset, (double)u_count, posix_lock_type_name(lock_type) ));
+
+	/*
+	 * If the requested lock won't fit in the POSIX range, we will
+	 * pretend it was successful.
+	 */
+
+	if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
+		return True;
+	}
+
+	if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,posix_lock_type)) {
+		*errno_ret = errno;
+		DEBUG(5,("set_posix_lock_posix_flavour: Lock fail !: Type = %s: offset = %.0f, count = %.0f. Errno = %s\n",
+			posix_lock_type_name(posix_lock_type), (double)offset, (double)count, strerror(errno) ));
+		return False;
+	}
+	return True;
+}
+
+/****************************************************************************
+ POSIX function to release a lock. Returns True if the
+ lock could be released, False if not.
+ We are given a complete lock state from the upper layer which is what the lock
+ state should be after the unlock has already been done, so what
+ we do is punch out holes in the unlock range where locks owned by this process
+ have a different lock context.
+****************************************************************************/
+
+bool release_posix_lock_posix_flavour(files_struct *fsp,
+				SMB_BIG_UINT u_offset,
+				SMB_BIG_UINT u_count,
+				const struct lock_context *lock_ctx,
+				const struct lock_struct *plocks,
+				int num_locks)
+{
+	bool ret = True;
+	SMB_OFF_T offset;
+	SMB_OFF_T count;
+	TALLOC_CTX *ul_ctx = NULL;
+	struct lock_list *ulist = NULL;
+	struct lock_list *ul = NULL;
+
+	DEBUG(5,("release_posix_lock_posix_flavour: File %s, offset = %.0f, count = %.0f\n",
+		fsp->fsp_name, (double)u_offset, (double)u_count ));
+
+	/*
+	 * If the requested lock won't fit in the POSIX range, we will
+	 * pretend it was successful.
+	 */
+
+	if(!posix_lock_in_range(&offset, &count, u_offset, u_count)) {
+		return True;
+	}
+
+	if ((ul_ctx = talloc_init("release_posix_lock")) == NULL) {
+		DEBUG(0,("release_posix_lock_windows_flavour: unable to init talloc context.\n"));
+		return False;
+	}
+
+	if ((ul = TALLOC_P(ul_ctx, struct lock_list)) == NULL) {
+		DEBUG(0,("release_posix_lock_windows_flavour: unable to talloc unlock list.\n"));
+		talloc_destroy(ul_ctx);
+		return False;
+	}
+
+	/*
+	 * Create the initial list entry containing the
+	 * lock we want to remove.
+	 */
+
+	ZERO_STRUCTP(ul);
+	ul->start = offset;
+	ul->size = count;
+
+	DLIST_ADD(ulist, ul);
+
+	/*
+	 * Walk the given array creating a linked list
+	 * of unlock requests.
+	 */
+
+	ulist = posix_lock_list(ul_ctx,
+				ulist,
+				lock_ctx, /* Lock context ulist belongs to. */
+				fsp,
+				plocks,
+				num_locks);
+
+	/*
+	 * Release the POSIX locks on the list of ranges returned.
+	 */
+
+	for(; ulist; ulist = ulist->next) {
+		offset = ulist->start;
+		count = ulist->size;
+
+		DEBUG(5,("release_posix_lock_posix_flavour: Real unlock: offset = %.0f, count = %.0f\n",
+			(double)offset, (double)count ));
+
+		if (!posix_fcntl_lock(fsp,SMB_F_SETLK,offset,count,F_UNLCK)) {
+			ret = False;
+		}
+	}
+
+	talloc_destroy(ul_ctx);
+	return ret;
+}