summaryrefslogtreecommitdiff
path: root/source3
diff options
context:
space:
mode:
Diffstat (limited to 'source3')
-rw-r--r--source3/include/proto.h10
-rw-r--r--source3/include/smb.h14
-rw-r--r--source3/lib/dummysmbd.c19
-rw-r--r--source3/locking/brlock.c85
-rw-r--r--source3/smbd/aio.c4
-rw-r--r--source3/smbd/fileio.c4
-rw-r--r--source3/smbd/oplock.c25
-rw-r--r--source3/smbd/oplock_irix.c6
-rw-r--r--source3/smbd/oplock_linux.c6
-rw-r--r--source3/smbd/reply.c11
-rw-r--r--source3/smbd/vfs.c33
11 files changed, 178 insertions, 39 deletions
diff --git a/source3/include/proto.h b/source3/include/proto.h
index 1a1f8bef69..ceea97bf56 100644
--- a/source3/include/proto.h
+++ b/source3/include/proto.h
@@ -6899,7 +6899,15 @@ void release_file_oplock(files_struct *fsp);
bool remove_oplock(files_struct *fsp);
bool downgrade_oplock(files_struct *fsp);
void reply_to_oplock_break_requests(files_struct *fsp);
-void release_level_2_oplocks_on_change(files_struct *fsp);
+void process_oplock_async_level2_break_message(struct messaging_context *msg_ctx,
+ void *private_data,
+ uint32_t msg_type,
+ struct server_id src,
+ DATA_BLOB *data);
+void contend_level2_oplocks_begin(files_struct *fsp,
+ enum level2_contention_type type);
+void contend_level2_oplocks_end(files_struct *fsp,
+ enum level2_contention_type type);
void share_mode_entry_to_message(char *msg, const struct share_mode_entry *e);
void message_to_share_mode_entry(struct share_mode_entry *e, char *msg);
bool init_oplocks(struct messaging_context *msg_ctx);
diff --git a/source3/include/smb.h b/source3/include/smb.h
index 96cd3b7d85..557489f211 100644
--- a/source3/include/smb.h
+++ b/source3/include/smb.h
@@ -1695,6 +1695,16 @@ struct kernel_oplocks {
void *private_data;
};
+enum level2_contention_type {
+ LEVEL2_CONTEND_ALLOC_SHRINK,
+ LEVEL2_CONTEND_ALLOC_GROW,
+ LEVEL2_CONTEND_SET_FILE_LEN,
+ LEVEL2_CONTEND_FILL_SPARSE,
+ LEVEL2_CONTEND_WRITE,
+ LEVEL2_CONTEND_WINDOWS_BRL,
+ LEVEL2_CONTEND_POSIX_BRL
+};
+
/* if a kernel does support oplocks then a structure of the following
typee is used to describe how to interact with the kernel */
struct kernel_oplocks_ops {
@@ -1702,6 +1712,10 @@ struct kernel_oplocks_ops {
files_struct *fsp, int oplock_type);
void (*release_oplock)(struct kernel_oplocks *ctx,
files_struct *fsp, int oplock_type);
+ void (*contend_level2_oplocks_begin)(files_struct *fsp,
+ enum level2_contention_type type);
+ void (*contend_level2_oplocks_end)(files_struct *fsp,
+ enum level2_contention_type type);
};
#include "smb_macros.h"
diff --git a/source3/lib/dummysmbd.c b/source3/lib/dummysmbd.c
index 5c624bdebf..a41e6dc033 100644
--- a/source3/lib/dummysmbd.c
+++ b/source3/lib/dummysmbd.c
@@ -66,3 +66,22 @@ struct messaging_context *smbd_messaging_context(void)
{
return NULL;
}
+
+/**
+ * The following two functions need to be called from inside the low-level BRL
+ * code for oplocks correctness in smbd. Since other utility binaries also
+ * link in some of the brl code directly, these dummy functions are necessary
+ * to avoid needing to link in the oplocks code and its dependencies to all of
+ * the utility binaries.
+ */
+void contend_level2_oplocks_begin(files_struct *fsp,
+ enum level2_contention_type type)
+{
+ return;
+}
+
+void contend_level2_oplocks_end(files_struct *fsp,
+ enum level2_contention_type type)
+{
+ return;
+}
diff --git a/source3/locking/brlock.c b/source3/locking/brlock.c
index 032aaa56b6..76496d11fb 100644
--- a/source3/locking/brlock.c
+++ b/source3/locking/brlock.c
@@ -311,6 +311,7 @@ static NTSTATUS brl_lock_windows(struct byte_range_lock *br_lck,
unsigned int i;
files_struct *fsp = br_lck->fsp;
struct lock_struct *locks = br_lck->lock_data;
+ NTSTATUS status;
for (i=0; i < br_lck->num_locks; i++) {
/* Do any Windows or POSIX locks conflict ? */
@@ -327,6 +328,10 @@ static NTSTATUS brl_lock_windows(struct byte_range_lock *br_lck,
#endif
}
+ if (!IS_PENDING_LOCK(plock->lock_type)) {
+ contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
+ }
+
/* We can get the Windows lock, now see if it needs to
be mapped into a lower level POSIX one, and if so can
we get it ? */
@@ -346,9 +351,11 @@ static NTSTATUS brl_lock_windows(struct byte_range_lock *br_lck,
plock->context.smbpid = 0xFFFFFFFF;
if (errno_ret == EACCES || errno_ret == EAGAIN) {
- return NT_STATUS_FILE_LOCK_CONFLICT;
+ status = NT_STATUS_FILE_LOCK_CONFLICT;
+ goto fail;
} else {
- return map_nt_error_from_unix(errno);
+ status = map_nt_error_from_unix(errno);
+ goto fail;
}
}
}
@@ -356,7 +363,8 @@ static NTSTATUS brl_lock_windows(struct byte_range_lock *br_lck,
/* no conflicts - add it to the list of locks */
locks = (struct lock_struct *)SMB_REALLOC(locks, (br_lck->num_locks + 1) * sizeof(*locks));
if (!locks) {
- return NT_STATUS_NO_MEMORY;
+ status = NT_STATUS_NO_MEMORY;
+ goto fail;
}
memcpy(&locks[br_lck->num_locks], plock, sizeof(struct lock_struct));
@@ -365,6 +373,11 @@ static NTSTATUS brl_lock_windows(struct byte_range_lock *br_lck,
br_lck->modified = True;
return NT_STATUS_OK;
+ fail:
+ if (!IS_PENDING_LOCK(plock->lock_type)) {
+ contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WINDOWS_BRL);
+ }
+ return status;
}
/****************************************************************************
@@ -587,11 +600,13 @@ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
struct byte_range_lock *br_lck,
struct lock_struct *plock)
{
- unsigned int i, count;
+ unsigned int i, count, posix_count;
struct lock_struct *locks = br_lck->lock_data;
struct lock_struct *tp;
bool lock_was_added = False;
bool signal_pending_read = False;
+ bool break_oplocks = false;
+ NTSTATUS status;
/* No zero-zero locks for POSIX. */
if (plock->start == 0 && plock->size == 0) {
@@ -613,7 +628,7 @@ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
return NT_STATUS_NO_MEMORY;
}
- count = 0;
+ count = posix_count = 0;
for (i=0; i < br_lck->num_locks; i++) {
struct lock_struct *curr_lock = &locks[i];
@@ -637,6 +652,8 @@ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
memcpy(&tp[count], curr_lock, sizeof(struct lock_struct));
count++;
} else {
+ unsigned int tmp_count;
+
/* POSIX conflict semantics are different. */
if (brl_conflict_posix(curr_lock, plock)) {
/* Can't block ourselves with POSIX locks. */
@@ -648,10 +665,27 @@ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
}
/* Work out overlaps. */
- count += brlock_posix_split_merge(&tp[count], curr_lock, plock, &lock_was_added);
+ tmp_count += brlock_posix_split_merge(&tp[count], curr_lock, plock, &lock_was_added);
+ posix_count += tmp_count;
+ count += tmp_count;
}
}
+ /*
+ * Break oplocks while we hold a brl. Since lock() and unlock() calls
+ * are not symetric with POSIX semantics, we cannot guarantee our
+ * contend_level2_oplocks_begin/end calls will be acquired and
+ * released one-for-one as with Windows semantics. Therefore we only
+ * call contend_level2_oplocks_begin if this is the first POSIX brl on
+ * the file.
+ */
+ break_oplocks = (!IS_PENDING_LOCK(plock->lock_type) &&
+ posix_count == 0);
+ if (break_oplocks) {
+ contend_level2_oplocks_begin(br_lck->fsp,
+ LEVEL2_CONTEND_POSIX_BRL);
+ }
+
if (!lock_was_added) {
memcpy(&tp[count], plock, sizeof(struct lock_struct));
count++;
@@ -679,10 +713,12 @@ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
if (errno_ret == EACCES || errno_ret == EAGAIN) {
SAFE_FREE(tp);
- return NT_STATUS_FILE_LOCK_CONFLICT;
+ status = NT_STATUS_FILE_LOCK_CONFLICT;
+ goto fail;
} else {
SAFE_FREE(tp);
- return map_nt_error_from_unix(errno);
+ status = map_nt_error_from_unix(errno);
+ goto fail;
}
}
}
@@ -690,7 +726,8 @@ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
/* Realloc so we don't leak entries per lock call. */
tp = (struct lock_struct *)SMB_REALLOC(tp, count * sizeof(*locks));
if (!tp) {
- return NT_STATUS_NO_MEMORY;
+ status = NT_STATUS_NO_MEMORY;
+ goto fail;
}
br_lck->num_locks = count;
SAFE_FREE(br_lck->lock_data);
@@ -723,6 +760,12 @@ static NTSTATUS brl_lock_posix(struct messaging_context *msg_ctx,
}
return NT_STATUS_OK;
+ fail:
+ if (break_oplocks) {
+ contend_level2_oplocks_end(br_lck->fsp,
+ LEVEL2_CONTEND_POSIX_BRL);
+ }
+ return status;
}
/****************************************************************************
@@ -881,6 +924,7 @@ static bool brl_unlock_windows(struct messaging_context *msg_ctx,
}
}
+ contend_level2_oplocks_end(br_lck->fsp, LEVEL2_CONTEND_WINDOWS_BRL);
return True;
}
@@ -892,7 +936,7 @@ static bool brl_unlock_posix(struct messaging_context *msg_ctx,
struct byte_range_lock *br_lck,
const struct lock_struct *plock)
{
- unsigned int i, j, count;
+ unsigned int i, j, count, posix_count;
struct lock_struct *tp;
struct lock_struct *locks = br_lck->lock_data;
bool overlap_found = False;
@@ -919,7 +963,7 @@ static bool brl_unlock_posix(struct messaging_context *msg_ctx,
return False;
}
- count = 0;
+ count = posix_count = 0;
for (i = 0; i < br_lck->num_locks; i++) {
struct lock_struct *lock = &locks[i];
struct lock_struct tmp_lock[3];
@@ -946,6 +990,7 @@ static bool brl_unlock_posix(struct messaging_context *msg_ctx,
/* No change in this lock. */
memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
count++;
+ posix_count++;
} else {
SMB_ASSERT(tmp_lock[0].lock_type == UNLOCK_LOCK);
overlap_found = True;
@@ -970,6 +1015,7 @@ static bool brl_unlock_posix(struct messaging_context *msg_ctx,
}
}
count++;
+ posix_count++;
continue;
} else {
/* tmp_count == 3 - (we split a lock range in two). */
@@ -979,8 +1025,10 @@ static bool brl_unlock_posix(struct messaging_context *msg_ctx,
memcpy(&tp[count], &tmp_lock[0], sizeof(struct lock_struct));
count++;
+ posix_count++;
memcpy(&tp[count], &tmp_lock[2], sizeof(struct lock_struct));
count++;
+ posix_count++;
overlap_found = True;
/* Optimisation... */
/* We know we're finished here as we can't overlap any
@@ -1024,6 +1072,11 @@ static bool brl_unlock_posix(struct messaging_context *msg_ctx,
tp = NULL;
}
+ if (posix_count == 0) {
+ contend_level2_oplocks_end(br_lck->fsp,
+ LEVEL2_CONTEND_POSIX_BRL);
+ }
+
br_lck->num_locks = count;
SAFE_FREE(br_lck->lock_data);
locks = tp;
@@ -1276,6 +1329,7 @@ void brl_close_fnum(struct messaging_context *msg_ctx,
struct lock_struct *locks = br_lck->lock_data;
struct server_id pid = procid_self();
bool unlock_individually = False;
+ bool posix_level2_contention_ended = false;
if(lp_posix_locking(fsp->conn->params)) {
@@ -1346,8 +1400,17 @@ void brl_close_fnum(struct messaging_context *msg_ctx,
if ((lock->lock_flav == WINDOWS_LOCK) && (lock->fnum == fnum)) {
del_this_lock = True;
num_deleted_windows_locks++;
+ contend_level2_oplocks_end(br_lck->fsp,
+ LEVEL2_CONTEND_WINDOWS_BRL);
} else if (lock->lock_flav == POSIX_LOCK) {
del_this_lock = True;
+
+ /* Only end level2 contention once for posix */
+ if (!posix_level2_contention_ended) {
+ posix_level2_contention_ended = true;
+ contend_level2_oplocks_end(br_lck->fsp,
+ LEVEL2_CONTEND_POSIX_BRL);
+ }
}
}
diff --git a/source3/smbd/aio.c b/source3/smbd/aio.c
index 75ce07d41a..6b19e098e5 100644
--- a/source3/smbd/aio.c
+++ b/source3/smbd/aio.c
@@ -291,7 +291,9 @@ bool schedule_aio_write_and_X(connection_struct *conn,
aio_ex->req = talloc_move(aio_ex, &req);
- release_level_2_oplocks_on_change(fsp);
+ /* This should actually be improved to span the write. */
+ contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WRITE);
+ contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WRITE);
if (!write_through && !lp_syncalways(SNUM(fsp->conn))
&& fsp->aio_write_behind) {
diff --git a/source3/smbd/fileio.c b/source3/smbd/fileio.c
index 3e3f0943b3..a9a97a2d14 100644
--- a/source3/smbd/fileio.c
+++ b/source3/smbd/fileio.c
@@ -318,7 +318,9 @@ ssize_t write_file(struct smb_request *req,
* the shared memory area whilst doing this.
*/
- release_level_2_oplocks_on_change(fsp);
+ /* This should actually be improved to span the write. */
+ contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_WRITE);
+ contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_WRITE);
#ifdef WITH_PROFILE
if (profile_p && profile_p->writecache_total_writes % 500 == 0) {
diff --git a/source3/smbd/oplock.c b/source3/smbd/oplock.c
index 0945ac6677..4c84fd41ed 100644
--- a/source3/smbd/oplock.c
+++ b/source3/smbd/oplock.c
@@ -329,7 +329,7 @@ static void add_oplock_timeout_handler(files_struct *fsp)
the client for LEVEL2.
*******************************************************************/
-static void process_oplock_async_level2_break_message(struct messaging_context *msg_ctx,
+void process_oplock_async_level2_break_message(struct messaging_context *msg_ctx,
void *private_data,
uint32_t msg_type,
struct server_id src,
@@ -699,7 +699,8 @@ static void process_open_retry_message(struct messaging_context *msg_ctx,
none.
****************************************************************************/
-void release_level_2_oplocks_on_change(files_struct *fsp)
+static void contend_level2_oplocks_begin_default(files_struct *fsp,
+ enum level2_contention_type type)
{
int i;
struct share_mode_lock *lck;
@@ -799,6 +800,26 @@ void release_level_2_oplocks_on_change(files_struct *fsp)
TALLOC_FREE(lck);
}
+void contend_level2_oplocks_begin(files_struct *fsp,
+ enum level2_contention_type type)
+{
+ if (koplocks && koplocks->ops->contend_level2_oplocks_begin) {
+ koplocks->ops->contend_level2_oplocks_begin(fsp, type);
+ return;
+ }
+
+ contend_level2_oplocks_begin_default(fsp, type);
+}
+
+void contend_level2_oplocks_end(files_struct *fsp,
+ enum level2_contention_type type)
+{
+ /* Only kernel oplocks implement this so far */
+ if (koplocks && koplocks->ops->contend_level2_oplocks_end) {
+ koplocks->ops->contend_level2_oplocks_end(fsp, type);
+ }
+}
+
/****************************************************************************
Linearize a share mode entry struct to an internal oplock break message.
****************************************************************************/
diff --git a/source3/smbd/oplock_irix.c b/source3/smbd/oplock_irix.c
index 23b2fa9081..bbc9132a08 100644
--- a/source3/smbd/oplock_irix.c
+++ b/source3/smbd/oplock_irix.c
@@ -270,8 +270,10 @@ static void irix_oplocks_read_fde_handler(struct event_context *ev,
****************************************************************************/
static const struct kernel_oplocks_ops irix_koplocks = {
- .set_oplock = irix_set_kernel_oplock,
- .release_oplock = irix_release_kernel_oplock,
+ .set_oplock = irix_set_kernel_oplock,
+ .release_oplock = irix_release_kernel_oplock,
+ .contend_level2_oplocks_begin = NULL,
+ .contend_level2_oplocks_end = NULL,
};
struct kernel_oplocks *irix_init_kernel_oplocks(TALLOC_CTX *mem_ctx)
diff --git a/source3/smbd/oplock_linux.c b/source3/smbd/oplock_linux.c
index 5840ff0851..273fbfdc01 100644
--- a/source3/smbd/oplock_linux.c
+++ b/source3/smbd/oplock_linux.c
@@ -179,8 +179,10 @@ static bool linux_oplocks_available(void)
****************************************************************************/
static const struct kernel_oplocks_ops linux_koplocks = {
- .set_oplock = linux_set_kernel_oplock,
- .release_oplock = linux_release_kernel_oplock,
+ .set_oplock = linux_set_kernel_oplock,
+ .release_oplock = linux_release_kernel_oplock,
+ .contend_level2_oplocks_begin = NULL,
+ .contend_level2_oplocks_end = NULL,
};
struct kernel_oplocks *linux_init_kernel_oplocks(TALLOC_CTX *mem_ctx)
diff --git a/source3/smbd/reply.c b/source3/smbd/reply.c
index 60e2e5dc7a..151f9d0827 100644
--- a/source3/smbd/reply.c
+++ b/source3/smbd/reply.c
@@ -3010,8 +3010,6 @@ void reply_lockread(struct smb_request *req)
return;
}
- release_level_2_oplocks_on_change(fsp);
-
numtoread = SVAL(req->vwv+1, 0);
startpos = IVAL_TO_SMB_OFF_T(req->vwv+2, 0);
@@ -4497,8 +4495,6 @@ void reply_lock(struct smb_request *req)
return;
}
- release_level_2_oplocks_on_change(fsp);
-
count = (uint64_t)IVAL(req->vwv+1, 0);
offset = (uint64_t)IVAL(req->vwv+3, 0);
@@ -6870,13 +6866,6 @@ void reply_lockingX(struct smb_request *req)
}
}
- /*
- * We do this check *after* we have checked this is not a oplock break
- * response message. JRA.
- */
-
- release_level_2_oplocks_on_change(fsp);
-
if (req->buflen <
(num_ulocks + num_locks) * (large_file_format ? 20 : 10)) {
reply_nterror(req, NT_STATUS_INVALID_PARAMETER);
diff --git a/source3/smbd/vfs.c b/source3/smbd/vfs.c
index 515e557f67..8d82ca550c 100644
--- a/source3/smbd/vfs.c
+++ b/source3/smbd/vfs.c
@@ -514,8 +514,6 @@ int vfs_allocate_file_space(files_struct *fsp, uint64_t len)
uint64_t space_avail;
uint64_t bsize,dfree,dsize;
- release_level_2_oplocks_on_change(fsp);
-
/*
* Actually try and commit the space on disk....
*/
@@ -541,15 +539,23 @@ int vfs_allocate_file_space(files_struct *fsp, uint64_t len)
DEBUG(10,("vfs_allocate_file_space: file %s, shrink. Current size %.0f\n",
fsp->fsp_name, (double)st.st_size ));
+ contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_ALLOC_SHRINK);
+
flush_write_cache(fsp, SIZECHANGE_FLUSH);
if ((ret = SMB_VFS_FTRUNCATE(fsp, (SMB_OFF_T)len)) != -1) {
set_filelen_write_cache(fsp, len);
}
+
+ contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_ALLOC_SHRINK);
+
return ret;
}
/* Grow - we need to test if we have enough space. */
+ contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_ALLOC_GROW);
+ contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_ALLOC_GROW);
+
if (!lp_strict_allocate(SNUM(fsp->conn)))
return 0;
@@ -581,7 +587,8 @@ int vfs_set_filelen(files_struct *fsp, SMB_OFF_T len)
{
int ret;
- release_level_2_oplocks_on_change(fsp);
+ contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_SET_FILE_LEN);
+
DEBUG(10,("vfs_set_filelen: ftruncate %s to len %.0f\n", fsp->fsp_name, (double)len));
flush_write_cache(fsp, SIZECHANGE_FLUSH);
if ((ret = SMB_VFS_FTRUNCATE(fsp, len)) != -1) {
@@ -592,6 +599,8 @@ int vfs_set_filelen(files_struct *fsp, SMB_OFF_T len)
fsp->fsp_name);
}
+ contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_SET_FILE_LEN);
+
return ret;
}
@@ -613,7 +622,6 @@ int vfs_fill_sparse(files_struct *fsp, SMB_OFF_T len)
size_t num_to_write;
ssize_t pwrite_ret;
- release_level_2_oplocks_on_change(fsp);
ret = SMB_VFS_FSTAT(fsp, &st);
if (ret == -1) {
return ret;
@@ -626,13 +634,16 @@ int vfs_fill_sparse(files_struct *fsp, SMB_OFF_T len)
DEBUG(10,("vfs_fill_sparse: write zeros in file %s from len %.0f to len %.0f (%.0f bytes)\n",
fsp->fsp_name, (double)st.st_size, (double)len, (double)(len - st.st_size)));
+ contend_level2_oplocks_begin(fsp, LEVEL2_CONTEND_FILL_SPARSE);
+
flush_write_cache(fsp, SIZECHANGE_FLUSH);
if (!sparse_buf) {
sparse_buf = SMB_CALLOC_ARRAY(char, SPARSE_BUF_WRITE_SIZE);
if (!sparse_buf) {
errno = ENOMEM;
- return -1;
+ ret = -1;
+ goto out;
}
}
@@ -647,17 +658,23 @@ int vfs_fill_sparse(files_struct *fsp, SMB_OFF_T len)
if (pwrite_ret == -1) {
DEBUG(10,("vfs_fill_sparse: SMB_VFS_PWRITE for file %s failed with error %s\n",
fsp->fsp_name, strerror(errno) ));
- return -1;
+ ret = -1;
+ goto out;
}
if (pwrite_ret == 0) {
- return 0;
+ ret = 0;
+ goto out;
}
total += pwrite_ret;
}
set_filelen_write_cache(fsp, len);
- return 0;
+
+ ret = 0;
+ out:
+ contend_level2_oplocks_end(fsp, LEVEL2_CONTEND_FILL_SPARSE);
+ return ret;
}
/****************************************************************************