From dde07058075d357cfdc63624c8dcaa67ebd40add Mon Sep 17 00:00:00 2001 From: Andrew Tridgell Date: Wed, 3 Nov 2004 10:09:48 +0000 Subject: r3507: - added deferred replies on sharing violation in pvfs open. The deferred reply is short-circuited immediately when the file is closed by another user, allowing it to be opened by the waiting user. - added a sane set of timeval manipulation routines - converted all the events code and code that uses it to use struct timeval instead of time_t, which allows for microsecond resolution instead of 1 second resolution. This was needed for doing the pvfs deferred open code, and is why the patch is so big. (This used to be commit 0d51511d408d91eb5f68a35e980e0875299b1831) --- source4/ntvfs/cifs/vfs_cifs.c | 5 +- source4/ntvfs/common/brlock.c | 7 +- source4/ntvfs/common/opendb.c | 122 +++++++++++++++++++ source4/ntvfs/posix/pvfs_lock.c | 7 +- source4/ntvfs/posix/pvfs_open.c | 254 +++++++++++++++++++++++++++++++--------- source4/ntvfs/posix/pvfs_wait.c | 12 +- source4/ntvfs/posix/vfs_posix.h | 2 + 7 files changed, 338 insertions(+), 71 deletions(-) (limited to 'source4/ntvfs') diff --git a/source4/ntvfs/cifs/vfs_cifs.c b/source4/ntvfs/cifs/vfs_cifs.c index 4fd5650f9b..3e9899cb8c 100644 --- a/source4/ntvfs/cifs/vfs_cifs.c +++ b/source4/ntvfs/cifs/vfs_cifs.c @@ -78,7 +78,8 @@ static BOOL oplock_handler(struct smbcli_transport *transport, uint16_t tid, uin /* a handler for read events on a connection to a backend server */ -static void cifs_socket_handler(struct event_context *ev, struct fd_event *fde, time_t t, uint16_t flags) +static void cifs_socket_handler(struct event_context *ev, struct fd_event *fde, + struct timeval t, uint16_t flags) { struct cvfs_private *private = fde->private; struct smbsrv_tcon *tcon = private->tcon; @@ -149,7 +150,7 @@ static NTSTATUS cvfs_connect(struct ntvfs_module_context *ntvfs, /* we need to receive oplock break requests from the server */ smbcli_oplock_handler(private->transport, oplock_handler, private); - smbcli_transport_idle_handler(private->transport, idle_func, 1, private); + smbcli_transport_idle_handler(private->transport, idle_func, 50000, private); private->transport->event.fde->handler = cifs_socket_handler; private->transport->event.fde->private = private; diff --git a/source4/ntvfs/common/brlock.c b/source4/ntvfs/common/brlock.c index d1df0413ce..6fae7c6e4c 100644 --- a/source4/ntvfs/common/brlock.c +++ b/source4/ntvfs/common/brlock.c @@ -333,17 +333,14 @@ static void brl_notify_unlock(struct brl_context *brl, for (i=0;i= PENDING_READ_LOCK && brl_overlap(&locks[i], removed_lock)) { - DATA_BLOB data; - if (last_notice != -1 && brl_overlap(&locks[i], &locks[last_notice])) { continue; } if (locks[i].lock_type == PENDING_WRITE_LOCK) { last_notice = i; } - data.data = (void *)&locks[i].notify_ptr; - data.length = sizeof(void *); - messaging_send(brl->messaging_ctx, locks[i].context.server, MSG_BRL_RETRY, &data); + messaging_send_ptr(brl->messaging_ctx, locks[i].context.server, + MSG_BRL_RETRY, locks[i].notify_ptr); } } } diff --git a/source4/ntvfs/common/opendb.c b/source4/ntvfs/common/opendb.c index 5dc68e5382..39d4f37ec2 100644 --- a/source4/ntvfs/common/opendb.c +++ b/source4/ntvfs/common/opendb.c @@ -39,6 +39,7 @@ */ #include "includes.h" +#include "messages.h" struct odb_context { struct tdb_wrap *w; @@ -58,6 +59,8 @@ struct odb_entry { uint32_t share_access; uint32_t create_options; uint32_t access_mask; + void *notify_ptr; + BOOL pending; }; @@ -152,6 +155,8 @@ static BOOL share_conflict(struct odb_entry *e1, struct odb_entry *e2) { #define CHECK_MASK(am, sa, right, share) if (((am) & (right)) && !((sa) & (share))) return True + if (e1->pending || e2->pending) return False; + /* if either open involves no read.write or delete access then it can't conflict */ if (!(e1->access_mask & (SA_RIGHT_FILE_WRITE_APPEND | @@ -219,6 +224,8 @@ NTSTATUS odb_open_file(struct odb_lock *lck, uint16_t fnum, e.share_access = share_access; e.create_options = create_options; e.access_mask = access_mask; + e.notify_ptr = NULL; + e.pending = False; /* check the existing file opens to see if they conflict */ @@ -254,6 +261,56 @@ NTSTATUS odb_open_file(struct odb_lock *lck, uint16_t fnum, } +/* + register a pending open file in the open files database +*/ +NTSTATUS odb_open_file_pending(struct odb_lock *lck, void *private) +{ + struct odb_context *odb = lck->odb; + TDB_DATA dbuf; + struct odb_entry e; + char *tp; + struct odb_entry *elist; + int count; + + dbuf = tdb_fetch(odb->w->tdb, lck->key); + + e.server = odb->server; + e.tid = odb->tid; + e.fnum = 0; + e.share_access = 0; + e.create_options = 0; + e.access_mask = 0; + e.notify_ptr = private; + e.pending = True; + + /* check the existing file opens to see if they + conflict */ + elist = (struct odb_entry *)dbuf.dptr; + count = dbuf.dsize / sizeof(struct odb_entry); + + tp = Realloc(dbuf.dptr, (count+1) * sizeof(struct odb_entry)); + if (tp == NULL) { + if (dbuf.dptr) free(dbuf.dptr); + return NT_STATUS_NO_MEMORY; + } + + dbuf.dptr = tp; + dbuf.dsize = (count+1) * sizeof(struct odb_entry); + + memcpy(dbuf.dptr + (count*sizeof(struct odb_entry)), + &e, sizeof(struct odb_entry)); + + if (tdb_store(odb->w->tdb, lck->key, dbuf, TDB_REPLACE) != 0) { + free(dbuf.dptr); + return NT_STATUS_INTERNAL_DB_CORRUPTION; + } + + free(dbuf.dptr); + return NT_STATUS_OK; +} + + /* remove a opendb entry */ @@ -274,6 +331,15 @@ NTSTATUS odb_close_file(struct odb_lock *lck, uint16_t fnum) elist = (struct odb_entry *)dbuf.dptr; count = dbuf.dsize / sizeof(struct odb_entry); + /* send any pending notifications */ + for (i=0;imessaging_ctx, elist[i].server, + MSG_PVFS_RETRY_OPEN, elist[i].notify_ptr); + + } + } + /* find the entry, and delete it */ for (i=0;iodb; + TDB_DATA dbuf; + struct odb_entry *elist; + int i, count; + NTSTATUS status; + + dbuf = tdb_fetch(odb->w->tdb, lck->key); + + if (dbuf.dptr == NULL) { + return NT_STATUS_UNSUCCESSFUL; + } + + elist = (struct odb_entry *)dbuf.dptr; + count = dbuf.dsize / sizeof(struct odb_entry); + + /* find the entry, and delete it */ + for (i=0;iserver == elist[i].server && + odb->tid == elist[i].tid) { + if (i < count-1) { + memmove(elist+i, elist+i+1, + (count - (i+1)) * sizeof(struct odb_entry)); + } + break; + } + } + + status = NT_STATUS_OK; + + if (i == count) { + status = NT_STATUS_UNSUCCESSFUL; + } else if (count == 1) { + if (tdb_delete(odb->w->tdb, lck->key) != 0) { + status = NT_STATUS_INTERNAL_DB_CORRUPTION; + } + } else { + dbuf.dsize = (count-1) * sizeof(struct odb_entry); + if (tdb_store(odb->w->tdb, lck->key, dbuf, TDB_REPLACE) != 0) { + status = NT_STATUS_INTERNAL_DB_CORRUPTION; + } + } + + free(dbuf.dptr); + + return status; +} + + /* update create options on an open file */ @@ -386,6 +506,8 @@ NTSTATUS odb_can_open(struct odb_context *odb, DATA_BLOB *key, e.share_access = share_access; e.create_options = create_options; e.access_mask = access_mask; + e.notify_ptr = NULL; + e.pending = False; for (i=0;if = f; pending->req = req; - /* round up to the nearest second */ - pending->end_time = time(NULL) + ((lck->lockx.in.timeout+999)/1000); + pending->end_time = + timeval_current_ofs(lck->lockx.in.timeout/1000, + 1000*(lck->lockx.in.timeout%1000)); } if (lck->lockx.in.mode & LOCKING_ANDX_SHARED_LOCK) { diff --git a/source4/ntvfs/posix/pvfs_open.c b/source4/ntvfs/posix/pvfs_open.c index f3ef72f4ed..8ad6ad0389 100644 --- a/source4/ntvfs/posix/pvfs_open.c +++ b/source4/ntvfs/posix/pvfs_open.c @@ -25,6 +25,7 @@ #include "system/time.h" #include "system/filesys.h" #include "dlinklist.h" +#include "messages.h" /* create file handles with convenient numbers for sniffers @@ -33,6 +34,8 @@ #define PVFS_MIN_NEW_FNUM 0x200 #define PVFS_MIN_DIR_FNUM 0x300 +#define SHARING_VIOLATION_DELAY 1000000 + /* find open file handle given fnum */ @@ -125,7 +128,6 @@ static NTSTATUS pvfs_open_directory(struct pvfs_state *pvfs, fnum = idr_get_new_above(pvfs->idtree_fnum, f, PVFS_MIN_DIR_FNUM, UINT16_MAX); if (fnum == -1) { - talloc_free(f); return NT_STATUS_TOO_MANY_OPENED_FILES; } @@ -228,10 +230,12 @@ static int pvfs_fd_destructor(void *p) return 0; } - status = odb_close_file(lck, f->fnum); - if (!NT_STATUS_IS_OK(status)) { - DEBUG(0,("Unable to remove opendb entry for '%s' - %s\n", - f->name->full_name, nt_errstr(status))); + if (f->have_opendb_entry) { + status = odb_close_file(lck, f->fnum); + if (!NT_STATUS_IS_OK(status)) { + DEBUG(0,("Unable to remove opendb entry for '%s' - %s\n", + f->name->full_name, nt_errstr(status))); + } } talloc_free(lck); @@ -370,6 +374,7 @@ static NTSTATUS pvfs_create_file(struct pvfs_state *pvfs, f->access_mask = access_mask; f->seek_offset = 0; f->position = 0; + f->have_opendb_entry = True; DLIST_ADD(pvfs->open_files, f); @@ -398,6 +403,166 @@ static NTSTATUS pvfs_create_file(struct pvfs_state *pvfs, } +/* + open am existing file - called from both the open retry code + and the main open code +*/ +NTSTATUS pvfs_open_existing(struct pvfs_file *f, + union smb_open *io, + int open_flags) +{ + int fd; + NTSTATUS status; + + /* do the actual open */ + fd = open(f->name->full_name, open_flags); + if (fd == -1) { + return pvfs_map_errno(f->pvfs, errno); + } + + f->fd = fd; + + /* re-resolve the open fd */ + status = pvfs_resolve_name_fd(f->pvfs, fd, f->name); + if (!NT_STATUS_IS_OK(status)) { + return status; + } + + io->generic.out.oplock_level = NO_OPLOCK; + io->generic.out.fnum = f->fnum; + io->generic.out.create_action = NTCREATEX_ACTION_EXISTED; + io->generic.out.create_time = f->name->dos.create_time; + io->generic.out.access_time = f->name->dos.access_time; + io->generic.out.write_time = f->name->dos.write_time; + io->generic.out.change_time = f->name->dos.change_time; + io->generic.out.attrib = f->name->dos.attrib; + io->generic.out.alloc_size = f->name->dos.alloc_size; + io->generic.out.size = f->name->st.st_size; + io->generic.out.file_type = FILE_TYPE_DISK; + io->generic.out.ipc_state = 0; + io->generic.out.is_directory = 0; + + /* success - keep the file handle */ + talloc_steal(f->pvfs, f); + + return NT_STATUS_OK; +} + +/* + state of a pending open retry +*/ +struct pvfs_open_retry { + union smb_open *io; + struct pvfs_file *f; + struct smbsrv_request *req; + void *wait_handle; + struct timeval end_time; + int open_flags; +}; + +/* destroy a pending open request */ +static int pvfs_retry_destructor(void *ptr) +{ + struct pvfs_open_retry *r = ptr; + struct odb_lock *lck; + lck = odb_lock(r->req, r->f->pvfs->odb_context, &r->f->locking_key); + if (lck != NULL) { + odb_remove_pending(lck, r); + } + return 0; +} + +/* + retry an open +*/ +static void pvfs_open_retry(void *private, BOOL timed_out) +{ + struct pvfs_open_retry *r = private; + struct odb_lock *lck; + struct pvfs_file *f = r->f; + struct smbsrv_request *req = r->req; + NTSTATUS status; + + lck = odb_lock(req, f->pvfs->odb_context, &f->locking_key); + if (lck == NULL) { + req->async_states->status = NT_STATUS_INTERNAL_DB_CORRUPTION; + req->async_states->send_fn(req); + return; + } + + /* see if we are allowed to open at the same time as existing opens */ + status = odb_open_file(lck, f->fnum, f->share_access, + f->create_options, f->access_mask); + if (NT_STATUS_EQUAL(status, NT_STATUS_SHARING_VIOLATION) && !timed_out) { + talloc_free(lck); + return; + } + + talloc_free(r->wait_handle); + + if (!NT_STATUS_IS_OK(status)) { + req->async_states->status = status; + req->async_states->send_fn(req); + return; + } + + f->have_opendb_entry = True; + + /* do the rest of the open work */ + status = pvfs_open_existing(f, r->io, r->open_flags); + + if (NT_STATUS_IS_OK(status)) { + talloc_steal(f->pvfs, f); + } + + req->async_states->status = status; + req->async_states->send_fn(req); +} + +/* + setup for a open retry after a sharing violation +*/ +static NTSTATUS pvfs_open_setup_retry(struct smbsrv_request *req, + union smb_open *io, + struct pvfs_file *f, + struct odb_lock *lck, + int open_flags) +{ + struct pvfs_open_retry *r; + struct pvfs_state *pvfs = f->pvfs; + NTSTATUS status; + + r = talloc_p(req, struct pvfs_open_retry); + if (r == NULL) { + return NT_STATUS_NO_MEMORY; + } + + r->io = io; + r->f = f; + r->req = req; + r->end_time = timeval_current_ofs(0, SHARING_VIOLATION_DELAY); + r->open_flags = open_flags; + + /* setup a pending lock */ + status = odb_open_file_pending(lck, r); + if (!NT_STATUS_IS_OK(status)) { + return status; + } + + r->wait_handle = pvfs_wait_message(pvfs, req, MSG_PVFS_RETRY_OPEN, r->end_time, + pvfs_open_retry, r); + if (r->wait_handle == NULL) { + return NT_STATUS_NO_MEMORY; + } + + talloc_free(lck); + talloc_steal(pvfs, req); + + talloc_set_destructor(r, pvfs_retry_destructor); + + return NT_STATUS_OK; +} + /* open a file */ @@ -405,7 +570,7 @@ NTSTATUS pvfs_open(struct ntvfs_module_context *ntvfs, struct smbsrv_request *req, union smb_open *io) { struct pvfs_state *pvfs = ntvfs->private_data; - int fd, flags; + int flags; struct pvfs_filename *name; struct pvfs_file *f; NTSTATUS status; @@ -539,11 +704,26 @@ NTSTATUS pvfs_open(struct ntvfs_module_context *ntvfs, return NT_STATUS_TOO_MANY_OPENED_FILES; } + f->fnum = fnum; + f->fd = -1; + f->name = talloc_steal(f, name); + f->session = req->session; + f->smbpid = req->smbpid; + f->pvfs = pvfs; + f->pending_list = NULL; + f->lock_count = 0; + f->create_options = io->generic.in.create_options; + f->share_access = io->generic.in.share_access; + f->access_mask = access_mask; + f->seek_offset = 0; + f->position = 0; + f->have_opendb_entry = False; + /* form the lock context used for byte range locking and opendb locking */ status = pvfs_locking_key(name, f, &f->locking_key); if (!NT_STATUS_IS_OK(status)) { - idr_remove(pvfs->idtree_fnum, fnum); + idr_remove(pvfs->idtree_fnum, f->fnum); return status; } @@ -558,65 +738,31 @@ NTSTATUS pvfs_open(struct ntvfs_module_context *ntvfs, return NT_STATUS_INTERNAL_DB_CORRUPTION; } - /* see if we are allowed to open at the same time as existing opens */ - status = odb_open_file(lck, fnum, share_access, create_options, access_mask); - if (!NT_STATUS_IS_OK(status)) { - idr_remove(pvfs->idtree_fnum, fnum); - return status; - } - - f->fnum = fnum; - f->fd = -1; - f->name = talloc_steal(f, name); - f->session = req->session; - f->smbpid = req->smbpid; - f->pvfs = pvfs; - f->pending_list = NULL; - f->lock_count = 0; - f->create_options = io->generic.in.create_options; - f->share_access = io->generic.in.share_access; - f->access_mask = access_mask; - f->seek_offset = 0; - f->position = 0; - DLIST_ADD(pvfs->open_files, f); /* setup a destructor to avoid file descriptor leaks on abnormal termination */ talloc_set_destructor(f, pvfs_fd_destructor); - /* do the actual open */ - fd = open(name->full_name, flags); - if (fd == -1) { - return pvfs_map_errno(pvfs, errno); - } - f->fd = fd; + /* see if we are allowed to open at the same time as existing opens */ + status = odb_open_file(lck, f->fnum, share_access, create_options, access_mask); + + /* on a sharing violation we need to retry when the file is closed by + the other user, or after 1 second */ + if (NT_STATUS_EQUAL(status, NT_STATUS_SHARING_VIOLATION) && + (req->async_states->state & NTVFS_ASYNC_STATE_MAY_ASYNC)) { + return pvfs_open_setup_retry(req, io, f, lck, flags); + } - /* re-resolve the open fd */ - status = pvfs_resolve_name_fd(pvfs, fd, name); if (!NT_STATUS_IS_OK(status)) { return status; } - io->generic.out.oplock_level = NO_OPLOCK; - io->generic.out.fnum = f->fnum; - io->generic.out.create_action = NTCREATEX_ACTION_EXISTED; - io->generic.out.create_time = name->dos.create_time; - io->generic.out.access_time = name->dos.access_time; - io->generic.out.write_time = name->dos.write_time; - io->generic.out.change_time = name->dos.change_time; - io->generic.out.attrib = name->dos.attrib; - io->generic.out.alloc_size = name->dos.alloc_size; - io->generic.out.size = name->st.st_size; - io->generic.out.file_type = FILE_TYPE_DISK; - io->generic.out.ipc_state = 0; - io->generic.out.is_directory = 0; - - /* success - keep the file handle */ - talloc_steal(pvfs, f); + f->have_opendb_entry = True; - return NT_STATUS_OK; + /* do the rest of the open work */ + return pvfs_open_existing(f, io, flags); } @@ -677,7 +823,6 @@ NTSTATUS pvfs_logoff(struct ntvfs_module_context *ntvfs, for (f=pvfs->open_files;f;f=next) { next = f->next; if (f->session == req->session) { - DLIST_REMOVE(pvfs->open_files, f); talloc_free(f); } } @@ -698,7 +843,6 @@ NTSTATUS pvfs_exit(struct ntvfs_module_context *ntvfs, for (f=pvfs->open_files;f;f=next) { next = f->next; if (f->smbpid == req->smbpid) { - DLIST_REMOVE(pvfs->open_files, f); talloc_free(f); } } diff --git a/source4/ntvfs/posix/pvfs_wait.c b/source4/ntvfs/posix/pvfs_wait.c index f01bd0ea18..0faab8ef55 100644 --- a/source4/ntvfs/posix/pvfs_wait.c +++ b/source4/ntvfs/posix/pvfs_wait.c @@ -58,10 +58,9 @@ static void pvfs_wait_dispatch(struct messaging_context *msg, void *private, uin struct pvfs_wait *pwait = private; struct smbsrv_request *req; - /* we need to check that this one is for us. This sender sends - the private pointer as the body of the message. This might - seem a little unusual, but as the pointer is guaranteed - unique for this server, it is a good token */ + /* we need to check that this one is for us. See + messaging_send_ptr() for the other side of this. + */ if (data->length != sizeof(void *) || *(void **)data->data != pwait->private) { return; @@ -82,7 +81,8 @@ static void pvfs_wait_dispatch(struct messaging_context *msg, void *private, uin /* receive a timeout on a message wait */ -static void pvfs_wait_timeout(struct event_context *ev, struct timed_event *te, time_t t) +static void pvfs_wait_timeout(struct event_context *ev, + struct timed_event *te, struct timeval t) { struct pvfs_wait *pwait = te->private; struct smbsrv_request *req = pwait->req; @@ -116,7 +116,7 @@ static int pvfs_wait_destructor(void *ptr) void *pvfs_wait_message(struct pvfs_state *pvfs, struct smbsrv_request *req, int msg_type, - time_t end_time, + struct timeval end_time, void (*fn)(void *, BOOL), void *private) { diff --git a/source4/ntvfs/posix/vfs_posix.h b/source4/ntvfs/posix/vfs_posix.h index 530a2deae3..265649f5a3 100644 --- a/source4/ntvfs/posix/vfs_posix.h +++ b/source4/ntvfs/posix/vfs_posix.h @@ -112,6 +112,8 @@ struct pvfs_file { /* yes, we need 2 independent positions ... */ uint64_t seek_offset; uint64_t position; + + BOOL have_opendb_entry; }; -- cgit