summaryrefslogtreecommitdiff
path: root/source3
diff options
context:
space:
mode:
authorJeremy Allison <jra@samba.org>2012-04-10 15:45:55 -0700
committerJeremy Allison <jra@samba.org>2012-04-11 02:29:04 +0200
commitfe707f6549292ccb681ccd0c596cbd17525522f3 (patch)
tree5667fd10fd30c77965870a172e90427015ac57b4 /source3
parent224379ba70a6939bd6a92012f023caeb7e43d6b7 (diff)
downloadsamba-fe707f6549292ccb681ccd0c596cbd17525522f3.tar.gz
samba-fe707f6549292ccb681ccd0c596cbd17525522f3.tar.bz2
samba-fe707f6549292ccb681ccd0c596cbd17525522f3.zip
Add a new module, aio_linux which implements Linux kernel aio support. Docs to follow.
Autobuild-User: Jeremy Allison <jra@samba.org> Autobuild-Date: Wed Apr 11 02:29:04 CEST 2012 on sn-devel-104
Diffstat (limited to 'source3')
-rw-r--r--source3/Makefile.in5
-rw-r--r--source3/configure.in15
-rw-r--r--source3/modules/vfs_aio_linux.c730
-rw-r--r--source3/modules/wscript_build10
-rw-r--r--source3/wscript26
5 files changed, 786 insertions, 0 deletions
diff --git a/source3/Makefile.in b/source3/Makefile.in
index e1d8770f24..ff223d9fdc 100644
--- a/source3/Makefile.in
+++ b/source3/Makefile.in
@@ -873,6 +873,7 @@ VFS_TSMSM_OBJ = modules/vfs_tsmsm.o
VFS_FILEID_OBJ = modules/vfs_fileid.o
VFS_AIO_FORK_OBJ = modules/vfs_aio_fork.o
VFS_AIO_PTHREAD_OBJ = modules/vfs_aio_pthread.o
+VFS_AIO_LINUX_OBJ = modules/vfs_aio_linux.o
VFS_PREOPEN_OBJ = modules/vfs_preopen.o
VFS_SYNCOPS_OBJ = modules/vfs_syncops.o
VFS_ACL_XATTR_OBJ = modules/vfs_acl_xattr.o
@@ -3066,6 +3067,10 @@ bin/aio_pthread.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_AIO_PTHREAD_OBJ)
@echo "Building plugin $@"
@$(SHLD_MODULE) $(VFS_AIO_PTHREAD_OBJ)
+bin/aio_linux.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_AIO_LINUX_OBJ)
+ @echo "Building plugin $@"
+ @$(SHLD_MODULE) $(VFS_AIO_LINUX_OBJ)
+
bin/preopen.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_PREOPEN_OBJ)
@echo "Building plugin $@"
@$(SHLD_MODULE) $(VFS_PREOPEN_OBJ)
diff --git a/source3/configure.in b/source3/configure.in
index bf777a16f5..0470a18c1c 100644
--- a/source3/configure.in
+++ b/source3/configure.in
@@ -5567,6 +5567,20 @@ if test x"$samba_cv_HAVE_AIO" = x"yes"; then
x"$samba_cv_msghdr_msg_acctright" = x"yes"; then
default_shared_modules="$default_shared_modules vfs_aio_fork"
fi
+
+# Check for Linux kernel aio support.
+ case "$host_os" in
+ *linux*)
+ AC_MSG_CHECKING(for Linux kernel asynchronous io support)
+ AC_CHECK_LIB(aio,io_submit,
+ [AIO_LIBS="$LIBS -laio";
+ AC_DEFINE(HAVE_LINUX_KERNEL_AIO, 1, Define to 1 if there is support for Linux kernel asynchronous io)],
+ [])
+ if test x"$ac_cv_lib_aio_io_submit" = x"yes"; then
+ default_shared_modules="$default_shared_modules vfs_aio_linux"
+ fi
+ ;;
+ esac
fi
#################################################
@@ -6519,6 +6533,7 @@ SMB_MODULE(vfs_tsmsm, \$(VFS_TSMSM_OBJ), "bin/tsmsm.$SHLIBEXT", VFS)
SMB_MODULE(vfs_fileid, \$(VFS_FILEID_OBJ), "bin/fileid.$SHLIBEXT", VFS)
SMB_MODULE(vfs_aio_fork, \$(VFS_AIO_FORK_OBJ), "bin/aio_fork.$SHLIBEXT", VFS)
SMB_MODULE(vfs_aio_pthread, \$(VFS_AIO_PTHREAD_OBJ), "bin/aio_pthread.$SHLIBEXT", VFS)
+SMB_MODULE(vfs_aio_linux, \$(VFS_AIO_LINUX_OBJ), "bin/aio_linux.$SHLIBEXT", VFS)
SMB_MODULE(vfs_preopen, \$(VFS_PREOPEN_OBJ), "bin/preopen.$SHLIBEXT", VFS)
SMB_MODULE(vfs_syncops, \$(VFS_SYNCOPS_OBJ), "bin/syncops.$SHLIBEXT", VFS)
SMB_MODULE(vfs_zfsacl, \$(VFS_ZFSACL_OBJ), "bin/zfsacl.$SHLIBEXT", VFS)
diff --git a/source3/modules/vfs_aio_linux.c b/source3/modules/vfs_aio_linux.c
new file mode 100644
index 0000000000..f6fa80a355
--- /dev/null
+++ b/source3/modules/vfs_aio_linux.c
@@ -0,0 +1,730 @@
+/*
+ * Simulate Posix AIO using Linux kernel AIO.
+ *
+ * Copyright (C) Jeremy Allison 2012
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "includes.h"
+#include "system/filesys.h"
+#include "smbd/smbd.h"
+#include "smbd/globals.h"
+#include <sys/eventfd.h>
+#include <libaio.h>
+
+struct aio_extra;
+static int event_fd = -1;
+static io_context_t io_ctx;
+static int aio_linux_requestid;
+static struct io_event *io_recv_events;
+static struct fd_event *aio_read_event;
+
+struct aio_private_data {
+ struct aio_private_data *prev, *next;
+ int requestid;
+ SMB_STRUCT_AIOCB *aiocb;
+ struct iocb *event_iocb;
+ ssize_t ret_size;
+ int ret_errno;
+ bool cancelled;
+};
+
+/* List of outstanding requests we have. */
+static struct aio_private_data *pd_list;
+
+static void aio_linux_handle_completion(struct event_context *event_ctx,
+ struct fd_event *event,
+ uint16 flags,
+ void *p);
+
+/************************************************************************
+ Housekeeping. Cleanup if no activity for 30 seconds.
+***********************************************************************/
+
+static void aio_linux_housekeeping(struct tevent_context *event_ctx,
+ struct tevent_timer *te,
+ struct timeval now,
+ void *private_data)
+{
+ /* Remove this timed event handler. */
+ TALLOC_FREE(te);
+
+ if (pd_list != NULL) {
+ /* Still busy. Look again in 30 seconds. */
+ (void)tevent_add_timer(event_ctx,
+ NULL,
+ timeval_current_ofs(30, 0),
+ aio_linux_housekeeping,
+ NULL);
+ return;
+ }
+
+ /* No activity for 30 seconds. Close out kernel resources. */
+ io_queue_release(io_ctx);
+ memset(&io_ctx, '\0', sizeof(io_ctx));
+
+ if (event_fd != -1) {
+ close(event_fd);
+ event_fd = -1;
+ }
+
+ TALLOC_FREE(aio_read_event);
+ TALLOC_FREE(io_recv_events);
+}
+
+/************************************************************************
+ Ensure event fd and aio context are initialized.
+***********************************************************************/
+
+static bool init_aio_linux(struct vfs_handle_struct *handle)
+{
+ struct tevent_timer *te = NULL;
+
+ if (event_fd != -1) {
+ /* Already initialized. */
+ return true;
+ }
+
+ /* Shedule a shutdown event for 30 seconds from now. */
+ te = tevent_add_timer(server_event_context(),
+ NULL,
+ timeval_current_ofs(30, 0),
+ aio_linux_housekeeping,
+ NULL);
+
+ if (te == NULL) {
+ goto fail;
+ }
+
+ /* Ensure we have enough space for aio_pending_size events. */
+ io_recv_events = talloc_zero_array(NULL,
+ struct io_event,
+ aio_pending_size);
+ if (io_recv_events == NULL) {
+ goto fail;
+ }
+
+ event_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+ if (event_fd == -1) {
+ goto fail;
+ }
+
+ aio_read_event = tevent_add_fd(server_event_context(),
+ NULL,
+ event_fd,
+ TEVENT_FD_READ,
+ aio_linux_handle_completion,
+ NULL);
+ if (aio_read_event == NULL) {
+ goto fail;
+ }
+
+ if (io_queue_init(aio_pending_size, &io_ctx)) {
+ goto fail;
+ }
+
+ DEBUG(10,("init_aio_linux: initialized with up to %d events\n",
+ aio_pending_size));
+
+ return true;
+
+ fail:
+
+ DEBUG(10,("init_aio_linux: initialization failed\n"));
+
+ TALLOC_FREE(te);
+ TALLOC_FREE(io_recv_events);
+ TALLOC_FREE(aio_read_event);
+ if (event_fd != -1) {
+ close(event_fd);
+ event_fd = -1;
+ }
+ memset(&io_ctx, '\0', sizeof(io_ctx));
+ return false;
+}
+
+/************************************************************************
+ Private data destructor.
+***********************************************************************/
+
+static int pd_destructor(struct aio_private_data *pd)
+{
+ DLIST_REMOVE(pd_list, pd);
+ return 0;
+}
+
+/************************************************************************
+ Create and initialize a private data struct.
+***********************************************************************/
+
+static struct aio_private_data *create_private_data(TALLOC_CTX *ctx,
+ SMB_STRUCT_AIOCB *aiocb)
+{
+ struct aio_private_data *pd = talloc_zero(ctx, struct aio_private_data);
+ if (!pd) {
+ return NULL;
+ }
+ pd->event_iocb = talloc_zero(pd, struct iocb);
+ pd->requestid = aio_linux_requestid++;
+ pd->aiocb = aiocb;
+ pd->ret_size = -1;
+ pd->ret_errno = EINPROGRESS;
+ talloc_set_destructor(pd, pd_destructor);
+ DLIST_ADD_END(pd_list, pd, struct aio_private_data *);
+ return pd;
+}
+
+/************************************************************************
+ Initiate an asynchronous pread call.
+***********************************************************************/
+
+static int aio_linux_read(struct vfs_handle_struct *handle,
+ struct files_struct *fsp,
+ SMB_STRUCT_AIOCB *aiocb)
+{
+ struct aio_extra *aio_ex = (struct aio_extra *)aiocb->aio_sigevent.sigev_value.sival_ptr;
+ struct aio_private_data *pd = NULL;
+ int ret;
+
+ if (!init_aio_linux(handle)) {
+ return -1;
+ }
+
+ pd = create_private_data(aio_ex, aiocb);
+ if (pd == NULL) {
+ DEBUG(10, ("aio_linux_read: Could not create private data.\n"));
+ return -1;
+ }
+
+ io_prep_pread(pd->event_iocb,
+ pd->aiocb->aio_fildes,
+ discard_const(pd->aiocb->aio_buf),
+ pd->aiocb->aio_nbytes,
+ pd->aiocb->aio_offset);
+ io_set_eventfd(pd->event_iocb, event_fd);
+ /* Use the callback pointer as a private data ptr. */
+ io_set_callback(pd->event_iocb, (io_callback_t)pd);
+
+ ret = io_submit(io_ctx, 1, &pd->event_iocb);
+ if (ret < 0) {
+ errno = ret;
+ return -1;
+ }
+
+ DEBUG(10, ("aio_linux_read: requestid=%d read requested "
+ "of %llu bytes at offset %llu\n",
+ pd->requestid,
+ (unsigned long long)pd->aiocb->aio_nbytes,
+ (unsigned long long)pd->aiocb->aio_offset));
+
+ return 0;
+}
+
+/************************************************************************
+ Initiate an asynchronous pwrite call.
+***********************************************************************/
+
+static int aio_linux_write(struct vfs_handle_struct *handle,
+ struct files_struct *fsp,
+ SMB_STRUCT_AIOCB *aiocb)
+{
+ struct aio_extra *aio_ex = (struct aio_extra *)aiocb->aio_sigevent.sigev_value.sival_ptr;
+ struct aio_private_data *pd = NULL;
+ int ret;
+
+ if (!init_aio_linux(handle)) {
+ return -1;
+ }
+
+ pd = create_private_data(aio_ex, aiocb);
+ if (pd == NULL) {
+ DEBUG(10, ("aio_linux_write: Could not create private data.\n"));
+ return -1;
+ }
+
+ io_prep_pwrite(pd->event_iocb,
+ pd->aiocb->aio_fildes,
+ discard_const(pd->aiocb->aio_buf),
+ pd->aiocb->aio_nbytes,
+ pd->aiocb->aio_offset);
+ io_set_eventfd(pd->event_iocb, event_fd);
+ /* Use the callback pointer as a private data ptr. */
+ io_set_callback(pd->event_iocb, (io_callback_t)pd);
+
+ ret = io_submit(io_ctx, 1, &pd->event_iocb);
+ if (ret < 0) {
+ errno = ret;
+ return -1;
+ }
+
+ DEBUG(10, ("aio_linux_write: requestid=%d pwrite requested "
+ "of %llu bytes at offset %llu\n",
+ pd->requestid,
+ (unsigned long long)pd->aiocb->aio_nbytes,
+ (unsigned long long)pd->aiocb->aio_offset));
+
+ return 0;
+}
+
+/************************************************************************
+ Handle a single finished io.
+***********************************************************************/
+
+static void aio_linux_handle_io_finished(struct io_event *ioev)
+{
+ struct aio_extra *aio_ex = NULL;
+ struct aio_private_data *pd = (struct aio_private_data *)ioev->data;
+
+ /* ioev->res2 contains the -errno if error. */
+ /* ioev->res contains the number of bytes sent/received. */
+ if (ioev->res2) {
+ pd->ret_size = -1;
+ pd->ret_errno = -ioev->res2;
+ } else {
+ pd->ret_size = ioev->res;
+ pd->ret_errno = 0;
+ }
+
+ aio_ex = (struct aio_extra *)pd->aiocb->aio_sigevent.sigev_value.sival_ptr;
+ smbd_aio_complete_aio_ex(aio_ex);
+
+ DEBUG(10,("aio_linux_handle_io_finished: requestid %d completed\n",
+ pd->requestid ));
+ TALLOC_FREE(aio_ex);
+}
+
+/************************************************************************
+ Callback when multiple IOs complete.
+***********************************************************************/
+
+static void aio_linux_handle_completion(struct event_context *event_ctx,
+ struct fd_event *event,
+ uint16 flags,
+ void *p)
+{
+ uint64_t num_events = 0;
+
+ DEBUG(10, ("aio_linux_handle_completion called with flags=%d\n",
+ (int)flags));
+
+ if ((flags & EVENT_FD_READ) == 0) {
+ return;
+ }
+
+ /* Read the number of events available. */
+ if (sys_read(event_fd, &num_events, sizeof(num_events)) !=
+ sizeof(num_events)) {
+ smb_panic("aio_linux_handle_completion: invalid read");
+ }
+
+ while (num_events > 0) {
+ uint64_t events_to_read = MIN(num_events, aio_pending_size);
+ struct timespec ts;
+ int i;
+ int ret;
+
+ ts.tv_sec = 0;
+ ts.tv_nsec = 0;
+
+ ret = io_getevents(io_ctx,
+ 1,
+ (long)events_to_read,
+ io_recv_events,
+ &ts);
+
+ if (ret < 0) {
+ errno = -ret;
+ DEBUG(1, ("aio_linux_handle_completion: "
+ "io_getevents error %s\n",
+ strerror(errno) ));
+ return;
+ }
+
+ if (ret == 0) {
+ DEBUG(10, ("aio_linux_handle_completion: "
+ "io_getevents returned 0\n"));
+ continue;
+ }
+
+ /* ret is positive. */
+ for (i = 0; i < ret; i++) {
+ aio_linux_handle_io_finished(&io_recv_events[i]);
+ }
+
+ num_events -= ret;
+ }
+}
+
+/************************************************************************
+ Find the private data by aiocb.
+***********************************************************************/
+
+static struct aio_private_data *find_private_data_by_aiocb(SMB_STRUCT_AIOCB *aiocb)
+{
+ struct aio_private_data *pd;
+
+ for (pd = pd_list; pd != NULL; pd = pd->next) {
+ if (pd->aiocb == aiocb) {
+ return pd;
+ }
+ }
+
+ return NULL;
+}
+
+/************************************************************************
+ Called to return the result of a completed AIO.
+ Should only be called if aio_error returns something other than EINPROGRESS.
+ Returns:
+ Any other value - return from IO operation.
+***********************************************************************/
+
+static ssize_t aio_linux_return_fn(struct vfs_handle_struct *handle,
+ struct files_struct *fsp,
+ SMB_STRUCT_AIOCB *aiocb)
+{
+ struct aio_private_data *pd = find_private_data_by_aiocb(aiocb);
+
+ if (pd == NULL) {
+ errno = EINVAL;
+ DEBUG(0, ("aio_linux_return_fn: returning EINVAL\n"));
+ return -1;
+ }
+
+ pd->aiocb = NULL;
+
+ if (pd->ret_size == -1) {
+ errno = pd->ret_errno;
+ }
+
+ return pd->ret_size;
+}
+
+/************************************************************************
+ Called to check the result of an AIO.
+ Returns:
+ EINPROGRESS - still in progress.
+ EINVAL - invalid aiocb.
+ ECANCELED - request was cancelled.
+ 0 - request completed successfully.
+ Any other value - errno from IO operation.
+***********************************************************************/
+
+static int aio_linux_error_fn(struct vfs_handle_struct *handle,
+ struct files_struct *fsp,
+ SMB_STRUCT_AIOCB *aiocb)
+{
+ struct aio_private_data *pd = find_private_data_by_aiocb(aiocb);
+
+ if (pd == NULL) {
+ return EINVAL;
+ }
+ if (pd->cancelled) {
+ return ECANCELED;
+ }
+ return pd->ret_errno;
+}
+
+/************************************************************************
+ Called to request the cancel of an AIO, or all of them on a specific
+ fsp if aiocb == NULL.
+***********************************************************************/
+
+static int aio_linux_cancel(struct vfs_handle_struct *handle,
+ struct files_struct *fsp,
+ SMB_STRUCT_AIOCB *aiocb)
+{
+ struct aio_private_data *pd = NULL;
+
+ for (pd = pd_list; pd != NULL; pd = pd->next) {
+ if (pd->aiocb == NULL) {
+ continue;
+ }
+ if (pd->aiocb->aio_fildes != fsp->fh->fd) {
+ continue;
+ }
+ if ((aiocb != NULL) && (pd->aiocb != aiocb)) {
+ continue;
+ }
+
+ /*
+ * We let the kernel do its job, but we discard the result when
+ * it's finished. NB. Should I call io_cancel here ?
+ */
+
+ pd->cancelled = true;
+ }
+
+ return AIO_CANCELED;
+}
+
+/************************************************************************
+ Callback for a previously detected job completion deferred to the main
+ loop.
+***********************************************************************/
+
+static void aio_linux_handle_immediate(struct tevent_context *ctx,
+ struct tevent_immediate *im,
+ void *private_data)
+{
+ struct io_event *ioev = (struct io_event *)private_data;
+
+ aio_linux_handle_io_finished(ioev);
+ TALLOC_FREE(ioev);
+}
+
+/************************************************************************
+ Private data struct used in suspend completion code.
+***********************************************************************/
+
+struct suspend_private {
+ int num_entries;
+ int num_finished;
+ const SMB_STRUCT_AIOCB * const *aiocb_array;
+};
+
+/************************************************************************
+ Handle a single finished io from suspend.
+***********************************************************************/
+
+static void aio_linux_handle_suspend_io_finished(struct suspend_private *sp,
+ struct io_event *ioev)
+{
+ struct aio_private_data *pd = (struct aio_private_data *)ioev->data;
+ struct io_event *new_ioev = NULL;
+ struct tevent_immediate *im = NULL;
+ int i;
+
+ /* Is this a requestid with an aiocb we're interested in ? */
+ for (i = 0; i < sp->num_entries; i++) {
+ if (sp->aiocb_array[i] == pd->aiocb) {
+ sp->num_finished++;
+ aio_linux_handle_io_finished(ioev);
+ return;
+ }
+ }
+
+ /* Jobid completed we weren't waiting for.
+ We must reshedule this as an immediate event
+ on the main event context. */
+ im = tevent_create_immediate(NULL);
+ if (!im) {
+ exit_server_cleanly("aio_linux_handle_suspend_completion: no memory");
+ }
+
+ new_ioev = (struct io_event *)talloc_memdup(NULL,
+ ioev,
+ sizeof(struct io_event));
+ if (!new_ioev) {
+ exit_server_cleanly("aio_linux_handle_suspend_completion: no memory");
+ }
+
+ DEBUG(10,("aio_linux_handle_suspend_completion: "
+ "re-scheduling requestid %d\n",
+ pd->requestid));
+
+ tevent_schedule_immediate(im,
+ server_event_context(),
+ aio_linux_handle_immediate,
+ (void *)new_ioev);
+}
+
+/************************************************************************
+ Callback when an IO completes from a suspend call.
+***********************************************************************/
+
+static void aio_linux_handle_suspend_completion(struct event_context *event_ctx,
+ struct fd_event *event,
+ uint16 flags,
+ void *p)
+{
+ struct suspend_private *sp = (struct suspend_private *)p;
+ uint64_t remaining_events = sp->num_entries - sp->num_finished;
+ uint64_t num_events = 0;
+
+ DEBUG(10, ("aio_linux_handle_suspend_completion called with flags=%d\n",
+ (int)flags));
+
+ if ((flags & EVENT_FD_READ) == 0) {
+ return;
+ }
+
+ /* Read the number of events available. */
+ if (sys_read(event_fd, &num_events, sizeof(num_events)) !=
+ sizeof(num_events)) {
+ smb_panic("aio_linux_handle_completion: invalid read");
+ }
+
+ while (num_events > 0) {
+ uint64_t events_to_read = MIN(num_events, remaining_events);
+ struct timespec ts;
+ int i;
+ int ret;
+
+ ts.tv_sec = 0;
+ ts.tv_nsec = 0;
+
+ ret = io_getevents(io_ctx,
+ 1,
+ (long)events_to_read,
+ io_recv_events,
+ &ts);
+
+ if (ret < 0) {
+ errno = -ret;
+ DEBUG(1, ("aio_linux_handle_suspend_completion: "
+ "io_getevents error %s\n",
+ strerror(errno) ));
+ return;
+ }
+
+ if (ret == 0) {
+ DEBUG(10, ("aio_linux_handle_suspend_completion: "
+ "io_getevents returned 0\n"));
+ continue;
+ }
+
+ /* ret is positive. */
+ for (i = 0; i < ret; i++) {
+ aio_linux_handle_suspend_io_finished(sp,
+ &io_recv_events[i]);
+ }
+
+ num_events -= ret;
+ }
+}
+
+static void aio_linux_suspend_timed_out(struct tevent_context *event_ctx,
+ struct tevent_timer *te,
+ struct timeval now,
+ void *private_data)
+{
+ bool *timed_out = (bool *)private_data;
+ /* Remove this timed event handler. */
+ TALLOC_FREE(te);
+ *timed_out = true;
+}
+
+/************************************************************************
+ Called to request everything to stop until all IO is completed.
+***********************************************************************/
+
+static int aio_linux_suspend(struct vfs_handle_struct *handle,
+ struct files_struct *fsp,
+ const SMB_STRUCT_AIOCB * const aiocb_array[],
+ int n,
+ const struct timespec *timeout)
+{
+ struct event_context *ev = NULL;
+ struct fd_event *sock_event = NULL;
+ int ret = -1;
+ struct suspend_private sp;
+ bool timed_out = false;
+ TALLOC_CTX *frame = talloc_stackframe();
+
+ /* This is a blocking call, and has to use a sub-event loop. */
+ ev = event_context_init(frame);
+ if (ev == NULL) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ if (timeout) {
+ struct timeval tv = convert_timespec_to_timeval(*timeout);
+ struct tevent_timer *te = tevent_add_timer(ev,
+ frame,
+ timeval_current_ofs(tv.tv_sec,
+ tv.tv_usec),
+ aio_linux_suspend_timed_out,
+ &timed_out);
+ if (!te) {
+ errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ ZERO_STRUCT(sp);
+ sp.num_entries = n;
+ sp.aiocb_array = aiocb_array;
+ sp.num_finished = 0;
+
+ sock_event = tevent_add_fd(ev,
+ frame,
+ event_fd,
+ TEVENT_FD_READ,
+ aio_linux_handle_suspend_completion,
+ (void *)&sp);
+ if (sock_event == NULL) {
+ goto out;
+ }
+ /*
+ * We're going to cheat here. We know that smbd/aio.c
+ * only calls this when it's waiting for every single
+ * outstanding call to finish on a close, so just wait
+ * individually for each IO to complete. We don't care
+ * what order they finish - only that they all do. JRA.
+ */
+ while (sp.num_entries != sp.num_finished) {
+ if (tevent_loop_once(ev) == -1) {
+ goto out;
+ }
+
+ if (timed_out) {
+ errno = EAGAIN;
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+ out:
+
+ TALLOC_FREE(frame);
+ return ret;
+}
+
+static int aio_linux_connect(vfs_handle_struct *handle, const char *service,
+ const char *user)
+{
+ /*********************************************************************
+ * How many io_events to initialize ?
+ * 128 per process seems insane as a default until you realize that
+ * (a) Throttling is done in SMB2 via the crediting algorithm.
+ * (b) SMB1 clients are limited to max_mux (50) outstanding
+ * requests and Windows clients don't use this anyway.
+ * Essentially we want this to be unlimited unless smb.conf
+ * says different.
+ *********************************************************************/
+ aio_pending_size = lp_parm_int(
+ SNUM(handle->conn), "aio_linux", "aio num events", 128);
+ return SMB_VFS_NEXT_CONNECT(handle, service, user);
+}
+
+static struct vfs_fn_pointers vfs_aio_linux_fns = {
+ .connect_fn = aio_linux_connect,
+ .aio_read_fn = aio_linux_read,
+ .aio_write_fn = aio_linux_write,
+ .aio_return_fn = aio_linux_return_fn,
+ .aio_cancel_fn = aio_linux_cancel,
+ .aio_error_fn = aio_linux_error_fn,
+ .aio_suspend_fn = aio_linux_suspend,
+};
+
+NTSTATUS vfs_aio_linux_init(void)
+{
+ return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
+ "aio_linux", &vfs_aio_linux_fns);
+}
diff --git a/source3/modules/wscript_build b/source3/modules/wscript_build
index 32c32fa224..090ca1b7d9 100644
--- a/source3/modules/wscript_build
+++ b/source3/modules/wscript_build
@@ -37,6 +37,7 @@ VFS_TSMSM_SRC = 'vfs_tsmsm.c'
VFS_FILEID_SRC = 'vfs_fileid.c'
VFS_AIO_FORK_SRC = 'vfs_aio_fork.c'
VFS_AIO_PTHREAD_SRC = 'vfs_aio_pthread.c'
+VFS_AIO_LINUX_SRC = 'vfs_aio_linux.c'
VFS_PREOPEN_SRC = 'vfs_preopen.c'
VFS_SYNCOPS_SRC = 'vfs_syncops.c'
VFS_ACL_XATTR_SRC = 'vfs_acl_xattr.c'
@@ -366,6 +367,15 @@ bld.SAMBA3_MODULE('vfs_aio_pthread',
enabled=bld.SAMBA3_IS_ENABLED_MODULE('vfs_aio_pthread'),
allow_undefined_symbols=True)
+bld.SAMBA3_MODULE('vfs_aio_linux',
+ subsystem='vfs',
+ source=VFS_AIO_LINUX_SRC,
+ deps='samba-util aio',
+ init_function='',
+ internal_module=bld.SAMBA3_IS_STATIC_MODULE('vfs_aio_linux'),
+ enabled=bld.SAMBA3_IS_ENABLED_MODULE('vfs_aio_linux'),
+ allow_undefined_symbols=True)
+
bld.SAMBA3_MODULE('vfs_preopen',
subsystem='vfs',
source=VFS_PREOPEN_SRC,
diff --git a/source3/wscript b/source3/wscript
index 24924cca00..0c2a51ede4 100644
--- a/source3/wscript
+++ b/source3/wscript
@@ -380,6 +380,29 @@ return acl_get_perm_np(permset_d, perm);
conf.CHECK_CODE('struct aiocb a; return aio_error(&a);', 'HAVE_AIO_ERROR', msg='Checking for aio_error', headers='aio.h', lib='aio rt')
conf.CHECK_CODE('struct aiocb a; return aio_cancel(1, &a);', 'HAVE_AIO_CANCEL', msg='Checking for aio_cancel', headers='aio.h', lib='aio rt')
conf.CHECK_CODE('struct aiocb a; return aio_suspend(&a, 1, NULL);', 'HAVE_AIO_SUSPEND', msg='Checking for aio_suspend', headers='aio.h', lib='aio rt')
+ if host_os.rfind('linux') > -1:
+ conf.CHECK_FUNCS_IN('io_submit', 'aio')
+ conf.CHECK_CODE('''
+struct io_event ioev;
+struct iocb *ioc;
+io_context_t ctx;
+struct timespec ts;
+int fd;
+char *buf;
+fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+io_queue_init(128,&ctx);
+io_prep_pwrite(ioc, 1, buf, 1, 0);
+io_prep_pread(ioc, 1, buf, 1, 0);
+io_set_eventfd(ioc, fd);
+io_set_callback(ioc, (io_callback_t)(0));
+io_submit(ctx, 1, &ioc);
+io_getevents(ctx, 1, 1, &ioev, &ts);
+''',
+ 'HAVE_LINUX_KERNEL_AIO',
+ msg='Checking for linux kernel asynchronous io support',
+ headers='unistd.h stdlib.h sys/types.h fcntl.h sys/eventfd.h libaio.h',
+ lib='aio')
+
if not conf.CONFIG_SET('HAVE_AIO'):
conf.DEFINE('HAVE_NO_AIO', '1')
else:
@@ -1576,6 +1599,9 @@ main() {
if conf.CONFIG_SET('HAVE_AIO') and Options.options.with_pthreadpool:
default_shared_modules.extend(TO_LIST('vfs_aio_pthread'))
+ if conf.CONFIG_SET('HAVE_AIO') and conf.CONFIG_SET('HAVE_LINUX_KERNEL_AIO'):
+ default_shared_modules.extend(TO_LIST('vfs_aio_linux'))
+
if conf.CONFIG_SET('HAVE_LDAP'):
default_static_modules.extend(TO_LIST('pdb_ldap idmap_ldap'))