summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--source3/modules/vfs_aio_linux.c643
1 files changed, 100 insertions, 543 deletions
diff --git a/source3/modules/vfs_aio_linux.c b/source3/modules/vfs_aio_linux.c
index 7b739429e4..b685cdcad2 100644
--- a/source3/modules/vfs_aio_linux.c
+++ b/source3/modules/vfs_aio_linux.c
@@ -2,6 +2,7 @@
* Simulate Posix AIO using Linux kernel AIO.
*
* Copyright (C) Jeremy Allison 2012
+ * Copyright (C) Volker Lendecke 2012
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -22,33 +23,19 @@
#include "system/filesys.h"
#include "smbd/smbd.h"
#include "smbd/globals.h"
+#include "lib/util/tevent_unix.h"
#include <sys/eventfd.h>
#include <libaio.h>
-struct aio_extra;
static int event_fd = -1;
static io_context_t io_ctx;
-static int aio_linux_requestid;
-static struct io_event *io_recv_events;
static struct fd_event *aio_read_event;
+static bool used;
+static unsigned num_busy;
-struct aio_private_data {
- struct aio_private_data *prev, *next;
- int requestid;
- SMB_STRUCT_AIOCB *aiocb;
- struct iocb *event_iocb;
- ssize_t ret_size;
- int ret_errno;
- bool cancelled;
-};
-
-/* List of outstanding requests we have. */
-static struct aio_private_data *pd_list;
-
-static void aio_linux_handle_completion(struct event_context *event_ctx,
- struct fd_event *event,
- uint16 flags,
- void *p);
+static void aio_linux_done(struct event_context *event_ctx,
+ struct fd_event *event,
+ uint16 flags, void *private_data);
/************************************************************************
Housekeeping. Cleanup if no activity for 30 seconds.
@@ -62,7 +49,9 @@ static void aio_linux_housekeeping(struct tevent_context *event_ctx,
/* Remove this timed event handler. */
TALLOC_FREE(te);
- if (pd_list != NULL) {
+ if ((num_busy != 0) || used) {
+ used = false;
+
/* Still busy. Look again in 30 seconds. */
(void)tevent_add_timer(event_ctx,
NULL,
@@ -82,7 +71,6 @@ static void aio_linux_housekeeping(struct tevent_context *event_ctx,
}
TALLOC_FREE(aio_read_event);
- TALLOC_FREE(io_recv_events);
}
/************************************************************************
@@ -99,7 +87,7 @@ static bool init_aio_linux(struct vfs_handle_struct *handle)
}
/* Schedule a shutdown event for 30 seconds from now. */
- te = tevent_add_timer(server_event_context(),
+ te = tevent_add_timer(handle->conn->sconn->ev_ctx,
NULL,
timeval_current_ofs(30, 0),
aio_linux_housekeeping,
@@ -109,14 +97,6 @@ static bool init_aio_linux(struct vfs_handle_struct *handle)
goto fail;
}
- /* Ensure we have enough space for aio_pending_size events. */
- io_recv_events = talloc_zero_array(NULL,
- struct io_event,
- aio_pending_size);
- if (io_recv_events == NULL) {
- goto fail;
- }
-
event_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
if (event_fd == -1) {
goto fail;
@@ -126,7 +106,7 @@ static bool init_aio_linux(struct vfs_handle_struct *handle)
NULL,
event_fd,
TEVENT_FD_READ,
- aio_linux_handle_completion,
+ aio_linux_done,
NULL);
if (aio_read_event == NULL) {
goto fail;
@@ -146,7 +126,6 @@ static bool init_aio_linux(struct vfs_handle_struct *handle)
DEBUG(10,("init_aio_linux: initialization failed\n"));
TALLOC_FREE(te);
- TALLOC_FREE(io_recv_events);
TALLOC_FREE(aio_read_event);
if (event_fd != -1) {
close(event_fd);
@@ -156,183 +135,91 @@ static bool init_aio_linux(struct vfs_handle_struct *handle)
return false;
}
-/************************************************************************
- Private data destructor.
-***********************************************************************/
+struct aio_linux_state {
+ struct iocb event_iocb;
+ ssize_t ret;
+ int err;
+};
-static int pd_destructor(struct aio_private_data *pd)
+static struct tevent_req *aio_linux_pread_send(
+ struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev, struct files_struct *fsp,
+ void *data, size_t n, off_t offset)
{
- DLIST_REMOVE(pd_list, pd);
- return 0;
-}
-
-/************************************************************************
- Create and initialize a private data struct.
-***********************************************************************/
+ struct tevent_req *req;
+ struct aio_linux_state *state;
+ struct iocb *piocb;
+ int ret;
-static struct aio_private_data *create_private_data(TALLOC_CTX *ctx,
- SMB_STRUCT_AIOCB *aiocb)
-{
- struct aio_private_data *pd = talloc_zero(ctx, struct aio_private_data);
- if (!pd) {
+ req = tevent_req_create(mem_ctx, &state, struct aio_linux_state);
+ if (req == NULL) {
return NULL;
}
- pd->event_iocb = talloc_zero(pd, struct iocb);
- pd->requestid = aio_linux_requestid++;
- pd->aiocb = aiocb;
- pd->ret_size = -1;
- pd->ret_errno = EINPROGRESS;
- talloc_set_destructor(pd, pd_destructor);
- DLIST_ADD_END(pd_list, pd, struct aio_private_data *);
- return pd;
-}
-
-/************************************************************************
- Initiate an asynchronous pread call.
-***********************************************************************/
-
-static int aio_linux_read(struct vfs_handle_struct *handle,
- struct files_struct *fsp,
- SMB_STRUCT_AIOCB *aiocb)
-{
- struct aio_extra *aio_ex = (struct aio_extra *)aiocb->aio_sigevent.sigev_value.sival_ptr;
- struct aio_private_data *pd = NULL;
- int ret;
-
if (!init_aio_linux(handle)) {
- return -1;
+ tevent_req_error(req, EIO);
+ return tevent_req_post(req, ev);
}
- pd = create_private_data(aio_ex, aiocb);
- if (pd == NULL) {
- DEBUG(10, ("aio_linux_read: Could not create private data.\n"));
- return -1;
- }
+ io_prep_pread(&state->event_iocb, fsp->fh->fd, data, n, offset);
+ io_set_eventfd(&state->event_iocb, event_fd);
+ state->event_iocb.data = req;
- io_prep_pread(pd->event_iocb,
- pd->aiocb->aio_fildes,
- discard_const(pd->aiocb->aio_buf),
- pd->aiocb->aio_nbytes,
- pd->aiocb->aio_offset);
- io_set_eventfd(pd->event_iocb, event_fd);
- /* Use the callback pointer as a private data ptr. */
- io_set_callback(pd->event_iocb, (io_callback_t)pd);
+ piocb = &state->event_iocb;
- ret = io_submit(io_ctx, 1, &pd->event_iocb);
+ ret = io_submit(io_ctx, 1, &piocb);
if (ret < 0) {
- errno = ret;
- return -1;
+ tevent_req_error(req, -ret);
+ return tevent_req_post(req, ev);
}
-
- DEBUG(10, ("aio_linux_read: requestid=%d read requested "
- "of %llu bytes at offset %llu\n",
- pd->requestid,
- (unsigned long long)pd->aiocb->aio_nbytes,
- (unsigned long long)pd->aiocb->aio_offset));
-
- return 0;
+ num_busy += 1;
+ used = true;
+ return req;
}
-/************************************************************************
- Initiate an asynchronous pwrite call.
-***********************************************************************/
-
-static int aio_linux_write(struct vfs_handle_struct *handle,
- struct files_struct *fsp,
- SMB_STRUCT_AIOCB *aiocb)
+static struct tevent_req *aio_linux_pwrite_send(
+ struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev, struct files_struct *fsp,
+ const void *data, size_t n, off_t offset)
{
- struct aio_extra *aio_ex = (struct aio_extra *)aiocb->aio_sigevent.sigev_value.sival_ptr;
- struct aio_private_data *pd = NULL;
+ struct tevent_req *req;
+ struct aio_linux_state *state;
+ struct iocb *piocb;
int ret;
+ req = tevent_req_create(mem_ctx, &state, struct aio_linux_state);
+ if (req == NULL) {
+ return NULL;
+ }
if (!init_aio_linux(handle)) {
- return -1;
+ tevent_req_error(req, EIO);
+ return tevent_req_post(req, ev);
}
- pd = create_private_data(aio_ex, aiocb);
- if (pd == NULL) {
- DEBUG(10, ("aio_linux_write: Could not create private data.\n"));
- return -1;
- }
+ io_prep_pwrite(&state->event_iocb, fsp->fh->fd, discard_const(data),
+ n, offset);
+ io_set_eventfd(&state->event_iocb, event_fd);
+ state->event_iocb.data = req;
- io_prep_pwrite(pd->event_iocb,
- pd->aiocb->aio_fildes,
- discard_const(pd->aiocb->aio_buf),
- pd->aiocb->aio_nbytes,
- pd->aiocb->aio_offset);
- io_set_eventfd(pd->event_iocb, event_fd);
- /* Use the callback pointer as a private data ptr. */
- io_set_callback(pd->event_iocb, (io_callback_t)pd);
+ piocb = &state->event_iocb;
- ret = io_submit(io_ctx, 1, &pd->event_iocb);
+ ret = io_submit(io_ctx, 1, &piocb);
if (ret < 0) {
- errno = ret;
- return -1;
+ tevent_req_error(req, -ret);
+ return tevent_req_post(req, ev);
}
-
- DEBUG(10, ("aio_linux_write: requestid=%d pwrite requested "
- "of %llu bytes at offset %llu\n",
- pd->requestid,
- (unsigned long long)pd->aiocb->aio_nbytes,
- (unsigned long long)pd->aiocb->aio_offset));
-
- return 0;
+ num_busy += 1;
+ used = true;
+ return req;
}
-/************************************************************************
- Save off the error / success conditions from the io_event.
- Is idempotent (can be called multiple times given the same ioev).
-***********************************************************************/
-
-static void aio_linux_setup_returns(struct io_event *ioev)
-{
- struct aio_private_data *pd = (struct aio_private_data *)ioev->data;
-
- if (ioev->res < 0) {
- pd->ret_size = -1;
- pd->ret_errno = -ioev->res;
- } else {
- pd->ret_size = ioev->res;
- pd->ret_errno = 0;
- }
-}
-
-/************************************************************************
- Handle a single finished io.
-***********************************************************************/
-
-static void aio_linux_handle_io_finished(struct io_event *ioev)
-{
- struct aio_extra *aio_ex = NULL;
- struct aio_private_data *pd = (struct aio_private_data *)ioev->data;
-
- aio_linux_setup_returns(ioev);
-
- aio_ex = (struct aio_extra *)pd->aiocb->aio_sigevent.sigev_value.sival_ptr;
- smbd_aio_complete_aio_ex(aio_ex);
-
- DEBUG(10,("aio_linux_handle_io_finished: requestid %d completed\n",
- pd->requestid ));
- TALLOC_FREE(aio_ex);
-}
-
-/************************************************************************
- Callback when multiple IOs complete.
-***********************************************************************/
-
-static void aio_linux_handle_completion(struct event_context *event_ctx,
- struct fd_event *event,
- uint16 flags,
- void *p)
+static void aio_linux_done(struct event_context *event_ctx,
+ struct fd_event *event,
+ uint16 flags, void *private_data)
{
uint64_t num_events = 0;
- DEBUG(10, ("aio_linux_handle_completion called with flags=%d\n",
- (int)flags));
-
- if ((flags & EVENT_FD_READ) == 0) {
- return;
- }
+ DEBUG(10, ("aio_linux_done called with flags=%d\n",
+ (int)flags));
/* Read the number of events available. */
if (sys_read(event_fd, &num_events, sizeof(num_events)) !=
@@ -341,382 +228,54 @@ static void aio_linux_handle_completion(struct event_context *event_ctx,
}
while (num_events > 0) {
- uint64_t events_to_read = MIN(num_events, aio_pending_size);
- struct timespec ts;
- int i;
+ struct timespec ts = { 0, };
+ struct io_event finished;
+ struct tevent_req *req;
+ struct aio_linux_state *state;
int ret;
- ts.tv_sec = 0;
- ts.tv_nsec = 0;
-
- ret = io_getevents(io_ctx,
- 1,
- (long)events_to_read,
- io_recv_events,
- &ts);
-
+ ret = io_getevents(io_ctx, 1, 1, &finished, &ts);
if (ret < 0) {
- errno = -ret;
- DEBUG(1, ("aio_linux_handle_completion: "
- "io_getevents error %s\n",
- strerror(errno) ));
+ DEBUG(1, ("aio_linux_done: io_getevents returned %s\n",
+ strerror(-ret)));
return;
}
-
if (ret == 0) {
- DEBUG(10, ("aio_linux_handle_completion: "
- "io_getevents returned 0\n"));
+ DEBUG(10, ("aio_linux_done: io_getvents returned "
+ "0\n"));
continue;
}
- /* ret is positive. */
- for (i = 0; i < ret; i++) {
- aio_linux_handle_io_finished(&io_recv_events[i]);
- }
-
- num_events -= ret;
- }
-}
-
-/************************************************************************
- Find the private data by aiocb.
-***********************************************************************/
+ num_busy -= 1;
-static struct aio_private_data *find_private_data_by_aiocb(SMB_STRUCT_AIOCB *aiocb)
-{
- struct aio_private_data *pd;
+ req = talloc_get_type_abort(finished.data,
+ struct tevent_req);
+ state = tevent_req_data(req, struct aio_linux_state);
- for (pd = pd_list; pd != NULL; pd = pd->next) {
- if (pd->aiocb == aiocb) {
- return pd;
+ if (finished.res < 0) {
+ state->ret = -1;
+ state->err = -finished.res;
+ } else {
+ state->ret = finished.res;
+ state->err = 0;
}
+ tevent_req_done(req);
+ num_events -= 1;
}
-
- return NULL;
}
-/************************************************************************
- Called to return the result of a completed AIO.
- Should only be called if aio_error returns something other than EINPROGRESS.
- Returns:
- Any other value - return from IO operation.
-***********************************************************************/
-
-static ssize_t aio_linux_return_fn(struct vfs_handle_struct *handle,
- struct files_struct *fsp,
- SMB_STRUCT_AIOCB *aiocb)
+static ssize_t aio_linux_recv(struct tevent_req *req, int *err)
{
- struct aio_private_data *pd = find_private_data_by_aiocb(aiocb);
-
- if (pd == NULL) {
- errno = EINVAL;
- DEBUG(0, ("aio_linux_return_fn: returning EINVAL\n"));
- return -1;
- }
+ struct aio_linux_state *state = tevent_req_data(
+ req, struct aio_linux_state);
- pd->aiocb = NULL;
-
- if (pd->cancelled) {
- errno = ECANCELED;
+ if (tevent_req_is_unix_error(req, err)) {
return -1;
}
-
- if (pd->ret_size == -1) {
- errno = pd->ret_errno;
- }
-
- return pd->ret_size;
-}
-
-/************************************************************************
- Called to check the result of an AIO.
- Returns:
- EINPROGRESS - still in progress.
- EINVAL - invalid aiocb.
- ECANCELED - request was cancelled.
- 0 - request completed successfully.
- Any other value - errno from IO operation.
-***********************************************************************/
-
-static int aio_linux_error_fn(struct vfs_handle_struct *handle,
- struct files_struct *fsp,
- SMB_STRUCT_AIOCB *aiocb)
-{
- struct aio_private_data *pd = find_private_data_by_aiocb(aiocb);
-
- if (pd == NULL) {
- return EINVAL;
- }
- if (pd->cancelled) {
- return ECANCELED;
- }
- return pd->ret_errno;
-}
-
-/************************************************************************
- Called to request the cancel of an AIO, or all of them on a specific
- fsp if aiocb == NULL.
-***********************************************************************/
-
-static int aio_linux_cancel(struct vfs_handle_struct *handle,
- struct files_struct *fsp,
- SMB_STRUCT_AIOCB *aiocb)
-{
- struct aio_private_data *pd = NULL;
-
- for (pd = pd_list; pd != NULL; pd = pd->next) {
- if (pd->aiocb == NULL) {
- continue;
- }
- if (pd->aiocb->aio_fildes != fsp->fh->fd) {
- continue;
- }
- if ((aiocb != NULL) && (pd->aiocb != aiocb)) {
- continue;
- }
-
- /*
- * We let the kernel do its job, but we discard the result when
- * it's finished. NB. Should I call io_cancel here ?
- */
-
- pd->cancelled = true;
- }
-
- return AIO_CANCELED;
-}
-
-/************************************************************************
- Callback for a previously detected job completion deferred to the main
- loop.
-***********************************************************************/
-
-static void aio_linux_handle_immediate(struct tevent_context *ctx,
- struct tevent_immediate *im,
- void *private_data)
-{
- struct io_event *ioev = (struct io_event *)private_data;
-
- aio_linux_handle_io_finished(ioev);
- TALLOC_FREE(ioev);
-}
-
-/************************************************************************
- Private data struct used in suspend completion code.
-***********************************************************************/
-
-struct suspend_private {
- int num_entries;
- int num_finished;
- const SMB_STRUCT_AIOCB * const *aiocb_array;
-};
-
-/************************************************************************
- Handle a single finished io from suspend.
-***********************************************************************/
-
-static void aio_linux_handle_suspend_io_finished(struct suspend_private *sp,
- struct io_event *ioev)
-{
- struct aio_private_data *pd = (struct aio_private_data *)ioev->data;
- struct io_event *new_ioev = NULL;
- struct tevent_immediate *im = NULL;
- int i;
-
- /* Is this a requestid with an aiocb we're interested in ? */
- for (i = 0; i < sp->num_entries; i++) {
- if (sp->aiocb_array[i] == pd->aiocb) {
- sp->num_finished++;
- /*
- * We don't call aio_linux_handle_io_finished()
- * here, but only the function that sets up the
- * return values. This allows
- * aio_linux_handle_io_finished() to be successfully
- * called from smbd/aio.c:wait_for_aio_completion()
- * once we return from here with all io's done.
- */
- aio_linux_setup_returns(ioev);
- return;
- }
- }
-
- /* Jobid completed we weren't waiting for.
- We must reshedule this as an immediate event
- on the main event context. */
- im = tevent_create_immediate(NULL);
- if (!im) {
- exit_server_cleanly("aio_linux_handle_suspend_completion: no memory");
- }
-
- new_ioev = (struct io_event *)talloc_memdup(NULL,
- ioev,
- sizeof(struct io_event));
- if (!new_ioev) {
- exit_server_cleanly("aio_linux_handle_suspend_completion: no memory");
+ if (state->ret == -1) {
+ *err = state->err;
}
-
- DEBUG(10,("aio_linux_handle_suspend_completion: "
- "re-scheduling requestid %d\n",
- pd->requestid));
-
- tevent_schedule_immediate(im,
- server_event_context(),
- aio_linux_handle_immediate,
- (void *)new_ioev);
-}
-
-/************************************************************************
- Callback when an IO completes from a suspend call.
-***********************************************************************/
-
-static void aio_linux_handle_suspend_completion(struct event_context *event_ctx,
- struct fd_event *event,
- uint16 flags,
- void *p)
-{
- struct suspend_private *sp = (struct suspend_private *)p;
- uint64_t remaining_events = sp->num_entries - sp->num_finished;
- uint64_t num_events = 0;
-
- DEBUG(10, ("aio_linux_handle_suspend_completion called with flags=%d\n",
- (int)flags));
-
- if ((flags & EVENT_FD_READ) == 0) {
- return;
- }
-
- /* Read the number of events available. */
- if (sys_read(event_fd, &num_events, sizeof(num_events)) !=
- sizeof(num_events)) {
- smb_panic("aio_linux_handle_completion: invalid read");
- }
-
- while (num_events > 0) {
- uint64_t events_to_read = MIN(num_events, remaining_events);
- struct timespec ts;
- int i;
- int ret;
-
- ts.tv_sec = 0;
- ts.tv_nsec = 0;
-
- ret = io_getevents(io_ctx,
- 1,
- (long)events_to_read,
- io_recv_events,
- &ts);
-
- if (ret < 0) {
- errno = -ret;
- DEBUG(1, ("aio_linux_handle_suspend_completion: "
- "io_getevents error %s\n",
- strerror(errno) ));
- return;
- }
-
- if (ret == 0) {
- DEBUG(10, ("aio_linux_handle_suspend_completion: "
- "io_getevents returned 0\n"));
- continue;
- }
-
- /* ret is positive. */
- for (i = 0; i < ret; i++) {
- aio_linux_handle_suspend_io_finished(sp,
- &io_recv_events[i]);
- }
-
- num_events -= ret;
- }
-}
-
-static void aio_linux_suspend_timed_out(struct tevent_context *event_ctx,
- struct tevent_timer *te,
- struct timeval now,
- void *private_data)
-{
- bool *timed_out = (bool *)private_data;
- /* Remove this timed event handler. */
- TALLOC_FREE(te);
- *timed_out = true;
-}
-
-/************************************************************************
- Called to request everything to stop until all IO is completed.
-***********************************************************************/
-
-static int aio_linux_suspend(struct vfs_handle_struct *handle,
- struct files_struct *fsp,
- const SMB_STRUCT_AIOCB * const aiocb_array[],
- int n,
- const struct timespec *timeout)
-{
- struct event_context *ev = NULL;
- struct fd_event *sock_event = NULL;
- int ret = -1;
- struct suspend_private sp;
- bool timed_out = false;
- TALLOC_CTX *frame = talloc_stackframe();
-
- /* This is a blocking call, and has to use a sub-event loop. */
- ev = event_context_init(frame);
- if (ev == NULL) {
- errno = ENOMEM;
- goto out;
- }
-
- if (timeout) {
- struct timeval tv = convert_timespec_to_timeval(*timeout);
- struct tevent_timer *te = tevent_add_timer(ev,
- frame,
- timeval_current_ofs(tv.tv_sec,
- tv.tv_usec),
- aio_linux_suspend_timed_out,
- &timed_out);
- if (!te) {
- errno = ENOMEM;
- goto out;
- }
- }
-
- ZERO_STRUCT(sp);
- sp.num_entries = n;
- sp.aiocb_array = aiocb_array;
- sp.num_finished = 0;
-
- sock_event = tevent_add_fd(ev,
- frame,
- event_fd,
- TEVENT_FD_READ,
- aio_linux_handle_suspend_completion,
- (void *)&sp);
- if (sock_event == NULL) {
- goto out;
- }
- /*
- * We're going to cheat here. We know that smbd/aio.c
- * only calls this when it's waiting for every single
- * outstanding call to finish on a close, so just wait
- * individually for each IO to complete. We don't care
- * what order they finish - only that they all do. JRA.
- */
- while (sp.num_entries != sp.num_finished) {
- if (tevent_loop_once(ev) == -1) {
- goto out;
- }
-
- if (timed_out) {
- errno = EAGAIN;
- goto out;
- }
- }
-
- ret = 0;
-
- out:
-
- TALLOC_FREE(frame);
- return ret;
+ return state->ret;
}
static int aio_linux_connect(vfs_handle_struct *handle, const char *service,
@@ -738,12 +297,10 @@ static int aio_linux_connect(vfs_handle_struct *handle, const char *service,
static struct vfs_fn_pointers vfs_aio_linux_fns = {
.connect_fn = aio_linux_connect,
- .aio_read_fn = aio_linux_read,
- .aio_write_fn = aio_linux_write,
- .aio_return_fn = aio_linux_return_fn,
- .aio_cancel_fn = aio_linux_cancel,
- .aio_error_fn = aio_linux_error_fn,
- .aio_suspend_fn = aio_linux_suspend,
+ .pread_send_fn = aio_linux_pread_send,
+ .pread_recv_fn = aio_linux_recv,
+ .pwrite_send_fn = aio_linux_pwrite_send,
+ .pwrite_recv_fn = aio_linux_recv,
};
NTSTATUS vfs_aio_linux_init(void)