From ff701ceae2409c656e1c30c00c728cfa67aeba21 Mon Sep 17 00:00:00 2001 From: Volker Lendecke Date: Fri, 6 Jul 2012 15:33:47 +0200 Subject: s3: Add vfs_aio_posix Signed-off-by: Jeremy Allison --- source3/modules/vfs_aio_posix.c | 243 ++++++++++++++++++++++++++++++++++++++++ source3/modules/wscript_build | 10 ++ 2 files changed, 253 insertions(+) create mode 100644 source3/modules/vfs_aio_posix.c (limited to 'source3/modules') diff --git a/source3/modules/vfs_aio_posix.c b/source3/modules/vfs_aio_posix.c new file mode 100644 index 0000000000..97b102b5e4 --- /dev/null +++ b/source3/modules/vfs_aio_posix.c @@ -0,0 +1,243 @@ +/* + * Simulate pread_send/recv and pwrite_send/recv using posix aio + * + * Copyright (C) Volker Lendecke 2012 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include "includes.h" +#include "system/filesys.h" +#include "system/shmem.h" +#include "smbd/smbd.h" +#include "smbd/globals.h" +#include "lib/util/tevent_unix.h" +#include + +/* The signal we'll use to signify aio done. */ +#ifndef RT_SIGNAL_AIO +#define RT_SIGNAL_AIO (SIGRTMIN+3) +#endif + +#ifndef HAVE_STRUCT_SIGEVENT_SIGEV_VALUE_SIVAL_PTR +#ifdef HAVE_STRUCT_SIGEVENT_SIGEV_VALUE_SIGVAL_PTR +#define sival_int sigval_int +#define sival_ptr sigval_ptr +#endif +#endif + +static struct tevent_signal *aio_signal_event = NULL; + +struct aio_posix_state { + struct aiocb acb; + ssize_t ret; + int err; +}; + +static int aio_posix_state_destructor(struct aio_posix_state *s) +{ + int ret; + + /* + * We could do better here. This destructor is run when a + * request is prematurely cancelled. We wait for the aio to + * complete, so that we do not have to maintain aiocb structs + * beyond the life of an aio_posix_state. Possible, but not + * sure the effort is worth it right now. + */ + + do { + const struct aiocb *a = &s->acb; + ret = aio_suspend(&a, 1, NULL); + } while ((ret == -1) && (errno == EINTR)); + + return 0; +} + +static struct tevent_req *aio_posix_pread_send( + struct vfs_handle_struct *handle, + TALLOC_CTX *mem_ctx, struct tevent_context *ev, + struct files_struct *fsp, void *data, size_t n, off_t offset) +{ + struct tevent_req *req; + struct aio_posix_state *state; + struct aiocb *a; + int ret; + + req = tevent_req_create(mem_ctx, &state, struct aio_posix_state); + if (req == NULL) { + return NULL; + } + + a = &state->acb; + + a->aio_fildes = fsp->fh->fd; + a->aio_buf = data; + a->aio_nbytes = n; + a->aio_offset = offset; + a->aio_sigevent.sigev_notify = SIGEV_SIGNAL; + a->aio_sigevent.sigev_signo = RT_SIGNAL_AIO; + a->aio_sigevent.sigev_value.sival_ptr = req; + + ret = aio_read(a); + if (ret == 0) { + talloc_set_destructor(state, aio_posix_state_destructor); + return req; + } + + if (errno == EAGAIN) { + /* + * aio overloaded, do the sync fallback + */ + state->ret = sys_pread(fsp->fh->fd, data, n, offset); + if (state->ret == -1) { + state->err = errno; + } + tevent_req_done(req); + return tevent_req_post(req, ev); + } + + tevent_req_error(req, errno); + return tevent_req_post(req, ev); +} + +static struct tevent_req *aio_posix_pwrite_send( + struct vfs_handle_struct *handle, + TALLOC_CTX *mem_ctx, struct tevent_context *ev, + struct files_struct *fsp, const void *data, size_t n, off_t offset) +{ + struct tevent_req *req; + struct aio_posix_state *state; + struct aiocb *a; + int ret; + + req = tevent_req_create(mem_ctx, &state, struct aio_posix_state); + if (req == NULL) { + return NULL; + } + + a = &state->acb; + + a->aio_fildes = fsp->fh->fd; + a->aio_buf = discard_const(data); + a->aio_nbytes = n; + a->aio_offset = offset; + a->aio_sigevent.sigev_notify = SIGEV_SIGNAL; + a->aio_sigevent.sigev_signo = RT_SIGNAL_AIO; + a->aio_sigevent.sigev_value.sival_ptr = req; + + ret = aio_write(a); + if (ret == 0) { + talloc_set_destructor(state, aio_posix_state_destructor); + return req; + } + + if (errno == EAGAIN) { + /* + * aio overloaded, do the sync fallback + */ + state->ret = sys_pwrite(fsp->fh->fd, data, n, offset); + if (state->ret == -1) { + state->err = errno; + } + tevent_req_done(req); + return tevent_req_post(req, ev); + } + + tevent_req_error(req, errno); + return tevent_req_post(req, ev); +} + +static void aio_posix_signal_handler(struct tevent_context *ev, + struct tevent_signal *se, + int signum, int count, + void *_info, void *private_data) +{ + siginfo_t *info; + struct tevent_req *req; + struct aio_posix_state *state; + int err; + + info = (siginfo_t *)_info; + req = talloc_get_type_abort(info->si_value.sival_ptr, + struct tevent_req); + state = tevent_req_data(req, struct aio_posix_state); + + err = aio_error(&state->acb); + if (err == EINPROGRESS) { + DEBUG(10, ("aio_posix_signal_handler: operation req %p " + "still in progress\n", req)); + return; + } + if (err == ECANCELED) { + DEBUG(10, ("aio_posix_signal_handler: operation req %p " + "canceled\n", req)); + return; + } + + /* + * No need to suspend for this in the destructor anymore + */ + talloc_set_destructor(state, NULL); + + state->ret = aio_return(&state->acb); + state->err = err; + tevent_req_done(req); +} + +static ssize_t aio_posix_recv(struct tevent_req *req, int *err) +{ + struct aio_posix_state *state = tevent_req_data( + req, struct aio_posix_state); + + if (tevent_req_is_unix_error(req, err)) { + return -1; + } + *err = state->err; + return state->ret; +} + +static int aio_posix_connect(vfs_handle_struct *handle, const char *service, + const char *user) +{ + if (aio_signal_event == NULL) { + struct tevent_context *ev = handle->conn->sconn->ev_ctx; + + aio_signal_event = tevent_add_signal( + ev, ev, RT_SIGNAL_AIO, SA_SIGINFO, + aio_posix_signal_handler, NULL); + + if (aio_signal_event == NULL) { + DEBUG(1, ("tevent_add_signal failed\n")); + return -1; + } + } + return SMB_VFS_NEXT_CONNECT(handle, service, user); +} + +static struct vfs_fn_pointers vfs_aio_posix_fns = { + .connect_fn = aio_posix_connect, + .pread_send_fn = aio_posix_pread_send, + .pread_recv_fn = aio_posix_recv, + .pwrite_send_fn = aio_posix_pwrite_send, + .pwrite_recv_fn = aio_posix_recv, +}; + +NTSTATUS vfs_aio_posix_init(void); +NTSTATUS vfs_aio_posix_init(void) +{ + return smb_register_vfs(SMB_VFS_INTERFACE_VERSION, + "aio_posix", &vfs_aio_posix_fns); +} diff --git a/source3/modules/wscript_build b/source3/modules/wscript_build index 1f3189affb..2f5088ad24 100644 --- a/source3/modules/wscript_build +++ b/source3/modules/wscript_build @@ -37,6 +37,7 @@ VFS_TSMSM_SRC = 'vfs_tsmsm.c' VFS_FILEID_SRC = 'vfs_fileid.c' VFS_AIO_FORK_SRC = 'vfs_aio_fork.c' VFS_AIO_PTHREAD_SRC = 'vfs_aio_pthread.c' +VFS_AIO_POSIX_SRC = 'vfs_aio_posix.c' VFS_AIO_LINUX_SRC = 'vfs_aio_linux.c' VFS_PREOPEN_SRC = 'vfs_preopen.c' VFS_SYNCOPS_SRC = 'vfs_syncops.c' @@ -363,6 +364,15 @@ bld.SAMBA3_MODULE('vfs_aio_pthread', enabled=bld.SAMBA3_IS_ENABLED_MODULE('vfs_aio_pthread'), allow_undefined_symbols=True) +bld.SAMBA3_MODULE('vfs_aio_posix', + subsystem='vfs', + source=VFS_AIO_POSIX_SRC, + deps='samba-util tevent', + init_function='', + internal_module=bld.SAMBA3_IS_STATIC_MODULE('vfs_aio_posix'), + enabled=bld.SAMBA3_IS_ENABLED_MODULE('vfs_aio_posix'), + allow_undefined_symbols=True) + bld.SAMBA3_MODULE('vfs_aio_linux', subsystem='vfs', source=VFS_AIO_LINUX_SRC, -- cgit