summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVolker Lendecke <vl@samba.org>2009-03-10 18:02:21 +0100
committerVolker Lendecke <vl@samba.org>2009-03-10 18:11:56 +0100
commit3d280639c4652d6cd35577e333bcd46c2517754c (patch)
tree33fc79b0908db0807deec4c700d4fedea4be085c
parent66a26a0ac27462361cb94e3ef1744d275bd95da1 (diff)
downloadsamba-3d280639c4652d6cd35577e333bcd46c2517754c.tar.gz
samba-3d280639c4652d6cd35577e333bcd46c2517754c.tar.bz2
samba-3d280639c4652d6cd35577e333bcd46c2517754c.zip
Add a vfs_preopen module to hide fs latencies
-rw-r--r--docs-xml/manpages-3/vfs_preopen.8.xml115
-rw-r--r--source3/Makefile.in5
-rw-r--r--source3/configure.in3
-rw-r--r--source3/modules/vfs_preopen.c456
4 files changed, 578 insertions, 1 deletions
diff --git a/docs-xml/manpages-3/vfs_preopen.8.xml b/docs-xml/manpages-3/vfs_preopen.8.xml
new file mode 100644
index 0000000000..a84d4720bb
--- /dev/null
+++ b/docs-xml/manpages-3/vfs_preopen.8.xml
@@ -0,0 +1,115 @@
+<?xml version="1.0" encoding="iso-8859-1"?>
+<!DOCTYPE refentry PUBLIC "-//Samba-Team//DTD DocBook V4.2-Based Variant V1.0//EN" "http://www.samba.org/samba/DTD/samba-doc">
+<refentry id="vfs_preopen.8">
+
+<refmeta>
+ <refentrytitle>vfs_preopen</refentrytitle>
+ <manvolnum>8</manvolnum>
+ <refmiscinfo class="source">Samba</refmiscinfo>
+ <refmiscinfo class="manual">System Administration tools</refmiscinfo>
+ <refmiscinfo class="version">3.3</refmiscinfo>
+</refmeta>
+
+<refnamediv>
+ <refname>vfs_preopen</refname>
+ <refpurpose>Hide read latencies for applications reading numbered files</refpurpose>
+</refnamediv>
+
+<refsynopsisdiv>
+ <cmdsynopsis>
+ <command>vfs objects = preopen</command>
+ </cmdsynopsis>
+</refsynopsisdiv>
+
+<refsect1>
+ <title>DESCRIPTION</title>
+
+ <para>This VFS module is part of the
+ <citerefentry><refentrytitle>samba</refentrytitle>
+ <manvolnum>7</manvolnum></citerefentry> suite.</para>
+
+ <para>This module assists applications that want to read numbered
+ files in sequence with very strict latency requirements. One area
+ where this happens in video streaming applications that want to read
+ one file per frame.</para>
+
+ <para>When you use this module, a number of helper processes is
+ started that speculatively open files and read a number of bytes to
+ prime the file system cache, so that later on when the real
+ application's request comes along, no disk access is necessary.</para>
+
+ <para>This module is stackable.</para>
+
+</refsect1>
+
+
+<refsect1>
+ <title>OPTIONS</title>
+
+ <variablelist>
+
+ <varlistentry>
+ <term>preopen:names = /pattern/</term>
+ <listitem>
+ <para>
+ preopen:names specifies the file name pattern which should
+ trigger the preopen helpers to do their work. We assume that
+ the files are numbered incrementally. So if your file names
+ are numbered FRAME00000.frm FRAME00001.frm and so on you would
+ list them as <command>preopen:names=/FRAME*.frm/</command>
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>preopen:num_bytes = BYTES</term>
+ <listitem>
+ <para>
+ Specifies the number of bytes the helpers should speculatively
+ read, defaults to 1.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>preopen:helpers = NUM-PROCS</term>
+ <listitem>
+ <para>
+ Number of forked helper processes, defaults to 1.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>preopen:queuelen = NUM-FILES</term>
+ <listitem>
+ <para>
+ Number of files that should be speculatively opened. Defaults
+ to the 10 subsequent files.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ </variablelist>
+</refsect1>
+
+<refsect1>
+ <title>VERSION</title>
+ <para>This man page is correct for version 3.3 of the Samba suite.
+ </para>
+</refsect1>
+
+<refsect1>
+ <title>AUTHOR</title>
+
+ <para>The original Samba software and related utilities
+ were created by Andrew Tridgell. Samba is now developed
+ by the Samba Team as an Open Source project similar
+ to the way the Linux kernel is developed.</para>
+
+ <para>The PREOPEN VFS module was created with contributions from
+ Volker Lendecke and the developers at IBM.
+ </para>
+</refsect1>
+
+</refentry>
diff --git a/source3/Makefile.in b/source3/Makefile.in
index 6aabcf0c8d..76fd91a31e 100644
--- a/source3/Makefile.in
+++ b/source3/Makefile.in
@@ -667,6 +667,7 @@ VFS_READAHEAD_OBJ = modules/vfs_readahead.o
VFS_TSMSM_OBJ = modules/vfs_tsmsm.o
VFS_FILEID_OBJ = modules/vfs_fileid.o
VFS_AIO_FORK_OBJ = modules/vfs_aio_fork.o
+VFS_PREOPEN_OBJ = modules/vfs_preopen.o
VFS_SYNCOPS_OBJ = modules/vfs_syncops.o
VFS_ACL_XATTR_OBJ = modules/vfs_acl_xattr.o
VFS_ACL_TDB_OBJ = modules/vfs_acl_tdb.o
@@ -2567,6 +2568,10 @@ bin/aio_fork.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_AIO_FORK_OBJ)
@echo "Building plugin $@"
@$(SHLD_MODULE) $(VFS_AIO_FORK_OBJ)
+bin/preopen.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_PREOPEN_OBJ)
+ @echo "Building plugin $@"
+ @$(SHLD_MODULE) $(VFS_PREOPEN_OBJ)
+
bin/acl_xattr.@SHLIBEXT@: $(BINARY_PREREQS) $(VFS_ACL_XATTR_OBJ)
@echo "Building plugin $@"
@$(SHLD_MODULE) $(VFS_ACL_XATTR_OBJ)
diff --git a/source3/configure.in b/source3/configure.in
index e48ff34554..2af1545d58 100644
--- a/source3/configure.in
+++ b/source3/configure.in
@@ -417,7 +417,7 @@ dnl These have to be built static:
default_static_modules="pdb_smbpasswd pdb_tdbsam pdb_wbc_sam rpc_lsarpc rpc_samr rpc_winreg rpc_initshutdown rpc_dssetup rpc_wkssvc rpc_svcctl rpc_ntsvcs rpc_netlogon rpc_netdfs rpc_srvsvc rpc_spoolss2 rpc_eventlog auth_sam auth_unix auth_winbind auth_wbc auth_server auth_domain auth_builtin auth_netlogond vfs_default nss_info_template"
dnl These are preferably build shared, and static if dlopen() is not available
-default_shared_modules="vfs_recycle vfs_audit vfs_extd_audit vfs_full_audit vfs_netatalk vfs_fake_perms vfs_default_quota vfs_readonly vfs_cap vfs_expand_msdfs vfs_shadow_copy vfs_shadow_copy2 charset_CP850 charset_CP437 auth_script vfs_readahead vfs_xattr_tdb vfs_streams_xattr vfs_streams_depot vfs_acl_xattr vfs_acl_tdb vfs_smb_traffic_analyzer"
+default_shared_modules="vfs_recycle vfs_audit vfs_extd_audit vfs_full_audit vfs_netatalk vfs_fake_perms vfs_default_quota vfs_readonly vfs_cap vfs_expand_msdfs vfs_shadow_copy vfs_shadow_copy2 charset_CP850 charset_CP437 auth_script vfs_readahead vfs_xattr_tdb vfs_streams_xattr vfs_streams_depot vfs_acl_xattr vfs_acl_tdb vfs_smb_traffic_analyzer vfs_preopen"
if test "x$developer" = xyes; then
default_static_modules="$default_static_modules rpc_rpcecho"
@@ -6185,6 +6185,7 @@ SMB_MODULE(vfs_readahead, \$(VFS_READAHEAD_OBJ), "bin/readahead.$SHLIBEXT", VFS)
SMB_MODULE(vfs_tsmsm, \$(VFS_TSMSM_OBJ), "bin/tsmsm.$SHLIBEXT", VFS)
SMB_MODULE(vfs_fileid, \$(VFS_FILEID_OBJ), "bin/fileid.$SHLIBEXT", VFS)
SMB_MODULE(vfs_aio_fork, \$(VFS_AIO_FORK_OBJ), "bin/aio_fork.$SHLIBEXT", VFS)
+SMB_MODULE(vfs_preopen, \$(VFS_PREOPEN_OBJ), "bin/preopen.$SHLIBEXT", VFS)
SMB_MODULE(vfs_syncops, \$(VFS_SYNCOPS_OBJ), "bin/syncops.$SHLIBEXT", VFS)
SMB_MODULE(vfs_zfsacl, \$(VFS_ZFSACL_OBJ), "bin/zfsacl.$SHLIBEXT", VFS)
SMB_MODULE(vfs_notify_fam, \$(VFS_NOTIFY_FAM_OBJ), "bin/notify_fam.$SHLIBEXT", VFS)
diff --git a/source3/modules/vfs_preopen.c b/source3/modules/vfs_preopen.c
new file mode 100644
index 0000000000..25b9e7f3e4
--- /dev/null
+++ b/source3/modules/vfs_preopen.c
@@ -0,0 +1,456 @@
+/*
+ * Force a readahead of files by opening them and reading the first bytes
+ *
+ * Copyright (C) Volker Lendecke 2008
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "includes.h"
+
+struct preopen_state;
+
+struct preopen_helper {
+ struct preopen_state *state;
+ struct fd_event *fde;
+ pid_t pid;
+ int fd;
+ bool busy;
+};
+
+struct preopen_state {
+ int num_helpers;
+ struct preopen_helper *helpers;
+
+ size_t to_read; /* How many bytes to read in children? */
+ int queue_max;
+
+ char *template_fname; /* Filename to be sent to children */
+ size_t number_start; /* start offset into "template_fname" */
+ int num_digits; /* How many digits is the number long? */
+
+ int fnum_sent; /* last fname sent to children */
+
+ int fnum_queue_end; /* last fname to be sent, based on
+ * last open call + preopen:queuelen
+ */
+
+ name_compare_entry *preopen_names;
+};
+
+static void preopen_helper_destroy(struct preopen_helper *c)
+{
+ int status;
+ close(c->fd);
+ c->fd = -1;
+ kill(c->pid, SIGKILL);
+ waitpid(c->pid, &status, 0);
+ c->busy = true;
+}
+
+static void preopen_queue_run(struct preopen_state *state)
+{
+ char *pdelimiter;
+ char delimiter;
+
+ pdelimiter = state->template_fname + state->number_start
+ + state->num_digits;
+ delimiter = *pdelimiter;
+
+ while (state->fnum_sent < state->fnum_queue_end) {
+
+ ssize_t written;
+ size_t to_write;
+ int helper;
+
+ for (helper=0; helper<state->num_helpers; helper++) {
+ if (state->helpers[helper].busy) {
+ continue;
+ }
+ break;
+ }
+ if (helper == state->num_helpers) {
+ /* everyone is busy */
+ return;
+ }
+
+ snprintf(state->template_fname + state->number_start,
+ state->num_digits + 1,
+ "%.*lu", state->num_digits,
+ (long unsigned int)(state->fnum_sent + 1));
+ *pdelimiter = delimiter;
+
+ to_write = talloc_get_size(state->template_fname);
+ written = write_data(state->helpers[helper].fd,
+ state->template_fname, to_write);
+ state->helpers[helper].busy = true;
+
+ if (written != to_write) {
+ preopen_helper_destroy(&state->helpers[helper]);
+ }
+ state->fnum_sent += 1;
+ }
+}
+
+static void preopen_helper_readable(struct event_context *ev,
+ struct fd_event *fde, uint16_t flags,
+ void *priv)
+{
+ struct preopen_helper *helper = (struct preopen_helper *)priv;
+ struct preopen_state *state = helper->state;
+ ssize_t nread;
+ char c;
+
+ if ((flags & EVENT_FD_READ) == 0) {
+ return;
+ }
+
+ nread = read(helper->fd, &c, 1);
+ if (nread <= 0) {
+ preopen_helper_destroy(helper);
+ return;
+ }
+
+ helper->busy = false;
+
+ preopen_queue_run(state);
+}
+
+static int preopen_helpers_destructor(struct preopen_state *c)
+{
+ int i;
+
+ for (i=0; i<c->num_helpers; i++) {
+ if (c->helpers[i].fd == -1) {
+ continue;
+ }
+ preopen_helper_destroy(&c->helpers[i]);
+ }
+
+ return 0;
+}
+
+static bool preopen_helper_open_one(int sock_fd, char **pnamebuf,
+ size_t to_read, void *filebuf)
+{
+ char *namebuf = *pnamebuf;
+ ssize_t nwritten, nread;
+ char c = 0;
+ int fd;
+
+ nread = 0;
+
+ while ((nread == 0) || (namebuf[nread-1] != '\0')) {
+ ssize_t thistime;
+
+ thistime = read(sock_fd, namebuf + nread,
+ talloc_get_size(namebuf) - nread);
+ if (thistime <= 0) {
+ return false;
+ }
+
+ nread += thistime;
+
+ if (nread == talloc_get_size(namebuf)) {
+ namebuf = TALLOC_REALLOC_ARRAY(
+ NULL, namebuf, char,
+ talloc_get_size(namebuf) * 2);
+ if (namebuf == NULL) {
+ return false;
+ }
+ *pnamebuf = namebuf;
+ }
+ }
+
+ fd = open(namebuf, O_RDONLY);
+ if (fd == -1) {
+ goto done;
+ }
+ nread = read(fd, filebuf, to_read);
+ close(fd);
+
+ done:
+ nwritten = write(sock_fd, &c, 1);
+ return true;
+}
+
+static bool preopen_helper(int fd, size_t to_read)
+{
+ char *namebuf;
+ void *readbuf;
+
+ namebuf = TALLOC_ARRAY(NULL, char, 1024);
+ if (namebuf == NULL) {
+ return false;
+ }
+
+ readbuf = talloc_size(NULL, to_read);
+ if (readbuf == NULL) {
+ TALLOC_FREE(namebuf);
+ return false;
+ }
+
+ while (preopen_helper_open_one(fd, &namebuf, to_read, readbuf)) {
+ ;
+ }
+
+ TALLOC_FREE(readbuf);
+ TALLOC_FREE(namebuf);
+ return false;
+}
+
+static NTSTATUS preopen_init_helper(struct preopen_helper *h)
+{
+ int fdpair[2];
+ NTSTATUS status;
+
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, fdpair) == -1) {
+ status = map_nt_error_from_unix(errno);
+ DEBUG(10, ("socketpair() failed: %s\n", strerror(errno)));
+ return status;
+ }
+
+ h->pid = sys_fork();
+
+ if (h->pid == -1) {
+ return map_nt_error_from_unix(errno);
+ }
+
+ if (h->pid == 0) {
+ close(fdpair[0]);
+ preopen_helper(fdpair[1], h->state->to_read);
+ exit(0);
+ }
+ close(fdpair[1]);
+ h->fd = fdpair[0];
+ h->fde = event_add_fd(smbd_event_context(), h->state, h->fd,
+ EVENT_FD_READ, preopen_helper_readable, h);
+ if (h->fde == NULL) {
+ close(h->fd);
+ h->fd = -1;
+ return NT_STATUS_NO_MEMORY;
+ }
+ h->busy = false;
+ return NT_STATUS_OK;
+}
+
+static NTSTATUS preopen_init_helpers(TALLOC_CTX *mem_ctx, size_t to_read,
+ int num_helpers, int queue_max,
+ struct preopen_state **presult)
+{
+ struct preopen_state *result;
+ int i;
+
+ result = talloc(mem_ctx, struct preopen_state);
+ if (result == NULL) {
+ return NT_STATUS_NO_MEMORY;
+ }
+
+ result->num_helpers = num_helpers;
+ result->helpers = TALLOC_ARRAY(result, struct preopen_helper,
+ num_helpers);
+ if (result->helpers == NULL) {
+ TALLOC_FREE(result);
+ return NT_STATUS_NO_MEMORY;
+ }
+
+ result->to_read = to_read;
+ result->queue_max = queue_max;
+ result->template_fname = NULL;
+ result->fnum_sent = 0;
+
+ for (i=0; i<num_helpers; i++) {
+ result->helpers[i].state = result;
+ result->helpers[i].fd = -1;
+ }
+
+ talloc_set_destructor(result, preopen_helpers_destructor);
+
+ for (i=0; i<num_helpers; i++) {
+ preopen_init_helper(&result->helpers[i]);
+ }
+
+ *presult = result;
+ return NT_STATUS_OK;
+}
+
+static void preopen_free_helpers(void **ptr)
+{
+ TALLOC_FREE(*ptr);
+}
+
+static struct preopen_state *preopen_state_get(vfs_handle_struct *handle)
+{
+ struct preopen_state *state;
+ NTSTATUS status;
+ const char *namelist;
+
+ if (SMB_VFS_HANDLE_TEST_DATA(handle)) {
+ SMB_VFS_HANDLE_GET_DATA(handle, state, struct preopen_state,
+ return NULL);
+ return state;
+ }
+
+ namelist = lp_parm_const_string(SNUM(handle->conn), "preopen", "names",
+ NULL);
+
+ if (namelist == NULL) {
+ return NULL;
+ }
+
+ status = preopen_init_helpers(
+ NULL,
+ lp_parm_int(SNUM(handle->conn), "preopen", "num_bytes", 1),
+ lp_parm_int(SNUM(handle->conn), "preopen", "helpers", 1),
+ lp_parm_int(SNUM(handle->conn), "preopen", "queuelen", 10),
+ &state);
+ if (!NT_STATUS_IS_OK(status)) {
+ return NULL;
+ }
+
+ set_namearray(&state->preopen_names, (char *)namelist);
+
+ if (state->preopen_names == NULL) {
+ TALLOC_FREE(state);
+ return NULL;
+ }
+
+ if (!SMB_VFS_HANDLE_TEST_DATA(handle)) {
+ SMB_VFS_HANDLE_SET_DATA(handle, state, preopen_free_helpers,
+ struct preopen_state, return NULL);
+ }
+
+ return state;
+}
+
+static bool preopen_parse_fname(const char *fname, unsigned long *pnum,
+ size_t *pstart_idx, int *pnum_digits)
+{
+ const char *p, *q;
+ unsigned long num;
+
+ p = strrchr_m(fname, '/');
+ if (p == NULL) {
+ p = fname;
+ }
+
+ p += 1;
+ while (p[0] != '\0') {
+ if (isdigit(p[0]) && isdigit(p[1]) && isdigit(p[2])) {
+ break;
+ }
+ p += 1;
+ }
+ if (*p == '\0') {
+ /* no digits around */
+ return false;
+ }
+
+ num = strtoul(p, (char **)&q, 10);
+
+ if (num+1 < num) {
+ /* overflow */
+ return false;
+ }
+
+ *pnum = num;
+ *pstart_idx = (p - fname);
+ *pnum_digits = (q - p);
+ return true;
+}
+
+static int preopen_open(vfs_handle_struct *handle, const char *fname,
+ files_struct *fsp, int flags, mode_t mode)
+{
+ struct preopen_state *state;
+ int res;
+ unsigned long num;
+
+ DEBUG(10, ("preopen_open called on %s\n", fname));
+
+ state = preopen_state_get(handle);
+ if (state == NULL) {
+ return SMB_VFS_NEXT_OPEN(handle, fname, fsp, flags, mode);
+ }
+
+ res = SMB_VFS_NEXT_OPEN(handle, fname, fsp, flags, mode);
+ if (res == -1) {
+ return -1;
+ }
+
+ if (flags != O_RDONLY) {
+ return res;
+ }
+
+ if (!is_in_path(fname, state->preopen_names, true)) {
+ DEBUG(10, ("%s does not match the preopen:names list\n",
+ fname));
+ return res;
+ }
+
+ TALLOC_FREE(state->template_fname);
+ state->template_fname = talloc_asprintf(
+ state, "%s/%s", fsp->conn->connectpath, fname);
+
+ if (state->template_fname == NULL) {
+ return res;
+ }
+
+ if (!preopen_parse_fname(state->template_fname, &num,
+ &state->number_start, &state->num_digits)) {
+ TALLOC_FREE(state->template_fname);
+ return res;
+ }
+
+ if (num > state->fnum_sent) {
+ /*
+ * Helpers were too slow, there's no point in reading
+ * files in helpers that we already read in the
+ * parent.
+ */
+ state->fnum_sent = num;
+ }
+
+ if ((state->fnum_queue_end != 0) /* Something was started earlier */
+ && (num < (state->fnum_queue_end - state->queue_max))) {
+ /*
+ * "num" is before the queue we announced. This means
+ * a new run is started.
+ */
+ state->fnum_sent = num;
+ }
+
+ state->fnum_queue_end = num + state->queue_max;
+
+ preopen_queue_run(state);
+
+ return res;
+}
+
+/* VFS operations structure */
+
+static vfs_op_tuple preopen_ops[] = {
+ {SMB_VFS_OP(preopen_open), SMB_VFS_OP_OPEN,
+ SMB_VFS_LAYER_TRANSPARENT},
+ {SMB_VFS_OP(NULL), SMB_VFS_OP_NOOP,
+ SMB_VFS_LAYER_NOOP}
+};
+
+NTSTATUS vfs_preopen_init(void);
+NTSTATUS vfs_preopen_init(void)
+{
+ return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
+ "preopen", preopen_ops);
+}