/* * Unix SMB/CIFS implementation. * * Support for OneFS bulk directory enumeration API * * Copyright (C) Steven Danneman, 2009 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see . */ #include "includes.h" #include "smbd/smbd.h" #include "onefs.h" #include "onefs_config.h" #include #include /* The OneFS filesystem provides a readdirplus() syscall, equivalent to the * NFSv3 PDU, which retrieves bulk directory listings with stat information * in a single syscall. * * This file hides this bulk interface underneath Samba's very POSIX like * opendir/readdir/telldir VFS interface. This is done to provide a * significant performance improvement when listing the contents of large * directories, which also require file meta information. ie a typical * Windows Explorer request. */ #define RDP_RESUME_KEY_START 0x1 #define RDP_BATCH_SIZE 128 #define RDP_DIRENTRIES_SIZE ((size_t)(RDP_BATCH_SIZE * sizeof(struct dirent))) static char *rdp_direntries = NULL; static struct stat *rdp_stats = NULL; static uint64_t *rdp_cookies = NULL; struct rdp_dir_state { struct rdp_dir_state *next, *prev; SMB_STRUCT_DIR *dirp; char *direntries_cursor; /* cursor to last returned direntry in cache */ size_t stat_count; /* number of entries stored in the cache */ size_t stat_cursor; /* cursor to last returned stat in the cache */ uint64_t resume_cookie; /* cookie from the last entry returned from the cache */ }; static struct rdp_dir_state *dirstatelist = NULL; SMB_STRUCT_DIR *rdp_last_dirp = NULL; /** * Given a DIR pointer, return our internal state. * * This function also tells us whether the given DIR is the same as we saw * during the last call. Because we use a single globally allocated buffer * for readdirplus entries we must check every call into this API to see if * it's for the same directory listing, or a new one. If it's the same we can * maintain our current cached entries, otherwise we must go to the kernel. * * @return 0 on success, 1 on failure */ static int rdp_retrieve_dir_state(SMB_STRUCT_DIR *dirp, struct rdp_dir_state **dir_state, bool *same_as_last) { struct rdp_dir_state *dsp; /* Is this directory the same as the last call */ *same_as_last = (dirp == rdp_last_dirp); for(dsp = dirstatelist; dsp; dsp = dsp->next) if (dsp->dirp == dirp) { *dir_state = dsp; return 0; } /* Couldn't find existing dir_state for the given directory * pointer. */ return 1; } /** * Initialize the global readdirplus buffers. * * These same buffers are used for all calls into readdirplus. * * @return 0 on success, errno value on failure */ static int rdp_init(struct rdp_dir_state *dsp) { /* Unfortunately, there is no good way to free these buffers. If we * allocated and freed for every DIR handle performance would be * adversely affected. For now these buffers will be leaked and only * freed when the smbd process dies. */ if (!rdp_direntries) { rdp_direntries = SMB_MALLOC(RDP_DIRENTRIES_SIZE); if (!rdp_direntries) return ENOMEM; } if (!rdp_stats) { rdp_stats = SMB_MALLOC(RDP_BATCH_SIZE * sizeof(struct stat)); if (!rdp_stats) return ENOMEM; } if (!rdp_cookies) { rdp_cookies = SMB_MALLOC(RDP_BATCH_SIZE * sizeof(uint64_t)); if (!rdp_cookies) return ENOMEM; } dsp->direntries_cursor = rdp_direntries + RDP_DIRENTRIES_SIZE; dsp->stat_count = RDP_BATCH_SIZE; dsp->stat_cursor = RDP_BATCH_SIZE; dsp->resume_cookie = RDP_RESUME_KEY_START; return 0; } /** * Call into readdirplus() to refill our global dirent cache. * * This function also resets all cursors back to the beginning of the cache. * All stat buffers are retrieved by following symlinks. * * @return number of entries retrieved, -1 on error */ static int rdp_fill_cache(struct rdp_dir_state *dsp) { int nread, dirfd; dirfd = dirfd(dsp->dirp); if (dirfd < 0) { DEBUG(1, ("Could not retrieve fd for DIR\n")); return -1; } /* Resize the stat_count to grab as many entries as possible */ dsp->stat_count = RDP_BATCH_SIZE; DEBUG(9, ("Calling readdirplus() with DIR %p, dirfd: %d, " "resume_cookie %#llx, size_to_read: %zu, " "direntries_size: %zu, stat_count: %u\n", dsp->dirp, dirfd, dsp->resume_cookie, RDP_BATCH_SIZE, RDP_DIRENTRIES_SIZE, dsp->stat_count)); nread = readdirplus(dirfd, RDP_FOLLOW, &dsp->resume_cookie, RDP_BATCH_SIZE, rdp_direntries, RDP_DIRENTRIES_SIZE, &dsp->stat_count, rdp_stats, rdp_cookies); if (nread < 0) { DEBUG(1, ("Error calling readdirplus(): %s\n", strerror(errno))); return -1; } DEBUG(9, ("readdirplus() returned %u entries from DIR %p\n", dsp->stat_count, dsp->dirp)); dsp->direntries_cursor = rdp_direntries; dsp->stat_cursor = 0; return nread; } /** * Create a dir_state to track an open directory that we're enumerating. * * This utility function is globally accessible for use by other parts of the * onefs.so module to initialize a dir_state when a directory is opened through * a path other than the VFS layer. * * @return 0 on success and errno on failure * * @note: Callers of this function MUST cleanup the dir_state through a proper * call to VFS_CLOSEDIR(). */ int onefs_rdp_add_dir_state(connection_struct *conn, SMB_STRUCT_DIR *dirp) { int ret = 0; struct rdp_dir_state *dsp = NULL; /* No-op if readdirplus is disabled */ if (!lp_parm_bool(SNUM(conn), PARM_ONEFS_TYPE, PARM_USE_READDIRPLUS, PARM_USE_READDIRPLUS_DEFAULT)) { return 0; } /* Create a struct dir_state */ dsp = SMB_MALLOC_P(struct rdp_dir_state); if (!dsp) { DEBUG(0, ("Error allocating struct rdp_dir_state.\n")); return ENOMEM; } /* Initialize the dir_state structure and add it to the list */ ret = rdp_init(dsp); if (ret) { DEBUG(0, ("Error initializing readdirplus() buffers: %s\n", strerror(ret))); return ret; } /* Set the SMB_STRUCT_DIR in the dsp */ dsp->dirp = dirp; DLIST_ADD(dirstatelist, dsp); return 0; } /** * Open a directory for enumeration. * * Create a state struct to track the state of this directory for the life * of this open. * * @param[in] handle vfs handle given in most VFS calls * @param[in] fname filename of the directory to open * @param[in] mask unused * @param[in] attr unused * * @return DIR pointer, NULL if directory does not exist, NULL on error */ SMB_STRUCT_DIR * onefs_opendir(vfs_handle_struct *handle, const char *fname, const char *mask, uint32 attr) { int ret = 0; SMB_STRUCT_DIR *ret_dirp; /* Fallback to default system routines if readdirplus is disabled */ if (!lp_parm_bool(SNUM(handle->conn), PARM_ONEFS_TYPE, PARM_USE_READDIRPLUS, PARM_USE_READDIRPLUS_DEFAULT)) { return SMB_VFS_NEXT_OPENDIR(handle, fname, mask, attr); } /* Open the directory */ ret_dirp = SMB_VFS_NEXT_OPENDIR(handle, fname, mask, attr); if (!ret_dirp) { DEBUG(3, ("Unable to open directory: %s\n", fname)); return NULL; } /* Create the dir_state struct and add it to the list */ ret = onefs_rdp_add_dir_state(handle->conn, ret_dirp); if (ret) { DEBUG(0, ("Error adding dir_state to the list\n")); return NULL; } DEBUG(9, ("Opened handle on directory: \"%s\", DIR %p\n", fname, ret_dirp)); return ret_dirp; } /** * Retrieve one direntry and optional stat buffer from our readdir cache. * * Increment the internal resume cookie, and refresh the cache from the * kernel if necessary. * * The cache cursor tracks the last entry which was successfully returned * to a caller of onefs_readdir(). When a new entry is requested, this * function first increments the cursor, then returns that entry. * * @param[in] handle vfs handle given in most VFS calls * @param[in] dirp system DIR handle to retrieve direntries from * @param[in/out] sbuf optional stat buffer to fill, this can be NULL * * @return dirent structure, NULL if at the end of the directory, NULL on error */ SMB_STRUCT_DIRENT * onefs_readdir(vfs_handle_struct *handle, SMB_STRUCT_DIR *dirp, SMB_STRUCT_STAT *sbuf) { struct rdp_dir_state *dsp = NULL; SMB_STRUCT_DIRENT *ret_direntp; bool same_as_last, filled_cache = false; int ret = -1; /* Set stat invalid in-case we error out */ if (sbuf) SET_STAT_INVALID(*sbuf); /* Fallback to default system routines if readdirplus is disabled */ if (!lp_parm_bool(SNUM(handle->conn), PARM_ONEFS_TYPE, PARM_USE_READDIRPLUS, PARM_USE_READDIRPLUS_DEFAULT)) { return readdir(dirp); } /* Retrieve state based off DIR handle */ ret = rdp_retrieve_dir_state(dirp, &dsp, &same_as_last); if (ret) { DEBUG(1, ("Could not retrieve dir_state struct for " "SMB_STRUCT_DIR pointer.\n")); ret_direntp = NULL; goto end; } /* DIR is the same, current buffer and cursors are valid. * Check if there are any entries left in our current cache. */ if (same_as_last) { if (dsp->stat_cursor == dsp->stat_count - 1) { /* Cache is empty, refill from kernel */ ret = rdp_fill_cache(dsp); if (ret <= 0) { ret_direntp = NULL; goto end; } filled_cache = true; } } else { /* DIR is different from last call, reset all buffers and * cursors, and refill the global cache from the new DIR */ ret = rdp_fill_cache(dsp); if (ret <= 0) { ret_direntp = NULL; goto end; } filled_cache = true; DEBUG(8, ("Switched global rdp cache to new DIR entry.\n")); } /* If we just filled the cache we treat that action as the cursor * increment as the resume cookie used belonged to the previous * directory entry. If the cache has not changed we first increment * our cursor, then return the next entry */ if (!filled_cache) { dsp->direntries_cursor += ((SMB_STRUCT_DIRENT *)dsp->direntries_cursor)->d_reclen; dsp->stat_cursor++; } /* The resume_cookie stored here purposely differs based on whether we * just filled the cache. The resume cookie stored must always provide * the next direntry, in case the cache is reloaded on every * onefs_readdir() */ dsp->resume_cookie = rdp_cookies[dsp->stat_cursor]; /* Return an entry from cache */ ret_direntp = ((SMB_STRUCT_DIRENT *)dsp->direntries_cursor); if (sbuf) { struct stat onefs_sbuf; onefs_sbuf = rdp_stats[dsp->stat_cursor]; init_stat_ex_from_onefs_stat(sbuf, &onefs_sbuf); /* readdirplus() sets st_ino field to 0, if it was * unable to retrieve stat information for that * particular directory entry. */ if (sbuf->st_ex_ino == 0) SET_STAT_INVALID(*sbuf); } DEBUG(9, ("Read from DIR %p, direntry: \"%s\", resume cookie: %#llx, " "cache cursor: %zu, cache count: %zu\n", dsp->dirp, ret_direntp->d_name, dsp->resume_cookie, dsp->stat_cursor, dsp->stat_count)); /* FALLTHROUGH */ end: /* Set rdp_last_dirp at the end of every VFS call where the cache was * reloaded */ rdp_last_dirp = dirp; return ret_direntp; } /** * Set the location of the next direntry to be read via onefs_readdir(). * * This function should only pass in locations retrieved from onefs_telldir(). * * @param[in] handle vfs handle given in most VFS calls * @param[in] dirp system DIR handle to set offset on * @param[in] offset into the directory to resume reading from * * @return no return value */ void onefs_seekdir(vfs_handle_struct *handle, SMB_STRUCT_DIR *dirp, long offset) { struct rdp_dir_state *dsp = NULL; bool same_as_last; uint64_t resume_cookie = 0; int ret = -1; /* Fallback to default system routines if readdirplus is disabled */ if (!lp_parm_bool(SNUM(handle->conn), PARM_ONEFS_TYPE, PARM_USE_READDIRPLUS, PARM_USE_READDIRPLUS_DEFAULT)) { return seekdir(dirp, offset); } /* Validate inputs */ if (offset < 0) { DEBUG(1, ("Invalid offset %ld passed.\n", offset)); return; } /* Retrieve state based off DIR handle */ ret = rdp_retrieve_dir_state(dirp, &dsp, &same_as_last); if (ret) { DEBUG(1, ("Could not retrieve dir_state struct for " "SMB_STRUCT_DIR pointer.\n")); /* XXX: we can't return an error, should we ABORT rather than * return without actually seeking? */ return; } /* Convert offset to resume_cookie */ resume_cookie = rdp_offset31_to_cookie63(offset); DEBUG(9, ("Seek DIR %p, offset: %ld, resume_cookie: %#llx\n", dsp->dirp, offset, resume_cookie)); /* TODO: We could check if the resume_cookie is already in the cache * through a linear search. This would allow us to avoid the cost of * flushing the cache. Frequently, the seekdir offset will only be * one entry before the current cache cursor. However, usually * VFS_SEEKDIR() is only called at the end of a TRAND2_FIND read and * we'll flush the cache at the beginning of the next PDU anyway. Some * analysis should be done to see if this enhancement would provide * better performance. */ /* Set the resume cookie and indicate that the cache should be reloaded * on next read */ dsp->resume_cookie = resume_cookie; rdp_last_dirp = NULL; return; } /** * Returns the location of the next direntry to be read via onefs_readdir(). * * This value can be passed into onefs_seekdir(). * * @param[in] handle vfs handle given in most VFS calls * @param[in] dirp system DIR handle to set offset on * * @return offset into the directory to resume reading from */ long onefs_telldir(vfs_handle_struct *handle, SMB_STRUCT_DIR *dirp) { struct rdp_dir_state *dsp = NULL; bool same_as_last; long offset; int ret = -1; /* Fallback to default system routines if readdirplus is disabled */ if (!lp_parm_bool(SNUM(handle->conn), PARM_ONEFS_TYPE, PARM_USE_READDIRPLUS, PARM_USE_READDIRPLUS_DEFAULT)) { return sys_telldir(dirp); } /* Retrieve state based off DIR handle */ ret = rdp_retrieve_dir_state(dirp, &dsp, &same_as_last); if (ret) { DEBUG(1, ("Could not retrieve dir_state struct for " "SMB_STRUCT_DIR pointer.\n")); return -1; } /* Convert resume_cookie to offset */ offset = rdp_cookie63_to_offset31(dsp->resume_cookie); if (offset < 0) { DEBUG(1, ("Unable to convert resume_cookie: %#llx to a " "suitable 32-bit offset value. Error: %s\n", dsp->resume_cookie, strerror(errno))); return -1; } DEBUG(9, ("Seek DIR %p, offset: %ld, resume_cookie: %#llx\n", dsp->dirp, offset, dsp->resume_cookie)); return offset; } /** * Set the next direntry to be read via onefs_readdir() to the beginning of the * directory. * * @param[in] handle vfs handle given in most VFS calls * @param[in] dirp system DIR handle to set offset on * * @return no return value */ void onefs_rewinddir(vfs_handle_struct *handle, SMB_STRUCT_DIR *dirp) { struct rdp_dir_state *dsp = NULL; bool same_as_last; int ret = -1; /* Fallback to default system routines if readdirplus is disabled */ if (!lp_parm_bool(SNUM(handle->conn), PARM_ONEFS_TYPE, PARM_USE_READDIRPLUS, PARM_USE_READDIRPLUS_DEFAULT)) { return sys_rewinddir(dirp); } /* Retrieve state based off DIR handle */ ret = rdp_retrieve_dir_state(dirp, &dsp, &same_as_last); if (ret) { DEBUG(1, ("Could not retrieve dir_state struct for " "SMB_STRUCT_DIR pointer.\n")); return; } /* Reset location and resume key to beginning */ ret = rdp_init(dsp); if (ret) { DEBUG(0, ("Error re-initializing rdp cursors: %s\n", strerror(ret))); return; } DEBUG(9, ("Rewind DIR: %p, to resume_cookie: %#llx\n", dsp->dirp, dsp->resume_cookie)); return; } /** * Close DIR pointer and remove all state for that directory open. * * @param[in] handle vfs handle given in most VFS calls * @param[in] dirp system DIR handle to set offset on * * @return -1 on failure, setting errno */ int onefs_closedir(vfs_handle_struct *handle, SMB_STRUCT_DIR *dirp) { struct rdp_dir_state *dsp = NULL; bool same_as_last; int ret_val = -1; int ret = -1; /* Fallback to default system routines if readdirplus is disabled */ if (!lp_parm_bool(SNUM(handle->conn), PARM_ONEFS_TYPE, PARM_USE_READDIRPLUS, PARM_USE_READDIRPLUS_DEFAULT)) { return SMB_VFS_NEXT_CLOSEDIR(handle, dirp); } /* Retrieve state based off DIR handle */ ret = rdp_retrieve_dir_state(dirp, &dsp, &same_as_last); if (ret) { DEBUG(1, ("Could not retrieve dir_state struct for " "SMB_STRUCT_DIR pointer.\n")); errno = ENOENT; return -1; } /* Close DIR pointer */ ret_val = SMB_VFS_NEXT_CLOSEDIR(handle, dsp->dirp); DEBUG(9, ("Closed handle on DIR %p\n", dsp->dirp)); /* Tear down state struct */ DLIST_REMOVE(dirstatelist, dsp); SAFE_FREE(dsp); /* Set lastp to NULL, as cache is no longer valid */ rdp_last_dirp = NULL; return ret_val; } /** * Initialize cache data at the beginning of every SMB search operation * * Since filesystem operations, such as delete files or meta data * updates can occur to files in the directory we're searching * between FIND_FIRST and FIND_NEXT calls we must refresh the cache * from the kernel on every new search SMB. * * @param[in] handle vfs handle given in most VFS calls * @param[in] dirp system DIR handle for the current search * * @return nothing */ void onefs_init_search_op(vfs_handle_struct *handle, SMB_STRUCT_DIR *dirp) { /* Setting the rdp_last_dirp to NULL will cause the next readdir * operation to refill the cache. */ rdp_last_dirp = NULL; return; }