/* * Unix SMB/CIFS implementation. * * OneFS shadow copy implementation that utilizes the file system's native * snapshot support. This file does all of the heavy lifting. * * Copyright (C) Dave Richards, 2007 * Copyright (C) Tim Prouty, 2009 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. */ #include <ifs/ifs_syscalls.h> #include <sys/types.h> #include <sys/isi_enc.h> #include <sys/module.h> #include <sys/stat.h> #include <sys/syscall.h> #include <sys/time.h> #include <dirent.h> #include <errno.h> #include <fcntl.h> #include <limits.h> #include <search.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include "onefs_shadow_copy.h" /* Copied from ../include/proto.h */ void become_root(void); void unbecome_root(void); #define SNAPSHOT_DIRECTORY ".snapshot" #define MAX_VERSIONS 64 /** * A snapshot object. * * During snapshot enumeration, snapshots are represented by snapshot objects * and are stored in a snapshot set. The snapshot object represents one * snapshot within the set. An important thing to note about the set is that * the key of the snapshot object is the tv_sec component of the is_time * member. What this means is that we only store one snapshot for each * second. If multiple snapshots were created within the same second, we'll * keep the earliest one and ignore the rest. Thus, not all snapshots are * necessarily retained. */ struct osc_snapshot { char * is_name; struct timespec is_time; struct osc_snapshot * is_next; }; /** * A snapshot context object. * * Snapshot contexts are used to pass information throughout the snapshot * enumeration routines. As a result, snapshot contexts are stored on the * stack and are both created and destroyed within a single API function. */ struct osc_snapshot_ctx { void * osc_set; struct timespec osc_mtime; }; /** * A directory context. * * Directory contexts are the underlying data structured used to enumerate * snapshot versions. An opendir()-, readdir()- and closedir()-like interface * is provided that utilizes directory contexts. At the API level, directory * contexts are passed around as void pointers. Directory contexts are * allocated on the heap and their lifetime is dictated by the calling * routine. */ struct osc_directory_ctx { size_t idc_pos; size_t idc_len; size_t idc_size; char ** idc_version; }; /** * Return a file descriptor to the STF names directory. * * Opens the STF names directory and returns a file descriptor to it. * Subsequent calls return the same value (avoiding the need to re-open the * directory repeatedly). Caveat caller: don't close the file descriptor or * you will be shot! */ static int osc_get_names_directory_fd(void) { static int fd = -1; if (fd == -1) { become_root(); fd = pctl2_lin_open(STF_NAMES_LIN, HEAD_SNAPID, O_RDONLY); unbecome_root(); } return fd; } /** * Compare two time values. * * Accepts two struct timespecs and compares the tv_sec components of these * values. It returns -1 if the first value preceeds the second, 0 if they * are equal and +1 if the first values succeeds the second. */ static int osc_time_compare(const struct timespec *tsp1, const struct timespec *tsp2) { return (tsp1->tv_sec < tsp2->tv_sec) ? -1 : (tsp1->tv_sec > tsp2->tv_sec) ? +1 : 0; } /** * Compare two timespec values. * * Compares two timespec values. It returns -1 if the first value preceeds * the second, 0 if they are equal and +1 if the first values succeeds the * second. */ static int osc_timespec_compare(const struct timespec *tsp1, const struct timespec *tsp2) { return (tsp1->tv_sec < tsp2->tv_sec) ? -1 : (tsp1->tv_sec > tsp2->tv_sec) ? +1 : (tsp1->tv_nsec < tsp2->tv_nsec) ? -1 : (tsp1->tv_nsec > tsp2->tv_nsec) ? +1 : 0; } /** * Determine whether a timespec value is zero. * * Return 1 if the struct timespec provided is zero and 0 otherwise. */ static int osc_timespec_is_zero(const struct timespec *tsp) { return (tsp->tv_sec == 0) && (tsp->tv_nsec == 0); } /** * Create a snapshot object. * * Allocates and initializes a new snapshot object. In addition to allocating * space for the snapshot object itself, space is allocated for the snapshot * name. Both the name and time are then copied to the new object. */ static struct osc_snapshot * osc_snapshot_create(const char *name, const struct timespec *tsp) { struct osc_snapshot *isp; isp = malloc(sizeof *isp); if (isp == NULL) goto out; isp->is_name = malloc(strlen(name) + 1); if (isp->is_name == NULL) { free(isp); isp = NULL; goto out; } strcpy(isp->is_name, name); isp->is_time = *tsp; isp->is_next = NULL; out: return isp; } /** * Destroy a snapshot object. * * Frees both the name and the snapshot object itself. Appropriate NULL * checking is performed because counting on free to do so is immoral. */ static void osc_snapshot_destroy(struct osc_snapshot *isp) { if (isp != NULL) { if (isp->is_name != NULL) free(isp->is_name); free(isp); } } /** * Destroy all snapshots in the snapshot list. * * Calls osc_snapshot_destroy() on each snapshot in the list. */ static void osc_snapshot_destroy_list(struct osc_snapshot *isp) { struct osc_snapshot *tmp; while (isp != NULL) { tmp = isp; isp = isp->is_next; osc_snapshot_destroy(tmp); } } /** * Compare two snapshot objects. * * Compare two snapshot objects. It is really just a wrapper for * osc_time_compare(), which compare the time value of the two snapshots. * N.B. time value in this context refers only to the tv_sec component. */ static int osc_snapshot_compare(const void *vp1, const void *vp2) { const struct osc_snapshot *isp1 = vp1; const struct osc_snapshot *isp2 = vp2; return -osc_time_compare(&isp1->is_time, &isp2->is_time); } /** * Insert a snapshot into the snapshot set. * * Inserts a new snapshot into the snapshot set. The key for snapshots is * their creation time (it's actually the seconds portion of the creation * time). If a duplicate snapshot is found in the set, the new snapshot is * added to a linked list of snapshots for that second. */ static void osc_snapshot_insert(struct osc_snapshot_ctx *oscp, const char *name, const struct timespec *tsp, int *errorp) { struct osc_snapshot *isp1; struct osc_snapshot **ispp; isp1 = osc_snapshot_create(name, tsp); if (isp1 == NULL) { *errorp = 1; return; } ispp = tsearch(isp1, &oscp->osc_set, osc_snapshot_compare); if (ispp != NULL) { struct osc_snapshot *isp2 = *ispp; /* If this is the only snapshot for this second, we're done. */ if (isp2 == isp1) return; /* Collision: add the new snapshot to the list. */ isp1->is_next = isp2->is_next; isp2->is_next = isp1; } else *errorp = 1; } /** * Process the next snapshot. * * Called for (almost) every entry in a .snapshot directory, ("." and ".." are * ignored in osc_process_snapshot_directory()). All other entries are passed * to osc_process_snapshot(), however. These entries can fall into one of two * categories: snapshot names and snapshot aliases. We only care about * snapshot names (as aliases are just redundant entries). Once it verifies * that name represents a valid snapshot name, it calls fstat() to get the * creation time of the snapshot and then calls osc_snapshot_insert() to add * this entry to the snapshot set. */ static void osc_process_snapshot(struct osc_snapshot_ctx *oscp, const char *name, int *errorp) { int fd; struct stf_stat stf_stat; struct stat stbuf; fd = osc_get_names_directory_fd(); if (fd == -1) goto out; fd = enc_openat(fd, name, ENC_DEFAULT, O_RDONLY); if (fd == -1) goto out; memset(&stf_stat, 0, sizeof stf_stat); if (ifs_snap_stat(fd, &stf_stat) == -1) goto out; if (stf_stat.sf_type != SF_STF) goto out; if (fstat(fd, &stbuf) == -1) goto out; osc_snapshot_insert(oscp, name, &stbuf.st_birthtimespec, errorp); out: if (fd != -1) close(fd); } /** * Process a snapshot directory. * * Opens the snapshot directory and calls osc_process_snapshot() for each * entry. (Well ok, "." and ".." are ignored.) The goal here is to add all * snapshots in the directory to the snapshot set. */ static void osc_process_snapshot_directory(struct osc_snapshot_ctx *oscp, int *errorp) { int fd; struct stat stbuf; DIR *dirp; struct dirent *dp; fd = osc_get_names_directory_fd(); if (fd == -1) goto out; if (fstat(fd, &stbuf) == -1) goto out; dirp = opendir(SNAPSHOT_DIRECTORY); if (dirp == NULL) goto out; for (;;) { dp = readdir(dirp); if (dp == NULL) break; if (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' || (dp->d_name[1] == '.' && dp->d_name[2] == '\0'))) continue; osc_process_snapshot(oscp, dp->d_name, errorp); if (*errorp) break; } closedir(dirp); if (!*errorp) oscp->osc_mtime = stbuf.st_mtimespec; out: return; } /** * Initialize a snapshot context object. * * Clears all members of the context object. */ static void osc_snapshot_ctx_init(struct osc_snapshot_ctx *oscp) { memset(oscp, 0, sizeof *oscp); } /** * Desoy a snapshot context object. * * Frees all snapshots associated with the snapshot context and then calls * osc_snapshot_ctx_init() to re-initialize the context object. */ static void osc_snapshot_ctx_clean(struct osc_snapshot_ctx *oscp) { struct osc_snapshot *tmp; while (oscp->osc_set != NULL) { tmp = *(void **)oscp->osc_set; tdelete(tmp, &oscp->osc_set, osc_snapshot_compare); osc_snapshot_destroy_list(tmp); } osc_snapshot_ctx_init(oscp); } /** * Return the "global" snapshot context. * * We maintain a single open snapshot context. Return a pointer to it. */ static struct osc_snapshot_ctx * osc_get_snapshot_ctx(void) { static struct osc_snapshot_ctx osc = { 0, { 0, 0 } }; return &osc; } /** * Determine whether a snapshot context is still valid. * * "Valid" in this context means "reusable". We can re-use a previous * snapshot context iff we successfully built a previous snapshot context * and no snapshots have been created or deleted since we did so. * A "names" directory exists within our snapshot * implementation in which all snapshot names are entered. Each time a * snapshot is created or deleted, an entry must be added or removed. * When this happens the modification time on the "names" directory * changes. Therefore, a snapshot context is valid iff the context * pointer is non-NULL, the cached modification time is non-zero * (zero means uninitialized), and the modification time of the "names" * directory matches the cached value. */ static int osc_snapshot_ctx_is_valid(struct osc_snapshot_ctx *oscp) { int fd; struct stat stbuf; if (oscp == NULL) return 0; if (osc_timespec_is_zero(&oscp->osc_mtime)) return 0; fd = osc_get_names_directory_fd(); if (fd == -1) return 0; if (fstat(fd, &stbuf) == -1) return 0; if (osc_timespec_compare(&oscp->osc_mtime, &stbuf.st_mtimespec) != 0) return 0; return 1; } /** * Create and initialize a directory context. * * Allocates a directory context from the heap and initializes it. */ static struct osc_directory_ctx * osc_directory_ctx_create(void) { struct osc_directory_ctx *idcp; idcp = malloc(sizeof *idcp); if (idcp != NULL) memset(idcp, 0, sizeof *idcp); return idcp; } /** * Destroy a directory context. * * Frees any versions associated with the directory context and then frees the * context itself. */ static void osc_directory_ctx_destroy(struct osc_directory_ctx *idcp) { int i; if (idcp == NULL) return; for (i = 0; i < idcp->idc_len; i++) free(idcp->idc_version[i]); free(idcp); } /** * Expand the size of a directory context's version list. * * If osc_directory_ctx_append_version() detects that the version list is too * small to accomodate a new version string, it called * osc_directory_ctx_expand_version_list() to expand the version list. */ static void osc_directory_ctx_expand_version_list(struct osc_snapshot_ctx *oscp, struct osc_directory_ctx *idcp, int *errorp) { size_t size; char **cpp; size = idcp->idc_size * 2 ?: 1; cpp = realloc(idcp->idc_version, size * sizeof (char *)); if (cpp == NULL) { *errorp = 1; return; } idcp->idc_size = size; idcp->idc_version = cpp; } /** * Append a new version to a directory context. * * Appends a snapshot version to the * directory context's version list. */ static void osc_directory_ctx_append_version(struct osc_snapshot_ctx *oscp, struct osc_directory_ctx *idcp, const struct timespec *tsp, int *errorp) { char *cp; struct tm *tmp; char text[64]; if (idcp->idc_len >= MAX_VERSIONS) return; if (idcp->idc_len >= idcp->idc_size) { osc_directory_ctx_expand_version_list(oscp, idcp, errorp); if (*errorp) return; } tmp = gmtime(&tsp->tv_sec); if (tmp == NULL) { *errorp = 1; return; } snprintf(text, sizeof text, "@GMT-%04u.%02u.%02u-%02u.%02u.%02u", tmp->tm_year + 1900, tmp->tm_mon + 1, tmp->tm_mday, tmp->tm_hour, tmp->tm_min, tmp->tm_sec); cp = malloc(strlen(text) + 1); if (cp == NULL) { *errorp = 1; return; } strcpy(cp, text); idcp->idc_version[idcp->idc_len++] = cp; } /** * Make a directory context from a snapshot context. * * Once a snapshot context has been completely filled-in, * osc_make_directory_ctx() is used to build a directory context from it. The * idea here is to create version for each snapshot in the snapshot set. */ static void osc_make_directory_ctx(struct osc_snapshot_ctx *oscp, struct osc_directory_ctx *idcp, int *errorp) { static void walk(const void *vp, VISIT v, int level) { const struct osc_snapshot *isp; if ((v != postorder && v != leaf) || *errorp) return; isp = *(const struct osc_snapshot **)(u_long)vp; osc_directory_ctx_append_version(oscp, idcp, &isp->is_time, errorp); } twalk(oscp->osc_set, walk); } /** * Open a version directory. * * Opens a version directory. What this really means is that * osc_version_opendir() returns a handle to a directory context, which can be * used to retrieve version strings. */ void * osc_version_opendir(void) { int error = 0; struct osc_directory_ctx *idcp; struct osc_snapshot_ctx *oscp; idcp = osc_directory_ctx_create(); if (idcp == NULL) goto error_out; oscp = osc_get_snapshot_ctx(); if (!osc_snapshot_ctx_is_valid(oscp)) { osc_snapshot_ctx_clean(oscp); osc_process_snapshot_directory(oscp, &error); if (error) goto error_out; } osc_make_directory_ctx(oscp, idcp, &error); if (error) goto error_out; goto out; error_out: if (idcp != NULL) { osc_directory_ctx_destroy(idcp); idcp = NULL; } out: return (void *)idcp; } /** * Read the next version directory entry. * * Returns the name of the next version in the version directory, or NULL if * we're at the end of the directory. What this really does is return the * next version from the version list stored in the directory context. */ char * osc_version_readdir(void *vp) { struct osc_directory_ctx *idcp = vp; if (idcp == NULL) return NULL; if (idcp->idc_pos >= idcp->idc_len) return NULL; return idcp->idc_version[idcp->idc_pos++]; } /** * Close the version directory. * * Destroys the underlying directory context. */ void osc_version_closedir(void *vp) { struct osc_directory_ctx *idcp = vp; if (idcp != NULL) osc_directory_ctx_destroy(idcp); } /** * Canonicalize a path. * * Converts paths of the form @GMT-.. to paths of the form ../.snapshot/.. * It's not the prettiest routine I've ever written, but what the heck? */ char * osc_canonicalize_path(const char *path, char *snap_component) { int error = 0; struct osc_snapshot_ctx *oscp; struct tm tm; int n; struct osc_snapshot is; struct osc_snapshot **ispp; struct osc_snapshot *isp; char *cpath = NULL; char *cpath2 = NULL; const char *snap_component_orig = snap_component; struct stat sb; oscp = osc_get_snapshot_ctx(); if (!osc_snapshot_ctx_is_valid(oscp)) { osc_snapshot_ctx_clean(oscp); osc_process_snapshot_directory(oscp, &error); if (error) goto out; } memset(&tm, 0, sizeof tm); n = sscanf(snap_component, "@GMT-%4u.%2u.%2u-%2u.%2u.%2u", &tm.tm_year, &tm.tm_mon, &tm.tm_mday, &tm.tm_hour, &tm.tm_min, &tm.tm_sec); if (n != 6) goto out; tm.tm_year -= 1900; tm.tm_mon -= 1; is.is_name = NULL; is.is_time.tv_sec = timegm(&tm); is.is_time.tv_nsec = 0; ispp = tfind(&is, &oscp->osc_set, osc_snapshot_compare); if (ispp == NULL) goto out; isp = *ispp; /* Determine the path after "@GMT-..." */ while (*snap_component != '/' && *snap_component != '\0') snap_component++; while (*snap_component == '/') snap_component++; cpath = malloc(strlen(SNAPSHOT_DIRECTORY) + strlen(isp->is_name) + strlen(path) + 3); if (cpath == NULL) goto out; /* * Use the first snapshot that has a successful stat for the requested * path. */ while (true) { sprintf(cpath, "%s/%s", SNAPSHOT_DIRECTORY, isp->is_name); /* Append path before "@GMT-..." */ if (snap_component_orig != path) { strcat(cpath, "/"); strncat(cpath, path, snap_component_orig - path); } /* Append path after "@GMT-..." */ if (*snap_component != '\0') { strcat(cpath, "/"); strcat(cpath, snap_component); } /* If there is a valid snapshot for this file, we're done. */ if (stat(cpath, &sb) == 0) break; /* Try the next snapshot. If this was the last one, give up. */ isp = isp->is_next; if (isp == NULL) break; /* If the realloc fails, give up. */ cpath2 = realloc(cpath, strlen(SNAPSHOT_DIRECTORY) + strlen(isp->is_name) + strlen(path) + 3); if (cpath2 == NULL) break; cpath = cpath2; } out: return cpath; }