diff options
author | Andrew Tridgell <tridge@samba.org> | 2007-04-05 03:51:49 +0000 |
---|---|---|
committer | Gerald (Jerry) Carter <jerry@samba.org> | 2007-10-10 14:49:48 -0500 |
commit | 66156220ebf5dc212e9aa86015b7301d1b665b50 (patch) | |
tree | 08bebff7187077845d1a81ae37d14d1bbf10cd3b | |
parent | 7196090bfd73ab64fd0095ce3ed612bc7978a810 (diff) | |
download | samba-66156220ebf5dc212e9aa86015b7301d1b665b50.tar.gz samba-66156220ebf5dc212e9aa86015b7301d1b665b50.tar.bz2 samba-66156220ebf5dc212e9aa86015b7301d1b665b50.zip |
r22082: merged the ctdb changes from bzr
added opendb ctdb backend from ronnie
(This used to be commit b0da25cb79f860bfa14ba7a8419c7996d936292b)
-rw-r--r-- | source4/cluster/ctdb/common/ctdb_call.c | 93 | ||||
-rw-r--r-- | source4/cluster/ctdb/common/ctdb_ltdb.c | 42 | ||||
-rw-r--r-- | source4/cluster/ctdb/config.mk | 4 | ||||
-rw-r--r-- | source4/cluster/ctdb/include/ctdb.h | 6 | ||||
-rw-r--r-- | source4/cluster/ctdb/include/ctdb_private.h | 3 | ||||
-rw-r--r-- | source4/cluster/ctdb/opendb_ctdb.c | 611 | ||||
-rw-r--r-- | source4/ntvfs/common/config.mk | 2 | ||||
-rw-r--r-- | source4/ntvfs/common/opendb.c | 6 | ||||
-rw-r--r-- | source4/ntvfs/common/opendb.h | 2 |
9 files changed, 753 insertions, 16 deletions
diff --git a/source4/cluster/ctdb/common/ctdb_call.c b/source4/cluster/ctdb/common/ctdb_call.c index 75355f7ae7..0b4195140c 100644 --- a/source4/cluster/ctdb/common/ctdb_call.c +++ b/source4/cluster/ctdb/common/ctdb_call.c @@ -189,7 +189,7 @@ static void ctdb_call_send_dmaster(struct ctdb_db_context *ctdb_db, r->hdr.srcnode = ctdb->vnn; r->hdr.reqid = c->hdr.reqid; r->db_id = c->db_id; - r->dmaster = header->laccessor; + r->dmaster = c->hdr.srcnode; r->keylen = key->dsize; r->datalen = data->dsize; memcpy(&r->data[0], key->dptr, key->dsize); @@ -239,7 +239,7 @@ void ctdb_request_dmaster(struct ctdb_context *ctdb, struct ctdb_req_header *hdr ctdb_send_error(ctdb, hdr, ret, "Unknown database in request. db_id==0x%08x",c->db_id); return; } - + /* fetch the current record */ ret = ctdb_ltdb_fetch(ctdb_db, key, &header, &data2); if (ret != 0) { @@ -329,9 +329,12 @@ void ctdb_request_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) } /* if this nodes has done enough consecutive calls on the same record - then give them the record */ - if (header.laccessor == c->hdr.srcnode && - header.lacount >= ctdb->max_lacount) { + then give them the record + or if the node requested an immediate migration + */ + if ( (header.laccessor == c->hdr.srcnode + && header.lacount >= ctdb->max_lacount) + || c->flags&CTDB_IMMEDIATE_MIGRATION ) { ctdb_call_send_dmaster(ctdb_db, c, &header, &call.key, &data); talloc_free(data.dptr); return; @@ -373,6 +376,7 @@ struct ctdb_call_state { struct ctdb_call call; int redirect_count; struct ctdb_ltdb_header header; + void *fetch_private; }; @@ -419,7 +423,6 @@ void ctdb_reply_dmaster(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) } ctdb_db = state->ctdb_db; - data.dptr = c->data; data.dsize = c->datalen; @@ -578,6 +581,7 @@ struct ctdb_call_state *ctdb_call_send(struct ctdb_db_context *ctdb_db, struct c state->c->hdr.srcnode = ctdb->vnn; /* this limits us to 16k outstanding messages - not unreasonable */ state->c->hdr.reqid = idr_get_new(ctdb->idr, state, 0xFFFF); + state->c->flags = call->flags; state->c->db_id = ctdb_db->db_id; state->c->callid = call->call_id; state->c->keylen = call->key.dsize; @@ -604,6 +608,13 @@ struct ctdb_call_state *ctdb_call_send(struct ctdb_db_context *ctdb_db, struct c } + +struct ctdb_record_handle { + struct ctdb_db_context *ctdb_db; + TDB_DATA key; + TDB_DATA *data; +}; + /* make a remote ctdb call - async recv. @@ -612,6 +623,8 @@ struct ctdb_call_state *ctdb_call_send(struct ctdb_db_context *ctdb_db, struct c */ int ctdb_call_recv(struct ctdb_call_state *state, struct ctdb_call *call) { + struct ctdb_record_handle *rec; + while (state->state < CTDB_CALL_DONE) { event_loop_once(state->node->ctdb->ev); } @@ -620,6 +633,18 @@ int ctdb_call_recv(struct ctdb_call_state *state, struct ctdb_call *call) talloc_free(state); return -1; } + + rec = state->fetch_private; + + /* ugly hack to manage forced migration */ + if (rec != NULL) { + rec->data->dptr = talloc_memdup(rec, state->call.reply_data.dptr, + state->call.reply_data.dsize); + rec->data->dsize = state->call.reply_data.dsize; + talloc_free(state); + return 0; + } + if (state->call.reply_data.dsize) { call->reply_data.dptr = talloc_memdup(state->node->ctdb, state->call.reply_data.dptr, @@ -643,3 +668,59 @@ int ctdb_call(struct ctdb_db_context *ctdb_db, struct ctdb_call *call) state = ctdb_call_send(ctdb_db, call); return ctdb_call_recv(state, call); } + + + + + + +struct ctdb_record_handle *ctdb_fetch_lock(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx, + TDB_DATA key, TDB_DATA *data) +{ + struct ctdb_call call; + struct ctdb_record_handle *rec; + struct ctdb_call_state *state; + int ret; + + ZERO_STRUCT(call); + call.call_id = CTDB_FETCH_FUNC; + call.key = key; + call.flags = CTDB_IMMEDIATE_MIGRATION; + + rec = talloc(mem_ctx, struct ctdb_record_handle); + CTDB_NO_MEMORY_NULL(ctdb_db->ctdb, rec); + + rec->ctdb_db = ctdb_db; + rec->key = key; + rec->key.dptr = talloc_memdup(rec, key.dptr, key.dsize); + rec->data = data; + + state = ctdb_call_send(ctdb_db, &call); + state->fetch_private = rec; + + ret = ctdb_call_recv(state, &call); + if (ret != 0) { + talloc_free(rec); + return NULL; + } + + return rec; +} + + +int ctdb_record_store(struct ctdb_record_handle *rec, TDB_DATA data) +{ + int ret; + struct ctdb_ltdb_header header; + + /* should be avoided if possible hang header off rec ? */ + ret = ctdb_ltdb_fetch(rec->ctdb_db, rec->key, &header, NULL); + if (ret) { + ctdb_set_error(rec->ctdb_db->ctdb, "Fetch of locally held record failed"); + return ret; + } + + ret = ctdb_ltdb_store(rec->ctdb_db, rec->key, &header, data); + + return ret; +} diff --git a/source4/cluster/ctdb/common/ctdb_ltdb.c b/source4/cluster/ctdb/common/ctdb_ltdb.c index 189816229f..ceedb6c5ce 100644 --- a/source4/cluster/ctdb/common/ctdb_ltdb.c +++ b/source4/cluster/ctdb/common/ctdb_ltdb.c @@ -41,6 +41,17 @@ struct ctdb_db_context *ctdb_db_handle(struct ctdb_context *ctdb, const char *na return NULL; } + +/* + this is the dummy null procedure that all databases support +*/ +static int ctdb_fetch_func(struct ctdb_call_info *call) +{ + call->reply_data = &call->record_data; + return 0; +} + + /* attach to a specific database */ @@ -49,6 +60,7 @@ struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, const char *name, { struct ctdb_db_context *ctdb_db, *tmp_db; TDB_DATA data; + int ret; ctdb_db = talloc_zero(ctdb, struct ctdb_db_context); CTDB_NO_MEMORY_NULL(ctdb, ctdb_db); @@ -80,7 +92,18 @@ struct ctdb_db_context *ctdb_attach(struct ctdb_context *ctdb, const char *name, return NULL; } + + /* + all databases support the "fetch" function. we need this in order to do forced migration of records + */ + ret = ctdb_set_call(ctdb_db, ctdb_fetch_func, CTDB_FETCH_FUNC); + if (ret != 0) { + talloc_free(ctdb_db); + return NULL; + } + DLIST_ADD(ctdb->db_list, ctdb_db); + return ctdb_db; } @@ -124,18 +147,25 @@ int ctdb_ltdb_fetch(struct ctdb_db_context *ctdb_db, /* return an initial header */ free(rec.dptr); ltdb_initial_header(ctdb_db, key, header); - data->dptr = NULL; - data->dsize = 0; + if (data) { + data->dptr = NULL; + data->dsize = 0; + } return 0; } *header = *(struct ctdb_ltdb_header *)rec.dptr; - data->dsize = rec.dsize - sizeof(struct ctdb_ltdb_header); - data->dptr = talloc_memdup(ctdb_db, sizeof(struct ctdb_ltdb_header)+rec.dptr, - data->dsize); + if (data) { + data->dsize = rec.dsize - sizeof(struct ctdb_ltdb_header); + data->dptr = talloc_memdup(ctdb_db, sizeof(struct ctdb_ltdb_header)+rec.dptr, + data->dsize); + } + free(rec.dptr); - CTDB_NO_MEMORY(ctdb, data->dptr); + if (data) { + CTDB_NO_MEMORY(ctdb, data->dptr); + } return 0; } diff --git a/source4/cluster/ctdb/config.mk b/source4/cluster/ctdb/config.mk index 89ac7aa7f4..893bd9f136 100644 --- a/source4/cluster/ctdb/config.mk +++ b/source4/cluster/ctdb/config.mk @@ -3,6 +3,10 @@ OBJ_FILES = brlock_ctdb.o ################## +[SUBSYSTEM::opendb_ctdb] +OBJ_FILES = opendb_ctdb.o + +################## [SUBSYSTEM::ctdb_tcp] OBJ_FILES = \ tcp/tcp_init.o \ diff --git a/source4/cluster/ctdb/include/ctdb.h b/source4/cluster/ctdb/include/ctdb.h index 51eeaebedb..9049314401 100644 --- a/source4/cluster/ctdb/include/ctdb.h +++ b/source4/cluster/ctdb/include/ctdb.h @@ -21,12 +21,14 @@ #ifndef _CTDB_H #define _CTDB_H +#define CTDB_IMMEDIATE_MIGRATION 0x00000001 struct ctdb_call { int call_id; TDB_DATA key; TDB_DATA call_data; TDB_DATA reply_data; uint32_t status; + uint32_t flags; }; /* @@ -148,6 +150,7 @@ int ctdb_set_message_handler(struct ctdb_context *ctdb, ctdb_message_fn_t handle int ctdb_send_message(struct ctdb_context *ctdb, uint32_t vnn, uint32_t srvid, TDB_DATA data); + /* fetch and lock a ctdb record. Underneath this will force the dmaster for the record to be moved to the local node. @@ -155,7 +158,7 @@ int ctdb_send_message(struct ctdb_context *ctdb, uint32_t vnn, The lock is released when is talloc_free() is called on the returned ctdb_record_handle. */ -struct ctdb_record_handle *ctdb_fetch_lock(struct ctdb_db_context *ctdb_db, TDB_DATA key, TDB_DATA *data); +struct ctdb_record_handle *ctdb_fetch_lock(struct ctdb_db_context *ctdb_db, TALLOC_CTX *mem_ctx, TDB_DATA key, TDB_DATA *data); /* change the data in a record held with a ctdb_record_handle @@ -163,4 +166,5 @@ struct ctdb_record_handle *ctdb_fetch_lock(struct ctdb_db_context *ctdb_db, TDB_ */ int ctdb_record_store(struct ctdb_record_handle *rec, TDB_DATA data); + #endif diff --git a/source4/cluster/ctdb/include/ctdb_private.h b/source4/cluster/ctdb/include/ctdb_private.h index 1e2244ff7d..66a9a6e244 100644 --- a/source4/cluster/ctdb/include/ctdb_private.h +++ b/source4/cluster/ctdb/include/ctdb_private.h @@ -23,6 +23,8 @@ #include "ctdb.h" + +#define CTDB_FETCH_FUNC 0xf0000001 /* an installed ctdb remote call */ @@ -166,6 +168,7 @@ struct ctdb_req_header { struct ctdb_req_call { struct ctdb_req_header hdr; + uint32_t flags; uint32_t db_id; uint32_t callid; uint32_t keylen; diff --git a/source4/cluster/ctdb/opendb_ctdb.c b/source4/cluster/ctdb/opendb_ctdb.c new file mode 100644 index 0000000000..e12bf3260b --- /dev/null +++ b/source4/cluster/ctdb/opendb_ctdb.c @@ -0,0 +1,611 @@ +/* + Unix SMB/CIFS implementation. + + Copyright (C) Andrew Tridgell 2004 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +/* + this is the open files database, ctdb backend. It implements shared + storage of what files are open between server instances, and + implements the rules of shared access to files. + + The caller needs to provide a file_key, which specifies what file + they are talking about. This needs to be a unique key across all + filesystems, and is usually implemented in terms of a device/inode + pair. + + Before any operations can be performed the caller needs to establish + a lock on the record associated with file_key. That is done by + calling odb_lock(). The caller releases this lock by calling + talloc_free() on the returned handle. + + All other operations on a record are done by passing the odb_lock() + handle back to this module. The handle contains internal + information about what file_key is being operated on. +*/ + +#include "includes.h" +#include "system/filesys.h" +#include "lib/tdb/include/tdb.h" +#include "messaging/messaging.h" +#include "db_wrap.h" +#include "lib/messaging/irpc.h" +#include "librpc/gen_ndr/ndr_opendb.h" +#include "ntvfs/ntvfs.h" +#include "ntvfs/common/ntvfs_common.h" +#include "cluster/cluster.h" +#include "include/ctdb.h" + +struct odb_context { + struct ctdb_context *ctdb; + struct ctdb_db_context *ctdb_db; + struct ntvfs_context *ntvfs_ctx; + BOOL oplocks; +}; + +/* + an odb lock handle. You must obtain one of these using odb_lock() before doing + any other operations. +*/ +struct odb_lock { + struct odb_context *odb; + struct ctdb_record_handle *rec; + TDB_DATA key; + TDB_DATA data; +}; + +/* + Open up the openfiles.tdb database. Close it down using + talloc_free(). We need the messaging_ctx to allow for pending open + notifications. +*/ +static struct odb_context *odb_ctdb_init(TALLOC_CTX *mem_ctx, + struct ntvfs_context *ntvfs_ctx) +{ + struct odb_context *odb; + struct ctdb_context *ctdb = talloc_get_type(cluster_backend_handle(), + struct ctdb_context); + + odb = talloc(mem_ctx, struct odb_context); + if (odb == NULL) { + return NULL; + } + + odb->ctdb = ctdb; + odb->ctdb_db = ctdb_db_handle(ctdb, "opendb"); + if (!odb->ctdb_db) { + DEBUG(0,("Failed to get attached ctdb db handle for opendb\n")); + talloc_free(odb); + return NULL; + } + + odb->ntvfs_ctx = ntvfs_ctx; + + /* leave oplocks disabled by default until the code is working */ + odb->oplocks = lp_parm_bool(-1, "opendb", "oplocks", False); + + return odb; +} + +/* + get a lock on a entry in the odb. This call returns a lock handle, + which the caller should unlock using talloc_free(). +*/ +static struct odb_lock *odb_ctdb_lock(TALLOC_CTX *mem_ctx, + struct odb_context *odb, DATA_BLOB *file_key) +{ + struct odb_lock *lck; + + lck = talloc(mem_ctx, struct odb_lock); + if (lck == NULL) { + return NULL; + } + + lck->odb = talloc_reference(lck, odb); + lck->key.dptr = talloc_memdup(lck, file_key->data, file_key->length); + lck->key.dsize = file_key->length; + if (lck->key.dptr == NULL) { + talloc_free(lck); + return NULL; + } + + lck->rec = ctdb_fetch_lock(odb->ctdb_db, (TALLOC_CTX *)lck, lck->key, &lck->data); + if (!lck->rec) { + talloc_free(lck); + return NULL; + } + + return lck; +} + +/* + determine if two odb_entry structures conflict + + return NT_STATUS_OK on no conflict +*/ +static NTSTATUS share_conflict(struct opendb_entry *e1, struct opendb_entry *e2) +{ + /* if either open involves no read.write or delete access then + it can't conflict */ + if (!(e1->access_mask & (SEC_FILE_WRITE_DATA | + SEC_FILE_APPEND_DATA | + SEC_FILE_READ_DATA | + SEC_FILE_EXECUTE | + SEC_STD_DELETE))) { + return NT_STATUS_OK; + } + if (!(e2->access_mask & (SEC_FILE_WRITE_DATA | + SEC_FILE_APPEND_DATA | + SEC_FILE_READ_DATA | + SEC_FILE_EXECUTE | + SEC_STD_DELETE))) { + return NT_STATUS_OK; + } + + /* data IO access masks. This is skipped if the two open handles + are on different streams (as in that case the masks don't + interact) */ + if (e1->stream_id != e2->stream_id) { + return NT_STATUS_OK; + } + +#define CHECK_MASK(am, right, sa, share) \ + if (((am) & (right)) && !((sa) & (share))) return NT_STATUS_SHARING_VIOLATION + + CHECK_MASK(e1->access_mask, SEC_FILE_WRITE_DATA | SEC_FILE_APPEND_DATA, + e2->share_access, NTCREATEX_SHARE_ACCESS_WRITE); + CHECK_MASK(e2->access_mask, SEC_FILE_WRITE_DATA | SEC_FILE_APPEND_DATA, + e1->share_access, NTCREATEX_SHARE_ACCESS_WRITE); + + CHECK_MASK(e1->access_mask, SEC_FILE_READ_DATA | SEC_FILE_EXECUTE, + e2->share_access, NTCREATEX_SHARE_ACCESS_READ); + CHECK_MASK(e2->access_mask, SEC_FILE_READ_DATA | SEC_FILE_EXECUTE, + e1->share_access, NTCREATEX_SHARE_ACCESS_READ); + + CHECK_MASK(e1->access_mask, SEC_STD_DELETE, + e2->share_access, NTCREATEX_SHARE_ACCESS_DELETE); + CHECK_MASK(e2->access_mask, SEC_STD_DELETE, + e1->share_access, NTCREATEX_SHARE_ACCESS_DELETE); + + return NT_STATUS_OK; +} + +/* + pull a record, translating from the db format to the opendb_file structure defined + in opendb.idl +*/ +static NTSTATUS odb_pull_record(struct odb_lock *lck, struct opendb_file *file) +{ + TDB_DATA dbuf; + DATA_BLOB blob; + NTSTATUS status; + + dbuf = lck->data; + + blob.data = dbuf.dptr; + blob.length = dbuf.dsize; + + status = ndr_pull_struct_blob(&blob, lck, file, (ndr_pull_flags_fn_t)ndr_pull_opendb_file); + + return status; +} + +/* + push a record, translating from the opendb_file structure defined in opendb.idl +*/ +static NTSTATUS odb_push_record(struct odb_lock *lck, struct opendb_file *file) +{ + TDB_DATA dbuf; + DATA_BLOB blob; + NTSTATUS status; + int ret; + + if (!file->num_entries) { + dbuf.dptr = NULL; + dbuf.dsize = 0; + ctdb_record_store(lck->rec, dbuf); + } + + status = ndr_push_struct_blob(&blob, lck, file, (ndr_push_flags_fn_t)ndr_push_opendb_file); + NT_STATUS_NOT_OK_RETURN(status); + + dbuf.dptr = blob.data; + dbuf.dsize = blob.length; + + ret = ctdb_record_store(lck->rec, dbuf); + data_blob_free(&blob); + if (ret != 0) { + return NT_STATUS_INTERNAL_DB_CORRUPTION; + } + + return NT_STATUS_OK; +} + +/* + send an oplock break to a client +*/ +static NTSTATUS odb_oplock_break_send(struct odb_context *odb, struct opendb_entry *e) +{ + /* tell the server handling this open file about the need to send the client + a break */ + return messaging_send_ptr(odb->ntvfs_ctx->msg_ctx, e->server, + MSG_NTVFS_OPLOCK_BREAK, e->file_handle); +} + +/* + register an open file in the open files database. This implements the share_access + rules + + Note that the path is only used by the delete on close logic, not + for comparing with other filenames +*/ +static NTSTATUS odb_ctdb_open_file(struct odb_lock *lck, void *file_handle, + uint32_t stream_id, uint32_t share_access, + uint32_t access_mask, BOOL delete_on_close, + const char *path, + uint32_t oplock_level, uint32_t *oplock_granted) +{ + struct odb_context *odb = lck->odb; + struct opendb_entry e; + int i; + struct opendb_file file; + NTSTATUS status; + + if (odb->oplocks == False) { + oplock_level = OPLOCK_NONE; + } + + status = odb_pull_record(lck, &file); + if (NT_STATUS_EQUAL(status, NT_STATUS_OBJECT_NAME_NOT_FOUND)) { + /* initialise a blank structure */ + ZERO_STRUCT(file); + file.path = path; + } else { + NT_STATUS_NOT_OK_RETURN(status); + } + + /* see if it conflicts */ + e.server = odb->ntvfs_ctx->server_id; + e.file_handle = file_handle; + e.stream_id = stream_id; + e.share_access = share_access; + e.access_mask = access_mask; + e.delete_on_close = delete_on_close; + e.oplock_level = OPLOCK_NONE; + + /* see if anyone has an oplock, which we need to break */ + for (i=0;i<file.num_entries;i++) { + if (file.entries[i].oplock_level == OPLOCK_BATCH) { + /* a batch oplock caches close calls, which + means the client application might have + already closed the file. We have to allow + this close to propogate by sending a oplock + break request and suspending this call + until the break is acknowledged or the file + is closed */ + odb_oplock_break_send(odb, &file.entries[i]); + return NT_STATUS_OPLOCK_NOT_GRANTED; + } + } + + if (file.delete_on_close || + (file.num_entries != 0 && delete_on_close)) { + /* while delete on close is set, no new opens are allowed */ + return NT_STATUS_DELETE_PENDING; + } + + /* check for sharing violations */ + for (i=0;i<file.num_entries;i++) { + status = share_conflict(&file.entries[i], &e); + NT_STATUS_NOT_OK_RETURN(status); + } + + /* we now know the open could succeed, but we need to check + for any exclusive oplocks. We can't grant a second open + till these are broken. Note that we check for batch oplocks + before checking for sharing violations, and check for + exclusive oplocks afterwards. */ + for (i=0;i<file.num_entries;i++) { + if (file.entries[i].oplock_level == OPLOCK_EXCLUSIVE) { + odb_oplock_break_send(odb, &file.entries[i]); + return NT_STATUS_OPLOCK_NOT_GRANTED; + } + } + + /* + possibly grant an exclusive or batch oplock if this is the only client + with the file open. We don't yet grant levelII oplocks. + */ + if (oplock_granted != NULL) { + if ((oplock_level == OPLOCK_BATCH || + oplock_level == OPLOCK_EXCLUSIVE) && + file.num_entries == 0) { + (*oplock_granted) = oplock_level; + } else { + (*oplock_granted) = OPLOCK_NONE; + } + e.oplock_level = (*oplock_granted); + } + + /* it doesn't conflict, so add it to the end */ + file.entries = talloc_realloc(lck, file.entries, struct opendb_entry, + file.num_entries+1); + NT_STATUS_HAVE_NO_MEMORY(file.entries); + + file.entries[file.num_entries] = e; + file.num_entries++; + + return odb_push_record(lck, &file); +} + + +/* + register a pending open file in the open files database +*/ +static NTSTATUS odb_ctdb_open_file_pending(struct odb_lock *lck, void *private) +{ + struct odb_context *odb = lck->odb; + struct opendb_file file; + NTSTATUS status; + + status = odb_pull_record(lck, &file); + NT_STATUS_NOT_OK_RETURN(status); + + file.pending = talloc_realloc(lck, file.pending, struct opendb_pending, + file.num_pending+1); + NT_STATUS_HAVE_NO_MEMORY(file.pending); + + file.pending[file.num_pending].server = odb->ntvfs_ctx->server_id; + file.pending[file.num_pending].notify_ptr = private; + + file.num_pending++; + + return odb_push_record(lck, &file); +} + + +/* + remove a opendb entry +*/ +static NTSTATUS odb_ctdb_close_file(struct odb_lock *lck, void *file_handle) +{ + struct odb_context *odb = lck->odb; + struct opendb_file file; + int i; + NTSTATUS status; + + status = odb_pull_record(lck, &file); + NT_STATUS_NOT_OK_RETURN(status); + + /* find the entry, and delete it */ + for (i=0;i<file.num_entries;i++) { + if (file_handle == file.entries[i].file_handle && + cluster_id_equal(&odb->ntvfs_ctx->server_id, &file.entries[i].server)) { + if (file.entries[i].delete_on_close) { + file.delete_on_close = True; + } + if (i < file.num_entries-1) { + memmove(file.entries+i, file.entries+i+1, + (file.num_entries - (i+1)) * + sizeof(struct opendb_entry)); + } + break; + } + } + + if (i == file.num_entries) { + return NT_STATUS_UNSUCCESSFUL; + } + + /* send any pending notifications, removing them once sent */ + for (i=0;i<file.num_pending;i++) { + messaging_send_ptr(odb->ntvfs_ctx->msg_ctx, file.pending[i].server, + MSG_PVFS_RETRY_OPEN, + file.pending[i].notify_ptr); + } + file.num_pending = 0; + + file.num_entries--; + + return odb_push_record(lck, &file); +} + + +/* + remove a pending opendb entry +*/ +static NTSTATUS odb_ctdb_remove_pending(struct odb_lock *lck, void *private) +{ + struct odb_context *odb = lck->odb; + int i; + NTSTATUS status; + struct opendb_file file; + + status = odb_pull_record(lck, &file); + NT_STATUS_NOT_OK_RETURN(status); + + /* find the entry, and delete it */ + for (i=0;i<file.num_pending;i++) { + if (private == file.pending[i].notify_ptr && + cluster_id_equal(&odb->ntvfs_ctx->server_id, &file.pending[i].server)) { + if (i < file.num_pending-1) { + memmove(file.pending+i, file.pending+i+1, + (file.num_pending - (i+1)) * + sizeof(struct opendb_pending)); + } + break; + } + } + + if (i == file.num_pending) { + return NT_STATUS_UNSUCCESSFUL; + } + + file.num_pending--; + + return odb_push_record(lck, &file); +} + + +/* + rename the path in a open file +*/ +static NTSTATUS odb_ctdb_rename(struct odb_lock *lck, const char *path) +{ + struct opendb_file file; + NTSTATUS status; + + status = odb_pull_record(lck, &file); + if (NT_STATUS_EQUAL(NT_STATUS_OBJECT_NAME_NOT_FOUND, status)) { + /* not having the record at all is OK */ + return NT_STATUS_OK; + } + NT_STATUS_NOT_OK_RETURN(status); + + file.path = path; + return odb_push_record(lck, &file); +} + +/* + update delete on close flag on an open file +*/ +static NTSTATUS odb_ctdb_set_delete_on_close(struct odb_lock *lck, BOOL del_on_close) +{ + NTSTATUS status; + struct opendb_file file; + + status = odb_pull_record(lck, &file); + NT_STATUS_NOT_OK_RETURN(status); + + file.delete_on_close = del_on_close; + + return odb_push_record(lck, &file); +} + +/* + return the current value of the delete_on_close bit, and how many + people still have the file open +*/ +static NTSTATUS odb_ctdb_get_delete_on_close(struct odb_context *odb, + DATA_BLOB *key, BOOL *del_on_close, + int *open_count, char **path) +{ + NTSTATUS status; + struct opendb_file file; + struct odb_lock *lck; + + lck = odb_lock(odb, odb, key); + NT_STATUS_HAVE_NO_MEMORY(lck); + + status = odb_pull_record(lck, &file); + if (NT_STATUS_EQUAL(NT_STATUS_OBJECT_NAME_NOT_FOUND, status)) { + talloc_free(lck); + (*del_on_close) = False; + return NT_STATUS_OK; + } + if (!NT_STATUS_IS_OK(status)) { + talloc_free(lck); + return status; + } + + (*del_on_close) = file.delete_on_close; + if (open_count != NULL) { + (*open_count) = file.num_entries; + } + if (path != NULL) { + *path = talloc_strdup(odb, file.path); + NT_STATUS_HAVE_NO_MEMORY(*path); + if (file.num_entries == 1 && file.entries[0].delete_on_close) { + (*del_on_close) = True; + } + } + + talloc_free(lck); + + return NT_STATUS_OK; +} + + +/* + determine if a file can be opened with the given share_access, + create_options and access_mask +*/ +static NTSTATUS odb_ctdb_can_open(struct odb_lock *lck, + uint32_t share_access, uint32_t create_options, + uint32_t access_mask) +{ + struct odb_context *odb = lck->odb; + NTSTATUS status; + struct opendb_file file; + struct opendb_entry e; + int i; + + status = odb_pull_record(lck, &file); + if (NT_STATUS_EQUAL(status, NT_STATUS_OBJECT_NAME_NOT_FOUND)) { + return NT_STATUS_OK; + } + NT_STATUS_NOT_OK_RETURN(status); + + if ((create_options & NTCREATEX_OPTIONS_DELETE_ON_CLOSE) && + file.num_entries != 0) { + return NT_STATUS_SHARING_VIOLATION; + } + + if (file.delete_on_close) { + return NT_STATUS_DELETE_PENDING; + } + + e.server = odb->ntvfs_ctx->server_id; + e.file_handle = NULL; + e.stream_id = 0; + e.share_access = share_access; + e.access_mask = access_mask; + + for (i=0;i<file.num_entries;i++) { + status = share_conflict(&file.entries[i], &e); + if (!NT_STATUS_IS_OK(status)) { + /* note that we discard the error code + here. We do this as unless we are actually + doing an open (which comes via a different + function), we need to return a sharing + violation */ + return NT_STATUS_SHARING_VIOLATION; + } + } + + return NT_STATUS_OK; +} + + +static const struct opendb_ops opendb_ctdb_ops = { + .odb_init = odb_ctdb_init, + .odb_lock = odb_ctdb_lock, + .odb_open_file = odb_ctdb_open_file, + .odb_open_file_pending = odb_ctdb_open_file_pending, + .odb_close_file = odb_ctdb_close_file, + .odb_remove_pending = odb_ctdb_remove_pending, + .odb_rename = odb_ctdb_rename, + .odb_set_delete_on_close = odb_ctdb_set_delete_on_close, + .odb_get_delete_on_close = odb_ctdb_get_delete_on_close, + .odb_can_open = odb_ctdb_can_open +}; + + +void odb_ctdb_init_ops(void) +{ + odb_set_ops(&opendb_ctdb_ops); +} diff --git a/source4/ntvfs/common/config.mk b/source4/ntvfs/common/config.mk index 665e8e255b..5c744f9c90 100644 --- a/source4/ntvfs/common/config.mk +++ b/source4/ntvfs/common/config.mk @@ -10,6 +10,6 @@ OBJ_FILES = \ opendb_tdb.o \ notify.o PUBLIC_DEPENDENCIES = NDR_OPENDB NDR_NOTIFY sys_notify share -PRIVATE_DEPENDENCIES = brlock_ctdb +PRIVATE_DEPENDENCIES = brlock_ctdb opendb_ctdb # End LIBRARY ntvfs_common ################################################ diff --git a/source4/ntvfs/common/opendb.c b/source4/ntvfs/common/opendb.c index c17b819c04..ea27efdf3b 100644 --- a/source4/ntvfs/common/opendb.c +++ b/source4/ntvfs/common/opendb.c @@ -62,7 +62,11 @@ _PUBLIC_ struct odb_context *odb_init(TALLOC_CTX *mem_ctx, struct ntvfs_context *ntvfs_ctx) { if (ops == NULL) { - odb_tdb_init_ops(); + if (lp_parm_bool(-1, "ctdb", "opendb", False)) { + odb_ctdb_init_ops(); + } else { + odb_tdb_init_ops(); + } } return ops->odb_init(mem_ctx, ntvfs_ctx); } diff --git a/source4/ntvfs/common/opendb.h b/source4/ntvfs/common/opendb.h index 9d6c18c471..85bb678d77 100644 --- a/source4/ntvfs/common/opendb.h +++ b/source4/ntvfs/common/opendb.h @@ -46,4 +46,4 @@ struct opendb_ops { void odb_set_ops(const struct opendb_ops *new_ops); void odb_tdb_init_ops(void); - +void odb_ctdb_init_ops(void); |