diff options
author | Alexander Bokovoy <ab@samba.org> | 2008-01-16 12:09:48 +0300 |
---|---|---|
committer | Alexander Bokovoy <ab@samba.org> | 2008-01-16 12:09:48 +0300 |
commit | 68694369fc96354452979b07425f3f48c4f73bbe (patch) | |
tree | 6f4f9870ad48babe85c26af0b86d143e446a0cfe /source3/lib | |
parent | f426949e8861b3a97d321516edbcbd771b7d6273 (diff) | |
download | samba-68694369fc96354452979b07425f3f48c4f73bbe.tar.gz samba-68694369fc96354452979b07425f3f48c4f73bbe.tar.bz2 samba-68694369fc96354452979b07425f3f48c4f73bbe.zip |
Merge CTDB-related fixes from samba-ctdb 3.0 branch (http://samba.org/~tridge/3_0-ctdb)
Signed-off-by: Alexander Bokovoy <ab@samba.org>(This used to be commit 0c8e23afbbb2d081fc23908bafcad04650bfacea)
Diffstat (limited to 'source3/lib')
-rw-r--r-- | source3/lib/conn_tdb.c | 2 | ||||
-rw-r--r-- | source3/lib/ctdbd_conn.c | 36 | ||||
-rw-r--r-- | source3/lib/dbwrap.c | 12 | ||||
-rw-r--r-- | source3/lib/dbwrap_ctdb.c | 178 | ||||
-rw-r--r-- | source3/lib/dbwrap_file.c | 5 | ||||
-rw-r--r-- | source3/lib/dbwrap_tdb.c | 21 | ||||
-rw-r--r-- | source3/lib/dbwrap_util.c | 90 | ||||
-rw-r--r-- | source3/lib/messages_ctdbd.c | 4 | ||||
-rw-r--r-- | source3/lib/util.c | 13 |
9 files changed, 276 insertions, 85 deletions
diff --git a/source3/lib/conn_tdb.c b/source3/lib/conn_tdb.c index dd0a354a85..22d85c873d 100644 --- a/source3/lib/conn_tdb.c +++ b/source3/lib/conn_tdb.c @@ -34,7 +34,7 @@ static struct db_context *connections_db_ctx(bool rw) } else { db_ctx = db_open(NULL, lock_path("connections.tdb"), 0, - TDB_DEFAULT, O_RDONLY, 0); + TDB_CLEAR_IF_FIRST|TDB_DEFAULT, O_RDONLY, 0); } return db_ctx; diff --git a/source3/lib/ctdbd_conn.c b/source3/lib/ctdbd_conn.c index 899bbcfcce..18e9879601 100644 --- a/source3/lib/ctdbd_conn.c +++ b/source3/lib/ctdbd_conn.c @@ -1203,6 +1203,42 @@ NTSTATUS ctdbd_register_reconfigure(struct ctdbd_connection *conn) return register_with_ctdbd(conn, CTDB_SRVID_RECONFIGURE); } +/* + persstent store. Used when we update a record in a persistent database + */ +NTSTATUS ctdbd_persistent_store(struct ctdbd_connection *conn, uint32_t db_id, TDB_DATA key, TDB_DATA data) +{ + int cstatus=0; + struct ctdb_rec_data *rec; + TDB_DATA recdata; + size_t length; + NTSTATUS status; + + length = offsetof(struct ctdb_rec_data, data) + key.dsize + data.dsize; + + rec = (struct ctdb_rec_data *)talloc_size(conn, length); + NT_STATUS_HAVE_NO_MEMORY(rec); + + rec->length = length; + rec->reqid = db_id; + rec->keylen = key.dsize; + rec->datalen= data.dsize; + memcpy(&rec->data[0], key.dptr, key.dsize); + memcpy(&rec->data[key.dsize], data.dptr, data.dsize); + + recdata.dptr = (uint8_t *)rec; + recdata.dsize = length; + + status = ctdbd_control(conn, CTDB_CURRENT_NODE, + CTDB_CONTROL_PERSISTENT_STORE, + 0, recdata, NULL, NULL, &cstatus); + if (cstatus != 0) { + return NT_STATUS_INTERNAL_DB_CORRUPTION; + } + return status; +} + + #else NTSTATUS ctdbd_init_connection(TALLOC_CTX *mem_ctx, diff --git a/source3/lib/dbwrap.c b/source3/lib/dbwrap.c index 4e16d18682..001424a6c0 100644 --- a/source3/lib/dbwrap.c +++ b/source3/lib/dbwrap.c @@ -20,7 +20,9 @@ */ #include "includes.h" - +#ifdef CLUSTER_SUPPORT +#include "ctdb_private.h" +#endif /* * Fall back using fetch_locked if no genuine fetch operation is provided */ @@ -46,10 +48,16 @@ struct db_context *db_open(TALLOC_CTX *mem_ctx, int open_flags, mode_t mode) { struct db_context *result = NULL; +#ifdef CLUSTER_SUPPORT + const char *sockname = lp_ctdbd_socket(); +#endif #ifdef CLUSTER_SUPPORT + if(!sockname || !*sockname) { + sockname = CTDB_PATH; + } - if (lp_clustering()) { + if (lp_clustering() && socket_exist(sockname)) { const char *partname; /* ctdb only wants the file part of the name */ partname = strrchr(name, '/'); diff --git a/source3/lib/dbwrap_ctdb.c b/source3/lib/dbwrap_ctdb.c index 90d0b260c5..f497f871d2 100644 --- a/source3/lib/dbwrap_ctdb.c +++ b/source3/lib/dbwrap_ctdb.c @@ -18,16 +18,14 @@ */ #include "includes.h" - #ifdef CLUSTER_SUPPORT - #include "ctdb.h" #include "ctdb_private.h" +#include "ctdbd_conn.h" struct db_ctdb_ctx { struct tdb_wrap *wtdb; uint32 db_id; - struct ctdbd_connection *conn; }; struct db_ctdb_rec { @@ -35,8 +33,6 @@ struct db_ctdb_rec { struct ctdb_ltdb_header header; }; -static struct ctdbd_connection *db_ctdbd_conn(struct db_ctdb_ctx *ctx); - static NTSTATUS db_ctdb_store(struct db_record *rec, TDB_DATA data, int flag) { struct db_ctdb_rec *crec = talloc_get_type_abort( @@ -60,6 +56,42 @@ static NTSTATUS db_ctdb_store(struct db_record *rec, TDB_DATA data, int flag) return (ret == 0) ? NT_STATUS_OK : NT_STATUS_INTERNAL_DB_CORRUPTION; } + +/* for persistent databases the store is a bit different. We have to + ask the ctdb daemon to push the record to all nodes after the + store */ +static NTSTATUS db_ctdb_store_persistent(struct db_record *rec, TDB_DATA data, int flag) +{ + struct db_ctdb_rec *crec = talloc_get_type_abort( + rec->private_data, struct db_ctdb_rec); + TDB_DATA cdata; + int ret; + NTSTATUS status; + + cdata.dsize = sizeof(crec->header) + data.dsize; + + if (!(cdata.dptr = SMB_MALLOC_ARRAY(uint8, cdata.dsize))) { + return NT_STATUS_NO_MEMORY; + } + + crec->header.rsn++; + + memcpy(cdata.dptr, &crec->header, sizeof(crec->header)); + memcpy(cdata.dptr + sizeof(crec->header), data.dptr, data.dsize); + + ret = tdb_store(crec->ctdb_ctx->wtdb->tdb, rec->key, cdata, TDB_REPLACE); + status = (ret == 0) ? NT_STATUS_OK : NT_STATUS_INTERNAL_DB_CORRUPTION; + + /* now tell ctdbd to update this record on all other nodes */ + if (NT_STATUS_IS_OK(status)) { + status = ctdbd_persistent_store(messaging_ctdbd_connection(), crec->ctdb_ctx->db_id, rec->key, cdata); + } + + SAFE_FREE(cdata.dptr); + + return status; +} + static NTSTATUS db_ctdb_delete(struct db_record *rec) { struct db_ctdb_rec *crec = talloc_get_type_abort( @@ -110,6 +142,7 @@ static struct db_record *db_ctdb_fetch_locked(struct db_context *db, struct db_ctdb_rec *crec; NTSTATUS status; TDB_DATA ctdb_data; + int migrate_attempts = 0; if (!(result = talloc(mem_ctx, struct db_record))) { DEBUG(0, ("talloc failed\n")); @@ -153,7 +186,11 @@ again: return NULL; } - result->store = db_ctdb_store; + if (db->persistent) { + result->store = db_ctdb_store_persistent; + } else { + result->store = db_ctdb_store; + } result->delete_rec = db_ctdb_delete; talloc_set_destructor(result, db_ctdb_record_destr); @@ -175,12 +212,14 @@ again: tdb_chainunlock(ctx->wtdb->tdb, key); talloc_set_destructor(result, NULL); + migrate_attempts += 1; + DEBUG(10, ("ctdb_data.dptr = %p, dmaster = %u (%u)\n", ctdb_data.dptr, ctdb_data.dptr ? ((struct ctdb_ltdb_header *)ctdb_data.dptr)->dmaster : -1, get_my_vnn())); - status = ctdbd_migrate(db_ctdbd_conn(ctx), ctx->db_id, key); + status = ctdbd_migrate(messaging_ctdbd_connection(),ctx->db_id, key); if (!NT_STATUS_IS_OK(status)) { DEBUG(5, ("ctdb_migrate failed: %s\n", nt_errstr(status))); @@ -191,6 +230,11 @@ again: goto again; } + if (migrate_attempts > 10) { + DEBUG(0, ("db_ctdb_fetch_locked needed %d attempts\n", + migrate_attempts)); + } + memcpy(&crec->header, ctdb_data.dptr, sizeof(crec->header)); result->value.dsize = ctdb_data.dsize - sizeof(crec->header); @@ -226,10 +270,12 @@ static int db_ctdb_fetch(struct db_context *db, TALLOC_CTX *mem_ctx, /* * See if we have a valid record and we are the dmaster. If so, we can * take the shortcut and just return it. + * we bypass the dmaster check for persistent databases */ if ((ctdb_data.dptr != NULL) && (ctdb_data.dsize >= sizeof(struct ctdb_ltdb_header)) && - ((struct ctdb_ltdb_header *)ctdb_data.dptr)->dmaster == get_my_vnn()) { + (db->persistent || + ((struct ctdb_ltdb_header *)ctdb_data.dptr)->dmaster == get_my_vnn())) { /* we are the dmaster - avoid the ctdb protocol op */ data->dsize = ctdb_data.dsize - sizeof(struct ctdb_ltdb_header); @@ -254,8 +300,7 @@ static int db_ctdb_fetch(struct db_context *db, TALLOC_CTX *mem_ctx, SAFE_FREE(ctdb_data.dptr); /* we weren't able to get it locally - ask ctdb to fetch it for us */ - status = ctdbd_fetch(db_ctdbd_conn(ctx), ctx->db_id, key, mem_ctx, - data); + status = ctdbd_fetch(messaging_ctdbd_connection(),ctx->db_id, key, mem_ctx, data); if (!NT_STATUS_IS_OK(status)) { DEBUG(5, ("ctdbd_fetch failed: %s\n", nt_errstr(status))); return -1; @@ -283,6 +328,22 @@ static void traverse_callback(TDB_DATA key, TDB_DATA data, void *private_data) talloc_free(tmp_ctx); } +static int traverse_persistent_callback(TDB_CONTEXT *tdb, TDB_DATA kbuf, TDB_DATA dbuf, + void *private_data) +{ + struct traverse_state *state = (struct traverse_state *)private_data; + struct db_record *rec; + TALLOC_CTX *tmp_ctx = talloc_new(state->db); + int ret = 0; + /* we have to give them a locked record to prevent races */ + rec = db_ctdb_fetch_locked(state->db, tmp_ctx, kbuf); + if (rec && rec->value.dsize > 0) { + ret = state->fn(rec, state->private_data); + } + talloc_free(tmp_ctx); + return ret; +} + static int db_ctdb_traverse(struct db_context *db, int (*fn)(struct db_record *rec, void *private_data), @@ -296,6 +357,13 @@ static int db_ctdb_traverse(struct db_context *db, state.fn = fn; state.private_data = private_data; + if (db->persistent) { + /* for persistent databases we don't need to do a ctdb traverse, + we can do a faster local traverse */ + return tdb_traverse(ctx->wtdb->tdb, traverse_persistent_callback, &state); + } + + ctdbd_traverse(ctx->db_id, traverse_callback, &state); return 0; } @@ -322,6 +390,27 @@ static void traverse_read_callback(TDB_DATA key, TDB_DATA data, void *private_da state->fn(&rec, state->private_data); } +static int traverse_persistent_callback_read(TDB_CONTEXT *tdb, TDB_DATA kbuf, TDB_DATA dbuf, + void *private_data) +{ + struct traverse_state *state = (struct traverse_state *)private_data; + struct db_record rec; + rec.key = kbuf; + rec.value = dbuf; + rec.store = db_ctdb_store_deny; + rec.delete_rec = db_ctdb_delete_deny; + rec.private_data = state->db; + + if (rec.value.dsize <= sizeof(struct ctdb_ltdb_header)) { + /* a deleted record */ + return 0; + } + rec.value.dsize -= sizeof(struct ctdb_ltdb_header); + rec.value.dptr += sizeof(struct ctdb_ltdb_header); + + return state->fn(&rec, state->private_data); +} + static int db_ctdb_traverse_read(struct db_context *db, int (*fn)(struct db_record *rec, void *private_data), @@ -335,6 +424,12 @@ static int db_ctdb_traverse_read(struct db_context *db, state.fn = fn; state.private_data = private_data; + if (db->persistent) { + /* for persistent databases we don't need to do a ctdb traverse, + we can do a faster local traverse */ + return tdb_traverse_read(ctx->wtdb->tdb, traverse_persistent_callback_read, &state); + } + ctdbd_traverse(ctx->db_id, traverse_read_callback, &state); return 0; } @@ -346,41 +441,6 @@ static int db_ctdb_get_seqnum(struct db_context *db) return tdb_get_seqnum(ctx->wtdb->tdb); } -/* - * Get the ctdbd connection for a database. If possible, re-use the messaging - * ctdbd connection - */ -static struct ctdbd_connection *db_ctdbd_conn(struct db_ctdb_ctx *ctx) -{ - struct ctdbd_connection *result; - - result = messaging_ctdbd_connection(); - - if (result != NULL) { - - if (ctx->conn == NULL) { - /* - * Someone has initialized messaging since we - * initialized our own connection, we don't need it - * anymore. - */ - TALLOC_FREE(ctx->conn); - } - - return result; - } - - if (ctx->conn == NULL) { - NTSTATUS status; - status = ctdbd_init_connection(ctx, &ctx->conn); - if (!NT_STATUS_IS_OK(status)) { - return NULL; - } - set_my_vnn(ctdbd_vnn(ctx->conn)); - } - - return ctx->conn; -} struct db_context *db_open_ctdb(TALLOC_CTX *mem_ctx, const char *name, @@ -390,7 +450,6 @@ struct db_context *db_open_ctdb(TALLOC_CTX *mem_ctx, struct db_context *result; struct db_ctdb_ctx *db_ctdb; char *db_path; - NTSTATUS status; if (!lp_clustering()) { DEBUG(10, ("Clustering disabled -- no ctdb\n")); @@ -409,20 +468,15 @@ struct db_context *db_open_ctdb(TALLOC_CTX *mem_ctx, return NULL; } - db_ctdb->conn = NULL; - - status = ctdbd_db_attach(db_ctdbd_conn(db_ctdb), name, - &db_ctdb->db_id, tdb_flags); - - if (!NT_STATUS_IS_OK(status)) { - DEBUG(0, ("ctdbd_db_attach failed for %s: %s\n", name, - nt_errstr(status))); + if (!NT_STATUS_IS_OK(ctdbd_db_attach(messaging_ctdbd_connection(),name, &db_ctdb->db_id, tdb_flags))) { + DEBUG(0, ("ctdbd_db_attach failed for %s\n", name)); TALLOC_FREE(result); return NULL; } - db_path = ctdbd_dbpath(db_ctdbd_conn(db_ctdb), db_ctdb, - db_ctdb->db_id); + db_path = ctdbd_dbpath(messaging_ctdbd_connection(), db_ctdb, db_ctdb->db_id); + + result->persistent = ((tdb_flags & TDB_CLEAR_IF_FIRST) == 0); /* only pass through specific flags */ tdb_flags &= TDB_SEQNUM; @@ -447,16 +501,4 @@ struct db_context *db_open_ctdb(TALLOC_CTX *mem_ctx, return result; } - -#else - -struct db_context *db_open_ctdb(TALLOC_CTX *mem_ctx, - const char *name, - int hash_size, int tdb_flags, - int open_flags, mode_t mode) -{ - DEBUG(0, ("no clustering compiled in\n")); - return NULL; -} - #endif diff --git a/source3/lib/dbwrap_file.c b/source3/lib/dbwrap_file.c index 5b41f5941b..e3779de1e4 100644 --- a/source3/lib/dbwrap_file.c +++ b/source3/lib/dbwrap_file.c @@ -17,10 +17,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>. */ -/* - * Be aware that this is just sample code that has not seen too much testing - */ - #include "includes.h" struct db_file_ctx { @@ -367,6 +363,7 @@ struct db_context *db_open_file(TALLOC_CTX *mem_ctx, result->fetch_locked = db_file_fetch_locked; result->traverse = db_file_traverse; result->traverse_read = db_file_traverse; + result->persistent = ((tdb_flags & TDB_CLEAR_IF_FIRST) == 0); ctx->locked_record = NULL; if (!(ctx->dirname = talloc_strdup(ctx, name))) { diff --git a/source3/lib/dbwrap_tdb.c b/source3/lib/dbwrap_tdb.c index 710e45de6b..e87ceb428f 100644 --- a/source3/lib/dbwrap_tdb.c +++ b/source3/lib/dbwrap_tdb.c @@ -31,6 +31,11 @@ static int db_tdb_record_destr(struct db_record* data) struct db_tdb_ctx *ctx = talloc_get_type_abort(data->private_data, struct db_tdb_ctx); + /* This hex_encode() call allocates memory on data context. By way how current + __talloc_free() code works, it is OK to allocate in the destructor as + the children of data will be freed after call to the destructor and this + new 'child' will be caught and freed correctly. + */ DEBUG(10, (DEBUGLEVEL > 10 ? "Unlocking key %s\n" : "Unlocking key %.20s\n", hex_encode(data, (unsigned char *)data->key.dptr, @@ -88,8 +93,9 @@ static struct db_record *db_tdb_fetch_locked(struct db_context *db, struct tdb_fetch_locked_state state; int res; - if (DEBUGLEVEL >= 10) { - char *keystr = hex_encode(NULL, key.dptr, key.dsize); + /* Do not accidently allocate/deallocate w/o need when debug level is lower than needed */ + if(DEBUGLEVEL >= 10) { + char *keystr = hex_encode(NULL, (unsigned char*)key.dptr, key.dsize); DEBUG(10, (DEBUGLEVEL > 10 ? "Locking key %s\n" : "Locking key %.20s\n", keystr)); @@ -191,15 +197,9 @@ static NTSTATUS db_tdb_delete(struct db_record *rec) { struct db_tdb_ctx *ctx = talloc_get_type_abort(rec->private_data, struct db_tdb_ctx); - int res; - - res = tdb_delete(ctx->wtdb->tdb, rec->key); - if (res == 0) { - return NT_STATUS_OK; - } - - return map_nt_error_from_tdb(tdb_error(ctx->wtdb->tdb)); + return (tdb_delete(ctx->wtdb->tdb, rec->key) == 0) ? + NT_STATUS_OK : NT_STATUS_UNSUCCESSFUL; } struct db_tdb_traverse_ctx { @@ -318,6 +318,7 @@ struct db_context *db_open_tdb(TALLOC_CTX *mem_ctx, result->traverse = db_tdb_traverse; result->traverse_read = db_tdb_traverse_read; result->get_seqnum = db_tdb_get_seqnum; + result->persistent = ((tdb_flags & TDB_CLEAR_IF_FIRST) == 0); return result; fail: diff --git a/source3/lib/dbwrap_util.c b/source3/lib/dbwrap_util.c new file mode 100644 index 0000000000..002d572019 --- /dev/null +++ b/source3/lib/dbwrap_util.c @@ -0,0 +1,90 @@ +/* + Unix SMB/CIFS implementation. + Utility functions for the dbwrap API + Copyright (C) Volker Lendecke 2007 + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. +*/ + +#include "includes.h" + +int32_t dbwrap_fetch_int32(struct db_context *db, const char *keystr) +{ + TDB_DATA dbuf; + int32 ret; + + if (db->fetch(db, NULL, string_term_tdb_data(keystr), &dbuf) != 0) { + return -1; + } + + if ((dbuf.dptr == NULL) || (dbuf.dsize != sizeof(int32_t))) { + TALLOC_FREE(dbuf.dptr); + return -1; + } + + ret = IVAL(dbuf.dptr, 0); + TALLOC_FREE(dbuf.dptr); + return ret; +} + +int dbwrap_store_int32(struct db_context *db, const char *keystr, int32_t v) +{ + struct db_record *rec; + int32 v_store; + NTSTATUS status; + + rec = db->fetch_locked(db, NULL, string_term_tdb_data(keystr)); + if (rec == NULL) { + return -1; + } + + SIVAL(&v_store, 0, v); + + status = rec->store(rec, make_tdb_data((const uint8 *)&v_store, + sizeof(v_store)), + TDB_REPLACE); + TALLOC_FREE(rec); + return NT_STATUS_IS_OK(status) ? 0 : -1; +} + +uint32_t dbwrap_change_uint32_atomic(struct db_context *db, const char *keystr, + uint32_t *oldval, uint32_t change_val) +{ + struct db_record *rec; + uint32 val = -1; + TDB_DATA data; + + if (!(rec = db->fetch_locked(db, NULL, + string_term_tdb_data(keystr)))) { + return -1; + } + + if ((rec->value.dptr != NULL) + && (rec->value.dsize == sizeof(val))) { + val = IVAL(rec->value.dptr, 0); + } + + val += change_val; + + data.dsize = sizeof(val); + data.dptr = (uint8 *)&val; + + rec->store(rec, data, TDB_REPLACE); + + TALLOC_FREE(rec); + + return 0; +} + diff --git a/source3/lib/messages_ctdbd.c b/source3/lib/messages_ctdbd.c index 6e9b934a75..f1a02e6af9 100644 --- a/source3/lib/messages_ctdbd.c +++ b/source3/lib/messages_ctdbd.c @@ -22,6 +22,10 @@ #ifdef CLUSTER_SUPPORT #include "librpc/gen_ndr/messaging.h" +#include "ctdb.h" +#include "ctdb_private.h" +#include "ctdbd_conn.h" + struct messaging_ctdbd_context { struct ctdbd_connection *conn; diff --git a/source3/lib/util.c b/source3/lib/util.c index 0653fc9d3f..bc3eaa8d5e 100644 --- a/source3/lib/util.c +++ b/source3/lib/util.c @@ -506,6 +506,19 @@ bool file_exist(const char *fname,SMB_STRUCT_STAT *sbuf) } /******************************************************************* + Check if a unix domain socket exists - call vfs_file_exist for samba files. +********************************************************************/ + +bool socket_exist(const char *fname) +{ + SMB_STRUCT_STAT st; + if (sys_stat(fname,&st) != 0) + return(False); + + return S_ISSOCK(st.st_mode); +} + +/******************************************************************* Check a files mod time. ********************************************************************/ |